1 package com.opencsv;
2
3 import com.opencsv.enums.CSVReaderNullFieldIndicator;
4 import org.apache.commons.lang3.StringUtils;
5
6 import java.io.IOException;
7 import java.util.regex.Pattern;
8 import java.util.stream.Collectors;
9 import java.util.stream.Stream;
10
11 /**
12 * An abstract class representing the basic functionality required for parsing CSV files.
13 * Implements the ICSVParser interface and provides common shared behavior for various CSV
14 * parsing implementations.
15 */
16 public abstract class AbstractCSVParser implements ICSVParser {
17 /**
18 * This is the character that the CSVParser will treat as the separator.
19 */
20 protected final char separator;
21 /**
22 * This is the separator in String form to reduce the number of calls to toString.
23 */
24 private final String separatorAsString;
25 /**
26 * This is the character that the CSVParser will treat as the quotation character.
27 */
28 protected final char quotechar;
29 /**
30 * This is the quotechar in String form to reduce the number of calls to toString.
31 */
32 private final String quotecharAsString;
33
34
35 /**
36 * Returns the doubled quote character as a string representation.
37 *
38 * @return The string representation of the doubled quote character.
39 */
40 protected final String getQuoteDoubledAsString() {
41 return quoteDoubledAsString;
42 }
43
44 /**
45 * This is quotecharAsString+quotecharAsString - used in replaceAll to reduce the number of strings being created.
46 */
47 private final String quoteDoubledAsString;
48
49 /**
50 * pattern created to match quotechars - optimizaion of the String.replaceAll.
51 */
52 private final Pattern quoteMatcherPattern;
53
54
55 /**
56 * Determines the handling of null fields.
57 *
58 * @see CSVReaderNullFieldIndicator
59 */
60 protected final CSVReaderNullFieldIndicator nullFieldIndicator;
61
62 /**
63 * Value to be appended to string to process.
64 */
65 protected String pending;
66
67 /**
68 * Common constructor.
69 *
70 * @param separator The delimiter to use for separating entries
71 * @param quotechar The character to use for quoted elements
72 * @param nullFieldIndicator Indicate what should be considered null
73 */
74 public AbstractCSVParser(char separator, char quotechar, CSVReaderNullFieldIndicator nullFieldIndicator) {
75 this.separator = separator;
76 this.separatorAsString = Character.toString(separator);
77
78 this.quotechar = quotechar;
79 this.quotecharAsString = Character.toString(quotechar);
80 this.quoteDoubledAsString = this.quotecharAsString + this.quotecharAsString;
81 this.quoteMatcherPattern = Pattern.compile(quotecharAsString);
82
83 this.nullFieldIndicator = nullFieldIndicator;
84 }
85
86
87 /**
88 * Retrieves the compiled {@link Pattern} used for matching quoted elements in a CSV input.
89 *
90 * @return The {@link Pattern} used for identifying quoted elements.
91 */
92 protected final Pattern getQuoteMatcherPattern() {
93 return quoteMatcherPattern;
94 }
95
96 @Override
97 public char getSeparator() {
98 return separator;
99 }
100
101 /**
102 * @return String version of separator to reduce number of calls to toString.
103 */
104 public String getSeparatorAsString() {
105 return separatorAsString;
106 }
107
108 @Override
109 public char getQuotechar() {
110 return quotechar;
111 }
112
113 /**
114 * @return String version of quotechar to reduce the number of calls to toString.
115 */
116 public String getQuotecharAsString() {
117 return quotecharAsString;
118 }
119
120 @Override
121 public boolean isPending() {
122 return pending != null;
123 }
124
125
126 @Override
127 public String[] parseLineMulti(String nextLine) throws IOException {
128 return parseLine(nextLine, true);
129 }
130
131 @Override
132 public String[] parseLine(String nextLine) throws IOException {
133 return parseLine(nextLine, false);
134 }
135
136 @Override
137 public String parseToLine(String[] values, boolean applyQuotesToAll) {
138 return Stream.of(values)
139 .map(v -> convertToCsvValue(v, applyQuotesToAll))
140 .collect(Collectors.joining(getSeparatorAsString()));
141 }
142
143 @Override
144 public void parseToLine(String[] values, boolean applyQuotesToAll, Appendable appendable) throws IOException {
145 boolean first = true;
146 for (String value : values) {
147 if (!first) {
148 appendable.append(getSeparator());
149 } else {
150 first = false;
151 }
152 convertToCsvValue(value, applyQuotesToAll, appendable);
153 }
154 }
155
156 /**
157 * Used when reverse parsing an array of strings to a single string. Handles the application of quotes around
158 * the string and handling any quotes within the string.
159 *
160 * @param value String to be converted
161 * @param applyQuotestoAll All values should be surrounded with quotes
162 * @return String that will go into the CSV string
163 */
164 protected abstract String convertToCsvValue(String value, boolean applyQuotestoAll);
165
166 /**
167 * Used when reverse parsing an array of strings to a single string. Handles the application of quotes around
168 * the string and handling any quotes within the string.
169 * <p>
170 * NOTE: as of 5.7.2 most objects will be inheriting a solution that calls the existing convertToCsvValue and thus
171 * will not receive much benefit.
172 *
173 * @param value String to be converted
174 * @param applyQuotesToAll All values should be surrounded with quotes
175 * @param appendable Appendable object that the converted values are added to.
176 */
177 protected void convertToCsvValue(String value, boolean applyQuotesToAll, Appendable appendable) throws IOException {
178 appendable.append(convertToCsvValue(value, applyQuotesToAll));
179 }
180
181 /**
182 * Used by reverse parsing to determine if a value should be surrounded by quote characters.
183 *
184 * @param value String to be tested
185 * @param forceSurround If the value is not {@code null} it will be surrounded with quotes
186 * @return True if the string should be surrounded with quotes, false otherwise
187 */
188 protected boolean isSurroundWithQuotes(String value, boolean forceSurround) {
189 if (value == null) {
190 return nullFieldIndicator.equals(CSVReaderNullFieldIndicator.EMPTY_QUOTES);
191 } else if (value.isEmpty() && nullFieldIndicator.equals(CSVReaderNullFieldIndicator.EMPTY_SEPARATORS)) {
192 return true;
193 }
194
195 return forceSurround || value.contains(getSeparatorAsString()) || value.contains(NEWLINE);
196 }
197
198 /**
199 * Parses an incoming {@link java.lang.String} and returns an array of elements.
200 *
201 * @param nextLine The string to parse
202 * @param multi Whether it takes multiple lines to form a single record
203 * @return The list of elements, or {@code null} if {@code nextLine} is {@code null}
204 * @throws IOException If bad things happen during the read
205 */
206 protected abstract String[] parseLine(String nextLine, boolean multi) throws IOException;
207
208 @Override
209 public CSVReaderNullFieldIndicator nullFieldIndicator() {
210 return nullFieldIndicator;
211 }
212
213 @Override
214 public String getPendingText() {
215 return StringUtils.defaultString(pending);
216 }
217 }