1 package com.opencsv;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 import com.opencsv.enums.CSVReaderNullFieldIndicator;
20 import org.apache.commons.lang3.ArrayUtils;
21 import org.apache.commons.lang3.ObjectUtils;
22 import org.apache.commons.lang3.StringUtils;
23
24 import java.io.IOException;
25 import java.util.ArrayList;
26 import java.util.List;
27 import java.util.Locale;
28 import java.util.ResourceBundle;
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45 public class CSVParser extends AbstractCSVParser {
46
47 private static final int BEGINNING_OF_LINE = 3;
48
49
50
51 private final char escape;
52
53
54
55
56 private final String escapeAsString;
57
58
59
60
61 private final String escapeDoubleAsString;
62
63
64
65
66 private final boolean strictQuotes;
67
68
69
70 private final boolean ignoreLeadingWhiteSpace;
71
72
73
74 private final boolean ignoreQuotations;
75 private int tokensOnLastCompleteLine = -1;
76 private boolean inField = false;
77
78
79 private Locale errorLocale;
80
81
82
83
84 public CSVParser() {
85 this(DEFAULT_SEPARATOR, DEFAULT_QUOTE_CHARACTER,
86 DEFAULT_ESCAPE_CHARACTER, DEFAULT_STRICT_QUOTES,
87 DEFAULT_IGNORE_LEADING_WHITESPACE,
88 DEFAULT_IGNORE_QUOTATIONS,
89 DEFAULT_NULL_FIELD_INDICATOR, Locale.getDefault());
90 }
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107 CSVParser(char separator, char quotechar, char escape, boolean strictQuotes, boolean ignoreLeadingWhiteSpace,
108 boolean ignoreQuotations, CSVReaderNullFieldIndicator nullFieldIndicator, Locale errorLocale) {
109 super(separator, quotechar, nullFieldIndicator);
110 this.errorLocale = ObjectUtils.defaultIfNull(errorLocale, Locale.getDefault());
111 if (anyCharactersAreTheSame(separator, quotechar, escape)) {
112 throw new UnsupportedOperationException(ResourceBundle.getBundle(DEFAULT_BUNDLE_NAME, this.errorLocale).getString("special.characters.must.differ"));
113 }
114 if (separator == NULL_CHARACTER) {
115 throw new UnsupportedOperationException(ResourceBundle.getBundle(DEFAULT_BUNDLE_NAME, this.errorLocale).getString("define.separator"));
116 }
117 this.escape = escape;
118 this.escapeAsString = Character.toString(escape);
119 this.escapeDoubleAsString = escapeAsString + escapeAsString;
120 this.strictQuotes = strictQuotes;
121 this.ignoreLeadingWhiteSpace = ignoreLeadingWhiteSpace;
122 this.ignoreQuotations = ignoreQuotations;
123 }
124
125
126
127
128 public char getEscape() {
129 return escape;
130 }
131
132
133
134
135 public boolean isStrictQuotes() {
136 return strictQuotes;
137 }
138
139
140
141
142 public boolean isIgnoreLeadingWhiteSpace() {
143 return ignoreLeadingWhiteSpace;
144 }
145
146
147
148
149 public boolean isIgnoreQuotations() {
150 return ignoreQuotations;
151 }
152
153
154
155
156
157
158
159
160
161
162
163 private boolean anyCharactersAreTheSame(char separator, char quotechar, char escape) {
164 return isSameCharacter(separator, quotechar) || isSameCharacter(separator, escape) || isSameCharacter(quotechar, escape);
165 }
166
167
168
169
170
171
172
173 private boolean isSameCharacter(char c1, char c2) {
174 return c1 != NULL_CHARACTER && c1 == c2;
175 }
176
177 @Override
178 protected String convertToCsvValue(String value, boolean applyQuotestoAll) {
179 String testValue = (value == null && !nullFieldIndicator.equals(CSVReaderNullFieldIndicator.NEITHER)) ? "" : value;
180 StringBuilder builder = new StringBuilder(testValue == null ? MAX_SIZE_FOR_EMPTY_FIELD : (testValue.length() * 2));
181 boolean containsQuoteChar = StringUtils.contains(testValue, getQuotechar());
182 boolean containsEscapeChar = StringUtils.contains(testValue, getEscape());
183 boolean containsSeparatorChar = StringUtils.contains(testValue, getSeparator());
184 boolean surroundWithQuotes = applyQuotestoAll || isSurroundWithQuotes(value, containsSeparatorChar);
185
186 String convertedString = !containsQuoteChar ? testValue : quoteMatcherPattern.matcher(testValue).replaceAll(quoteDoubledAsString);
187 convertedString = !containsEscapeChar ? convertedString : convertedString.replace(escapeAsString, escapeDoubleAsString);
188
189 if (surroundWithQuotes) {
190 builder.append(getQuotechar());
191 }
192
193 builder.append(convertedString);
194
195 if (surroundWithQuotes) {
196 builder.append(getQuotechar());
197 }
198
199 return builder.toString();
200 }
201
202 @Override
203 protected String[] parseLine(String nextLine, boolean multi) throws IOException {
204
205 if (!multi && pending != null) {
206 pending = null;
207 }
208
209 if (nextLine == null) {
210 if (pending != null) {
211 String s = pending;
212 pending = null;
213 return new String[]{s};
214 }
215 return null;
216 }
217 final List<String> tokensOnThisLine = tokensOnLastCompleteLine <= 0 ? new ArrayList<>() : new ArrayList<>((tokensOnLastCompleteLine + 1) * 2);
218 final StringFragmentCopier sfc = new StringFragmentCopier(nextLine);
219 boolean inQuotes = false;
220 boolean fromQuotedField = false;
221 if (pending != null) {
222 sfc.append(pending);
223 pending = null;
224 inQuotes = !this.ignoreQuotations;
225 }
226
227 while (!sfc.isEmptyInput()) {
228 final char c = sfc.takeInput();
229 if (c == this.escape) {
230 if (!strictQuotes) {
231 inField = true;
232 }
233 handleEscapeCharacter(nextLine, sfc, inQuotes);
234 } else if (c == quotechar) {
235 if (isNextCharacterEscapedQuote(nextLine, inQuotes(inQuotes), sfc.i - 1)) {
236 sfc.takeInput();
237 sfc.appendPrev();
238 } else {
239
240 inQuotes = !inQuotes;
241 if (sfc.isEmptyOutput()) {
242 fromQuotedField = true;
243 }
244
245
246 handleQuoteCharButNotStrictQuotes(nextLine, sfc);
247 }
248 inField = !inField;
249 } else if (c == separator && !(inQuotes && !ignoreQuotations)) {
250 tokensOnThisLine.add(convertEmptyToNullIfNeeded(sfc.takeOutput(), fromQuotedField));
251 fromQuotedField = false;
252 inField = false;
253 } else {
254 if (!strictQuotes || (inQuotes && !ignoreQuotations)) {
255 sfc.appendPrev();
256 inField = true;
257 fromQuotedField = true;
258 }
259 }
260
261 }
262
263 line_done: {
264 if (inQuotes && !ignoreQuotations) {
265 if (multi) {
266
267 sfc.append('\n');
268 pending = sfc.peekOutput();
269 break line_done;
270 } else {
271 throw new IOException(String.format(
272 ResourceBundle.getBundle(DEFAULT_BUNDLE_NAME, errorLocale).getString("unterminated.quote"),
273 sfc.peekOutput()));
274 }
275 } else {
276 inField = false;
277 }
278
279 tokensOnThisLine.add(convertEmptyToNullIfNeeded(sfc.takeOutput(), fromQuotedField));
280 }
281
282 tokensOnLastCompleteLine = tokensOnThisLine.size();
283 return tokensOnThisLine.toArray(ArrayUtils.EMPTY_STRING_ARRAY);
284
285 }
286
287 private void handleQuoteCharButNotStrictQuotes(String nextLine, StringFragmentCopier sfc) {
288 if (!strictQuotes) {
289 final int i = sfc.i;
290 if (i > BEGINNING_OF_LINE
291 && nextLine.charAt(i - 2) != this.separator
292 && nextLine.length() > (i) &&
293 nextLine.charAt(i) != this.separator
294 ) {
295
296 if (ignoreLeadingWhiteSpace && !sfc.isEmptyOutput() && StringUtils.isWhitespace(sfc.peekOutput())) {
297 sfc.clearOutput();
298 } else {
299 sfc.appendPrev();
300 }
301 }
302 }
303 }
304
305 private void handleEscapeCharacter(String nextLine, StringFragmentCopier sfc, boolean inQuotes) {
306 if (isNextCharacterEscapable(nextLine, inQuotes(inQuotes), sfc.i - 1)) {
307 sfc.takeInput();
308 sfc.appendPrev();
309 }
310 }
311
312 private String convertEmptyToNullIfNeeded(String s, boolean fromQuotedField) {
313 if (s.isEmpty() && shouldConvertEmptyToNull(fromQuotedField)) {
314 return null;
315 }
316 return s;
317 }
318
319 private boolean shouldConvertEmptyToNull(boolean fromQuotedField) {
320 switch (nullFieldIndicator) {
321 case BOTH:
322 return true;
323 case EMPTY_SEPARATORS:
324 return !fromQuotedField;
325 case EMPTY_QUOTES:
326 return fromQuotedField;
327 default:
328 return false;
329 }
330 }
331
332
333
334
335
336
337
338 private boolean inQuotes(boolean inQuotes) {
339 return (inQuotes && !ignoreQuotations) || inField;
340 }
341
342
343
344
345
346
347
348
349
350
351
352 private boolean isNextCharacterEscapedQuote(String nextLine, boolean inQuotes, int i) {
353 return inQuotes
354 && nextLine.length() > (i + 1)
355 && isCharacterQuoteCharacter(nextLine.charAt(i + 1));
356 }
357
358
359
360
361
362
363
364 private boolean isCharacterQuoteCharacter(char c) {
365 return c == quotechar;
366 }
367
368
369
370
371
372
373
374 private boolean isCharacterEscapeCharacter(char c) {
375 return c == escape;
376 }
377
378
379
380
381
382
383
384 private boolean isCharacterSeparator(char c) {
385 return c == separator;
386 }
387
388
389
390
391
392
393
394
395
396 private boolean isCharacterEscapable(char c) {
397 return isCharacterQuoteCharacter(c) || isCharacterEscapeCharacter(c) || isCharacterSeparator(c);
398 }
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416 protected boolean isNextCharacterEscapable(String nextLine, boolean inQuotes, int i) {
417 return inQuotes
418 && nextLine.length() > (i + 1)
419 && isCharacterEscapable(nextLine.charAt(i + 1));
420 }
421
422 @Override
423 public void setErrorLocale(Locale errorLocale) {
424 this.errorLocale = ObjectUtils.defaultIfNull(errorLocale, Locale.getDefault());
425 }
426
427
428
429
430
431 private static class StringFragmentCopier {
432 private final String input;
433
434 private int i = 0;
435
436
437
438 private StringBuilder sb;
439
440
441 private int pendingSubstrFrom = 0;
442 private int pendingSubstrTo = 0;
443
444 StringFragmentCopier(String input) {
445 this.input = input;
446 }
447
448 public boolean isEmptyInput() {
449 return i >= input.length();
450 }
451
452 public char takeInput() {
453 return input.charAt(i++);
454 }
455
456 private StringBuilder materializeBuilder() {
457 if (sb == null) {
458 sb = new StringBuilder(input.length() + READ_BUFFER_SIZE);
459 }
460
461 if (pendingSubstrFrom < pendingSubstrTo) {
462 sb.append(input, pendingSubstrFrom, pendingSubstrTo);
463 pendingSubstrFrom = pendingSubstrTo = i;
464 }
465
466 return sb;
467 }
468
469 public void append(String pending) {
470 materializeBuilder().append(pending);
471 }
472
473 public void append(char pending) {
474 materializeBuilder().append(pending);
475 }
476
477 public void appendPrev() {
478 if (pendingSubstrTo == pendingSubstrFrom) {
479 pendingSubstrFrom = i - 1;
480 pendingSubstrTo = i;
481 } else if (pendingSubstrTo == i - 1) {
482 pendingSubstrTo++;
483 } else {
484 materializeBuilder().append(input.charAt(i - 1));
485 }
486 }
487
488 public boolean isEmptyOutput() {
489 return pendingSubstrFrom >= pendingSubstrTo && (sb == null || sb.length() == 0);
490 }
491
492 public void clearOutput() {
493 if (sb != null) {
494 sb.setLength(0);
495 }
496
497 pendingSubstrFrom = pendingSubstrTo = i;
498 }
499
500 public String peekOutput() {
501 if (sb == null || sb.length() == 0) {
502 return input.substring(pendingSubstrFrom, pendingSubstrTo);
503 } else {
504 return materializeBuilder().toString();
505 }
506 }
507
508 public String takeOutput() {
509 final String result = peekOutput();
510 clearOutput();
511 return result;
512 }
513 }
514 }