1 package com.opencsv;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 import com.opencsv.bean.util.OrderedObject;
20 import com.opencsv.exceptions.*;
21 import com.opencsv.processor.RowProcessor;
22 import com.opencsv.stream.reader.LineReader;
23 import com.opencsv.validators.LineValidatorAggregator;
24 import com.opencsv.validators.RowValidatorAggregator;
25 import org.apache.commons.lang3.ObjectUtils;
26 import org.apache.commons.lang3.StringUtils;
27
28 import java.io.*;
29 import java.nio.charset.CharacterCodingException;
30 import java.nio.charset.MalformedInputException;
31 import java.util.*;
32 import java.util.zip.ZipException;
33
34
35
36
37
38
39 public class CSVReader implements Closeable, Iterable<String[]> {
40
41 public static final boolean DEFAULT_KEEP_CR = false;
42 public static final boolean DEFAULT_VERIFY_READER = true;
43
44 static final int CONTEXT_MULTILINE_EXCEPTION_MESSAGE_SIZE = 100;
45
46
47
48
49 public static final int DEFAULT_SKIP_LINES = 0;
50
51
52
53
54
55 public static final int DEFAULT_MULTILINE_LIMIT = 0;
56
57 protected static final List<Class<? extends IOException>> PASSTHROUGH_EXCEPTIONS =
58 Collections.unmodifiableList(
59 Arrays.asList(CharacterCodingException.class, CharConversionException.class,
60 UnsupportedEncodingException.class, UTFDataFormatException.class,
61 ZipException.class, FileNotFoundException.class, MalformedInputException.class));
62
63 public static final int READ_AHEAD_LIMIT = Character.SIZE / Byte.SIZE;
64 private static final int MAX_WIDTH = 100;
65 protected ICSVParser parser;
66 protected int skipLines;
67 protected BufferedReader br;
68 protected LineReader lineReader;
69 protected boolean hasNext = true;
70 protected boolean linesSkipped;
71 protected boolean keepCR;
72 protected boolean verifyReader;
73 protected int multilineLimit = DEFAULT_MULTILINE_LIMIT;
74 protected Locale errorLocale;
75
76 protected long linesRead = 0;
77 protected long recordsRead = 0;
78 protected String[] peekedLine = null;
79 final protected Queue<OrderedObject<String>> peekedLines = new LinkedList<>();
80
81 private final LineValidatorAggregator lineValidatorAggregator;
82 private final RowValidatorAggregator rowValidatorAggregator;
83 private final RowProcessor rowProcessor;
84
85
86
87
88
89
90 public CSVReader(Reader reader) {
91 this(reader, DEFAULT_SKIP_LINES,
92 new CSVParser(ICSVParser.DEFAULT_SEPARATOR,
93 ICSVParser.DEFAULT_QUOTE_CHARACTER,
94 ICSVParser.DEFAULT_ESCAPE_CHARACTER,
95 ICSVParser.DEFAULT_STRICT_QUOTES,
96 ICSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE,
97 ICSVParser.DEFAULT_IGNORE_QUOTATIONS,
98 ICSVParser.DEFAULT_NULL_FIELD_INDICATOR,
99 Locale.getDefault()),
100 DEFAULT_KEEP_CR,
101 DEFAULT_VERIFY_READER,
102 DEFAULT_MULTILINE_LIMIT,
103 Locale.getDefault(),
104 new LineValidatorAggregator(),
105 new RowValidatorAggregator(),
106 null);
107 }
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125 CSVReader(Reader reader, int line, ICSVParser icsvParser, boolean keepCR, boolean verifyReader, int multilineLimit,
126 Locale errorLocale, LineValidatorAggregator lineValidatorAggregator, RowValidatorAggregator rowValidatorAggregator,
127 RowProcessor rowProcessor) {
128 this.br =
129 (reader instanceof BufferedReader ?
130 (BufferedReader) reader :
131 new BufferedReader(reader));
132 this.lineReader = new LineReader(br, keepCR);
133 this.skipLines = line;
134 this.parser = icsvParser;
135 this.keepCR = keepCR;
136 this.verifyReader = verifyReader;
137 this.multilineLimit = multilineLimit;
138 this.errorLocale = ObjectUtils.defaultIfNull(errorLocale, Locale.getDefault());
139 this.lineValidatorAggregator = lineValidatorAggregator;
140 this.rowValidatorAggregator = rowValidatorAggregator;
141 this.rowProcessor = rowProcessor;
142 }
143
144
145
146
147 public ICSVParser getParser() {
148 return parser;
149 }
150
151
152
153
154
155
156
157 public int getSkipLines() {
158 return skipLines;
159 }
160
161
162
163
164
165
166 public boolean keepCarriageReturns() {
167 return keepCR;
168 }
169
170
171
172
173
174
175
176
177
178
179
180
181
182 public List<String[]> readAll() throws IOException, CsvException {
183
184 List<String[]> allElements = new LinkedList<>();
185 while (hasNext) {
186 String[] nextLineAsTokens = readNext();
187 if (nextLineAsTokens != null) {
188 allElements.add(nextLineAsTokens);
189 }
190 }
191 return allElements;
192
193 }
194
195
196
197
198
199
200
201
202
203 public String[] readNext() throws IOException, CsvValidationException {
204 return flexibleRead(true, true);
205 }
206
207
208
209
210
211
212
213
214
215
216 public String[] readNextSilently() throws IOException {
217 try {
218 return flexibleRead(true, false);
219 } catch (CsvValidationException e) {
220 throw new CsvRuntimeException("A CSValidationException was thrown from the runNextSilently method which should not happen", e);
221 }
222 }
223
224
225
226
227
228
229
230 private void primeNextRecord() throws IOException {
231
232 int linesInThisRecord = 0;
233 long lastSuccessfulLineRead = linesRead+1;
234 do {
235 String nextLine = getNextLine();
236 peekedLines.add(new OrderedObject<>(lastSuccessfulLineRead, nextLine));
237 linesInThisRecord++;
238
239
240
241 if (!hasNext) {
242 if (parser.isPending()) {
243 throw new CsvMalformedLineException(String.format(
244 ResourceBundle.getBundle(ICSVParser.DEFAULT_BUNDLE_NAME, errorLocale).getString("unterminated.quote"),
245 StringUtils.abbreviate(parser.getPendingText(), MAX_WIDTH)), lastSuccessfulLineRead, parser.getPendingText());
246 }
247 return;
248 }
249
250
251
252 if (multilineLimit > 0 && linesInThisRecord > multilineLimit) {
253
254
255 long row = this.recordsRead + 1L;
256
257 String context = parser.getPendingText();
258
259
260
261 if (context.length() > CONTEXT_MULTILINE_EXCEPTION_MESSAGE_SIZE) {
262 context = context.substring(0, CONTEXT_MULTILINE_EXCEPTION_MESSAGE_SIZE);
263 }
264
265 String messageFormat = ResourceBundle.getBundle(ICSVParser.DEFAULT_BUNDLE_NAME, errorLocale).getString("multiline.limit.broken");
266 String message = String.format(errorLocale, messageFormat, multilineLimit, row, context);
267 throw new CsvMultilineLimitBrokenException(message, row, parser.getPendingText(), multilineLimit);
268 }
269
270
271 String[] r = parser.parseLineMulti(nextLine);
272 if (r.length > 0) {
273 if (peekedLine == null) {
274 peekedLine = r;
275 } else {
276 peekedLine = combineResultsFromMultipleReads(peekedLine, r);
277 }
278 }
279
280 } while (parser.isPending());
281
282
283
284
285
286
287 if (keepCR) {
288 int lastItemIndex = peekedLine.length - 1;
289 if (peekedLine[lastItemIndex] != null && peekedLine[lastItemIndex].endsWith("\r")) {
290 peekedLine[lastItemIndex] = peekedLine[lastItemIndex].substring(0, peekedLine[lastItemIndex].length() - 1);
291 }
292 }
293 }
294
295
296
297
298
299
300
301
302
303 private void validateLine(long lastSuccessfulLineRead, String nextLine) throws CsvValidationException {
304 try {
305 lineValidatorAggregator.validate(nextLine);
306 } catch (CsvValidationException cve) {
307 cve.setLineNumber(lastSuccessfulLineRead);
308 throw cve;
309 }
310 }
311
312
313
314
315
316
317
318
319 protected void validateResult(String[] result, long lineStartOfRow) throws CsvValidationException {
320 if (result != null) {
321 if (rowProcessor != null) {
322 rowProcessor.processRow(result);
323 }
324 try {
325 rowValidatorAggregator.validate(result);
326 } catch (CsvValidationException cve) {
327 cve.setLineNumber(lineStartOfRow);
328 throw cve;
329 }
330 }
331 }
332
333
334
335
336
337
338
339
340 protected String[] combineResultsFromMultipleReads(String[] buffer, String[] lastRead) {
341 String[] t = new String[buffer.length + lastRead.length];
342 System.arraycopy(buffer, 0, t, 0, buffer.length);
343 System.arraycopy(lastRead, 0, t, buffer.length, lastRead.length);
344 return t;
345 }
346
347
348
349
350
351
352
353
354 protected String getNextLine() throws IOException {
355 if (isClosed()) {
356 hasNext = false;
357 return null;
358 }
359
360 if (!this.linesSkipped) {
361 for (int i = 0; i < skipLines; i++) {
362 lineReader.readLine();
363 linesRead++;
364 }
365 this.linesSkipped = true;
366 }
367 String nextLine = lineReader.readLine();
368 if (nextLine == null) {
369 hasNext = false;
370 } else {
371 linesRead++;
372 }
373
374 return hasNext ? nextLine : null;
375 }
376
377
378
379
380
381
382 public int getMultilineLimit() {
383 return multilineLimit;
384 }
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405 protected boolean isClosed() throws IOException {
406 if (!verifyReader) {
407 return false;
408 }
409 try {
410 br.mark(READ_AHEAD_LIMIT);
411 int nextByte = br.read();
412 br.reset();
413 return nextByte == -1;
414 } catch (IOException e) {
415 if (PASSTHROUGH_EXCEPTIONS.contains(e.getClass())) {
416 throw e;
417 }
418
419 return true;
420 }
421 }
422
423
424
425
426
427
428 @Override
429 public void close() throws IOException {
430 br.close();
431 }
432
433
434
435
436
437
438 @Override
439 public Iterator<String[]> iterator() {
440 try {
441 CSVIterator it = new CSVIterator(this);
442 it.setErrorLocale(errorLocale);
443 return it;
444 } catch (IOException | CsvValidationException e) {
445 throw new RuntimeException(e);
446 }
447 }
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466 public boolean verifyReader() {
467 return this.verifyReader;
468 }
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500 public long getLinesRead() {
501 return linesRead;
502 }
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538 public long getRecordsRead() {
539 return recordsRead;
540 }
541
542
543
544
545
546
547
548
549 public void skip(int numberOfLinesToSkip) throws IOException {
550 for (int j = 0; j < numberOfLinesToSkip; j++) {
551 readNextSilently();
552 }
553 }
554
555
556
557
558
559
560
561
562 public void setErrorLocale(Locale errorLocale) {
563 this.errorLocale = ObjectUtils.defaultIfNull(errorLocale, Locale.getDefault());
564 if (parser != null) {
565 parser.setErrorLocale(this.errorLocale);
566 }
567 }
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583 public String[] peek() throws IOException {
584 String[] result = null;
585 try {
586 result = flexibleRead(false, false);
587 } catch (CsvValidationException e) {
588
589 }
590 return result;
591 }
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607 private String[] flexibleRead(boolean popLine, boolean validate) throws IOException, CsvValidationException {
608
609 if(peekedLines.isEmpty()) {
610 primeNextRecord();
611 }
612
613 if(validate) {
614 for(OrderedObject<String> orderedObject : peekedLines) {
615 validateLine(orderedObject.getOrdinal(), orderedObject.getElement());
616 }
617 validateResult(peekedLine, linesRead);
618 }
619
620 String[] result = peekedLine;
621
622 if(popLine) {
623 peekedLines.clear();
624 peekedLine = null;
625 if(result != null) {
626 recordsRead++;
627 }
628 }
629
630 return result;
631 }
632 }