RFC4180Parser.java
package com.opencsv;
import com.opencsv.enums.CSVReaderNullFieldIndicator;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
/**
* This Parser is meant to parse according to the RFC4180 specification.
* <p>Since it shares the same interface with the CSVParser there are methods here that will do nothing.
* For example the RFC4180 specification does not have an concept of an escape character so the getEscape method
* will return char 0. The methods that are not supported are noted in the Javadocs.</p>
* <p>Another departure from the CSVParser is that there is only two constructors and only one is available publicly.
* The intent is that if you want to create anything other than a default RFC4180Parser you should use the
* CSVParserBuilder. This way the code will not become cluttered with constructors as the CSVParser did.</p>
* <p>You can view the RFC4180 specification at <a href="https://tools.ietf.org/html/rfc4180">the Internet Engineering
* Task Force (IETF) website</a>.</p>
* <p>Examples:</p>
* {@code
* ICSVParser parser = new RFC4180Parser();
* }
* <p>or</p>
* {@code
* CSVParserBuilder builder = new CSVParserBuilder()
* ICSVParser parser = builder.withParserType(ParserType.RFC4180Parser).build()
* }
*
* @author Scott Conway
* @since 3.9
*/
public class RFC4180Parser extends AbstractCSVParser {
/**
* Default constructor for the RFC4180Parser. Uses values from the ICSVParser.
*/
public RFC4180Parser() {
this(ICSVParser.DEFAULT_QUOTE_CHARACTER, ICSVParser.DEFAULT_SEPARATOR, CSVReaderNullFieldIndicator.NEITHER);
}
/**
* Constructor used by the CSVParserBuilder.
*
* @param separator The delimiter to use for separating entries
* @param quoteChar The character to use for quoted elements
* @param nullFieldIndicator Indicate what should be considered null
*/
RFC4180Parser(char quoteChar, char separator, CSVReaderNullFieldIndicator nullFieldIndicator) {
super(separator, quoteChar, nullFieldIndicator);
}
@Override
protected String convertToCsvValue(String value, boolean applyQuotesToAll) {
String testValue = (value == null && !nullFieldIndicator.equals(CSVReaderNullFieldIndicator.NEITHER)) ? "" : value;
StringBuilder builder = new StringBuilder(testValue == null ? MAX_SIZE_FOR_EMPTY_FIELD : (testValue.length() * 2));
boolean containsQuoteChar = testValue != null && testValue.contains(getQuotecharAsString());
boolean surroundWithQuotes = applyQuotesToAll || isSurroundWithQuotes(value, containsQuoteChar);
String convertedString = !containsQuoteChar ? testValue : quoteMatcherPattern.matcher(testValue).replaceAll(quoteDoubledAsString);
if (surroundWithQuotes) {
builder.append(getQuotechar());
}
builder.append(convertedString);
if (surroundWithQuotes) {
builder.append(getQuotechar());
}
return builder.toString();
}
/**
* Parses an incoming String and returns an array of elements.
*
* @param nextLine The string to parse
* @param multi Does it take multiple lines to form a single record?
* @return The list of elements, or null if nextLine is null
*/
protected String[] parseLine(String nextLine, boolean multi) {
String[] elements;
if (!multi && pending != null) {
pending = null;
}
if (nextLine == null) {
if (pending != null) {
String s = pending;
pending = null;
return new String[]{s};
}
return null;
}
String lineToProcess = multi && pending != null ? pending + nextLine : nextLine;
pending = null;
if (!StringUtils.contains(lineToProcess, quotechar)) {
elements = handleEmptySeparators(tokenizeStringIntoArray(lineToProcess));
} else {
elements = handleEmptySeparators(splitWhileNotInQuotes(lineToProcess, multi));
for (int i = 0; i < elements.length; i++) {
if (StringUtils.contains(elements[i], quotechar)) {
elements[i] = handleQuotes(elements[i]);
}
}
}
return elements;
}
private String[] tokenizeStringIntoArray(String nextLine) {
return nextLine.split(separatorAsString, -1);
}
private String[] handleEmptySeparators(String[] strings) {
if (nullFieldIndicator == CSVReaderNullFieldIndicator.EMPTY_SEPARATORS || nullFieldIndicator == CSVReaderNullFieldIndicator.BOTH) {
for (int i = 0; i < strings.length; i++) {
if (strings[i].isEmpty()) {
strings[i] = null;
}
}
}
return strings;
}
private String[] splitWhileNotInQuotes(String nextLine, boolean multi) {
int currentPosition = 0;
List<String> elements = new ArrayList<>();
int nextSeparator;
int nextQuote;
while (currentPosition < nextLine.length()) {
nextSeparator = nextLine.indexOf(separator, currentPosition);
nextQuote = nextLine.indexOf(quotechar, currentPosition);
if (nextSeparator == -1) {
elements.add(nextLine.substring(currentPosition));
currentPosition = nextLine.length();
} else if (nextQuote == -1 || nextQuote > nextSeparator || nextQuote != currentPosition) {
elements.add(nextLine.substring(currentPosition, nextSeparator));
currentPosition = nextSeparator + 1;
} else {
int fieldEnd = findEndOfFieldFromPosition(nextLine, currentPosition);
elements.add(fieldEnd >= nextLine.length() ? nextLine.substring(currentPosition) : nextLine.substring(currentPosition, fieldEnd));
currentPosition = fieldEnd + 1;
}
}
if (multi && lastElementStartedWithQuoteButDidNotEndInOne(elements)) {
pending = elements.get(elements.size() - 1) + NEWLINE;
elements.remove(elements.size() - 1);
} else if (nextLine.lastIndexOf(separator) == nextLine.length() - 1) {
elements.add("");
}
return elements.toArray(ArrayUtils.EMPTY_STRING_ARRAY);
}
private boolean lastElementStartedWithQuoteButDidNotEndInOne(List<String> elements) {
String lastElement = elements.get(elements.size() - 1);
return startsButDoesNotEndWithQuote(lastElement) || hasOnlyOneQuote(lastElement) || hasOddQuotes(lastElement);
}
private boolean hasOddQuotes(String lastElement) {
return StringUtils.countMatches(lastElement, quotechar) % 2 != 0;
}
private boolean hasOnlyOneQuote(String lastElement) {
return StringUtils.countMatches(lastElement, quotechar) == 1;
}
private boolean startsButDoesNotEndWithQuote(String lastElement) {
return lastElement.startsWith(getQuotecharAsString()) && !lastElement.endsWith(getQuotecharAsString());
}
private int findEndOfFieldFromPosition(String nextLine, int currentPosition) {
int nextQuote = nextLine.indexOf(quotechar, currentPosition + 1);
boolean inQuote = false;
while (haveNotFoundLastQuote(nextLine, nextQuote)) {
if (!inQuote && nextLine.charAt(nextQuote + 1) == separator) {
return nextQuote + 1;
}
do {
nextQuote = nextLine.indexOf(quotechar, nextQuote + 1);
inQuote = !inQuote;
} while (haveNotFoundLastQuote(nextLine, nextQuote) && nextLine.charAt(nextQuote + 1) == quotechar);
}
return nextLine.length();
}
private boolean haveNotFoundLastQuote(String nextLine, int nextQuote) {
return nextQuote != -1 && nextQuote < nextLine.length() - 1;
}
private String handleQuotes(String element) {
String ret = element;
if (!hasOnlyOneQuote(ret) && ret.startsWith(getQuotecharAsString())) {
ret = StringUtils.removeStart(ret, getQuotecharAsString());
ret = StringUtils.removeEnd(ret, getQuotecharAsString());
}
ret = StringUtils.replace(ret, getQuotecharAsString() + getQuotecharAsString(), getQuotecharAsString());
if (ret.isEmpty() && (nullFieldIndicator == CSVReaderNullFieldIndicator.BOTH || nullFieldIndicator == CSVReaderNullFieldIndicator.EMPTY_QUOTES)) {
ret = null;
}
return ret;
}
@Override
public void setErrorLocale(Locale errorLocale) {
// Curiously enough, this implementation never throws exceptions and so
// has no need of translations.
}
}