FuzzyMappingStrategy.java
package com.opencsv.bean;
import com.opencsv.CSVReader;
import com.opencsv.exceptions.CsvRequiredFieldEmptyException;
import org.apache.commons.collections4.ListValuedMap;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.similarity.LevenshteinDistance;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.LinkedList;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* A mapping strategy that tries to make the best match between header names
* and non-annotated member variables.
*
* @param <T> The type of bean being processed
* @author Andrew Rucker Jones
* @since 5.0
*/
public class FuzzyMappingStrategy<T> extends HeaderColumnNameMappingStrategy<T> {
/**
* Nullary constructor. Considered stable.
* @see FuzzyMappingStrategyBuilder
*/
public FuzzyMappingStrategy() {
}
/**
* Constructor to allow setting options for header name mapping.
* Not considered stable. As new options are introduced for the mapping
* strategy, they will be introduced here. You are encouraged to use
* {@link FuzzyMappingStrategyBuilder}.
*
* @param forceCorrectRecordLength If set, every record will be shortened
* or lengthened to match the number of
* headers
* @see FuzzyMappingStrategyBuilder
*/
public FuzzyMappingStrategy(boolean forceCorrectRecordLength) {
super(forceCorrectRecordLength);
}
/**
* This implementation intentionally does nothing in order to allow fuzzy
* matching in case there are no annotations at all in the class in
* question.
*/
@Override
protected void loadUnadornedFieldMap(ListValuedMap<Class<?>, Field> fields) {}
@Override
public void captureHeader(CSVReader reader) throws IOException, CsvRequiredFieldEmptyException {
super.captureHeader(reader);
// Find all headers not mapped
final Set<String> unusedHeaders = Stream.of(headerIndex.getHeaderIndex())
.filter(Objects::nonNull)
.filter(k -> fieldMap.get(k.toUpperCase()) == null)
.collect(Collectors.toSet());
// Find all non-annotated fields
final ListValuedMap<Class<?>, Field> unusedFields = partitionFields().get(Boolean.FALSE);
// Calculate distances and sort
LevenshteinDistance levenshtein = LevenshteinDistance.getDefaultInstance();
List<FuzzyComparison> comparisons = new LinkedList<>();
unusedHeaders.forEach(h -> {
unusedFields.entries().forEach(f -> {
comparisons.add(new FuzzyComparison(
levenshtein.apply(h.toUpperCase(), f.getValue().getName().toUpperCase()),
h, f.getKey(), f.getValue()));
});
});
comparisons.sort(null);
// Use the best matches
while (!comparisons.isEmpty()) {
FuzzyComparison fc = comparisons.get(0);
// Add the mapping
CsvConverter converter = determineConverter(
fc.field, fc.field.getType(), null, null, null);
fieldMap.put(fc.header.toUpperCase(), new BeanFieldSingleValue<>(
fc.type, fc.field, false, errorLocale, converter, null,
null));
// Remove any other comparisons for the header or field
comparisons.removeIf(e ->
StringUtils.equals(e.header, fc.header)
|| Objects.equals(e.field, fc.field));
}
}
/**
* This is a simple class for grouping header name, member variable name,
* and the result of fuzzy matching in one sortable place.
*/
private static class FuzzyComparison implements Comparable<FuzzyComparison> {
final Integer distance;
final String header;
final Class<?> type;
final Field field;
FuzzyComparison(Integer distance, String header, Class<?> type, Field field) {
this.distance = distance;
this.header = header;
this.type = type;
this.field = field;
}
@Override
public int compareTo(FuzzyComparison o) {
return Integer.compare(distance, o.distance);
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof FuzzyComparison)) {
return false;
}
FuzzyComparison that = (FuzzyComparison) o;
return Objects.equals(distance, that.distance);
}
@Override
public int hashCode() {
return Objects.hash(distance);
}
}
}