View Javadoc
1   package com.opencsv.bean;
2   
3   import com.opencsv.CSVReader;
4   import com.opencsv.exceptions.CsvRequiredFieldEmptyException;
5   import org.apache.commons.collections4.ListValuedMap;
6   import org.apache.commons.lang3.StringUtils;
7   import org.apache.commons.text.similarity.LevenshteinDistance;
8   
9   import java.io.IOException;
10  import java.lang.reflect.Field;
11  import java.util.LinkedList;
12  import java.util.List;
13  import java.util.Objects;
14  import java.util.Set;
15  import java.util.stream.Collectors;
16  import java.util.stream.Stream;
17  
18  /**
19   * A mapping strategy that tries to make the best match between header names
20   * and non-annotated member variables.
21   *
22   * @param <T> The type of bean being processed
23   * @author Andrew Rucker Jones
24   * @since 5.0
25   */
26  public class FuzzyMappingStrategy<T> extends HeaderColumnNameMappingStrategy<T> {
27  
28      /**
29       * Nullary constructor. Considered stable.
30       * @see FuzzyMappingStrategyBuilder
31       */
32      public FuzzyMappingStrategy() {
33      }
34  
35      /**
36       * Constructor to allow setting options for header name mapping.
37       * Not considered stable. As new options are introduced for the mapping
38       * strategy, they will be introduced here. You are encouraged to use
39       * {@link FuzzyMappingStrategyBuilder}.
40       *
41       * @param forceCorrectRecordLength If set, every record will be shortened
42       *                                 or lengthened to match the number of
43       *                                 headers
44       * @see FuzzyMappingStrategyBuilder
45       */
46      public FuzzyMappingStrategy(boolean forceCorrectRecordLength) {
47          super(forceCorrectRecordLength);
48      }
49  
50      /**
51       * This implementation intentionally does nothing in order to allow fuzzy
52       * matching in case there are no annotations at all in the class in
53       * question.
54       */
55      @Override
56      protected void loadUnadornedFieldMap(ListValuedMap<Class<?>, Field> fields) {}
57  
58      @Override
59      public void captureHeader(CSVReader reader) throws IOException, CsvRequiredFieldEmptyException {
60          super.captureHeader(reader);
61  
62          // Find all headers not mapped
63          final Set<String> unusedHeaders = Stream.of(headerIndex.getHeaderIndex())
64                  .filter(Objects::nonNull)
65                  .filter(k -> fieldMap.get(k.toUpperCase()) == null)
66                  .collect(Collectors.toSet());
67  
68          // Find all non-annotated fields
69          final ListValuedMap<Class<?>, Field> unusedFields = partitionFields().get(Boolean.FALSE);
70  
71          // Calculate distances and sort
72          LevenshteinDistance levenshtein = LevenshteinDistance.getDefaultInstance();
73          List<FuzzyComparison> comparisons = new LinkedList<>();
74          unusedHeaders.forEach(h -> {
75              unusedFields.entries().forEach(f -> {
76                  comparisons.add(new FuzzyComparison(
77                          levenshtein.apply(h.toUpperCase(), f.getValue().getName().toUpperCase()),
78                          h, f.getKey(), f.getValue()));
79              });
80          });
81          comparisons.sort(null);
82  
83          // Use the best matches
84          while (!comparisons.isEmpty()) {
85              FuzzyComparison fc = comparisons.get(0);
86  
87              // Add the mapping
88              CsvConverter converter = determineConverter(
89                      fc.field, fc.field.getType(), null, null, null);
90              fieldMap.put(fc.header.toUpperCase(), new BeanFieldSingleValue<>(
91                      fc.type, fc.field, false, errorLocale, converter, null,
92                      null));
93  
94              // Remove any other comparisons for the header or field
95              comparisons.removeIf(e ->
96                      StringUtils.equals(e.header, fc.header)
97                              || Objects.equals(e.field, fc.field));
98          }
99      }
100 
101     /**
102      * This is a simple class for grouping header name, member variable name,
103      * and the result of fuzzy matching in one sortable place.
104      */
105     private static class FuzzyComparison implements Comparable<FuzzyComparison> {
106 
107         final Integer distance;
108         final String header;
109         final Class<?> type;
110         final Field field;
111 
112         FuzzyComparison(Integer distance, String header, Class<?> type, Field field) {
113             this.distance = distance;
114             this.header = header;
115             this.type = type;
116             this.field = field;
117         }
118 
119         @Override
120         public int compareTo(FuzzyComparison o) {
121             return Integer.compare(distance, o.distance);
122         }
123 
124         @Override
125         public boolean equals(Object o) {
126             if (this == o) {
127                 return true;
128             }
129             if (!(o instanceof FuzzyComparison)) {
130                 return false;
131             }
132             FuzzyComparison that = (FuzzyComparison) o;
133             return Objects.equals(distance, that.distance);
134         }
135 
136         @Override
137         public int hashCode() {
138             return Objects.hash(distance);
139         }
140     }
141 }