Skip to main content

polars_utils/
levenshtein.rs

1// Implementation from: https://github.com/wooorm/levenshtein-rs/blob/9c4730b1973d4e61e187f8fe6d5f299ad5c991fc/src/lib.rs
2// (MIT licensed. Copyright (c) 2016 Titus Wormer) <tituswormer@gmail.com>
3pub fn levenshtein(a: &str, b: &str) -> usize {
4    let mut result = 0;
5
6    if a == b {
7        return result;
8    }
9
10    let length_a = a.chars().count();
11    let length_b = b.chars().count();
12
13    if length_a == 0 {
14        return length_b;
15    }
16
17    if length_b == 0 {
18        return length_a;
19    }
20
21    let mut cache: Vec<usize> = (1..).take(length_a).collect();
22
23    for (index_b, code_b) in b.chars().enumerate() {
24        result = index_b;
25        let mut distance_a = index_b;
26
27        for (index_a, code_a) in a.chars().enumerate() {
28            let distance_b = if code_a == code_b {
29                distance_a
30            } else {
31                distance_a + 1
32            };
33
34            distance_a = cache[index_a];
35
36            result = if distance_a > result {
37                if distance_b > result {
38                    result + 1
39                } else {
40                    distance_b
41                }
42            } else if distance_b > distance_a {
43                distance_a + 1
44            } else {
45                distance_b
46            };
47
48            cache[index_a] = result;
49        }
50    }
51
52    result
53}
54
55/// Return the closest candidate to `name` whose edit distance is within
56/// `max(1, name.len() / 3)`, or `None` if no candidate is close enough.
57pub fn did_you_mean<'a>(name: &str, candidates: impl Iterator<Item = &'a str>) -> Option<&'a str> {
58    let threshold = (name.len() / 3).max(1);
59    candidates
60        .filter_map(|c| {
61            let d = levenshtein(name, c);
62            if d <= threshold { Some((c, d)) } else { None }
63        })
64        .min_by_key(|&(_, d)| d)
65        .map(|(c, _)| c)
66}