Rollup merge of #67849 - cjkenn:check-sorted-words, r=estebank

Add a check for swapped words when we can't find an identifier

Fixes #66968

Couple things here:
1. The matches take the precedence of case insensitive match, then levenshtein match, then swapped words match. Doing this allows us to not even check for swapped words unless the other checks return `None`.
2. I've assumed that the swapped words check is not held to the limits of the max levenshtein distance threshold (ie. we want to try and find a match even if the levenshtein distance is very high). This means that we cannot perform this check in the `fold` that occurs after the `filter_map` call, because the candidate will be filtered out. So, I've split this into two separate `fold` calls, and had to collect the original iterator into a vec so it can be copied (I don't think we want to change the function signature to take a vec or require the `Copy` trait). An alternative implemenation may be to remove the `filter_map`, `fold` over the entire iterator, and do a check against `max_dist` inside the relevant cases there.

r? @estebank
This commit is contained in:
Yuki Okushi 2020-01-09 00:29:12 +09:00 committed by GitHub
commit 429a7e7522
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 47 additions and 5 deletions

View file

@ -54,14 +54,16 @@ where
T: Iterator<Item = &'a Symbol>,
{
let max_dist = dist.map_or_else(|| cmp::max(lookup.len(), 3) / 3, |d| d);
let name_vec: Vec<&Symbol> = iter_names.collect();
let (case_insensitive_match, levenstein_match) = iter_names
let (case_insensitive_match, levenshtein_match) = name_vec
.iter()
.filter_map(|&name| {
let dist = lev_distance(lookup, &name.as_str());
if dist <= max_dist { Some((name, dist)) } else { None }
})
// Here we are collecting the next structure:
// (case_insensitive_match, (levenstein_match, levenstein_distance))
// (case_insensitive_match, (levenshtein_match, levenshtein_distance))
.fold((None, None), |result, (candidate, dist)| {
(
if candidate.as_str().to_uppercase() == lookup.to_uppercase() {
@ -75,10 +77,31 @@ where
},
)
});
// Priority of matches:
// 1. Exact case insensitive match
// 2. Levenshtein distance match
// 3. Sorted word match
if let Some(candidate) = case_insensitive_match {
Some(candidate) // exact case insensitive match has a higher priority
Some(*candidate)
} else if levenshtein_match.is_some() {
levenshtein_match.map(|(candidate, _)| *candidate)
} else {
levenstein_match.map(|(candidate, _)| candidate)
find_match_by_sorted_words(name_vec, lookup)
}
}
fn find_match_by_sorted_words<'a>(iter_names: Vec<&'a Symbol>, lookup: &str) -> Option<Symbol> {
iter_names.iter().fold(None, |result, candidate| {
if sort_by_words(&candidate.as_str()) == sort_by_words(lookup) {
Some(**candidate)
} else {
result
}
})
}
fn sort_by_words(name: &str) -> String {
let mut split_words: Vec<&str> = name.split('_').collect();
split_words.sort();
split_words.join("_")
}

View file

@ -46,5 +46,11 @@ fn test_find_best_match_for_name() {
find_best_match_for_name(input.iter(), "aaaa", Some(4)),
Some(Symbol::intern("AAAA"))
);
let input = vec![Symbol::intern("a_longer_variable_name")];
assert_eq!(
find_best_match_for_name(input.iter(), "a_variable_longer_name", None),
Some(Symbol::intern("a_longer_variable_name"))
);
})
}

View file

@ -0,0 +1,4 @@
fn main() {
let a_longer_variable_name = 1;
println!("{}", a_variable_longer_name); //~ ERROR E0425
}

View file

@ -0,0 +1,9 @@
error[E0425]: cannot find value `a_variable_longer_name` in this scope
--> $DIR/issue-66968-suggest-sorted-words.rs:3:20
|
LL | println!("{}", a_variable_longer_name);
| ^^^^^^^^^^^^^^^^^^^^^^ help: a local variable with a similar name exists: `a_longer_variable_name`
error: aborting due to previous error
For more information about this error, try `rustc --explain E0425`.