From 7517ae2fb3bc29e91f0f7c2daaac8cd8c8887bb2 Mon Sep 17 00:00:00 2001 From: xFrednet Date: Sun, 24 Oct 2021 22:47:39 +0200 Subject: [PATCH] Refactored some string handling to prevent ICEs and FNs --- clippy_lints/src/enum_variants.rs | 44 ++++++-------------- clippy_utils/src/str_utils.rs | 69 +++++++++++++++++++++++++++++++ tests/ui/crashes/ice-7869.rs | 7 ++++ tests/ui/crashes/ice-7869.stderr | 15 +++++++ tests/ui/enum_variants.stderr | 2 +- tests/ui/match_ref_pats.rs | 2 +- 6 files changed, 106 insertions(+), 33 deletions(-) create mode 100644 tests/ui/crashes/ice-7869.rs create mode 100644 tests/ui/crashes/ice-7869.stderr diff --git a/clippy_lints/src/enum_variants.rs b/clippy_lints/src/enum_variants.rs index 19f1781d0b04..404b67c8f29f 100644 --- a/clippy_lints/src/enum_variants.rs +++ b/clippy_lints/src/enum_variants.rs @@ -2,7 +2,7 @@ use clippy_utils::diagnostics::{span_lint, span_lint_and_help}; use clippy_utils::source::is_present_in_source; -use clippy_utils::str_utils; +use clippy_utils::str_utils::{self, count_match_end, count_match_start}; use rustc_hir::{EnumDef, Item, ItemKind}; use rustc_lint::{LateContext, LateLintPass}; use rustc_session::{declare_tool_lint, impl_lint_pass}; @@ -117,26 +117,6 @@ impl_lint_pass!(EnumVariantNames => [ MODULE_INCEPTION ]); -/// Returns the number of chars that match from the start -#[must_use] -fn partial_match(pre: &str, name: &str) -> usize { - let mut name_iter = name.chars(); - let _ = name_iter.next_back(); // make sure the name is never fully matched - pre.chars().zip(name_iter).take_while(|&(l, r)| l == r).count() -} - -/// Returns the number of chars that match from the end -#[must_use] -fn partial_rmatch(post: &str, name: &str) -> usize { - let mut name_iter = name.chars(); - let _ = name_iter.next(); // make sure the name is never fully matched - post.chars() - .rev() - .zip(name_iter.rev()) - .take_while(|&(l, r)| l == r) - .count() -} - fn check_variant( cx: &LateContext<'_>, threshold: u64, @@ -150,7 +130,7 @@ fn check_variant( } for var in def.variants { let name = var.ident.name.as_str(); - if partial_match(item_name, &name) == item_name_chars + if count_match_start(item_name, &name).char_count == item_name_chars && name.chars().nth(item_name_chars).map_or(false, |c| !c.is_lowercase()) && name.chars().nth(item_name_chars + 1).map_or(false, |c| !c.is_numeric()) { @@ -161,7 +141,7 @@ fn check_variant( "variant name starts with the enum's name", ); } - if partial_rmatch(item_name, &name) == item_name_chars { + if count_match_end(item_name, &name).char_count == item_name_chars { span_lint( cx, ENUM_VARIANT_NAMES, @@ -176,7 +156,7 @@ fn check_variant( for var in def.variants { let name = var.ident.name.as_str(); - let pre_match = partial_match(pre, &name); + let pre_match = count_match_start(pre, &name).byte_count; pre = &pre[..pre_match]; let pre_camel = str_utils::camel_case_until(pre).byte_index; pre = &pre[..pre_camel]; @@ -193,8 +173,8 @@ fn check_variant( } } - let post_match = partial_rmatch(post, &name); - let post_end = post.len() - post_match; + let post_match = count_match_end(post, &name); + let post_end = post.len() - post_match.byte_count; post = &post[post_end..]; let post_camel = str_utils::camel_case_start(post); post = &post[post_camel.byte_index..]; @@ -266,14 +246,16 @@ impl LateLintPass<'_> for EnumVariantNames { ); } } - if item.vis.node.is_pub() { - let matching = partial_match(mod_camel, &item_camel); - let rmatching = partial_rmatch(mod_camel, &item_camel); + // The `module_name_repetitions` lint should only trigger if the item has the module in its + // name. Having the same name is accepted. + if item.vis.node.is_pub() && item_camel.len() > mod_camel.len() { + let matching = count_match_start(mod_camel, &item_camel); + let rmatching = count_match_end(mod_camel, &item_camel); let nchars = mod_camel.chars().count(); let is_word_beginning = |c: char| c == '_' || c.is_uppercase() || c.is_numeric(); - if matching == nchars { + if matching.char_count == nchars { match item_camel.chars().nth(nchars) { Some(c) if is_word_beginning(c) => span_lint( cx, @@ -284,7 +266,7 @@ impl LateLintPass<'_> for EnumVariantNames { _ => (), } } - if rmatching == nchars { + if rmatching.char_count == nchars { span_lint( cx, MODULE_NAME_REPETITIONS, diff --git a/clippy_utils/src/str_utils.rs b/clippy_utils/src/str_utils.rs index f3de1250d5d8..cba96e05a241 100644 --- a/clippy_utils/src/str_utils.rs +++ b/clippy_utils/src/str_utils.rs @@ -99,6 +99,75 @@ pub fn camel_case_start(s: &str) -> StrIndex { last_index } +/// Dealing with sting comparison can be complicated, this struct ensures that both the +/// character and byte count are provided for correct indexing. +#[derive(Debug, Default, PartialEq, Eq)] +pub struct StrCount { + pub char_count: usize, + pub byte_count: usize, +} + +impl StrCount { + pub fn new(char_count: usize, byte_count: usize) -> Self { + Self { char_count, byte_count } + } +} + +/// Returns the number of chars that match from the start +/// +/// ``` +/// assert_eq!(count_match_start("hello_mouse", "hello_penguin"), StrCount::new(6, 6)); +/// assert_eq!(count_match_start("hello_clippy", "bye_bugs"), StrCount::new(0, 0)); +/// assert_eq!(count_match_start("hello_world", "hello_world"), StrCount::new(11, 11)); +/// assert_eq!(count_match_start("T\u{f6}ffT\u{f6}ff", "T\u{f6}ff"), StrCount::new(4, 5)); +/// ``` +#[must_use] +pub fn count_match_start(str1: &str, str2: &str) -> StrCount { + // (char_index, char1) + let char_count = str1.chars().count(); + let iter1 = (0..=char_count).zip(str1.chars()); + // (byte_index, char2) + let iter2 = str2.char_indices(); + + iter1 + .zip(iter2) + .take_while(|((_, c1), (_, c2))| c1 == c2) + .last() + .map_or_else(StrCount::default, |((char_index, _), (byte_index, character))| { + StrCount::new(char_index + 1, byte_index + character.len_utf8()) + }) +} + +/// Returns the number of chars and bytes that match from the end +/// +/// ``` +/// assert_eq!(count_match_end("hello_cat", "bye_cat"), StrCount::new(4, 4)); +/// assert_eq!(count_match_end("if_item_thing", "enum_value"), StrCount::new(0, 0)); +/// assert_eq!(count_match_end("Clippy", "Clippy"), StrCount::new(6, 6)); +/// assert_eq!(count_match_end("MyT\u{f6}ff", "YourT\u{f6}ff"), StrCount::new(4, 5)); +/// ``` +#[must_use] +pub fn count_match_end(str1: &str, str2: &str) -> StrCount { + let char_count = str1.chars().count(); + if char_count == 0 { + return StrCount::default(); + } + + // (char_index, char1) + let iter1 = (0..char_count).rev().zip(str1.chars().rev()); + // (byte_index, char2) + let byte_count = str2.len(); + let iter2 = str2.char_indices().rev(); + + iter1 + .zip(iter2) + .take_while(|((_, c1), (_, c2))| c1 == c2) + .last() + .map_or_else(StrCount::default, |((char_index, _), (byte_index, _))| { + StrCount::new(char_count - char_index, byte_count - byte_index) + }) +} + #[cfg(test)] mod test { use super::*; diff --git a/tests/ui/crashes/ice-7869.rs b/tests/ui/crashes/ice-7869.rs new file mode 100644 index 000000000000..8f97a063a9a9 --- /dev/null +++ b/tests/ui/crashes/ice-7869.rs @@ -0,0 +1,7 @@ +enum Tila { + TyöAlkoi, + TyöKeskeytyi, + TyöValmis, +} + +fn main() {} diff --git a/tests/ui/crashes/ice-7869.stderr b/tests/ui/crashes/ice-7869.stderr new file mode 100644 index 000000000000..4fa9fb27e765 --- /dev/null +++ b/tests/ui/crashes/ice-7869.stderr @@ -0,0 +1,15 @@ +error: all variants have the same prefix: `Työ` + --> $DIR/ice-7869.rs:1:1 + | +LL | / enum Tila { +LL | | TyöAlkoi, +LL | | TyöKeskeytyi, +LL | | TyöValmis, +LL | | } + | |_^ + | + = note: `-D clippy::enum-variant-names` implied by `-D warnings` + = help: remove the prefixes and use full paths to the variants instead of glob imports + +error: aborting due to previous error + diff --git a/tests/ui/enum_variants.stderr b/tests/ui/enum_variants.stderr index 447fbb9e1bff..add8a91e26b8 100644 --- a/tests/ui/enum_variants.stderr +++ b/tests/ui/enum_variants.stderr @@ -60,7 +60,7 @@ LL | | } | = help: remove the prefixes and use full paths to the variants instead of glob imports -error: all variants have the same prefix: `With` +error: all variants have the same prefix: `WithOut` --> $DIR/enum_variants.rs:81:1 | LL | / enum Seallll { diff --git a/tests/ui/match_ref_pats.rs b/tests/ui/match_ref_pats.rs index 50246486bb6f..7e3674ab8c9f 100644 --- a/tests/ui/match_ref_pats.rs +++ b/tests/ui/match_ref_pats.rs @@ -1,5 +1,5 @@ #![warn(clippy::match_ref_pats)] -#![allow(clippy::equatable_if_let)] +#![allow(clippy::equatable_if_let, clippy::enum_variant_names)] fn ref_pats() { {