8733: Suggest str.lines when splitting at hard-coded newlines

Adds a new `splitting_strings_at_newlines` lint that suggests to use
`str.lines` instead of splitting a trimmed string at hard-coded
newlines.
This commit is contained in:
Florian Brucker 2023-12-17 18:46:49 +01:00
parent eca3932395
commit fe35e08e9f
7 changed files with 430 additions and 0 deletions

View file

@ -435,6 +435,7 @@ pub(crate) static LINTS: &[&crate::LintInfo] = &[
crate::methods::STABLE_SORT_PRIMITIVE_INFO,
crate::methods::STRING_EXTEND_CHARS_INFO,
crate::methods::STRING_LIT_CHARS_ANY_INFO,
crate::methods::STR_SPLIT_AT_NEWLINE_INFO,
crate::methods::SUSPICIOUS_COMMAND_ARG_SPACE_INFO,
crate::methods::SUSPICIOUS_MAP_INFO,
crate::methods::SUSPICIOUS_SPLITN_INFO,

View file

@ -94,6 +94,7 @@ mod single_char_pattern;
mod single_char_push_string;
mod skip_while_next;
mod stable_sort_primitive;
mod str_split;
mod str_splitn;
mod string_extend_chars;
mod string_lit_chars_any;
@ -3856,6 +3857,36 @@ declare_clippy_lint! {
"using `.map(f).unwrap_or_default()`, which is more succinctly expressed as `is_some_and(f)` or `is_ok_and(f)`"
}
declare_clippy_lint! {
/// ### What it does
///
/// Checks for usages of `str.trim().split("\n")` and `str.trim().split("\r\n")`.
///
/// ### Why is this bad?
///
/// Hard-coding the line endings makes the code less compatible. `str.lines` should be used instead.
///
/// ### Example
/// ```no_run
/// "some\ntext\nwith\nnewlines\n".trim().split('\n');
/// ```
/// Use instead:
/// ```no_run
/// "some\ntext\nwith\nnewlines\n".lines();
/// ```
///
/// ### Known Problems
///
/// This lint cannot detect if the split is intentionally restricted to a single type of newline (`"\n"` or
/// `"\r\n"`), for example during the parsing of a specific file format in which precisely one newline type is
/// valid.
/// ```
#[clippy::version = "1.76.0"]
pub STR_SPLIT_AT_NEWLINE,
pedantic,
"splitting a trimmed string at hard-coded newlines"
}
pub struct Methods {
avoid_breaking_exported_api: bool,
msrv: Msrv,
@ -4011,6 +4042,7 @@ impl_lint_pass!(Methods => [
ITER_FILTER_IS_SOME,
ITER_FILTER_IS_OK,
MANUAL_IS_VARIANT_AND,
STR_SPLIT_AT_NEWLINE,
]);
/// Extracts a method call name, args, and `Span` of the method name.
@ -4597,6 +4629,9 @@ impl Methods {
("sort_unstable_by", [arg]) => {
unnecessary_sort_by::check(cx, expr, recv, arg, true);
},
("split", [arg]) => {
str_split::check(cx, expr, recv, arg);
},
("splitn" | "rsplitn", [count_arg, pat_arg]) => {
if let Some(Constant::Int(count)) = constant(cx, cx.typeck_results(), count_arg) {
suspicious_splitn::check(cx, name, expr, recv, count);

View file

@ -0,0 +1,38 @@
use clippy_utils::diagnostics::span_lint_and_sugg;
use clippy_utils::source::snippet_with_context;
use clippy_utils::visitors::is_const_evaluatable;
use rustc_ast::ast::LitKind;
use rustc_errors::Applicability;
use rustc_hir::{Expr, ExprKind};
use rustc_lint::LateContext;
use super::STR_SPLIT_AT_NEWLINE;
pub(super) fn check<'a>(cx: &LateContext<'a>, expr: &'_ Expr<'_>, split_recv: &'a Expr<'_>, split_arg: &'_ Expr<'_>) {
// We're looking for `A.trim().split(B)`, where the adjusted type of `A` is `&str` (e.g. an
// expression returning `String`), and `B` is a `Pattern` that hard-codes a newline (either `"\n"`
// or `"\r\n"`). There are a lot of ways to specify a pattern, and this lint only checks the most
// basic ones: a `'\n'`, `"\n"`, and `"\r\n"`.
if let ExprKind::MethodCall(trim_method_name, trim_recv, [], _) = split_recv.kind
&& trim_method_name.ident.as_str() == "trim"
&& cx.typeck_results().expr_ty_adjusted(trim_recv).peel_refs().is_str()
&& !is_const_evaluatable(cx, trim_recv)
&& let ExprKind::Lit(split_lit) = split_arg.kind
&& (matches!(split_lit.node, LitKind::Char('\n'))
|| matches!(split_lit.node, LitKind::Str(sym, _) if (sym.as_str() == "\n" || sym.as_str() == "\r\n")))
{
let mut app = Applicability::MaybeIncorrect;
span_lint_and_sugg(
cx,
STR_SPLIT_AT_NEWLINE,
expr.span,
"using `str.trim().split()` with hard-coded newlines",
"use `str.lines()` instead",
format!(
"{}.lines()",
snippet_with_context(cx, trim_recv.span, expr.span.ctxt(), "..", &mut app).0
),
app,
);
}
}