New lint sliced_string_as_bytes (#14002)

resurrection of https://github.com/rust-lang/rust-clippy/pull/10984

fixes https://github.com/rust-lang/rust-clippy/issues/10981

changelog: [`sliced_string_as_bytes`]: add new lint
`sliced_string_as_bytes`
This commit is contained in:
Manish Goregaokar 2025-01-27 20:34:48 +00:00 committed by GitHub
commit 85bbba69b0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 158 additions and 3 deletions

View file

@ -468,6 +468,7 @@ pub static LINTS: &[&crate::LintInfo] = &[
crate::methods::SHOULD_IMPLEMENT_TRAIT_INFO,
crate::methods::SINGLE_CHAR_ADD_STR_INFO,
crate::methods::SKIP_WHILE_NEXT_INFO,
crate::methods::SLICED_STRING_AS_BYTES_INFO,
crate::methods::STABLE_SORT_PRIMITIVE_INFO,
crate::methods::STRING_EXTEND_CHARS_INFO,
crate::methods::STRING_LIT_CHARS_ANY_INFO,

View file

@ -102,6 +102,7 @@ mod single_char_add_str;
mod single_char_insert_string;
mod single_char_push_string;
mod skip_while_next;
mod sliced_string_as_bytes;
mod stable_sort_primitive;
mod str_split;
mod str_splitn;
@ -4363,6 +4364,34 @@ declare_clippy_lint! {
"detect `repeat().take()` that can be replaced with `repeat_n()`"
}
declare_clippy_lint! {
/// ### What it does
/// Checks for string slices immediantly followed by `as_bytes`.
///
/// ### Why is this bad?
/// It involves doing an unnecessary UTF-8 alignment check which is less efficient, and can cause a panic.
///
/// ### Known problems
/// In some cases, the UTF-8 validation and potential panic from string slicing may be required for
/// the code's correctness. If you need to ensure the slice boundaries fall on valid UTF-8 character
/// boundaries, the original form (`s[1..5].as_bytes()`) should be preferred.
///
/// ### Example
/// ```rust
/// let s = "Lorem ipsum";
/// s[1..5].as_bytes();
/// ```
/// Use instead:
/// ```rust
/// let s = "Lorem ipsum";
/// &s.as_bytes()[1..5];
/// ```
#[clippy::version = "1.86.0"]
pub SLICED_STRING_AS_BYTES,
perf,
"slicing a string and immediately calling as_bytes is less efficient and can lead to panics"
}
pub struct Methods {
avoid_breaking_exported_api: bool,
msrv: Msrv,
@ -4531,6 +4560,7 @@ impl_lint_pass!(Methods => [
DOUBLE_ENDED_ITERATOR_LAST,
USELESS_NONZERO_NEW_UNCHECKED,
MANUAL_REPEAT_N,
SLICED_STRING_AS_BYTES,
]);
/// Extracts a method call name, args, and `Span` of the method name.
@ -4798,6 +4828,7 @@ impl Methods {
if let Some(("as_str", recv, [], as_str_span, _)) = method_call(recv) {
redundant_as_str::check(cx, expr, recv, as_str_span, span);
}
sliced_string_as_bytes::check(cx, expr, recv);
},
("as_mut", []) => useless_asref::check(cx, expr, "as_mut", recv),
("as_ptr", []) => manual_c_str_literals::check_as_ptr(cx, expr, recv, &self.msrv),

View file

@ -0,0 +1,29 @@
use clippy_utils::diagnostics::span_lint_and_sugg;
use clippy_utils::source::snippet_with_applicability;
use clippy_utils::ty::is_type_lang_item;
use rustc_errors::Applicability;
use rustc_hir::{Expr, ExprKind, LangItem, is_range_literal};
use rustc_lint::LateContext;
use super::SLICED_STRING_AS_BYTES;
pub(super) fn check(cx: &LateContext<'_>, expr: &Expr<'_>, recv: &Expr<'_>) {
if let ExprKind::Index(indexed, index, _) = recv.kind
&& is_range_literal(index)
&& let ty = cx.typeck_results().expr_ty(indexed).peel_refs()
&& (ty.is_str() || is_type_lang_item(cx, ty, LangItem::String))
{
let mut applicability = Applicability::MaybeIncorrect;
let stringish = snippet_with_applicability(cx, indexed.span, "_", &mut applicability);
let range = snippet_with_applicability(cx, index.span, "_", &mut applicability);
span_lint_and_sugg(
cx,
SLICED_STRING_AS_BYTES,
expr.span,
"calling `as_bytes` after slicing a string",
"try",
format!("&{stringish}.as_bytes()[{range}]"),
applicability,
);
}
}