From 8964f6ed272d9dac4533fe1be97e5ff79de14532 Mon Sep 17 00:00:00 2001 From: Max Claus Nunes Date: Sat, 16 Nov 2024 13:05:47 -0300 Subject: [PATCH] Add lint for broken doc links Fix false positives on broken link detection Refactor variable names Fix doc comment about broken link lint Refactor, remove not used variable Improve broken link to catch more cases and span point to whole link Include reason why a link is considered broken Drop some checker because rustdoc already warn about them Refactor to use a single enum instead of multiple bool variables Fix lint warnings Rename function to collect broken links Warn directly instead of collecting all entries first Iterate directly rather than collecting Temporary change to confirm with code reviewer the next steps Handle broken links as part of the fake_broken_link_callback handler Simplify broken link detection without state machine usage Fix typos Add url check to reduce false positives Drop reason enum as there is only one reason Fix duplicated diagnostics Fix linter --- CHANGELOG.md | 1 + clippy_lints/src/declared_lints.rs | 1 + clippy_lints/src/doc/broken_link.rs | 83 +++++++++++++++++++++++++++++ clippy_lints/src/doc/mod.rs | 50 ++++++++++++++--- tests/ui/doc_broken_link.rs | 72 +++++++++++++++++++++++++ tests/ui/doc_broken_link.stderr | 29 ++++++++++ 6 files changed, 229 insertions(+), 7 deletions(-) create mode 100644 clippy_lints/src/doc/broken_link.rs create mode 100644 tests/ui/doc_broken_link.rs create mode 100644 tests/ui/doc_broken_link.stderr diff --git a/CHANGELOG.md b/CHANGELOG.md index 9209d11feb34..cbbbc8bef3cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5571,6 +5571,7 @@ Released 2018-09-13 [`disallowed_type`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_type [`disallowed_types`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_types [`diverging_sub_expression`]: https://rust-lang.github.io/rust-clippy/master/index.html#diverging_sub_expression +[`doc_broken_link`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_broken_link [`doc_comment_double_space_linebreaks`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_comment_double_space_linebreaks [`doc_include_without_cfg`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_include_without_cfg [`doc_lazy_continuation`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_lazy_continuation diff --git a/clippy_lints/src/declared_lints.rs b/clippy_lints/src/declared_lints.rs index b9f9f4e4fe0b..540c4031140a 100644 --- a/clippy_lints/src/declared_lints.rs +++ b/clippy_lints/src/declared_lints.rs @@ -139,6 +139,7 @@ pub static LINTS: &[&crate::LintInfo] = &[ crate::disallowed_names::DISALLOWED_NAMES_INFO, crate::disallowed_script_idents::DISALLOWED_SCRIPT_IDENTS_INFO, crate::disallowed_types::DISALLOWED_TYPES_INFO, + crate::doc::DOC_BROKEN_LINK_INFO, crate::doc::DOC_COMMENT_DOUBLE_SPACE_LINEBREAKS_INFO, crate::doc::DOC_INCLUDE_WITHOUT_CFG_INFO, crate::doc::DOC_LAZY_CONTINUATION_INFO, diff --git a/clippy_lints/src/doc/broken_link.rs b/clippy_lints/src/doc/broken_link.rs new file mode 100644 index 000000000000..a97b807e605f --- /dev/null +++ b/clippy_lints/src/doc/broken_link.rs @@ -0,0 +1,83 @@ +use clippy_utils::diagnostics::span_lint; +use pulldown_cmark::BrokenLink as PullDownBrokenLink; +use rustc_lint::LateContext; +use rustc_resolve::rustdoc::{DocFragment, source_span_for_markdown_range}; +use rustc_span::{BytePos, Pos, Span}; + +use super::DOC_BROKEN_LINK; + +/// Scan and report broken link on documents. +/// It ignores false positives detected by `pulldown_cmark`, and only +/// warns users when the broken link is consider a URL. +// NOTE: We don't check these other cases because +// rustdoc itself will check and warn about it: +// - When a link url is broken across multiple lines in the URL path part +// - When a link tag is missing the close parenthesis character at the end. +// - When a link has whitespace within the url link. +pub fn check(cx: &LateContext<'_>, bl: &PullDownBrokenLink<'_>, doc: &str, fragments: &[DocFragment]) { + warn_if_broken_link(cx, bl, doc, fragments); +} + +fn warn_if_broken_link(cx: &LateContext<'_>, bl: &PullDownBrokenLink<'_>, doc: &str, fragments: &[DocFragment]) { + if let Some(span) = source_span_for_markdown_range(cx.tcx, doc, &bl.span, fragments) { + let mut len = 0; + + // grab raw link data + let (_, raw_link) = doc.split_at(bl.span.start); + + // strip off link text part + let raw_link = match raw_link.split_once(']') { + None => return, + Some((prefix, suffix)) => { + len += prefix.len() + 1; + suffix + }, + }; + + let raw_link = match raw_link.split_once('(') { + None => return, + Some((prefix, suffix)) => { + if !prefix.is_empty() { + // there is text between ']' and '(' chars, so it is not a valid link + return; + } + len += prefix.len() + 1; + suffix + }, + }; + + if raw_link.starts_with("(http") { + // reduce chances of false positive reports + // by limiting this checking only to http/https links. + return; + } + + for c in raw_link.chars() { + if c == ')' { + // it is a valid link + return; + } + + if c == '\n' { + report_broken_link(cx, span, len); + break; + } + + len += 1; + } + } +} + +fn report_broken_link(cx: &LateContext<'_>, frag_span: Span, offset: usize) { + let start = frag_span.lo(); + let end = start + BytePos::from_usize(offset); + + let span = Span::new(start, end, frag_span.ctxt(), frag_span.parent()); + + span_lint( + cx, + DOC_BROKEN_LINK, + span, + "possible broken doc link: broken across multiple lines", + ); +} diff --git a/clippy_lints/src/doc/mod.rs b/clippy_lints/src/doc/mod.rs index ab77edf1147c..5e2f64e8f095 100644 --- a/clippy_lints/src/doc/mod.rs +++ b/clippy_lints/src/doc/mod.rs @@ -25,6 +25,7 @@ use rustc_span::edition::Edition; use std::ops::Range; use url::Url; +mod broken_link; mod doc_comment_double_space_linebreaks; mod include_in_doc_without_cfg; mod lazy_continuation; @@ -292,6 +293,34 @@ declare_clippy_lint! { "possible typo for an intra-doc link" } +declare_clippy_lint! { + /// ### What it does + /// Checks the doc comments have unbroken links, mostly caused + /// by bad formatted links such as broken across multiple lines. + /// + /// ### Why is this bad? + /// Because documentation generated by rustdoc will be broken + /// since expected links won't be links and just text. + /// + /// ### Examples + /// This link is broken: + /// ```no_run + /// /// [example of a bad link](https:// + /// /// github.com/rust-lang/rust-clippy/) + /// pub fn do_something() {} + /// ``` + /// + /// It shouldn't be broken across multiple lines to work: + /// ```no_run + /// /// [example of a good link](https://github.com/rust-lang/rust-clippy/) + /// pub fn do_something() {} + /// ``` + #[clippy::version = "1.84.0"] + pub DOC_BROKEN_LINK, + pedantic, + "broken document link" +} + declare_clippy_lint! { /// ### What it does /// Checks for the doc comments of publicly visible @@ -625,6 +654,7 @@ impl Documentation { impl_lint_pass!(Documentation => [ DOC_LINK_CODE, DOC_LINK_WITH_QUOTES, + DOC_BROKEN_LINK, DOC_MARKDOWN, DOC_NESTED_REFDEFS, MISSING_SAFETY_DOC, @@ -751,9 +781,9 @@ struct DocHeaders { /// back in the various late lint pass methods if they need the final doc headers, like "Safety" or /// "Panics" sections. fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet, attrs: &[Attribute]) -> Option { - /// We don't want the parser to choke on intra doc links. Since we don't - /// actually care about rendering them, just pretend that all broken links - /// point to a fake address. + // We don't want the parser to choke on intra doc links. Since we don't + // actually care about rendering them, just pretend that all broken links + // point to a fake address. #[expect(clippy::unnecessary_wraps)] // we're following a type signature fn fake_broken_link_callback<'a>(_: BrokenLink<'_>) -> Option<(CowStr<'a>, CowStr<'a>)> { Some(("fake".into(), "fake".into())) @@ -793,14 +823,12 @@ fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet, attrs: &[ return Some(DocHeaders::default()); } - let mut cb = fake_broken_link_callback; - check_for_code_clusters( cx, pulldown_cmark::Parser::new_with_broken_link_callback( &doc, main_body_opts() - Options::ENABLE_SMART_PUNCTUATION, - Some(&mut cb), + Some(&mut fake_broken_link_callback), ) .into_offset_iter(), &doc, @@ -810,9 +838,17 @@ fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet, attrs: &[ }, ); + // NOTE: check_doc uses it own cb function, + // to avoid causing duplicated diagnostics for the broken link checker. + let mut full_fake_broken_link_callback = |bl: BrokenLink<'_>| -> Option<(CowStr<'_>, CowStr<'_>)> { + broken_link::check(cx, &bl, &doc, &fragments); + Some(("fake".into(), "fake".into())) + }; + // disable smart punctuation to pick up ['link'] more easily let opts = main_body_opts() - Options::ENABLE_SMART_PUNCTUATION; - let parser = pulldown_cmark::Parser::new_with_broken_link_callback(&doc, opts, Some(&mut cb)); + let parser = + pulldown_cmark::Parser::new_with_broken_link_callback(&doc, opts, Some(&mut full_fake_broken_link_callback)); Some(check_doc( cx, diff --git a/tests/ui/doc_broken_link.rs b/tests/ui/doc_broken_link.rs new file mode 100644 index 000000000000..7d9c0ef13b3c --- /dev/null +++ b/tests/ui/doc_broken_link.rs @@ -0,0 +1,72 @@ +#![warn(clippy::doc_broken_link)] + +fn main() {} + +pub struct FakeType {} + +/// This might be considered a link false positive +/// and should be ignored by this lint rule: +/// Example of referencing some code with brackets [FakeType]. +pub fn doc_ignore_link_false_positive_1() {} + +/// This might be considered a link false positive +/// and should be ignored by this lint rule: +/// [`FakeType`]. Continue text after brackets, +/// then (something in +/// parenthesis). +pub fn doc_ignore_link_false_positive_2() {} + +/// Test valid link, whole link single line. +/// [doc valid link](https://test.fake/doc_valid_link) +pub fn doc_valid_link() {} + +/// Test valid link, whole link single line but it has special chars such as brackets and +/// parenthesis. [doc invalid link url invalid char](https://test.fake/doc_valid_link_url_invalid_char?foo[bar]=1&bar(foo)=2) +pub fn doc_valid_link_url_invalid_char() {} + +/// Test valid link, text tag broken across multiple lines. +/// [doc valid link broken +/// text](https://test.fake/doc_valid_link_broken_text) +pub fn doc_valid_link_broken_text() {} + +/// Test valid link, url tag broken across multiple lines, but +/// the whole url part in a single line. +/// [doc valid link broken url tag two lines first](https://test.fake/doc_valid_link_broken_url_tag_two_lines_first +/// ) +pub fn doc_valid_link_broken_url_tag_two_lines_first() {} + +/// Test valid link, url tag broken across multiple lines, but +/// the whole url part in a single line. +/// [doc valid link broken url tag two lines second]( +/// https://test.fake/doc_valid_link_broken_url_tag_two_lines_second) +pub fn doc_valid_link_broken_url_tag_two_lines_second() {} + +/// Test valid link, url tag broken across multiple lines, but +/// the whole url part in a single line, but the closing pharentesis +/// in a third line. +/// [doc valid link broken url tag three lines]( +/// https://test.fake/doc_valid_link_broken_url_tag_three_lines +/// ) +pub fn doc_valid_link_broken_url_tag_three_lines() {} + +/// Test invalid link, url part broken across multiple lines. +/// [doc invalid link broken url scheme part](https:// +/// test.fake/doc_invalid_link_broken_url_scheme_part) +//~^^ ERROR: possible broken doc link: broken across multiple lines +pub fn doc_invalid_link_broken_url_scheme_part() {} + +/// Test invalid link, url part broken across multiple lines. +/// [doc invalid link broken url host part](https://test +/// .fake/doc_invalid_link_broken_url_host_part) +//~^^ ERROR: possible broken doc link: broken across multiple lines +pub fn doc_invalid_link_broken_url_host_part() {} + +/// Test invalid link, for multiple urls in the same block of comment. +/// There is a [fist link - invalid](https://test +/// .fake) then it continues +//~^^ ERROR: possible broken doc link: broken across multiple lines +/// with a [second link - valid](https://test.fake/doc_valid_link) and another [third link - invalid](https://test +/// .fake). It ends with another +//~^^ ERROR: possible broken doc link: broken across multiple lines +/// line of comment. +pub fn doc_multiple_invalid_link_broken_url() {} diff --git a/tests/ui/doc_broken_link.stderr b/tests/ui/doc_broken_link.stderr new file mode 100644 index 000000000000..179ed97635ee --- /dev/null +++ b/tests/ui/doc_broken_link.stderr @@ -0,0 +1,29 @@ +error: possible broken doc link: broken across multiple lines + --> tests/ui/doc_broken_link.rs:53:5 + | +LL | /// [doc invalid link broken url scheme part](https:// + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = note: `-D clippy::doc-broken-link` implied by `-D warnings` + = help: to override `-D warnings` add `#[allow(clippy::doc_broken_link)]` + +error: possible broken doc link: broken across multiple lines + --> tests/ui/doc_broken_link.rs:59:5 + | +LL | /// [doc invalid link broken url host part](https://test + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: possible broken doc link: broken across multiple lines + --> tests/ui/doc_broken_link.rs:65:16 + | +LL | /// There is a [fist link - invalid](https://test + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: possible broken doc link: broken across multiple lines + --> tests/ui/doc_broken_link.rs:68:80 + | +LL | /// with a [second link - valid](https://test.fake/doc_valid_link) and another [third link - invalid](https://test + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +error: aborting due to 4 previous errors +