Add lint for broken doc links (#13696)

fixes https://github.com/rust-lang/rust-clippy/issues/2179 changelog: [`doc_broken_link`]: Add pedantic lint to catch broken doc links that won't produce a link tag by rustdoc.
2025-06-16 15:19:03 +00:00 · 2025-06-16 15:19:03 +00:00 · af9d56860e
commit af9d56860e
parent 4d67a1c64a 8964f6ed27
6 changed files with 229 additions and 7 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -5729,6 +5729,7 @@ Released 2018-09-13
 [`disallowed_type`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_type
 [`disallowed_types`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_types
 [`diverging_sub_expression`]: https://rust-lang.github.io/rust-clippy/master/index.html#diverging_sub_expression
+[`doc_broken_link`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_broken_link
 [`doc_comment_double_space_linebreaks`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_comment_double_space_linebreaks
 [`doc_include_without_cfg`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_include_without_cfg
 [`doc_lazy_continuation`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_lazy_continuation
--- a/clippy_lints/src/declared_lints.rs
+++ b/clippy_lints/src/declared_lints.rs
@ -112,6 +112,7 @@ pub static LINTS: &[&crate::LintInfo] = &[
    crate::disallowed_names::DISALLOWED_NAMES_INFO,
    crate::disallowed_script_idents::DISALLOWED_SCRIPT_IDENTS_INFO,
    crate::disallowed_types::DISALLOWED_TYPES_INFO,
+    crate::doc::DOC_BROKEN_LINK_INFO,
    crate::doc::DOC_COMMENT_DOUBLE_SPACE_LINEBREAKS_INFO,
    crate::doc::DOC_INCLUDE_WITHOUT_CFG_INFO,
    crate::doc::DOC_LAZY_CONTINUATION_INFO,
--- a/clippy_lints/src/doc/broken_link.rs
+++ b/clippy_lints/src/doc/broken_link.rs
@ -0,0 +1,83 @@
+use clippy_utils::diagnostics::span_lint;
+use pulldown_cmark::BrokenLink as PullDownBrokenLink;
+use rustc_lint::LateContext;
+use rustc_resolve::rustdoc::{DocFragment, source_span_for_markdown_range};
+use rustc_span::{BytePos, Pos, Span};
+
+use super::DOC_BROKEN_LINK;
+
+/// Scan and report broken link on documents.
+/// It ignores false positives detected by `pulldown_cmark`, and only
+/// warns users when the broken link is consider a URL.
+// NOTE: We don't check these other cases because
+// rustdoc itself will check and warn about it:
+// - When a link url is broken across multiple lines in the URL path part
+// - When a link tag is missing the close parenthesis character at the end.
+// - When a link has whitespace within the url link.
+pub fn check(cx: &LateContext<'_>, bl: &PullDownBrokenLink<'_>, doc: &str, fragments: &[DocFragment]) {
+    warn_if_broken_link(cx, bl, doc, fragments);
+}
+
+fn warn_if_broken_link(cx: &LateContext<'_>, bl: &PullDownBrokenLink<'_>, doc: &str, fragments: &[DocFragment]) {
+    if let Some(span) = source_span_for_markdown_range(cx.tcx, doc, &bl.span, fragments) {
+        let mut len = 0;
+
+        // grab raw link data
+        let (_, raw_link) = doc.split_at(bl.span.start);
+
+        // strip off link text part
+        let raw_link = match raw_link.split_once(']') {
+            None => return,
+            Some((prefix, suffix)) => {
+                len += prefix.len() + 1;
+                suffix
+            },
+        };
+
+        let raw_link = match raw_link.split_once('(') {
+            None => return,
+            Some((prefix, suffix)) => {
+                if !prefix.is_empty() {
+                    // there is text between ']' and '(' chars, so it is not a valid link
+                    return;
+                }
+                len += prefix.len() + 1;
+                suffix
+            },
+        };
+
+        if raw_link.starts_with("(http") {
+            // reduce chances of false positive reports
+            // by limiting this checking only to http/https links.
+            return;
+        }
+
+        for c in raw_link.chars() {
+            if c == ')' {
+                // it is a valid link
+                return;
+            }
+
+            if c == '\n' {
+                report_broken_link(cx, span, len);
+                break;
+            }
+
+            len += 1;
+        }
+    }
+}
+
+fn report_broken_link(cx: &LateContext<'_>, frag_span: Span, offset: usize) {
+    let start = frag_span.lo();
+    let end = start + BytePos::from_usize(offset);
+
+    let span = Span::new(start, end, frag_span.ctxt(), frag_span.parent());
+
+    span_lint(
+        cx,
+        DOC_BROKEN_LINK,
+        span,
+        "possible broken doc link: broken across multiple lines",
+    );
+}
--- a/clippy_lints/src/doc/mod.rs
+++ b/clippy_lints/src/doc/mod.rs
@ -24,6 +24,7 @@ use rustc_span::edition::Edition;
 use std::ops::Range;
 use url::Url;

+mod broken_link;
 mod doc_comment_double_space_linebreaks;
 mod doc_suspicious_footnotes;
 mod include_in_doc_without_cfg;
@ -292,6 +293,34 @@ declare_clippy_lint! {
    "possible typo for an intra-doc link"
 }

+declare_clippy_lint! {
+    /// ### What it does
+    /// Checks the doc comments have unbroken links, mostly caused
+    /// by bad formatted links such as broken across multiple lines.
+    ///
+    /// ### Why is this bad?
+    /// Because documentation generated by rustdoc will be broken
+    /// since expected links won't be links and just text.
+    ///
+    /// ### Examples
+    /// This link is broken:
+    /// ```no_run
+    /// /// [example of a bad link](https://
+    /// /// github.com/rust-lang/rust-clippy/)
+    /// pub fn do_something() {}
+    /// ```
+    ///
+    /// It shouldn't be broken across multiple lines to work:
+    /// ```no_run
+    /// /// [example of a good link](https://github.com/rust-lang/rust-clippy/)
+    /// pub fn do_something() {}
+    /// ```
+    #[clippy::version = "1.84.0"]
+    pub DOC_BROKEN_LINK,
+    pedantic,
+    "broken document link"
+}
+
 declare_clippy_lint! {
    /// ### What it does
    /// Checks for the doc comments of publicly visible
@ -656,6 +685,7 @@ impl Documentation {
 impl_lint_pass!(Documentation => [
    DOC_LINK_CODE,
    DOC_LINK_WITH_QUOTES,
+    DOC_BROKEN_LINK,
    DOC_MARKDOWN,
    DOC_NESTED_REFDEFS,
    MISSING_SAFETY_DOC,
@ -786,9 +816,9 @@ struct DocHeaders {
 /// back in the various late lint pass methods if they need the final doc headers, like "Safety" or
 /// "Panics" sections.
 fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet<String>, attrs: &[Attribute]) -> Option<DocHeaders> {
-    /// We don't want the parser to choke on intra doc links. Since we don't
-    /// actually care about rendering them, just pretend that all broken links
-    /// point to a fake address.
+    // We don't want the parser to choke on intra doc links. Since we don't
+    // actually care about rendering them, just pretend that all broken links
+    // point to a fake address.
    #[expect(clippy::unnecessary_wraps)] // we're following a type signature
    fn fake_broken_link_callback<'a>(_: BrokenLink<'_>) -> Option<(CowStr<'a>, CowStr<'a>)> {
        Some(("fake".into(), "fake".into()))
@ -828,14 +858,12 @@ fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet<String>, attrs: &[
        return Some(DocHeaders::default());
    }

-    let mut cb = fake_broken_link_callback;
-
    check_for_code_clusters(
        cx,
        pulldown_cmark::Parser::new_with_broken_link_callback(
            &doc,
            main_body_opts() - Options::ENABLE_SMART_PUNCTUATION,
-            Some(&mut cb),
+            Some(&mut fake_broken_link_callback),
        )
        .into_offset_iter(),
        &doc,
@ -845,9 +873,17 @@ fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet<String>, attrs: &[
        },
    );

+    // NOTE: check_doc uses it own cb function,
+    // to avoid causing duplicated diagnostics for the broken link checker.
+    let mut full_fake_broken_link_callback = |bl: BrokenLink<'_>| -> Option<(CowStr<'_>, CowStr<'_>)> {
+        broken_link::check(cx, &bl, &doc, &fragments);
+        Some(("fake".into(), "fake".into()))
+    };
+
    // disable smart punctuation to pick up ['link'] more easily
    let opts = main_body_opts() - Options::ENABLE_SMART_PUNCTUATION;
-    let parser = pulldown_cmark::Parser::new_with_broken_link_callback(&doc, opts, Some(&mut cb));
+    let parser =
+        pulldown_cmark::Parser::new_with_broken_link_callback(&doc, opts, Some(&mut full_fake_broken_link_callback));

    Some(check_doc(
        cx,
--- a/tests/ui/doc_broken_link.rs
+++ b/tests/ui/doc_broken_link.rs
@ -0,0 +1,72 @@
+#![warn(clippy::doc_broken_link)]
+
+fn main() {}
+
+pub struct FakeType {}
+
+/// This might be considered a link false positive
+/// and should be ignored by this lint rule:
+/// Example of referencing some code with brackets [FakeType].
+pub fn doc_ignore_link_false_positive_1() {}
+
+/// This might be considered a link false positive
+/// and should be ignored by this lint rule:
+/// [`FakeType`]. Continue text after brackets,
+/// then (something in
+/// parenthesis).
+pub fn doc_ignore_link_false_positive_2() {}
+
+/// Test valid link, whole link single line.
+/// [doc valid link](https://test.fake/doc_valid_link)
+pub fn doc_valid_link() {}
+
+/// Test valid link, whole link single line but it has special chars such as brackets and
+/// parenthesis. [doc invalid link url invalid char](https://test.fake/doc_valid_link_url_invalid_char?foo[bar]=1&bar(foo)=2)
+pub fn doc_valid_link_url_invalid_char() {}
+
+/// Test valid link, text tag broken across multiple lines.
+/// [doc valid link broken
+/// text](https://test.fake/doc_valid_link_broken_text)
+pub fn doc_valid_link_broken_text() {}
+
+/// Test valid link, url tag broken across multiple lines, but
+/// the whole url part in a single line.
+/// [doc valid link broken url tag two lines first](https://test.fake/doc_valid_link_broken_url_tag_two_lines_first
+/// )
+pub fn doc_valid_link_broken_url_tag_two_lines_first() {}
+
+/// Test valid link, url tag broken across multiple lines, but
+/// the whole url part in a single line.
+/// [doc valid link broken url tag two lines second](
+/// https://test.fake/doc_valid_link_broken_url_tag_two_lines_second)
+pub fn doc_valid_link_broken_url_tag_two_lines_second() {}
+
+/// Test valid link, url tag broken across multiple lines, but
+/// the whole url part in a single line, but the closing pharentesis
+/// in a third line.
+/// [doc valid link broken url tag three lines](
+/// https://test.fake/doc_valid_link_broken_url_tag_three_lines
+/// )
+pub fn doc_valid_link_broken_url_tag_three_lines() {}
+
+/// Test invalid link, url part broken across multiple lines.
+/// [doc invalid link broken url scheme part](https://
+/// test.fake/doc_invalid_link_broken_url_scheme_part)
+//~^^ ERROR: possible broken doc link: broken across multiple lines
+pub fn doc_invalid_link_broken_url_scheme_part() {}
+
+/// Test invalid link, url part broken across multiple lines.
+/// [doc invalid link broken url host part](https://test
+/// .fake/doc_invalid_link_broken_url_host_part)
+//~^^ ERROR: possible broken doc link: broken across multiple lines
+pub fn doc_invalid_link_broken_url_host_part() {}
+
+/// Test invalid link, for multiple urls in the same block of comment.
+/// There is a [fist link - invalid](https://test
+/// .fake) then it continues
+//~^^ ERROR: possible broken doc link: broken across multiple lines
+/// with a [second link - valid](https://test.fake/doc_valid_link) and another [third link - invalid](https://test
+/// .fake). It ends with another
+//~^^ ERROR: possible broken doc link: broken across multiple lines
+/// line of comment.
+pub fn doc_multiple_invalid_link_broken_url() {}
--- a/tests/ui/doc_broken_link.stderr
+++ b/tests/ui/doc_broken_link.stderr
@ -0,0 +1,29 @@
+error: possible broken doc link: broken across multiple lines
+  --> tests/ui/doc_broken_link.rs:53:5
+   |
+LL | /// [doc invalid link broken url scheme part](https://
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+   |
+   = note: `-D clippy::doc-broken-link` implied by `-D warnings`
+   = help: to override `-D warnings` add `#[allow(clippy::doc_broken_link)]`
+
+error: possible broken doc link: broken across multiple lines
+  --> tests/ui/doc_broken_link.rs:59:5
+   |
+LL | /// [doc invalid link broken url host part](https://test
+   |     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+error: possible broken doc link: broken across multiple lines
+  --> tests/ui/doc_broken_link.rs:65:16
+   |
+LL | /// There is a [fist link - invalid](https://test
+   |                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+error: possible broken doc link: broken across multiple lines
+  --> tests/ui/doc_broken_link.rs:68:80
+   |
+LL | /// with a [second link - valid](https://test.fake/doc_valid_link) and another [third link - invalid](https://test
+   |                                                                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+error: aborting due to 4 previous errors
+