Add lint for broken doc links (#13696)

fixes https://github.com/rust-lang/rust-clippy/issues/2179

changelog: [`doc_broken_link`]: Add pedantic lint to catch broken doc
links that won't produce a link tag by rustdoc.
This commit is contained in:
Jason Newcomb 2025-06-16 15:19:03 +00:00 committed by GitHub
commit af9d56860e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 229 additions and 7 deletions

View file

@ -5729,6 +5729,7 @@ Released 2018-09-13
[`disallowed_type`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_type
[`disallowed_types`]: https://rust-lang.github.io/rust-clippy/master/index.html#disallowed_types
[`diverging_sub_expression`]: https://rust-lang.github.io/rust-clippy/master/index.html#diverging_sub_expression
[`doc_broken_link`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_broken_link
[`doc_comment_double_space_linebreaks`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_comment_double_space_linebreaks
[`doc_include_without_cfg`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_include_without_cfg
[`doc_lazy_continuation`]: https://rust-lang.github.io/rust-clippy/master/index.html#doc_lazy_continuation

View file

@ -112,6 +112,7 @@ pub static LINTS: &[&crate::LintInfo] = &[
crate::disallowed_names::DISALLOWED_NAMES_INFO,
crate::disallowed_script_idents::DISALLOWED_SCRIPT_IDENTS_INFO,
crate::disallowed_types::DISALLOWED_TYPES_INFO,
crate::doc::DOC_BROKEN_LINK_INFO,
crate::doc::DOC_COMMENT_DOUBLE_SPACE_LINEBREAKS_INFO,
crate::doc::DOC_INCLUDE_WITHOUT_CFG_INFO,
crate::doc::DOC_LAZY_CONTINUATION_INFO,

View file

@ -0,0 +1,83 @@
use clippy_utils::diagnostics::span_lint;
use pulldown_cmark::BrokenLink as PullDownBrokenLink;
use rustc_lint::LateContext;
use rustc_resolve::rustdoc::{DocFragment, source_span_for_markdown_range};
use rustc_span::{BytePos, Pos, Span};
use super::DOC_BROKEN_LINK;
/// Scan and report broken link on documents.
/// It ignores false positives detected by `pulldown_cmark`, and only
/// warns users when the broken link is consider a URL.
// NOTE: We don't check these other cases because
// rustdoc itself will check and warn about it:
// - When a link url is broken across multiple lines in the URL path part
// - When a link tag is missing the close parenthesis character at the end.
// - When a link has whitespace within the url link.
pub fn check(cx: &LateContext<'_>, bl: &PullDownBrokenLink<'_>, doc: &str, fragments: &[DocFragment]) {
warn_if_broken_link(cx, bl, doc, fragments);
}
fn warn_if_broken_link(cx: &LateContext<'_>, bl: &PullDownBrokenLink<'_>, doc: &str, fragments: &[DocFragment]) {
if let Some(span) = source_span_for_markdown_range(cx.tcx, doc, &bl.span, fragments) {
let mut len = 0;
// grab raw link data
let (_, raw_link) = doc.split_at(bl.span.start);
// strip off link text part
let raw_link = match raw_link.split_once(']') {
None => return,
Some((prefix, suffix)) => {
len += prefix.len() + 1;
suffix
},
};
let raw_link = match raw_link.split_once('(') {
None => return,
Some((prefix, suffix)) => {
if !prefix.is_empty() {
// there is text between ']' and '(' chars, so it is not a valid link
return;
}
len += prefix.len() + 1;
suffix
},
};
if raw_link.starts_with("(http") {
// reduce chances of false positive reports
// by limiting this checking only to http/https links.
return;
}
for c in raw_link.chars() {
if c == ')' {
// it is a valid link
return;
}
if c == '\n' {
report_broken_link(cx, span, len);
break;
}
len += 1;
}
}
}
fn report_broken_link(cx: &LateContext<'_>, frag_span: Span, offset: usize) {
let start = frag_span.lo();
let end = start + BytePos::from_usize(offset);
let span = Span::new(start, end, frag_span.ctxt(), frag_span.parent());
span_lint(
cx,
DOC_BROKEN_LINK,
span,
"possible broken doc link: broken across multiple lines",
);
}

View file

@ -24,6 +24,7 @@ use rustc_span::edition::Edition;
use std::ops::Range;
use url::Url;
mod broken_link;
mod doc_comment_double_space_linebreaks;
mod doc_suspicious_footnotes;
mod include_in_doc_without_cfg;
@ -292,6 +293,34 @@ declare_clippy_lint! {
"possible typo for an intra-doc link"
}
declare_clippy_lint! {
/// ### What it does
/// Checks the doc comments have unbroken links, mostly caused
/// by bad formatted links such as broken across multiple lines.
///
/// ### Why is this bad?
/// Because documentation generated by rustdoc will be broken
/// since expected links won't be links and just text.
///
/// ### Examples
/// This link is broken:
/// ```no_run
/// /// [example of a bad link](https://
/// /// github.com/rust-lang/rust-clippy/)
/// pub fn do_something() {}
/// ```
///
/// It shouldn't be broken across multiple lines to work:
/// ```no_run
/// /// [example of a good link](https://github.com/rust-lang/rust-clippy/)
/// pub fn do_something() {}
/// ```
#[clippy::version = "1.84.0"]
pub DOC_BROKEN_LINK,
pedantic,
"broken document link"
}
declare_clippy_lint! {
/// ### What it does
/// Checks for the doc comments of publicly visible
@ -656,6 +685,7 @@ impl Documentation {
impl_lint_pass!(Documentation => [
DOC_LINK_CODE,
DOC_LINK_WITH_QUOTES,
DOC_BROKEN_LINK,
DOC_MARKDOWN,
DOC_NESTED_REFDEFS,
MISSING_SAFETY_DOC,
@ -786,9 +816,9 @@ struct DocHeaders {
/// back in the various late lint pass methods if they need the final doc headers, like "Safety" or
/// "Panics" sections.
fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet<String>, attrs: &[Attribute]) -> Option<DocHeaders> {
/// We don't want the parser to choke on intra doc links. Since we don't
/// actually care about rendering them, just pretend that all broken links
/// point to a fake address.
// We don't want the parser to choke on intra doc links. Since we don't
// actually care about rendering them, just pretend that all broken links
// point to a fake address.
#[expect(clippy::unnecessary_wraps)] // we're following a type signature
fn fake_broken_link_callback<'a>(_: BrokenLink<'_>) -> Option<(CowStr<'a>, CowStr<'a>)> {
Some(("fake".into(), "fake".into()))
@ -828,14 +858,12 @@ fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet<String>, attrs: &[
return Some(DocHeaders::default());
}
let mut cb = fake_broken_link_callback;
check_for_code_clusters(
cx,
pulldown_cmark::Parser::new_with_broken_link_callback(
&doc,
main_body_opts() - Options::ENABLE_SMART_PUNCTUATION,
Some(&mut cb),
Some(&mut fake_broken_link_callback),
)
.into_offset_iter(),
&doc,
@ -845,9 +873,17 @@ fn check_attrs(cx: &LateContext<'_>, valid_idents: &FxHashSet<String>, attrs: &[
},
);
// NOTE: check_doc uses it own cb function,
// to avoid causing duplicated diagnostics for the broken link checker.
let mut full_fake_broken_link_callback = |bl: BrokenLink<'_>| -> Option<(CowStr<'_>, CowStr<'_>)> {
broken_link::check(cx, &bl, &doc, &fragments);
Some(("fake".into(), "fake".into()))
};
// disable smart punctuation to pick up ['link'] more easily
let opts = main_body_opts() - Options::ENABLE_SMART_PUNCTUATION;
let parser = pulldown_cmark::Parser::new_with_broken_link_callback(&doc, opts, Some(&mut cb));
let parser =
pulldown_cmark::Parser::new_with_broken_link_callback(&doc, opts, Some(&mut full_fake_broken_link_callback));
Some(check_doc(
cx,

View file

@ -0,0 +1,72 @@
#![warn(clippy::doc_broken_link)]
fn main() {}
pub struct FakeType {}
/// This might be considered a link false positive
/// and should be ignored by this lint rule:
/// Example of referencing some code with brackets [FakeType].
pub fn doc_ignore_link_false_positive_1() {}
/// This might be considered a link false positive
/// and should be ignored by this lint rule:
/// [`FakeType`]. Continue text after brackets,
/// then (something in
/// parenthesis).
pub fn doc_ignore_link_false_positive_2() {}
/// Test valid link, whole link single line.
/// [doc valid link](https://test.fake/doc_valid_link)
pub fn doc_valid_link() {}
/// Test valid link, whole link single line but it has special chars such as brackets and
/// parenthesis. [doc invalid link url invalid char](https://test.fake/doc_valid_link_url_invalid_char?foo[bar]=1&bar(foo)=2)
pub fn doc_valid_link_url_invalid_char() {}
/// Test valid link, text tag broken across multiple lines.
/// [doc valid link broken
/// text](https://test.fake/doc_valid_link_broken_text)
pub fn doc_valid_link_broken_text() {}
/// Test valid link, url tag broken across multiple lines, but
/// the whole url part in a single line.
/// [doc valid link broken url tag two lines first](https://test.fake/doc_valid_link_broken_url_tag_two_lines_first
/// )
pub fn doc_valid_link_broken_url_tag_two_lines_first() {}
/// Test valid link, url tag broken across multiple lines, but
/// the whole url part in a single line.
/// [doc valid link broken url tag two lines second](
/// https://test.fake/doc_valid_link_broken_url_tag_two_lines_second)
pub fn doc_valid_link_broken_url_tag_two_lines_second() {}
/// Test valid link, url tag broken across multiple lines, but
/// the whole url part in a single line, but the closing pharentesis
/// in a third line.
/// [doc valid link broken url tag three lines](
/// https://test.fake/doc_valid_link_broken_url_tag_three_lines
/// )
pub fn doc_valid_link_broken_url_tag_three_lines() {}
/// Test invalid link, url part broken across multiple lines.
/// [doc invalid link broken url scheme part](https://
/// test.fake/doc_invalid_link_broken_url_scheme_part)
//~^^ ERROR: possible broken doc link: broken across multiple lines
pub fn doc_invalid_link_broken_url_scheme_part() {}
/// Test invalid link, url part broken across multiple lines.
/// [doc invalid link broken url host part](https://test
/// .fake/doc_invalid_link_broken_url_host_part)
//~^^ ERROR: possible broken doc link: broken across multiple lines
pub fn doc_invalid_link_broken_url_host_part() {}
/// Test invalid link, for multiple urls in the same block of comment.
/// There is a [fist link - invalid](https://test
/// .fake) then it continues
//~^^ ERROR: possible broken doc link: broken across multiple lines
/// with a [second link - valid](https://test.fake/doc_valid_link) and another [third link - invalid](https://test
/// .fake). It ends with another
//~^^ ERROR: possible broken doc link: broken across multiple lines
/// line of comment.
pub fn doc_multiple_invalid_link_broken_url() {}

View file

@ -0,0 +1,29 @@
error: possible broken doc link: broken across multiple lines
--> tests/ui/doc_broken_link.rs:53:5
|
LL | /// [doc invalid link broken url scheme part](https://
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
= note: `-D clippy::doc-broken-link` implied by `-D warnings`
= help: to override `-D warnings` add `#[allow(clippy::doc_broken_link)]`
error: possible broken doc link: broken across multiple lines
--> tests/ui/doc_broken_link.rs:59:5
|
LL | /// [doc invalid link broken url host part](https://test
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
error: possible broken doc link: broken across multiple lines
--> tests/ui/doc_broken_link.rs:65:16
|
LL | /// There is a [fist link - invalid](https://test
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
error: possible broken doc link: broken across multiple lines
--> tests/ui/doc_broken_link.rs:68:80
|
LL | /// with a [second link - valid](https://test.fake/doc_valid_link) and another [third link - invalid](https://test
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
error: aborting due to 4 previous errors