Deny broken intra-doc links in linkchecker

Since rustdoc isn't warning about these links, check for them manually.
This commit is contained in:
Joshua Nelson 2020-10-15 09:25:30 -04:00
parent dd7fc54ebd
commit 65835d1059
15 changed files with 90 additions and 18 deletions

View file

@ -7,3 +7,7 @@ edition = "2018"
[[bin]]
name = "linkchecker"
path = "main.rs"
[dependencies]
regex = "1"
once_cell = "1"

View file

@ -21,6 +21,9 @@ use std::fs;
use std::path::{Component, Path, PathBuf};
use std::rc::Rc;
use once_cell::sync::Lazy;
use regex::Regex;
use crate::Redirect::*;
// Add linkcheck exceptions here
@ -50,6 +53,44 @@ const LINKCHECK_EXCEPTIONS: &[(&str, &[&str])] = &[
("alloc/collections/btree_set/struct.BTreeSet.html", &["#insert-and-complex-keys"]),
];
#[rustfmt::skip]
const INTRA_DOC_LINK_EXCEPTIONS: &[(&str, &[&str])] = &[
// This will never have links that are not in other pages.
// To avoid repeating the exceptions twice, an empty list means all broken links are allowed.
("reference/print.html", &[]),
// All the reference 'links' are actually ENBF highlighted as code
("reference/comments.html", &[
"/</code> <code>!",
"*</code> <code>!",
]),
("reference/identifiers.html", &[
"a</code>-<code>z</code> <code>A</code>-<code>Z",
"a</code>-<code>z</code> <code>A</code>-<code>Z</code> <code>0</code>-<code>9</code> <code>_",
"a</code>-<code>z</code> <code>A</code>-<code>Z</code>] [<code>a</code>-<code>z</code> <code>A</code>-<code>Z</code> <code>0</code>-<code>9</code> <code>_",
]),
("reference/tokens.html", &[
"0</code>-<code>1",
"0</code>-<code>7",
"0</code>-<code>9",
"0</code>-<code>9",
"0</code>-<code>9</code> <code>a</code>-<code>f</code> <code>A</code>-<code>F",
]),
("reference/notation.html", &[
"b</code> <code>B",
"a</code>-<code>z",
]),
// This is being used in the sense of 'inclusive range', not a markdown link
("core/ops/struct.RangeInclusive.html", &["begin</code>, <code>end"]),
("std/ops/struct.RangeInclusive.html", &["begin</code>, <code>end"]),
("core/slice/trait.SliceIndex.html", &["begin</code>, <code>end"]),
("alloc/slice/trait.SliceIndex.html", &["begin</code>, <code>end"]),
("std/slice/trait.SliceIndex.html", &["begin</code>, <code>end"]),
];
static BROKEN_INTRA_DOC_LINK: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"\[<code>(.*)</code>\]"#).unwrap());
macro_rules! t {
($e:expr) => {
match $e {
@ -138,6 +179,14 @@ fn walk(cache: &mut Cache, root: &Path, dir: &Path, errors: &mut bool) {
}
}
fn is_intra_doc_exception(file: &Path, link: &str) -> bool {
if let Some(entry) = INTRA_DOC_LINK_EXCEPTIONS.iter().find(|&(f, _)| file.ends_with(f)) {
entry.1.is_empty() || entry.1.contains(&link)
} else {
false
}
}
fn is_exception(file: &Path, link: &str) -> bool {
if let Some(entry) = LINKCHECK_EXCEPTIONS.iter().find(|&(f, _)| file.ends_with(f)) {
entry.1.contains(&link)
@ -292,6 +341,19 @@ fn check(cache: &mut Cache, root: &Path, file: &Path, errors: &mut bool) -> Opti
}
}
});
// Search for intra-doc links that rustdoc didn't warn about
// FIXME(#77199, 77200) Rustdoc should just warn about these directly.
// NOTE: only looks at one line at a time; in practice this should find most links
for (i, line) in contents.lines().enumerate() {
for broken_link in BROKEN_INTRA_DOC_LINK.captures_iter(line) {
if !is_intra_doc_exception(file, &broken_link[1]) {
*errors = true;
print!("{}:{}: broken intra-doc link - ", pretty_file.display(), i + 1);
println!("{}", &broken_link[0]);
}
}
}
Some(pretty_file)
}