Rollup merge of #77971 - jyn514:broken-intra-doc-links, r=mark-simulacrum

Deny broken intra-doc links in linkchecker

Since rustdoc isn't warning about these links, check for them manually.

This also fixes the broken links that popped up from the lint.
This commit is contained in:
Yuki Okushi 2020-10-17 05:36:49 +09:00 committed by GitHub
commit 050eb4d7e4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 92 additions and 20 deletions

@ -1 +1 @@
Subproject commit 12db56cdedbc2c26a9aa18f994c0188cdcc67df5
Subproject commit 79b397d72c557eb6444a2ba0dc00a211a226a35a

View file

@ -7,3 +7,7 @@ edition = "2018"
[[bin]]
name = "linkchecker"
path = "main.rs"
[dependencies]
regex = "1"
once_cell = "1"

View file

@ -21,6 +21,9 @@ use std::fs;
use std::path::{Component, Path, PathBuf};
use std::rc::Rc;
use once_cell::sync::Lazy;
use regex::Regex;
use crate::Redirect::*;
// Add linkcheck exceptions here
@ -50,6 +53,44 @@ const LINKCHECK_EXCEPTIONS: &[(&str, &[&str])] = &[
("alloc/collections/btree_set/struct.BTreeSet.html", &["#insert-and-complex-keys"]),
];
#[rustfmt::skip]
const INTRA_DOC_LINK_EXCEPTIONS: &[(&str, &[&str])] = &[
// This will never have links that are not in other pages.
// To avoid repeating the exceptions twice, an empty list means all broken links are allowed.
("reference/print.html", &[]),
// All the reference 'links' are actually ENBF highlighted as code
("reference/comments.html", &[
"/</code> <code>!",
"*</code> <code>!",
]),
("reference/identifiers.html", &[
"a</code>-<code>z</code> <code>A</code>-<code>Z",
"a</code>-<code>z</code> <code>A</code>-<code>Z</code> <code>0</code>-<code>9</code> <code>_",
"a</code>-<code>z</code> <code>A</code>-<code>Z</code>] [<code>a</code>-<code>z</code> <code>A</code>-<code>Z</code> <code>0</code>-<code>9</code> <code>_",
]),
("reference/tokens.html", &[
"0</code>-<code>1",
"0</code>-<code>7",
"0</code>-<code>9",
"0</code>-<code>9",
"0</code>-<code>9</code> <code>a</code>-<code>f</code> <code>A</code>-<code>F",
]),
("reference/notation.html", &[
"b</code> <code>B",
"a</code>-<code>z",
]),
// This is being used in the sense of 'inclusive range', not a markdown link
("core/ops/struct.RangeInclusive.html", &["begin</code>, <code>end"]),
("std/ops/struct.RangeInclusive.html", &["begin</code>, <code>end"]),
("core/slice/trait.SliceIndex.html", &["begin</code>, <code>end"]),
("alloc/slice/trait.SliceIndex.html", &["begin</code>, <code>end"]),
("std/slice/trait.SliceIndex.html", &["begin</code>, <code>end"]),
];
static BROKEN_INTRA_DOC_LINK: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"\[<code>(.*)</code>\]"#).unwrap());
macro_rules! t {
($e:expr) => {
match $e {
@ -138,6 +179,14 @@ fn walk(cache: &mut Cache, root: &Path, dir: &Path, errors: &mut bool) {
}
}
fn is_intra_doc_exception(file: &Path, link: &str) -> bool {
if let Some(entry) = INTRA_DOC_LINK_EXCEPTIONS.iter().find(|&(f, _)| file.ends_with(f)) {
entry.1.is_empty() || entry.1.contains(&link)
} else {
false
}
}
fn is_exception(file: &Path, link: &str) -> bool {
if let Some(entry) = LINKCHECK_EXCEPTIONS.iter().find(|&(f, _)| file.ends_with(f)) {
entry.1.contains(&link)
@ -292,6 +341,19 @@ fn check(cache: &mut Cache, root: &Path, file: &Path, errors: &mut bool) -> Opti
}
}
});
// Search for intra-doc links that rustdoc didn't warn about
// FIXME(#77199, 77200) Rustdoc should just warn about these directly.
// NOTE: only looks at one line at a time; in practice this should find most links
for (i, line) in contents.lines().enumerate() {
for broken_link in BROKEN_INTRA_DOC_LINK.captures_iter(line) {
if !is_intra_doc_exception(file, &broken_link[1]) {
*errors = true;
print!("{}:{}: broken intra-doc link - ", pretty_file.display(), i + 1);
println!("{}", &broken_link[0]);
}
}
}
Some(pretty_file)
}