diff --git a/src/doc/rustc-dev-guide/.gitignore b/src/doc/rustc-dev-guide/.gitignore index f03fcae753f4..f2e57fc68ffb 100644 --- a/src/doc/rustc-dev-guide/.gitignore +++ b/src/doc/rustc-dev-guide/.gitignore @@ -1,6 +1,7 @@ book ci/date-check/target/ +ci/sembr/target/ # Generated by check-in.sh pulls.json diff --git a/src/doc/rustc-dev-guide/ci/sembr/Cargo.lock b/src/doc/rustc-dev-guide/ci/sembr/Cargo.lock new file mode 100644 index 000000000000..1e690cc7f1e7 --- /dev/null +++ b/src/doc/rustc-dev-guide/ci/sembr/Cargo.lock @@ -0,0 +1,466 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +dependencies = [ + "windows-sys 0.60.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.60.2", +] + +[[package]] +name = "anyhow" +version = "1.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" + +[[package]] +name = "bstr" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "clap" +version = "4.5.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c2cfd7bf8a6017ddaa4e32ffe7403d547790db06bd171c1c53926faab501623" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a4c05b9e80c5ccd3a7ef080ad7b6ba7d6fc00a985b8b157197075677c82c7a0" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" + +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "ignore" +version = "0.4.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81776e6f9464432afcc28d03e52eb101c93b6f0566f52aef2427663e700f0403" +dependencies = [ + "crossbeam-deque", + "globset", + "log", + "memchr", + "regex-automata", + "same-file", + "walkdir", + "winapi-util", +] + +[[package]] +name = "imara-diff" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f01d462f766df78ab820dd06f5eb700233c51f0f4c2e846520eaf4ba6aa5c5c" +dependencies = [ + "hashbrown", + "memchr", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "log" +version = "0.4.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "proc-macro2" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "sembr" +version = "0.0.0" +dependencies = [ + "anyhow", + "clap", + "ignore", + "imara-diff", + "regex", +] + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.108" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" diff --git a/src/doc/rustc-dev-guide/ci/sembr/Cargo.toml b/src/doc/rustc-dev-guide/ci/sembr/Cargo.toml new file mode 100644 index 000000000000..00818e2b3c2d --- /dev/null +++ b/src/doc/rustc-dev-guide/ci/sembr/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "sembr" +edition = "2024" + +[dependencies] +anyhow = "1" +ignore = "0.4" +imara-diff = "0.2" + +[dependencies.regex] +version = "1" +features = ["pattern"] + +[dependencies.clap] +version = "4" +features = ["derive"] diff --git a/src/doc/rustc-dev-guide/ci/sembr/src/main.rs b/src/doc/rustc-dev-guide/ci/sembr/src/main.rs new file mode 100644 index 000000000000..6b3753d8b82f --- /dev/null +++ b/src/doc/rustc-dev-guide/ci/sembr/src/main.rs @@ -0,0 +1,254 @@ +use std::path::PathBuf; +use std::sync::LazyLock; +use std::{fs, process}; + +use anyhow::Result; +use clap::Parser; +use ignore::Walk; +use imara_diff::{Algorithm, BasicLineDiffPrinter, Diff, InternedInput, UnifiedDiffConfig}; +use regex::Regex; + +#[derive(Parser)] +struct Cli { + root_dir: PathBuf, + #[arg(long)] + overwrite: bool, + #[arg(long, default_value_t = 100)] + line_length_limit: usize, + #[arg(long)] + show_diff: bool, +} + +static REGEX_IGNORE: LazyLock = LazyLock::new(|| Regex::new(r"(\d\.|\-|\*)\s+").unwrap()); +static REGEX_IGNORE_END: LazyLock = LazyLock::new(|| Regex::new(r"(\.|\?|;|!)$").unwrap()); +static REGEX_SPLIT: LazyLock = LazyLock::new(|| Regex::new(r"(\.|\?|;|!)\s+").unwrap()); + +fn main() -> Result<()> { + let cli = Cli::parse(); + let mut compliant = Vec::new(); + let mut not_compliant = Vec::new(); + let mut made_compliant = Vec::new(); + for result in Walk::new(cli.root_dir) { + let entry = result?; + if entry.file_type().expect("no stdin").is_dir() { + continue; + } + let path = entry.into_path(); + if let Some(extension) = path.extension() { + if extension != "md" { + continue; + } + let old = fs::read_to_string(&path)?; + let new = lengthen_lines(&comply(&old), cli.line_length_limit); + if new == old { + compliant.push(path.clone()); + } else if cli.overwrite { + fs::write(&path, new)?; + made_compliant.push(path.clone()); + } else if cli.show_diff { + println!("{}:", path.display()); + show_diff(&old, &new); + println!("---"); + } else { + not_compliant.push(path.clone()); + } + } + } + if !compliant.is_empty() { + display("compliant", &compliant); + } + if !made_compliant.is_empty() { + display("made compliant", &made_compliant); + } + if !not_compliant.is_empty() { + display("not compliant", ¬_compliant); + process::exit(1); + } + Ok(()) +} + +fn show_diff(old: &str, new: &str) { + let input = InternedInput::new(old, new); + let mut diff = Diff::compute(Algorithm::Histogram, &input); + diff.postprocess_lines(&input); + let diff = diff + .unified_diff(&BasicLineDiffPrinter(&input.interner), UnifiedDiffConfig::default(), &input) + .to_string(); + print!("{diff}"); +} + +fn display(header: &str, paths: &[PathBuf]) { + println!("{header}:"); + for element in paths { + println!("- {}", element.display()); + } +} + +fn ignore(line: &str, in_code_block: bool) -> bool { + in_code_block + || line.contains("e.g.") + || line.contains("i.e.") + || line.contains('|') + || line.trim_start().starts_with('>') + || line.starts_with('#') + || line.trim().is_empty() + || REGEX_IGNORE.is_match(line) +} + +fn comply(content: &str) -> String { + let content: Vec<_> = content.lines().map(std::borrow::ToOwned::to_owned).collect(); + let mut new_content = content.clone(); + let mut new_n = 0; + let mut in_code_block = false; + for (n, line) in content.into_iter().enumerate() { + if n != 0 { + new_n += 1; + } + if line.trim_start().starts_with("```") { + in_code_block = !in_code_block; + continue; + } + if ignore(&line, in_code_block) { + continue; + } + if REGEX_SPLIT.is_match(&line) { + let indent = line.find(|ch: char| !ch.is_whitespace()).unwrap(); + let new_lines: Vec<_> = line + .split_inclusive(&*REGEX_SPLIT) + .map(|portion| format!("{:indent$}{}", "", portion.trim())) + .collect(); + new_content.splice(new_n..=new_n, new_lines.clone()); + new_n += new_lines.len() - 1; + } + } + new_content.join("\n") + "\n" +} + +fn lengthen_lines(content: &str, limit: usize) -> String { + let content: Vec<_> = content.lines().map(std::borrow::ToOwned::to_owned).collect(); + let mut new_content = content.clone(); + let mut new_n = 0; + let mut in_code_block = false; + let mut skip_next = false; + for (n, line) in content.iter().enumerate() { + if skip_next { + skip_next = false; + continue; + } + if n != 0 { + new_n += 1; + } + if line.trim_start().starts_with("```") { + in_code_block = !in_code_block; + continue; + } + if ignore(line, in_code_block) || REGEX_SPLIT.is_match(line) { + continue; + } + let Some(next_line) = content.get(n + 1) else { + continue; + }; + if ignore(next_line, in_code_block) || REGEX_IGNORE_END.is_match(line) { + continue; + } + if line.len() + next_line.len() < limit { + new_content[new_n] = format!("{line} {}", next_line.trim_start()); + new_content.remove(new_n + 1); + skip_next = true; + } + } + new_content.join("\n") + "\n" +} + +#[test] +fn test_sembr() { + let original = "\ +# some. heading +must! be; split? and. normalizes space +1. ignore numbered +ignore | tables +ignore e.g. and i.e. +- ignore. list +* ignore. list +``` +some code. block +``` +some more text. +"; + let expected = "\ +# some. heading +must! +be; +split? +and. +normalizes space +1. ignore numbered +ignore | tables +ignore e.g. and i.e. +- ignore. list +* ignore. list +``` +some code. block +``` +some more text. +"; + assert_eq!(expected, comply(original)); +} + +#[test] +fn test_prettify() { + let original = "\ +do not split +short sentences +"; + let expected = "\ +do not split short sentences +"; + assert_eq!(expected, lengthen_lines(original, 50)); +} + +#[test] +fn test_prettify_prefix_spaces() { + let original = "\ + do not split + short sentences +"; + let expected = "\ + do not split short sentences +"; + assert_eq!(expected, lengthen_lines(original, 50)); +} + +#[test] +fn test_sembr_then_prettify() { + let original = "\ +hi there. do +not split +short sentences. +hi again. +"; + let expected = "\ +hi there. +do +not split +short sentences. +hi again. +"; + let processed = comply(original); + assert_eq!(expected, processed); + let expected = "\ +hi there. +do not split +short sentences. +hi again. +"; + let processed = lengthen_lines(&processed, 50); + assert_eq!(expected, processed); + let expected = "\ +hi there. +do not split short sentences. +hi again. +"; + let processed = lengthen_lines(&processed, 50); + assert_eq!(expected, processed); +}