Auto merge of #78429 - casey:doctest-attribute-splitting, r=jyn514
[librustdoc] Only split lang string on `,`, ` `, and `\t`
Split markdown lang strings into tokens on `,`.
The previous behavior was to split lang strings into tokens on any
character that wasn't a `_`, `_`, or alphanumeric.
This is a potentially breaking change, so please scrutinize! See discussion in #78344.
I noticed some test cases that made me wonder if there might have been some reason for the original behavior:
```
t("{.no_run .example}", false, true, Ignore::None, true, false, false, false, v(), None);
t("{.sh .should_panic}", true, false, Ignore::None, false, false, false, false, v(), None);
t("{.example .rust}", false, false, Ignore::None, true, false, false, false, v(), None);
t("{.test_harness .rust}", false, false, Ignore::None, true, true, false, false, v(), None);
```
It seemed pretty peculiar to specifically test lang strings in braces, with all the tokens prefixed by `.`.
I did some digging, and it looks like the test cases were added way back in [this commit from 2014](3fef7a74ca) by `@skade.`
It looks like they were added just to make sure that the splitting was permissive, and aren't testing that those strings in particular are accepted.
Closes https://github.com/rust-lang/rust/issues/78344.
This commit is contained in:
commit
d95d304861
6 changed files with 84 additions and 20 deletions
|
|
@ -780,6 +780,31 @@ impl LangString {
|
|||
Self::parse(string, allow_error_code_check, enable_per_target_ignores, None)
|
||||
}
|
||||
|
||||
fn tokens(string: &str) -> impl Iterator<Item = &str> {
|
||||
// Pandoc, which Rust once used for generating documentation,
|
||||
// expects lang strings to be surrounded by `{}` and for each token
|
||||
// to be proceeded by a `.`. Since some of these lang strings are still
|
||||
// loose in the wild, we strip a pair of surrounding `{}` from the lang
|
||||
// string and a leading `.` from each token.
|
||||
|
||||
let string = string.trim();
|
||||
|
||||
let first = string.chars().next();
|
||||
let last = string.chars().last();
|
||||
|
||||
let string = if first == Some('{') && last == Some('}') {
|
||||
&string[1..string.len() - 1]
|
||||
} else {
|
||||
string
|
||||
};
|
||||
|
||||
string
|
||||
.split(|c| c == ',' || c == ' ' || c == '\t')
|
||||
.map(str::trim)
|
||||
.map(|token| if token.chars().next() == Some('.') { &token[1..] } else { token })
|
||||
.filter(|token| !token.is_empty())
|
||||
}
|
||||
|
||||
fn parse(
|
||||
string: &str,
|
||||
allow_error_code_check: ErrorCodes,
|
||||
|
|
@ -793,11 +818,11 @@ impl LangString {
|
|||
let mut ignores = vec![];
|
||||
|
||||
data.original = string.to_owned();
|
||||
let tokens = string.split(|c: char| !(c == '_' || c == '-' || c.is_alphanumeric()));
|
||||
|
||||
let tokens = Self::tokens(string).collect::<Vec<&str>>();
|
||||
|
||||
for token in tokens {
|
||||
match token.trim() {
|
||||
"" => {}
|
||||
match token {
|
||||
"should_panic" => {
|
||||
data.should_panic = true;
|
||||
seen_rust_tags = !seen_other_tags;
|
||||
|
|
@ -894,6 +919,7 @@ impl LangString {
|
|||
_ => seen_other_tags = true,
|
||||
}
|
||||
}
|
||||
|
||||
// ignore-foo overrides ignore
|
||||
if !ignores.is_empty() {
|
||||
data.ignore = Ignore::Some(ignores);
|
||||
|
|
|
|||
|
|
@ -58,6 +58,9 @@ fn test_lang_string_parse() {
|
|||
|
||||
t(Default::default());
|
||||
t(LangString { original: "rust".into(), ..Default::default() });
|
||||
t(LangString { original: ".rust".into(), ..Default::default() });
|
||||
t(LangString { original: "{rust}".into(), ..Default::default() });
|
||||
t(LangString { original: "{.rust}".into(), ..Default::default() });
|
||||
t(LangString { original: "sh".into(), rust: false, ..Default::default() });
|
||||
t(LangString { original: "ignore".into(), ignore: Ignore::All, ..Default::default() });
|
||||
t(LangString {
|
||||
|
|
@ -75,16 +78,16 @@ fn test_lang_string_parse() {
|
|||
..Default::default()
|
||||
});
|
||||
t(LangString { original: "allow_fail".into(), allow_fail: true, ..Default::default() });
|
||||
t(LangString { original: "{.no_run .example}".into(), no_run: true, ..Default::default() });
|
||||
t(LangString { original: "no_run,example".into(), no_run: true, ..Default::default() });
|
||||
t(LangString {
|
||||
original: "{.sh .should_panic}".into(),
|
||||
original: "sh,should_panic".into(),
|
||||
should_panic: true,
|
||||
rust: false,
|
||||
..Default::default()
|
||||
});
|
||||
t(LangString { original: "{.example .rust}".into(), ..Default::default() });
|
||||
t(LangString { original: "example,rust".into(), ..Default::default() });
|
||||
t(LangString {
|
||||
original: "{.test_harness .rust}".into(),
|
||||
original: "test_harness,.rust".into(),
|
||||
test_harness: true,
|
||||
..Default::default()
|
||||
});
|
||||
|
|
@ -100,6 +103,18 @@ fn test_lang_string_parse() {
|
|||
rust: false,
|
||||
..Default::default()
|
||||
});
|
||||
t(LangString {
|
||||
original: "text,no_run, ".into(),
|
||||
no_run: true,
|
||||
rust: false,
|
||||
..Default::default()
|
||||
});
|
||||
t(LangString {
|
||||
original: "text,no_run,".into(),
|
||||
no_run: true,
|
||||
rust: false,
|
||||
..Default::default()
|
||||
});
|
||||
t(LangString {
|
||||
original: "edition2015".into(),
|
||||
edition: Some(Edition::Edition2015),
|
||||
|
|
@ -112,6 +127,29 @@ fn test_lang_string_parse() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lang_string_tokenizer() {
|
||||
fn case(lang_string: &str, want: &[&str]) {
|
||||
let have = LangString::tokens(lang_string).collect::<Vec<&str>>();
|
||||
assert_eq!(have, want, "Unexpected lang string split for `{}`", lang_string);
|
||||
}
|
||||
|
||||
case("", &[]);
|
||||
case("foo", &["foo"]);
|
||||
case("foo,bar", &["foo", "bar"]);
|
||||
case(".foo,.bar", &["foo", "bar"]);
|
||||
case("{.foo,.bar}", &["foo", "bar"]);
|
||||
case(" {.foo,.bar} ", &["foo", "bar"]);
|
||||
case("foo bar", &["foo", "bar"]);
|
||||
case("foo\tbar", &["foo", "bar"]);
|
||||
case("foo\t, bar", &["foo", "bar"]);
|
||||
case(" foo , bar ", &["foo", "bar"]);
|
||||
case(",,foo,,bar,,", &["foo", "bar"]);
|
||||
case("foo=bar", &["foo=bar"]);
|
||||
case("a-b-c", &["a-b-c"]);
|
||||
case("a_b_c", &["a_b_c"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_header() {
|
||||
fn t(input: &str, expect: &str) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue