refactor rustdoc::invalid_html_tags tag parser
previously, this lint did not distinguish between `<img` and `<img>`, and since the latter should be accepted under html5, the former was also accepted. the parser now also handles multi-line tags and multi-line attributes.
This commit is contained in:
parent
e50fed79a8
commit
15a8999aed
5 changed files with 498 additions and 188 deletions
|
|
@ -11,6 +11,7 @@
|
|||
#![feature(file_buffered)]
|
||||
#![feature(format_args_nl)]
|
||||
#![feature(if_let_guard)]
|
||||
#![feature(iter_advance_by)]
|
||||
#![feature(iter_intersperse)]
|
||||
#![feature(round_char_boundary)]
|
||||
#![feature(rustc_private)]
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
//! Detects invalid HTML (like an unclosed `<span>`) in doc comments.
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::iter::Peekable;
|
||||
use std::ops::Range;
|
||||
use std::str::CharIndices;
|
||||
|
||||
use itertools::Itertools as _;
|
||||
use pulldown_cmark::{BrokenLink, Event, LinkType, Parser, Tag, TagEnd};
|
||||
use rustc_hir::HirId;
|
||||
use rustc_resolve::rustdoc::source_span_for_markdown_range;
|
||||
|
|
@ -101,7 +103,7 @@ pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item, hir_id: HirId, dox: &
|
|||
});
|
||||
};
|
||||
|
||||
let mut tags = Vec::new();
|
||||
let mut tagp = TagParser::new();
|
||||
let mut is_in_comment = None;
|
||||
let mut in_code_block = false;
|
||||
|
||||
|
|
@ -126,70 +128,65 @@ pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item, hir_id: HirId, dox: &
|
|||
};
|
||||
|
||||
let p = Parser::new_with_broken_link_callback(dox, main_body_opts(), Some(&mut replacer))
|
||||
.into_offset_iter();
|
||||
.into_offset_iter()
|
||||
.coalesce(|a, b| {
|
||||
// for some reason, pulldown-cmark splits html blocks into separate events for each line.
|
||||
// we undo this, in order to handle multi-line tags.
|
||||
match (a, b) {
|
||||
((Event::Html(_), ra), (Event::Html(_), rb)) if ra.end == rb.start => {
|
||||
let merged = ra.start..rb.end;
|
||||
Ok((Event::Html(Cow::Borrowed(&dox[merged.clone()]).into()), merged))
|
||||
}
|
||||
x => Err(x),
|
||||
}
|
||||
});
|
||||
|
||||
for (event, range) in p {
|
||||
match event {
|
||||
Event::Start(Tag::CodeBlock(_)) => in_code_block = true,
|
||||
Event::Html(text) | Event::InlineHtml(text) if !in_code_block => {
|
||||
extract_tags(&mut tags, &text, range, &mut is_in_comment, &report_diag)
|
||||
tagp.extract_tags(&text, range, &mut is_in_comment, &report_diag)
|
||||
}
|
||||
Event::End(TagEnd::CodeBlock) => in_code_block = false,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
for (tag, range) in tags.iter().filter(|(t, _)| {
|
||||
let t = t.to_lowercase();
|
||||
!ALLOWED_UNCLOSED.contains(&t.as_str())
|
||||
}) {
|
||||
report_diag(format!("unclosed HTML tag `{tag}`"), range, true);
|
||||
}
|
||||
|
||||
if let Some(range) = is_in_comment {
|
||||
report_diag("Unclosed HTML comment".to_string(), &range, false);
|
||||
} else if let &Some(quote_pos) = &tagp.quote_pos {
|
||||
let qr = Range { start: quote_pos, end: quote_pos };
|
||||
report_diag(
|
||||
format!("unclosed quoted HTML attribute on tag `{}`", &tagp.tag_name),
|
||||
&qr,
|
||||
false,
|
||||
);
|
||||
} else {
|
||||
if !tagp.tag_name.is_empty() {
|
||||
report_diag(
|
||||
format!("incomplete HTML tag `{}`", &tagp.tag_name),
|
||||
&(tagp.tag_start_pos..dox.len()),
|
||||
false,
|
||||
);
|
||||
}
|
||||
for (tag, range) in tagp.tags.iter().filter(|(t, _)| {
|
||||
let t = t.to_lowercase();
|
||||
!is_implicitly_self_closing(&t)
|
||||
}) {
|
||||
report_diag(format!("unclosed HTML tag `{tag}`"), range, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// These tags are interpreted as self-closing if they lack an explicit closing tag.
|
||||
const ALLOWED_UNCLOSED: &[&str] = &[
|
||||
"area", "base", "br", "col", "embed", "hr", "img", "input", "keygen", "link", "meta", "param",
|
||||
"source", "track", "wbr",
|
||||
];
|
||||
|
||||
fn drop_tag(
|
||||
tags: &mut Vec<(String, Range<usize>)>,
|
||||
tag_name: String,
|
||||
range: Range<usize>,
|
||||
f: &impl Fn(String, &Range<usize>, bool),
|
||||
) {
|
||||
let tag_name_low = tag_name.to_lowercase();
|
||||
if let Some(pos) = tags.iter().rposition(|(t, _)| t.to_lowercase() == tag_name_low) {
|
||||
// If the tag is nested inside a "<script>" or a "<style>" tag, no warning should
|
||||
// be emitted.
|
||||
let should_not_warn = tags.iter().take(pos + 1).any(|(at, _)| {
|
||||
let at = at.to_lowercase();
|
||||
at == "script" || at == "style"
|
||||
});
|
||||
for (last_tag_name, last_tag_span) in tags.drain(pos + 1..) {
|
||||
if should_not_warn {
|
||||
continue;
|
||||
}
|
||||
let last_tag_name_low = last_tag_name.to_lowercase();
|
||||
if ALLOWED_UNCLOSED.contains(&last_tag_name_low.as_str()) {
|
||||
continue;
|
||||
}
|
||||
// `tags` is used as a queue, meaning that everything after `pos` is included inside it.
|
||||
// So `<h2><h3></h2>` will look like `["h2", "h3"]`. So when closing `h2`, we will still
|
||||
// have `h3`, meaning the tag wasn't closed as it should have.
|
||||
f(format!("unclosed HTML tag `{last_tag_name}`"), &last_tag_span, true);
|
||||
}
|
||||
// Remove the `tag_name` that was originally closed
|
||||
tags.pop();
|
||||
} else {
|
||||
// It can happen for example in this case: `<h2></script></h2>` (the `h2` tag isn't required
|
||||
// but it helps for the visualization).
|
||||
f(format!("unopened HTML tag `{tag_name}`"), &range, false);
|
||||
}
|
||||
/// Allows constructs like `<img>`, but not `<img`.
|
||||
fn is_implicitly_self_closing(tag_name: &str) -> bool {
|
||||
ALLOWED_UNCLOSED.contains(&tag_name)
|
||||
}
|
||||
|
||||
fn extract_path_backwards(text: &str, end_pos: usize) -> Option<usize> {
|
||||
|
|
@ -252,151 +249,292 @@ fn is_valid_for_html_tag_name(c: char, is_empty: bool) -> bool {
|
|||
c.is_ascii_alphabetic() || !is_empty && (c == '-' || c.is_ascii_digit())
|
||||
}
|
||||
|
||||
fn extract_html_tag(
|
||||
tags: &mut Vec<(String, Range<usize>)>,
|
||||
text: &str,
|
||||
range: &Range<usize>,
|
||||
start_pos: usize,
|
||||
iter: &mut Peekable<CharIndices<'_>>,
|
||||
f: &impl Fn(String, &Range<usize>, bool),
|
||||
) {
|
||||
let mut tag_name = String::new();
|
||||
let mut is_closing = false;
|
||||
let mut prev_pos = start_pos;
|
||||
/// Parse html tags to ensure they are well-formed
|
||||
#[derive(Debug, Clone)]
|
||||
struct TagParser {
|
||||
tags: Vec<(String, Range<usize>)>,
|
||||
/// Name of the tag that is being parsed, if we are within a tag.
|
||||
///
|
||||
/// Since the `<` and name of a tag must appear on the same line with no whitespace,
|
||||
/// if this is the empty string, we are not in a tag.
|
||||
tag_name: String,
|
||||
tag_start_pos: usize,
|
||||
is_closing: bool,
|
||||
/// `true` if we are within a tag, but not within its name.
|
||||
in_attrs: bool,
|
||||
/// If we are in a quoted attribute, what quote char does it use?
|
||||
///
|
||||
/// This needs to be stored in the struct since HTML5 allows newlines in quoted attrs.
|
||||
quote: Option<char>,
|
||||
quote_pos: Option<usize>,
|
||||
after_eq: bool,
|
||||
}
|
||||
|
||||
loop {
|
||||
let (pos, c) = match iter.peek() {
|
||||
Some((pos, c)) => (*pos, *c),
|
||||
// In case we reached the of the doc comment, we want to check that it's an
|
||||
// unclosed HTML tag. For example "/// <h3".
|
||||
None => (prev_pos, '\0'),
|
||||
};
|
||||
prev_pos = pos;
|
||||
// Checking if this is a closing tag (like `</a>` for `<a>`).
|
||||
if c == '/' && tag_name.is_empty() {
|
||||
is_closing = true;
|
||||
} else if is_valid_for_html_tag_name(c, tag_name.is_empty()) {
|
||||
tag_name.push(c);
|
||||
} else {
|
||||
if !tag_name.is_empty() {
|
||||
let mut r = Range { start: range.start + start_pos, end: range.start + pos };
|
||||
if c == '>' {
|
||||
// In case we have a tag without attribute, we can consider the span to
|
||||
// refer to it fully.
|
||||
r.end += 1;
|
||||
impl TagParser {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
tags: Vec::new(),
|
||||
tag_name: String::with_capacity(8),
|
||||
tag_start_pos: 0,
|
||||
is_closing: false,
|
||||
in_attrs: false,
|
||||
quote: None,
|
||||
quote_pos: None,
|
||||
after_eq: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn drop_tag(&mut self, range: Range<usize>, f: &impl Fn(String, &Range<usize>, bool)) {
|
||||
let tag_name_low = self.tag_name.to_lowercase();
|
||||
if let Some(pos) = self.tags.iter().rposition(|(t, _)| t.to_lowercase() == tag_name_low) {
|
||||
// If the tag is nested inside a "<script>" or a "<style>" tag, no warning should
|
||||
// be emitted.
|
||||
let should_not_warn = self.tags.iter().take(pos + 1).any(|(at, _)| {
|
||||
let at = at.to_lowercase();
|
||||
at == "script" || at == "style"
|
||||
});
|
||||
for (last_tag_name, last_tag_span) in self.tags.drain(pos + 1..) {
|
||||
if should_not_warn {
|
||||
continue;
|
||||
}
|
||||
if is_closing {
|
||||
// In case we have "</div >" or even "</div >".
|
||||
if c != '>' {
|
||||
if !c.is_whitespace() {
|
||||
// It seems like it's not a valid HTML tag.
|
||||
break;
|
||||
}
|
||||
let mut found = false;
|
||||
for (new_pos, c) in text[pos..].char_indices() {
|
||||
let last_tag_name_low = last_tag_name.to_lowercase();
|
||||
if is_implicitly_self_closing(&last_tag_name_low) {
|
||||
continue;
|
||||
}
|
||||
// `tags` is used as a queue, meaning that everything after `pos` is included inside it.
|
||||
// So `<h2><h3></h2>` will look like `["h2", "h3"]`. So when closing `h2`, we will still
|
||||
// have `h3`, meaning the tag wasn't closed as it should have.
|
||||
f(format!("unclosed HTML tag `{last_tag_name}`"), &last_tag_span, true);
|
||||
}
|
||||
// Remove the `tag_name` that was originally closed
|
||||
self.tags.pop();
|
||||
} else {
|
||||
// It can happen for example in this case: `<h2></script></h2>` (the `h2` tag isn't required
|
||||
// but it helps for the visualization).
|
||||
f(format!("unopened HTML tag `{}`", &self.tag_name), &range, false);
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle a `<` that appeared while parsing a tag.
|
||||
fn handle_lt_in_tag(
|
||||
&mut self,
|
||||
range: Range<usize>,
|
||||
lt_pos: usize,
|
||||
f: &impl Fn(String, &Range<usize>, bool),
|
||||
) {
|
||||
let global_pos = range.start + lt_pos;
|
||||
// is this check needed?
|
||||
if global_pos == self.tag_start_pos {
|
||||
// `<` is in the tag because it is the start.
|
||||
return;
|
||||
}
|
||||
// tried to start a new tag while in a tag
|
||||
f(
|
||||
format!("incomplete HTML tag `{}`", &self.tag_name),
|
||||
&(self.tag_start_pos..global_pos),
|
||||
false,
|
||||
);
|
||||
self.tag_parsed();
|
||||
}
|
||||
|
||||
fn extract_html_tag(
|
||||
&mut self,
|
||||
text: &str,
|
||||
range: &Range<usize>,
|
||||
start_pos: usize,
|
||||
iter: &mut Peekable<CharIndices<'_>>,
|
||||
f: &impl Fn(String, &Range<usize>, bool),
|
||||
) {
|
||||
let mut prev_pos = start_pos;
|
||||
|
||||
'outer_loop: loop {
|
||||
let (pos, c) = match iter.peek() {
|
||||
Some((pos, c)) => (*pos, *c),
|
||||
// In case we reached the of the doc comment, we want to check that it's an
|
||||
// unclosed HTML tag. For example "/// <h3".
|
||||
None if self.tag_name.is_empty() => (prev_pos, '\0'),
|
||||
None => break,
|
||||
};
|
||||
prev_pos = pos;
|
||||
if c == '/' && self.tag_name.is_empty() {
|
||||
// Checking if this is a closing tag (like `</a>` for `<a>`).
|
||||
self.is_closing = true;
|
||||
} else if !self.in_attrs && is_valid_for_html_tag_name(c, self.tag_name.is_empty()) {
|
||||
self.tag_name.push(c);
|
||||
} else {
|
||||
if !self.tag_name.is_empty() {
|
||||
self.in_attrs = true;
|
||||
let mut r = Range { start: range.start + start_pos, end: range.start + pos };
|
||||
if c == '>' {
|
||||
// In case we have a tag without attribute, we can consider the span to
|
||||
// refer to it fully.
|
||||
r.end += 1;
|
||||
}
|
||||
if self.is_closing {
|
||||
// In case we have "</div >" or even "</div >".
|
||||
if c != '>' {
|
||||
if !c.is_whitespace() {
|
||||
if c == '>' {
|
||||
r.end = range.start + new_pos + 1;
|
||||
found = true;
|
||||
}
|
||||
// It seems like it's not a valid HTML tag.
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
break;
|
||||
}
|
||||
}
|
||||
drop_tag(tags, tag_name, r, f);
|
||||
} else {
|
||||
let mut is_self_closing = false;
|
||||
let mut quote_pos = None;
|
||||
if c != '>' {
|
||||
let mut quote = None;
|
||||
let mut after_eq = false;
|
||||
for (i, c) in text[pos..].char_indices() {
|
||||
if !c.is_whitespace() {
|
||||
if let Some(q) = quote {
|
||||
if c == q {
|
||||
quote = None;
|
||||
quote_pos = None;
|
||||
after_eq = false;
|
||||
let mut found = false;
|
||||
for (new_pos, c) in text[pos..].char_indices() {
|
||||
if !c.is_whitespace() {
|
||||
if c == '>' {
|
||||
r.end = range.start + new_pos + 1;
|
||||
found = true;
|
||||
} else if c == '<' {
|
||||
self.handle_lt_in_tag(range.clone(), pos + new_pos, f);
|
||||
}
|
||||
} else if c == '>' {
|
||||
break;
|
||||
} else if c == '/' && !after_eq {
|
||||
is_self_closing = true;
|
||||
} else {
|
||||
if is_self_closing {
|
||||
is_self_closing = false;
|
||||
}
|
||||
if (c == '"' || c == '\'') && after_eq {
|
||||
quote = Some(c);
|
||||
quote_pos = Some(pos + i);
|
||||
} else if c == '=' {
|
||||
after_eq = true;
|
||||
}
|
||||
}
|
||||
} else if quote.is_none() {
|
||||
after_eq = false;
|
||||
}
|
||||
if !found {
|
||||
break 'outer_loop;
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(quote_pos) = quote_pos {
|
||||
let qr = Range { start: quote_pos, end: quote_pos };
|
||||
f(
|
||||
format!("unclosed quoted HTML attribute on tag `{tag_name}`"),
|
||||
&qr,
|
||||
false,
|
||||
);
|
||||
}
|
||||
if is_self_closing {
|
||||
// https://html.spec.whatwg.org/#parse-error-non-void-html-element-start-tag-with-trailing-solidus
|
||||
let valid = ALLOWED_UNCLOSED.contains(&&tag_name[..])
|
||||
|| tags.iter().take(pos + 1).any(|(at, _)| {
|
||||
let at = at.to_lowercase();
|
||||
at == "svg" || at == "math"
|
||||
});
|
||||
if !valid {
|
||||
f(format!("invalid self-closing HTML tag `{tag_name}`"), &r, false);
|
||||
}
|
||||
self.drop_tag(r, f);
|
||||
self.tag_parsed();
|
||||
} else {
|
||||
tags.push((tag_name, r));
|
||||
self.extract_opening_tag(text, range, r, pos, c, iter, f)
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
iter.next();
|
||||
}
|
||||
iter.next();
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_tags(
|
||||
tags: &mut Vec<(String, Range<usize>)>,
|
||||
text: &str,
|
||||
range: Range<usize>,
|
||||
is_in_comment: &mut Option<Range<usize>>,
|
||||
f: &impl Fn(String, &Range<usize>, bool),
|
||||
) {
|
||||
let mut iter = text.char_indices().peekable();
|
||||
|
||||
while let Some((start_pos, c)) = iter.next() {
|
||||
if is_in_comment.is_some() {
|
||||
if text[start_pos..].starts_with("-->") {
|
||||
*is_in_comment = None;
|
||||
fn extract_opening_tag(
|
||||
&mut self,
|
||||
text: &str,
|
||||
range: &Range<usize>,
|
||||
r: Range<usize>,
|
||||
pos: usize,
|
||||
c: char,
|
||||
iter: &mut Peekable<CharIndices<'_>>,
|
||||
f: &impl Fn(String, &Range<usize>, bool),
|
||||
) {
|
||||
// we can store this as a local, since html5 does require the `/` and `>`
|
||||
// to not be separated by whitespace.
|
||||
let mut is_self_closing = false;
|
||||
if c != '>' {
|
||||
'parse_til_gt: {
|
||||
for (i, c) in text[pos..].char_indices() {
|
||||
if !c.is_whitespace() {
|
||||
debug_assert_eq!(self.quote_pos.is_some(), self.quote.is_some());
|
||||
if let Some(q) = self.quote {
|
||||
if c == q {
|
||||
self.quote = None;
|
||||
self.quote_pos = None;
|
||||
self.after_eq = false;
|
||||
}
|
||||
} else if c == '>' {
|
||||
break 'parse_til_gt;
|
||||
} else if c == '<' {
|
||||
self.handle_lt_in_tag(range.clone(), pos + i, f);
|
||||
} else if c == '/' && !self.after_eq {
|
||||
is_self_closing = true;
|
||||
} else {
|
||||
if is_self_closing {
|
||||
is_self_closing = false;
|
||||
}
|
||||
if (c == '"' || c == '\'') && self.after_eq {
|
||||
self.quote = Some(c);
|
||||
self.quote_pos = Some(pos + i);
|
||||
} else if c == '=' {
|
||||
self.after_eq = true;
|
||||
}
|
||||
}
|
||||
} else if self.quote.is_none() {
|
||||
self.after_eq = false;
|
||||
}
|
||||
if !is_self_closing && !self.tag_name.is_empty() {
|
||||
iter.next();
|
||||
}
|
||||
}
|
||||
// if we've run out of text but still haven't found a `>`,
|
||||
// return early without calling `tag_parsed` or emitting lints.
|
||||
// this allows us to either find the `>` in a later event
|
||||
// or emit a lint about it being missing.
|
||||
return;
|
||||
}
|
||||
} else if c == '<' {
|
||||
if text[start_pos..].starts_with("<!--") {
|
||||
// We skip the "!--" part. (Once `advance_by` is stable, might be nice to use it!)
|
||||
iter.next();
|
||||
iter.next();
|
||||
iter.next();
|
||||
*is_in_comment = Some(Range {
|
||||
start: range.start + start_pos,
|
||||
end: range.start + start_pos + 3,
|
||||
}
|
||||
if is_self_closing {
|
||||
// https://html.spec.whatwg.org/#parse-error-non-void-html-element-start-tag-with-trailing-solidus
|
||||
let valid = ALLOWED_UNCLOSED.contains(&&self.tag_name[..])
|
||||
|| self.tags.iter().take(pos + 1).any(|(at, _)| {
|
||||
let at = at.to_lowercase();
|
||||
at == "svg" || at == "math"
|
||||
});
|
||||
} else {
|
||||
extract_html_tag(tags, text, &range, start_pos, &mut iter, f);
|
||||
if !valid {
|
||||
f(format!("invalid self-closing HTML tag `{}`", self.tag_name), &r, false);
|
||||
}
|
||||
} else if !self.tag_name.is_empty() {
|
||||
self.tags.push((std::mem::take(&mut self.tag_name), r));
|
||||
}
|
||||
self.tag_parsed();
|
||||
}
|
||||
/// Finished parsing a tag, reset related data.
|
||||
fn tag_parsed(&mut self) {
|
||||
self.tag_name.clear();
|
||||
self.is_closing = false;
|
||||
self.in_attrs = false;
|
||||
}
|
||||
|
||||
fn extract_tags(
|
||||
&mut self,
|
||||
text: &str,
|
||||
range: Range<usize>,
|
||||
is_in_comment: &mut Option<Range<usize>>,
|
||||
f: &impl Fn(String, &Range<usize>, bool),
|
||||
) {
|
||||
let mut iter = text.char_indices().peekable();
|
||||
let mut prev_pos = 0;
|
||||
loop {
|
||||
if self.quote.is_some() {
|
||||
debug_assert!(self.in_attrs && self.quote_pos.is_some());
|
||||
}
|
||||
if self.in_attrs
|
||||
&& let Some(&(start_pos, _)) = iter.peek()
|
||||
{
|
||||
self.extract_html_tag(text, &range, start_pos, &mut iter, f);
|
||||
// if no progress is being made, move forward forcefully.
|
||||
if prev_pos == start_pos {
|
||||
iter.next();
|
||||
}
|
||||
prev_pos = start_pos;
|
||||
continue;
|
||||
}
|
||||
let Some((start_pos, c)) = iter.next() else { break };
|
||||
if is_in_comment.is_some() {
|
||||
if text[start_pos..].starts_with("-->") {
|
||||
*is_in_comment = None;
|
||||
}
|
||||
} else if c == '<' {
|
||||
// "<!--" is a valid attribute name under html5, so don't treat it as a comment if we're in a tag.
|
||||
if self.tag_name.is_empty() && text[start_pos..].starts_with("<!--") {
|
||||
// We skip the "!--" part. (Once `advance_by` is stable, might be nice to use it!)
|
||||
iter.next();
|
||||
iter.next();
|
||||
iter.next();
|
||||
*is_in_comment = Some(Range {
|
||||
start: range.start + start_pos,
|
||||
end: range.start + start_pos + 3,
|
||||
});
|
||||
} else {
|
||||
if self.tag_name.is_empty() {
|
||||
self.tag_start_pos = range.start + start_pos;
|
||||
}
|
||||
self.extract_html_tag(text, &range, start_pos, &mut iter, f);
|
||||
}
|
||||
} else if !self.tag_name.is_empty() {
|
||||
// partially inside html tag that spans across events
|
||||
self.extract_html_tag(text, &range, start_pos, &mut iter, f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
|
|
|||
73
src/librustdoc/passes/lint/html_tags/tests.rs
Normal file
73
src/librustdoc/passes/lint/html_tags/tests.rs
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
use std::cell::RefCell;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_extract_tags_nested_unclosed() {
|
||||
let mut tagp = TagParser::new();
|
||||
let diags = RefCell::new(Vec::new());
|
||||
let dox = "<div>\n<br</div>";
|
||||
tagp.extract_tags(dox, 0..dox.len(), &mut None, &|s, r, b| {
|
||||
diags.borrow_mut().push((s, r.clone(), b));
|
||||
});
|
||||
assert_eq!(diags.borrow().len(), 1, "did not get expected diagnostics: {diags:?}");
|
||||
assert_eq!(diags.borrow()[0].1, 6..9)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_tags_taglike_in_attr() {
|
||||
let mut tagp = TagParser::new();
|
||||
let diags = RefCell::new(Vec::new());
|
||||
let dox = "<img src='<div>'>";
|
||||
tagp.extract_tags(dox, 0..dox.len(), &mut None, &|s, r, b| {
|
||||
diags.borrow_mut().push((s, r.clone(), b));
|
||||
});
|
||||
assert_eq!(diags.borrow().len(), 0, "unexpected diagnostics: {diags:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_tags_taglike_in_multiline_attr() {
|
||||
let mut tagp = TagParser::new();
|
||||
let diags = RefCell::new(Vec::new());
|
||||
let dox = "<img src=\"\nasd\n<div>\n\">";
|
||||
tagp.extract_tags(dox, 0..dox.len(), &mut None, &|s, r, b| {
|
||||
diags.borrow_mut().push((s, r.clone(), b));
|
||||
});
|
||||
assert_eq!(diags.borrow().len(), 0, "unexpected diagnostics: {diags:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_tags_taglike_in_multievent_attr() {
|
||||
let mut tagp = TagParser::new();
|
||||
let diags = RefCell::new(Vec::new());
|
||||
let dox = "<img src='<div>'>";
|
||||
let split_point = 10;
|
||||
let mut p = |range: Range<usize>| {
|
||||
tagp.extract_tags(&dox[range.clone()], range, &mut None, &|s, r, b| {
|
||||
diags.borrow_mut().push((s, r.clone(), b));
|
||||
})
|
||||
};
|
||||
p(0..split_point);
|
||||
p(split_point..dox.len());
|
||||
assert_eq!(diags.borrow().len(), 0, "unexpected diagnostics: {diags:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_tags_taglike_in_multiline_multievent_attr() {
|
||||
let mut tagp = TagParser::new();
|
||||
let diags = RefCell::new(Vec::new());
|
||||
let dox = "<img src='\n foo:\n </div>\n <p/>\n <div>\n'>";
|
||||
let mut p = |range: Range<usize>| {
|
||||
tagp.extract_tags(&dox[range.clone()], range, &mut None, &|s, r, b| {
|
||||
diags.borrow_mut().push((s, r.clone(), b));
|
||||
})
|
||||
};
|
||||
let mut offset = 0;
|
||||
for ln in dox.split_inclusive('\n') {
|
||||
let new_offset = offset + ln.len();
|
||||
p(offset..new_offset);
|
||||
offset = new_offset;
|
||||
}
|
||||
assert_eq!(diags.borrow().len(), 0, "unexpected diagnostics: {diags:?}");
|
||||
assert_eq!(tagp.tags.len(), 1);
|
||||
}
|
||||
|
|
@ -43,7 +43,7 @@ pub fn b() {}
|
|||
/// <h3>
|
||||
//~^ ERROR unclosed HTML tag `h3`
|
||||
/// <script
|
||||
//~^ ERROR unclosed HTML tag `script`
|
||||
//~^ ERROR incomplete HTML tag `script`
|
||||
pub fn c() {}
|
||||
|
||||
// Unclosed tags shouldn't warn if they are nested inside a <script> elem.
|
||||
|
|
@ -72,6 +72,7 @@ pub fn e() {}
|
|||
/// <div></div >
|
||||
/// <div></div
|
||||
//~^ ERROR unclosed HTML tag `div`
|
||||
//~| ERROR incomplete HTML tag `div`
|
||||
pub fn f() {}
|
||||
|
||||
/// <!---->
|
||||
|
|
@ -105,7 +106,7 @@ pub fn j() {}
|
|||
/// uiapp.run(&env::args().collect::<Vec<_>>());
|
||||
/// ```
|
||||
///
|
||||
/// <Vec<_> shouldn't warn!
|
||||
// <Vec<_> shouldn't warn!
|
||||
/// ``````
|
||||
pub fn k() {}
|
||||
|
||||
|
|
@ -141,14 +142,72 @@ pub fn no_error_2() {}
|
|||
/// </div>
|
||||
pub fn no_error_3() {}
|
||||
|
||||
/// > <div
|
||||
/// > class="foo">
|
||||
/// > </div>
|
||||
pub fn no_error_4() {}
|
||||
|
||||
/// unfinished ALLOWED_UNCLOSED
|
||||
///
|
||||
/// note: CommonMark doesn't allow an html block to start with a multiline tag,
|
||||
/// so we use `<br>` a bunch to force these to be parsed as html blocks.
|
||||
///
|
||||
/// <br>
|
||||
/// <img
|
||||
//~^ ERROR unclosed HTML tag `img`
|
||||
//~^ ERROR incomplete HTML tag `img`
|
||||
pub fn q() {}
|
||||
|
||||
/// nested unfinished ALLOWED_UNCLOSED
|
||||
/// <p><img</p>
|
||||
//~^ ERROR unclosed HTML tag `img`
|
||||
//~^ ERROR incomplete HTML tag `img`
|
||||
pub fn r() {}
|
||||
|
||||
/// > <br>
|
||||
/// > <img
|
||||
//~^ ERROR incomplete HTML tag `img`
|
||||
/// > href="#broken"
|
||||
pub fn s() {}
|
||||
|
||||
/// <br>
|
||||
/// <br<br>
|
||||
//~^ ERROR incomplete HTML tag `br`
|
||||
pub fn t() {}
|
||||
|
||||
/// <br>
|
||||
/// <br
|
||||
//~^ ERROR incomplete HTML tag `br`
|
||||
pub fn u() {}
|
||||
|
||||
/// <a href=">" alt="<">html5 allows this</a>
|
||||
pub fn no_error_5() {}
|
||||
|
||||
/// <br>
|
||||
/// <img title="
|
||||
/// html5
|
||||
/// allows
|
||||
/// multiline
|
||||
/// attr
|
||||
/// values
|
||||
/// these are just text, not tags:
|
||||
/// </div>
|
||||
/// <p/>
|
||||
/// <div>
|
||||
/// ">
|
||||
pub fn no_error_6() {}
|
||||
|
||||
/// <br>
|
||||
/// <a href="data:text/html,<!DOCTYPE>
|
||||
/// <html>
|
||||
/// <body><b>this is allowed for some reason</b></body>
|
||||
/// </html>
|
||||
/// ">what</a>
|
||||
pub fn no_error_7() {}
|
||||
|
||||
/// Technically this is allowed per the html5 spec,
|
||||
/// but there's basically no legitemate reason to do it,
|
||||
/// so we don't allow it.
|
||||
///
|
||||
/// <p <!-->foobar</p>
|
||||
//~^ ERROR Unclosed HTML comment
|
||||
//~| ERROR incomplete HTML tag `p`
|
||||
pub fn v() {}
|
||||
|
|
|
|||
|
|
@ -52,6 +52,12 @@ error: unclosed HTML tag `p`
|
|||
LL | /// <br/> <p>
|
||||
| ^^^
|
||||
|
||||
error: incomplete HTML tag `script`
|
||||
--> $DIR/invalid-html-tags.rs:45:5
|
||||
|
|
||||
LL | /// <script
|
||||
| ^^^^^^^
|
||||
|
||||
error: unclosed HTML tag `div`
|
||||
--> $DIR/invalid-html-tags.rs:41:5
|
||||
|
|
||||
|
|
@ -64,11 +70,11 @@ error: unclosed HTML tag `h3`
|
|||
LL | /// <h3>
|
||||
| ^^^^
|
||||
|
||||
error: unclosed HTML tag `script`
|
||||
--> $DIR/invalid-html-tags.rs:45:5
|
||||
error: incomplete HTML tag `div`
|
||||
--> $DIR/invalid-html-tags.rs:73:10
|
||||
|
|
||||
LL | /// <script
|
||||
| ^^^^^^
|
||||
LL | /// <div></div
|
||||
| ^^^^^
|
||||
|
||||
error: unclosed HTML tag `div`
|
||||
--> $DIR/invalid-html-tags.rs:73:5
|
||||
|
|
@ -77,40 +83,73 @@ LL | /// <div></div
|
|||
| ^^^^^
|
||||
|
||||
error: Unclosed HTML comment
|
||||
--> $DIR/invalid-html-tags.rs:87:5
|
||||
--> $DIR/invalid-html-tags.rs:88:5
|
||||
|
|
||||
LL | /// <!--
|
||||
| ^^^
|
||||
| ^^^^
|
||||
|
||||
error: unopened HTML tag `unopened-tag`
|
||||
--> $DIR/invalid-html-tags.rs:114:26
|
||||
--> $DIR/invalid-html-tags.rs:115:26
|
||||
|
|
||||
LL | /// Web Components style </unopened-tag>
|
||||
| ^^^^^^^^^^^^^^^
|
||||
|
||||
error: unclosed HTML tag `dashed-tags`
|
||||
--> $DIR/invalid-html-tags.rs:112:26
|
||||
--> $DIR/invalid-html-tags.rs:113:26
|
||||
|
|
||||
LL | /// Web Components style <dashed-tags>
|
||||
| ^^^^^^^^^^^^^
|
||||
|
||||
error: unclosed HTML tag `a`
|
||||
--> $DIR/invalid-html-tags.rs:121:19
|
||||
--> $DIR/invalid-html-tags.rs:122:19
|
||||
|
|
||||
LL | /// backslashed \<<a href="">
|
||||
| ^^
|
||||
|
||||
error: unclosed HTML tag `img`
|
||||
--> $DIR/invalid-html-tags.rs:147:5
|
||||
error: incomplete HTML tag `img`
|
||||
--> $DIR/invalid-html-tags.rs:156:5
|
||||
|
|
||||
LL | /// <img
|
||||
| ^^^
|
||||
| ^^^^
|
||||
|
||||
error: unclosed HTML tag `img`
|
||||
--> $DIR/invalid-html-tags.rs:152:8
|
||||
error: incomplete HTML tag `img`
|
||||
--> $DIR/invalid-html-tags.rs:161:8
|
||||
|
|
||||
LL | /// <p><img</p>
|
||||
| ^^^^
|
||||
|
||||
error: aborting due to 18 previous errors
|
||||
error: incomplete HTML tag `img`
|
||||
--> $DIR/invalid-html-tags.rs:166:7
|
||||
|
|
||||
LL | /// > <img
|
||||
| _______^
|
||||
LL | |
|
||||
LL | | /// > href="#broken"
|
||||
| |____________________^
|
||||
|
||||
error: incomplete HTML tag `br`
|
||||
--> $DIR/invalid-html-tags.rs:172:5
|
||||
|
|
||||
LL | /// <br<br>
|
||||
| ^^^
|
||||
|
||||
error: incomplete HTML tag `br`
|
||||
--> $DIR/invalid-html-tags.rs:177:5
|
||||
|
|
||||
LL | /// <br
|
||||
| ^^^
|
||||
|
||||
error: incomplete HTML tag `p`
|
||||
--> $DIR/invalid-html-tags.rs:210:5
|
||||
|
|
||||
LL | /// <p <!-->foobar</p>
|
||||
| ^^^
|
||||
|
||||
error: Unclosed HTML comment
|
||||
--> $DIR/invalid-html-tags.rs:210:8
|
||||
|
|
||||
LL | /// <p <!-->foobar</p>
|
||||
| ^^^^
|
||||
|
||||
error: aborting due to 24 previous errors
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue