Rollup merge of #146195 - nixxo:urlencoding-fix, r=ehuss
fix partial urlencoded link support Hello Rust community. This is my first contribution, hope is useful. While translating in Italian the rust book https://github.com/nixxo/rust-lang-book-it I noticed that the linkchecker tool was failing reporting broken links on some pages even if the link worked properly in the browser. Upon inspection I noticed that mdbook basically urlencoded the links, but not urlencoded the heading IDs resulting in a non-identical anchor/IDs pairing that linkchecker reports as non-valid. looking at the source code for the linkchecker tool I noticed that urlencoding was done by the `small_url_encode` function in a partial way, as the name suggests. Replacing this function with a full urlencoding fixes the issue and the links are properly reported as valid. - added full urlencoding to properly check urlencoded anchor links against non-urlencoded heading IDs - added tests urlecoding provided by https://crates.io/crates/urlencoding
This commit is contained in:
commit
3bd603b239
6 changed files with 23 additions and 12 deletions
|
|
@ -2167,6 +2167,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"html5ever",
|
||||
"regex",
|
||||
"urlencoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -5824,6 +5825,12 @@ dependencies = [
|
|||
"percent-encoding",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "urlencoding"
|
||||
version = "2.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
|
||||
|
||||
[[package]]
|
||||
name = "utf-8"
|
||||
version = "0.7.6"
|
||||
|
|
|
|||
|
|
@ -10,3 +10,4 @@ path = "main.rs"
|
|||
[dependencies]
|
||||
regex = "1"
|
||||
html5ever = "0.29.0"
|
||||
urlencoding = "2.1.3"
|
||||
|
|
|
|||
|
|
@ -232,18 +232,7 @@ enum FileEntry {
|
|||
type Cache = HashMap<String, FileEntry>;
|
||||
|
||||
fn small_url_encode(s: &str) -> String {
|
||||
s.replace('<', "%3C")
|
||||
.replace('>', "%3E")
|
||||
.replace(' ', "%20")
|
||||
.replace('?', "%3F")
|
||||
.replace('\'', "%27")
|
||||
.replace('&', "%26")
|
||||
.replace(',', "%2C")
|
||||
.replace(':', "%3A")
|
||||
.replace(';', "%3B")
|
||||
.replace('[', "%5B")
|
||||
.replace(']', "%5D")
|
||||
.replace('\"', "%22")
|
||||
urlencoding::encode(s).to_string()
|
||||
}
|
||||
|
||||
impl Checker {
|
||||
|
|
|
|||
|
|
@ -3,5 +3,8 @@
|
|||
|
||||
<h2 id="barfrag">Bar</h2>
|
||||
|
||||
<!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
|
||||
<h2 id="barfrag-è">Bar</h2>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
|||
|
|
@ -8,7 +8,15 @@
|
|||
<a href="https://example.com/doesnotexist">external links not validated</a>
|
||||
<a href="redir.html#redirfrag">Redirect</a>
|
||||
|
||||
<!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
|
||||
<a href="#localfrag-%C3%A8"></a>
|
||||
<a href="bar.html#barfrag-%C3%A8"></a>
|
||||
<a href="redir.html#redirfrag-%C3%A8"></a>
|
||||
|
||||
<h2 id="localfrag">Local</h2>
|
||||
|
||||
<!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
|
||||
<h2 id="localfrag-è">Local</h2>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
|||
|
|
@ -1,5 +1,8 @@
|
|||
<html>
|
||||
<body>
|
||||
<h2 id="redirfrag">Redir</h2>
|
||||
|
||||
<!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
|
||||
<h2 id="redirfrag-è">Redir</h2>
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue