Rollup merge of #146195 - nixxo:urlencoding-fix, r=ehuss

fix partial urlencoded link support

Hello Rust community.
This is my first contribution, hope is useful.

While translating in Italian the rust book https://github.com/nixxo/rust-lang-book-it I noticed that the linkchecker tool was failing reporting broken links on some pages even if the link worked properly in the browser. Upon inspection I noticed that mdbook basically urlencoded the links, but not urlencoded the heading IDs resulting in a non-identical anchor/IDs pairing that linkchecker reports as non-valid.

looking at the source code for the linkchecker tool I noticed that urlencoding was done by the `small_url_encode` function in a partial way, as the name suggests. Replacing this function with a full urlencoding fixes the issue and the links are properly reported as valid.

- added full urlencoding to properly check urlencoded anchor links against non-urlencoded heading IDs
- added tests

urlecoding provided by https://crates.io/crates/urlencoding
This commit is contained in:
Stuart Cook 2025-09-09 14:35:03 +10:00 committed by GitHub
commit 3bd603b239
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 23 additions and 12 deletions

View file

@ -2167,6 +2167,7 @@ version = "0.1.0"
dependencies = [
"html5ever",
"regex",
"urlencoding",
]
[[package]]
@ -5824,6 +5825,12 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "urlencoding"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
[[package]]
name = "utf-8"
version = "0.7.6"

View file

@ -10,3 +10,4 @@ path = "main.rs"
[dependencies]
regex = "1"
html5ever = "0.29.0"
urlencoding = "2.1.3"

View file

@ -232,18 +232,7 @@ enum FileEntry {
type Cache = HashMap<String, FileEntry>;
fn small_url_encode(s: &str) -> String {
s.replace('<', "%3C")
.replace('>', "%3E")
.replace(' ', "%20")
.replace('?', "%3F")
.replace('\'', "%27")
.replace('&', "%26")
.replace(',', "%2C")
.replace(':', "%3A")
.replace(';', "%3B")
.replace('[', "%5B")
.replace(']', "%5D")
.replace('\"', "%22")
urlencoding::encode(s).to_string()
}
impl Checker {

View file

@ -3,5 +3,8 @@
<h2 id="barfrag">Bar</h2>
<!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
<h2 id="barfrag-è">Bar</h2>
</body>
</html>

View file

@ -8,7 +8,15 @@
<a href="https://example.com/doesnotexist">external links not validated</a>
<a href="redir.html#redirfrag">Redirect</a>
<!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
<a href="#localfrag-%C3%A8"></a>
<a href="bar.html#barfrag-%C3%A8"></a>
<a href="redir.html#redirfrag-%C3%A8"></a>
<h2 id="localfrag">Local</h2>
<!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
<h2 id="localfrag-è">Local</h2>
</body>
</html>

View file

@ -1,5 +1,8 @@
<html>
<body>
<h2 id="redirfrag">Redir</h2>
<!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
<h2 id="redirfrag-è">Redir</h2>
</body>
</html>