fix partial urlencoded link support

- added full urlencoding to properly check urlencoded anchor links against non-urlencoded heading IDs
- added tests

urlecoding provided by https://crates.io/crates/urlencoding
This commit is contained in:
nixxo 2025-09-04 09:33:11 +02:00 committed by Eric Huss
parent 033c0a4742
commit 8b58777968
6 changed files with 23 additions and 12 deletions

View file

@ -2167,6 +2167,7 @@ version = "0.1.0"
dependencies = [
"html5ever",
"regex",
"urlencoding",
]
[[package]]
@ -5825,6 +5826,12 @@ dependencies = [
"percent-encoding",
]
[[package]]
name = "urlencoding"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
[[package]]
name = "utf-8"
version = "0.7.6"

View file

@ -10,3 +10,4 @@ path = "main.rs"
[dependencies]
regex = "1"
html5ever = "0.29.0"
urlencoding = "2.1.3"

View file

@ -232,18 +232,7 @@ enum FileEntry {
type Cache = HashMap<String, FileEntry>;
fn small_url_encode(s: &str) -> String {
s.replace('<', "%3C")
.replace('>', "%3E")
.replace(' ', "%20")
.replace('?', "%3F")
.replace('\'', "%27")
.replace('&', "%26")
.replace(',', "%2C")
.replace(':', "%3A")
.replace(';', "%3B")
.replace('[', "%5B")
.replace(']', "%5D")
.replace('\"', "%22")
urlencoding::encode(s).to_string()
}
impl Checker {

View file

@ -3,5 +3,8 @@
<h2 id="barfrag">Bar</h2>
<!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
<h2 id="barfrag-è">Bar</h2>
</body>
</html>

View file

@ -8,7 +8,15 @@
<a href="https://example.com/doesnotexist">external links not validated</a>
<a href="redir.html#redirfrag">Redirect</a>
<!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
<a href="#localfrag-%C3%A8"></a>
<a href="bar.html#barfrag-%C3%A8"></a>
<a href="redir.html#redirfrag-%C3%A8"></a>
<h2 id="localfrag">Local</h2>
<!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
<h2 id="localfrag-è">Local</h2>
</body>
</html>

View file

@ -1,5 +1,8 @@
<html>
<body>
<h2 id="redirfrag">Redir</h2>
<!-- testing urlecoded anchor link against a non-urlencoded heading IDs -->
<h2 id="redirfrag-è">Redir</h2>
</body>
</html>