From 180b4cedec730d0c0127d0d41714abe4176d5365 Mon Sep 17 00:00:00 2001 From: Noritada Kobayashi Date: Mon, 7 Nov 2022 19:25:07 +0900 Subject: [PATCH 1/3] Fix the length displayed for byte string literals with escaped newlines The length of byte strings containing escaped newlines is displayed two bytes longer when the first escaped character is a newline. This is due to a small bug in handling the first escaped newline in string literals. Closes #13567 --- crates/syntax/src/ast/token_ext.rs | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/crates/syntax/src/ast/token_ext.rs b/crates/syntax/src/ast/token_ext.rs index ba72e64425b2..22ad6db9aef3 100644 --- a/crates/syntax/src/ast/token_ext.rs +++ b/crates/syntax/src/ast/token_ext.rs @@ -209,17 +209,19 @@ impl ast::String { let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; let mut buf = String::new(); - let mut text_iter = text.chars(); + let mut prev = 0; let mut has_error = false; unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match ( unescaped_char, buf.capacity() == 0, ) { (Ok(c), false) => buf.push(c), - (Ok(c), true) if char_range.len() == 1 && Some(c) == text_iter.next() => (), + (Ok(_), true) if char_range.len() == 1 && char_range.start == prev => { + prev = char_range.end + } (Ok(c), true) => { buf.reserve_exact(text.len()); - buf.push_str(&text[..char_range.start]); + buf.push_str(&text[..prev]); buf.push(c); } (Err(_), _) => has_error = true, @@ -252,17 +254,19 @@ impl ast::ByteString { let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; let mut buf: Vec = Vec::new(); - let mut text_iter = text.chars(); + let mut prev = 0; let mut has_error = false; unescape_literal(text, Mode::ByteStr, &mut |char_range, unescaped_char| match ( unescaped_char, buf.capacity() == 0, ) { (Ok(c), false) => buf.push(c as u8), - (Ok(c), true) if char_range.len() == 1 && Some(c) == text_iter.next() => (), + (Ok(_), true) if char_range.len() == 1 && char_range.start == prev => { + prev = char_range.end + } (Ok(c), true) => { buf.reserve_exact(text.len()); - buf.extend_from_slice(text[..char_range.start].as_bytes()); + buf.extend_from_slice(text[..prev].as_bytes()); buf.push(c as u8); } (Err(_), _) => has_error = true, @@ -445,6 +449,12 @@ mod tests { check_string_value(r"\foobar", None); check_string_value(r"\nfoobar", "\nfoobar"); check_string_value(r"C:\\Windows\\System32\\", "C:\\Windows\\System32\\"); + check_string_value(r"\x61bcde", "a\x62cde"); + check_string_value( + r"a\ +bcde", "a\ +bcde", + ); } #[test] From bdf854701375c0692f9014db71a097520fddd41f Mon Sep 17 00:00:00 2001 From: Noritada Kobayashi Date: Mon, 7 Nov 2022 22:51:29 +0900 Subject: [PATCH 2/3] Clarify the intent Thanks to Lukas Wirth for a suggestion. --- crates/syntax/src/ast/token_ext.rs | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/crates/syntax/src/ast/token_ext.rs b/crates/syntax/src/ast/token_ext.rs index 22ad6db9aef3..32dd2db405c8 100644 --- a/crates/syntax/src/ast/token_ext.rs +++ b/crates/syntax/src/ast/token_ext.rs @@ -209,19 +209,19 @@ impl ast::String { let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; let mut buf = String::new(); - let mut prev = 0; + let mut prev_end = 0; let mut has_error = false; unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match ( unescaped_char, buf.capacity() == 0, ) { (Ok(c), false) => buf.push(c), - (Ok(_), true) if char_range.len() == 1 && char_range.start == prev => { - prev = char_range.end + (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => { + prev_end = char_range.end } (Ok(c), true) => { buf.reserve_exact(text.len()); - buf.push_str(&text[..prev]); + buf.push_str(&text[..prev_end]); buf.push(c); } (Err(_), _) => has_error = true, @@ -254,19 +254,19 @@ impl ast::ByteString { let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; let mut buf: Vec = Vec::new(); - let mut prev = 0; + let mut prev_end = 0; let mut has_error = false; unescape_literal(text, Mode::ByteStr, &mut |char_range, unescaped_char| match ( unescaped_char, buf.capacity() == 0, ) { (Ok(c), false) => buf.push(c as u8), - (Ok(_), true) if char_range.len() == 1 && char_range.start == prev => { - prev = char_range.end + (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => { + prev_end = char_range.end } (Ok(c), true) => { buf.reserve_exact(text.len()); - buf.extend_from_slice(text[..prev].as_bytes()); + buf.extend_from_slice(text[..prev_end].as_bytes()); buf.push(c as u8); } (Err(_), _) => has_error = true, @@ -449,11 +449,10 @@ mod tests { check_string_value(r"\foobar", None); check_string_value(r"\nfoobar", "\nfoobar"); check_string_value(r"C:\\Windows\\System32\\", "C:\\Windows\\System32\\"); - check_string_value(r"\x61bcde", "a\x62cde"); + check_string_value(r"\x61bcde", "abcde"); check_string_value( r"a\ -bcde", "a\ -bcde", +bcde", "abcde", ); } From 2340d7059e3b89a5233b0c91cf3c36fa94adfe6e Mon Sep 17 00:00:00 2001 From: Noritada Kobayashi Date: Mon, 7 Nov 2022 23:39:02 +0900 Subject: [PATCH 3/3] Add test code for unescaping byte strings --- crates/syntax/src/ast/token_ext.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/crates/syntax/src/ast/token_ext.rs b/crates/syntax/src/ast/token_ext.rs index 32dd2db405c8..8990f7a7d4e8 100644 --- a/crates/syntax/src/ast/token_ext.rs +++ b/crates/syntax/src/ast/token_ext.rs @@ -456,6 +456,31 @@ bcde", "abcde", ); } + fn check_byte_string_value<'a, const N: usize>( + lit: &str, + expected: impl Into>, + ) { + assert_eq!( + ast::ByteString { syntax: make::tokens::literal(&format!("b\"{}\"", lit)) } + .value() + .as_deref(), + expected.into().map(|value| &value[..]) + ); + } + + #[test] + fn test_byte_string_escape() { + check_byte_string_value(r"foobar", b"foobar"); + check_byte_string_value(r"\foobar", None::<&[u8; 0]>); + check_byte_string_value(r"\nfoobar", b"\nfoobar"); + check_byte_string_value(r"C:\\Windows\\System32\\", b"C:\\Windows\\System32\\"); + check_byte_string_value(r"\x61bcde", b"abcde"); + check_byte_string_value( + r"a\ +bcde", b"abcde", + ); + } + #[test] fn test_value_underscores() { check_float_value("3.141592653589793_f64", 3.141592653589793_f64);