Fix SourceFile::normalized_byte_pos

This method was broken by 258ace6, which changed `self.normalized_pos`
to use relative offsets however this method continued to compare against
an absolute offset.

Also adds a regression test for the issue that this method was
originally introduced to fix.
This commit is contained in:
Alan Egerton 2026-01-21 16:23:55 +00:00
parent 7e39015fb5
commit 01290cc9ac
No known key found for this signature in database
GPG key ID: 3D7EA7527916B438
6 changed files with 46 additions and 8 deletions

View file

@ -2426,14 +2426,12 @@ impl SourceFile {
/// normalized one. Hence we need to convert those offsets to the normalized
/// form when constructing spans.
pub fn normalized_byte_pos(&self, offset: u32) -> BytePos {
let diff = match self
.normalized_pos
.binary_search_by(|np| (np.pos.0 + np.diff).cmp(&(self.start_pos.0 + offset)))
{
Ok(i) => self.normalized_pos[i].diff,
Err(0) => 0,
Err(i) => self.normalized_pos[i - 1].diff,
};
let diff =
match self.normalized_pos.binary_search_by(|np| (np.pos.0 + np.diff).cmp(&offset)) {
Ok(i) => self.normalized_pos[i].diff,
Err(0) => 0,
Err(i) => self.normalized_pos[i - 1].diff,
};
BytePos::from_u32(self.start_pos.0 + offset - diff)
}

View file

@ -150,6 +150,7 @@ fn check_unexpected_extension(check: &mut RunningCheck, file_path: &Path, ext: &
const EXTENSION_EXCEPTION_PATHS: &[&str] = &[
"tests/ui/asm/named-asm-labels.s", // loading an external asm file to test named labels lint
"tests/ui/asm/normalize-offsets-for-crlf.s", // loading an external asm file to test CRLF normalization
"tests/ui/codegen/mismatched-data-layout.json", // testing mismatched data layout w/ custom targets
"tests/ui/check-cfg/my-awesome-platform.json", // testing custom targets with cfgs
"tests/ui/argfile/commandline-argfile-badutf8.args", // passing args via a file

2
tests/ui/asm/.gitattributes vendored Normal file
View file

@ -0,0 +1,2 @@
# Disable EOL normalization, as it is deliberately denormalized
normalize-offsets-for-crlf.s -text

View file

@ -0,0 +1,14 @@
// Byte positions into inline assembly reported by codegen errors require normalization or else
// they may not identify the appropriate span. Worse still, an ICE can occur if the erroneous
// span begins or ends part-way through a multibyte character.
//
// Regression test for https://github.com/rust-lang/rust/issues/110885
// This test is tied to assembler syntax and errors, which can vary by backend and architecture.
//@only-x86_64
//@needs-backends: llvm
//@build-fail
//~? ERROR instruction mnemonic
std::arch::global_asm!(include_str!("normalize-offsets-for-crlf.s"));
fn main() {}

View file

@ -0,0 +1,13 @@
// This file contains (some) CRLF line endings. When codegen reports an error, the byte
// offsets into this file that it identifies require normalization or else they will not
// identify the appropriate span. Worse still, an ICE can result if the erroneous span
// begins or ends part-way through a multibyte character such as £.
non_existent_mnemonic
// Without normalization, the three CRLF line endings below cause the diagnostic on the
// `non_existent_mnemonic` above to be spanned three bytes backward, and thus begin
// part-way inside the multibyte character in the preceding comment.
//
// NOTE: The lines of this note DELIBERATELY end with CRLF - DO NOT strip/convert them!
// It may not be obvious if you accidentally do, eg `git diff` may appear to show
// that the lines have been updated to the exact same content.

View file

@ -0,0 +1,10 @@
error: invalid instruction mnemonic 'non_existent_mnemonic'
|
note: instantiated into assembly here
--> <inline asm>:6:1
|
LL | non_existent_mnemonic
| ^^^^^^^^^^^^^^^^^^^^^
error: aborting due to 1 previous error