From aa95fcd4610aaad851fe7d860f8e9f5c96b58eaf Mon Sep 17 00:00:00 2001 From: michal kostrubiec Date: Sat, 7 Jun 2025 23:51:55 +0200 Subject: [PATCH 1/2] Add support for automatically reducing found fuzz cases. --- build_system/src/fuzz.rs | 93 +++++++--- build_system/src/fuzz/reduce.rs | 307 ++++++++++++++++++++++++++++++++ 2 files changed, 379 insertions(+), 21 deletions(-) create mode 100644 build_system/src/fuzz/reduce.rs diff --git a/build_system/src/fuzz.rs b/build_system/src/fuzz.rs index 05a87412b361..1bc43f525ef6 100644 --- a/build_system/src/fuzz.rs +++ b/build_system/src/fuzz.rs @@ -1,13 +1,19 @@ use std::ffi::OsStr; use std::path::Path; +mod reduce; + use crate::utils::run_command_with_output; fn show_usage() { println!( r#" `fuzz` command help: - --help : Show this help"# + --reduce : Reduces a file generated by rustlantis + --help : Show this help + --start : Start of the fuzzed range + --count : The number of cases to fuzz + -j --jobs : The number of threads to use during fuzzing"# ); } @@ -20,6 +26,16 @@ pub fn run() -> Result<(), String> { std::thread::available_parallelism().map(|threads| threads.get()).unwrap_or(1); while let Some(arg) = args.next() { match arg.as_str() { + "--reduce" => { + let Some(path) = args.next() else { + return Err("--reduce must be provided with a path".into()); + }; + if !std::fs::exists(&path).unwrap_or(false) { + return Err("--reduce must be provided with a valid path".into()); + } + reduce::reduce(&path); + return Ok(()); + } "--help" => { show_usage(); return Ok(()); @@ -75,16 +91,17 @@ fn fuzz_range(start: u64, end: u64, threads: usize) { let start = Arc::new(AtomicU64::new(start)); // Count time during fuzzing let start_time = Instant::now(); + let mut workers = Vec::with_capacity(threads); // Spawn `threads`.. for _ in 0..threads { let start = start.clone(); // .. which each will .. - std::thread::spawn(move || { + workers.push(std::thread::spawn(move || { // ... grab the next fuzz seed ... while start.load(Ordering::Relaxed) < end { let next = start.fetch_add(1, Ordering::Relaxed); // .. test that seed . - match test(next) { + match test(next, false) { Err(err) => { // If the test failed at compile-time... println!("test({}) failed because {err:?}", next); @@ -99,21 +116,30 @@ fn fuzz_range(start: u64, end: u64, threads: usize) { Ok(Err(err)) => { // If the test failed at run-time... println!("The LLVM and GCC results don't match for {err:?}"); - // ... copy that file to the directory `target/fuzz/runtime_error`... + // ... generate a new file, which prints temporaries(instead of hashing them)... let mut out_path: std::path::PathBuf = "target/fuzz/runtime_error".into(); std::fs::create_dir_all(&out_path).unwrap(); - // .. into a file named `fuzz{seed}.rs`. + let Ok(Err(tmp_print_err)) = test(next, true) else { + // ... if that file does not reproduce the issue... + // ... save the original sample in a file named `fuzz{seed}.rs`... + out_path.push(&format!("fuzz{next}.rs")); + std::fs::copy(err, &out_path).unwrap(); + continue; + }; + // ... if that new file still produces the issue, copy it to `fuzz{seed}.rs`.. out_path.push(&format!("fuzz{next}.rs")); - std::fs::copy(err, out_path).unwrap(); + std::fs::copy(tmp_print_err, &out_path).unwrap(); + // ... and start reducing it, using some propierites of `rustlantis` to speed up the process. + reduce::reduce(&out_path); } // If the test passed, do nothing Ok(Ok(())) => (), } } - }); + })); } // The "manager" thread loop. - while start.load(Ordering::Relaxed) < end { + while start.load(Ordering::Relaxed) < end || !workers.iter().all(|t| t.is_finished()) { // Every 500 ms... let five_hundred_millis = Duration::from_millis(500); std::thread::sleep(five_hundred_millis); @@ -121,7 +147,7 @@ fn fuzz_range(start: u64, end: u64, threads: usize) { let remaining = end - start.load(Ordering::Relaxed); // ... fix the count(the start counter counts the cases that // begun fuzzing, and not only the ones that are done)... - let fuzzed = (total - remaining) - threads as u64; + let fuzzed = (total - remaining).saturating_sub(threads as u64); // ... and the fuzz speed ... let iter_per_sec = fuzzed as f64 / start_time.elapsed().as_secs_f64(); // .. and use them to display fuzzing stats. @@ -131,6 +157,7 @@ fn fuzz_range(start: u64, end: u64, threads: usize) { (remaining as f64) / iter_per_sec ) } + drop(workers); } /// Builds & runs a file with LLVM. @@ -200,35 +227,59 @@ fn release_gcc(path: &std::path::Path) -> Result, String> { } /// Generates a new rustlantis file, & compares the result of running it with GCC and LLVM. -fn test(seed: u64) -> Result, String> { +fn test(seed: u64, print_tmp_vars: bool) -> Result, String> { // Generate a Rust source... - let source_file = generate(seed)?; - // ... test it with debug LLVM ... - let llvm_res = debug_llvm(&source_file)?; + let source_file = generate(seed, print_tmp_vars)?; + test_file(&source_file, true) +} +/// Tests a file with a cached LLVM result. Used for reduction, when it is known +/// that a given transformation should not change the execution result. +fn test_cached( + source_file: &Path, + remove_tmps: bool, + cache: &mut Option>, +) -> Result, String> { + if let None = cache { + // Test `source_file` with debug LLVM ... + *cache = Some(debug_llvm(&source_file)?); + } + let llvm_res = cache.as_ref().unwrap(); // ... test it with release GCC ... let gcc_res = release_gcc(&source_file)?; // ... compare the results ... - if llvm_res != gcc_res { + if *llvm_res != gcc_res { // .. if they don't match, report an error. - Ok(Err(source_file)) + Ok(Err(source_file.to_path_buf())) } else { - std::fs::remove_file(source_file).map_err(|err| format!("{err:?}"))?; + if remove_tmps { + std::fs::remove_file(source_file).map_err(|err| format!("{err:?}"))?; + } Ok(Ok(())) } } +fn test_file( + source_file: &Path, + remove_tmps: bool, +) -> Result, String> { + let mut uncached = None; + test_cached(source_file, remove_tmps, &mut uncached) +} /// Generates a new rustlantis file for us to run tests on. -fn generate(seed: u64) -> Result { +fn generate(seed: u64, print_tmp_vars: bool) -> Result { use std::io::Write; let mut out_path = std::env::temp_dir(); out_path.push(&format!("fuzz{seed}.rs")); // We need to get the command output here. - let out = std::process::Command::new("cargo") + let mut generate = std::process::Command::new("cargo"); + generate .args(["run", "--release", "--bin", "generate"]) .arg(&format!("{seed}")) - .current_dir("clones/rustlantis") - .output() - .map_err(|err| format!("{err:?}"))?; + .current_dir("clones/rustlantis"); + if print_tmp_vars { + generate.arg("--debug"); + } + let out = generate.output().map_err(|err| format!("{err:?}"))?; // Stuff the rustlantis output in a source file. std::fs::File::create(&out_path) .map_err(|err| format!("{err:?}"))? diff --git a/build_system/src/fuzz/reduce.rs b/build_system/src/fuzz/reduce.rs new file mode 100644 index 000000000000..1e6dac8b7916 --- /dev/null +++ b/build_system/src/fuzz/reduce.rs @@ -0,0 +1,307 @@ +use std::io::Write; +use std::path::{Path, PathBuf}; +fn save_reduction(lines: &[String], path: &PathBuf, ext: &str) { + let mut path = path.clone(); + path.set_extension(&format!("rs.{ext}")); + let mut file = std::fs::File::create(&path).expect("Could not create the reduced example file"); + for line in lines { + file.write_all(line.as_bytes()).expect("Could not save the reduced example"); + } +} +/// Checks if a given reduction is valid. +fn test_reduction(lines: &[String], path: &PathBuf, cache: &mut Option>) -> bool { + let mut path = path.clone(); + path.set_extension("rs_reduced"); + let mut file = std::fs::File::create(&path).expect("Could not create the reduced example file"); + for line in lines { + file.write_all(line.as_bytes()).expect("Could not save the reduced example"); + } + let Ok(Err(_)) = super::test_cached(&path, false, cache) else { + return false; + }; + return true; +} +/// Removes duplicate assigements in bulk. +/// If a line A = B is followed directly by A = C, +/// then removing the second line ought to be fully sound, +/// and not change the behaviour of the program at all. Detect & remove such lines. +fn remove_dup_assign( + file: &mut Vec, + path: &PathBuf, + starts: usize, + ends: usize, + cache: &mut Option>, +) { + let mut curr = 0; + let mut file_copy = file.clone(); + let mut reduction_count = 0; + // Not worth it. + if ends - starts < 8 { + return; + } + for index in starts..ends { + let Some((prefix, _)) = file_copy[index].split_once('=') else { + continue; + }; + let Some((prefix2, _)) = file_copy[index + 1].split_once('=') else { + continue; + }; + let prefix = prefix.trim(); + let prefix2 = prefix2.trim(); + + if prefix == prefix2 { + file_copy[index] = "".into(); + reduction_count += 1; + } + } + if reduction_count == 0 { + return; + } + if test_reduction(&file_copy, &path, cache) { + eprintln!("Reduced {path:?} by {} lines `remove_dup_assign`", reduction_count); + *file = file_copy; + } else { + remove_dup_assign(file, path, starts, (starts + ends) / 2, cache); + remove_dup_assign(file, path, (starts + ends) / 2, ends, cache); + } + save_reduction(file, path, "remove_dup_assign"); +} +/// Removes all the unneeded calls to `dump_var`. This is not something tools like `cvise` can do, +/// but it greately speeds up MIR interpretation + native execution. +fn remove_dump_var(file: &mut Vec, path: &PathBuf) { + let mut curr = 0; + // ... try disabling `dump_vars` one by one, until only the neccesarry ones are left. + while curr < file.len() { + let Some(line) = file[curr..].iter().position(|line| line.contains("dump_var")) else { + // No more `dump_var`s to remove - exit early. + break; + }; + // Make the line absolute again. + let line = line + curr; + let mut file_copy = file.clone(); + // Try removing 3 consecutive lines(the call, block end and block beginning). This effectively removes a `dump_var`. + file_copy.remove(line); + file_copy.remove(line); + file_copy.remove(line); + // Not cached - the execution result can change. + let mut uncached = None; + // Check if this reduction is valid. + if test_reduction(&file_copy, &path, &mut uncached) { + eprintln!("Reduced {path:?} by 3 lines `remove_dump_var`"); + *file = file_copy; + curr = line; + } else { + curr = line + 1; + } + } + save_reduction(file, path, "remove_dump_var"); +} +/// Replaces matches with gotos where possible. +/// This exploits some properties of rustlantis(match arm order), +/// and is only soundly applicable to MIR generated by it. +/// Still, it is not something `cvise` can do, but it simplifies the code a ton. +fn match_to_goto(file: &mut Vec, path: &PathBuf) { + let mut cache = None; + let mut curr = 0; + while curr < file.len() { + let Some(match_starts) = file[curr..].iter().position(|line| line.contains("match")) else { + // No more `match`es to remove - exit early. + break; + }; + let match_starts = match_starts + curr; + // Find the end of the match + let Some(match_ends) = file[match_starts..].iter().position(|line| line.contains('}')) + else { + // Can't find match end - exit early. + break; + }; + let match_ends = match_ends + match_starts; + let match_body = &file[match_starts..match_ends]; + + // Find where this match should normally jump to. + // This *should* be the second-last arm of the match, as per the paper(the remaining blocks are decoys). + // If this ever changes, this reduction may not always be sound. + // This is not a problem, however: we NEED to use MIRI for reduction anwyway, + // and it will catch this issue. + let jumps_to = &match_body[match_body.len() - 2].trim(); + let Some((_, bb_ident)) = jumps_to.split_once("bb") else { + break; + }; + // We now have the number of the block we jump to at runtime. + let bb_ident = bb_ident.trim_matches(','); + // Try replacing this match with an unconditional jump. + let mut file_copy = file.clone(); + for _ in match_starts..(match_ends + 1) { + file_copy.remove(match_starts); + } + file_copy.insert(match_starts, format!("Goto(bb{bb_ident})\n")); + if test_reduction(&file_copy, &path, &mut cache) { + eprintln!("Reduced {path:?} by {} lines `match_to_goto`", match_ends - match_starts); + *file = file_copy; + curr = match_starts; + } else { + curr = match_ends; + } + } + save_reduction(file, path, "match_to_goto"); +} +/// At this point, we can try "killing" blocks, by replacing their bodies with calls to `abort`. +/// This is always sound(the program aborts, so no UB can occur after the block), +/// and allows us to safely remove *a lot* of unneeded blocks. +fn block_abort(file: &mut Vec, path: &PathBuf) { + let mut curr = 0; + let mut cache = None; + while curr < file.len() { + let Some(block_starts) = file[curr..] + .iter() + .position(|line| line.starts_with("bb") && line.trim_end().ends_with(" = {")) + else { + // No more `block`s to kill - exit early. + break; + }; + let block_starts = block_starts + curr; + // Find the beginning of the next block to find the end of this block. + let Some(block_ends) = file[(block_starts + 1)..] + .iter() + .position(|line| line.starts_with("bb") && line.trim_end().ends_with(" = {")) + else { + // No more `block`s to kill - exit early. + break; + }; + let block_ends = block_starts + block_ends; + let block_starts = block_starts + 1; + let mut file_copy = file.clone(); + // Remove the block body... + for _ in block_starts..(block_ends) { + file_copy.remove(block_starts); + } + // ..and insert an unconditional call to abort. + file_copy.insert( + block_starts, + format!("Call(tmp = core::intrinsics::abort(), ReturnTo(bb1), UnwindUnreachable())\n"), + ); + file_copy.insert(block_starts, format!("let tmp = ();\n")); + + if test_reduction(&file_copy, &path, &mut cache) { + eprintln!("Reduced {path:?} by {} lines `block_abort`", block_ends - block_starts - 2); + *file = file_copy; + curr = block_starts; + } else { + curr = block_ends; + } + } + save_reduction(file, path, "block_abort"); +} +/// Removes unreachable basic blocks +fn remove_block(file: &mut Vec, path: &PathBuf) { + let mut curr = 0; + let mut cache = None; + // Next, we try to outright remove blocks. + while curr < file.len() { + let Some(block_starts) = file[curr..] + .iter() + .position(|line| line.starts_with("bb") && line.trim_end().ends_with(" = {")) + else { + // No more `block`s to remove - exit early. + break; + }; + let block_starts = block_starts + curr; + // Find the beginning of the next block to find the end of this block. + let Some(block_ends) = file[(block_starts + 1)..] + .iter() + .position(|line| line.starts_with("bb") && line.trim_end().ends_with(" = {")) + else { + // No more `block`s to remove - exit early. + break; + }; + let block_ends = block_starts + block_ends + 1; + // Large blocks are likely to be neccsarry. + if block_ends - block_starts > 6 { + curr = block_starts + 1; + continue; + } + let mut file_copy = file.clone(); + file_copy.drain(block_starts..block_ends); + if test_reduction(&file_copy, &path, &mut cache) { + eprintln!("Reduced {path:?} by {} lines `remove_blocks`", block_ends - block_starts); + *file = file_copy; + curr = block_starts; + } else { + curr = block_starts + 1; + } + } + save_reduction(file, path, "remove_block"); +} +/// Merges blocks ending with unconditional jumps. +fn linearize_cf(file: &mut Vec, path: &PathBuf) { + let mut curr = 0; + let mut cache = None; + // Next, we try to linearize the control flow. What the does that mean? + // Given a sequence like this: + // Goto(bb22) + // } + // bb22 = { + // We remove those 3 lines, merging the blocks together. This is not something `cvise` can do, + // and it makes other transformations easier. + while curr < file.len() { + let Some(block_starts) = file[curr..] + .iter() + .position(|line| line.starts_with("bb") && line.trim_end().ends_with(" = {")) + else { + // No more `block`s to remove - exit early. + break; + }; + let block_starts = block_starts + curr; + // Extract the block id. + let Some((block, _)) = file[block_starts].split_once('=') else { + curr = block_starts + 1; + continue; + }; + let block = block.trim(); + if file[block_starts - 2].trim() != format!("Goto({block})") { + curr = block_starts + 1; + continue; + } + let mut file_copy = file.clone(); + // Try removing 3 consecutive lines(the goto, block end and block beginning). This effectively removes a `Goto(next)`. + file_copy.remove(block_starts - 2); + file_copy.remove(block_starts - 2); + file_copy.remove(block_starts - 2); + // Check if this reduction is valid. + if test_reduction(&file_copy, &path, &mut cache) { + eprintln!("Reduced {path:?} by 3 lines `linearize_cf`"); + *file = file_copy; + curr = block_starts; + } else { + curr = block_starts + 1; + } + } + save_reduction(file, path, "linearize_cf"); +} +pub(super) fn reduce(path: impl AsRef) { + let path = path.as_ref().to_owned(); + // ... read the file to a buffer .. + let file = std::fs::read_to_string(&path).expect("Could not open the file to reduce"); + let mut file: Vec<_> = file.split_inclusive('\n').map(|s| s.to_string()).collect(); + + // ... and run reduction passes. + eprintln!("running `remove_dump_var` on {path:?}."); + remove_dump_var(&mut file, &path); + let len = file.len(); + let mut cache = None; + eprintln!("running `remove_dup_assign` on {path:?}."); + remove_dup_assign(&mut file, &path, 0, len, &mut cache); + file.retain(|line| !line.is_empty()); + eprintln!("running `match_to_goto` on {path:?}."); + match_to_goto(&mut file, &path); + eprintln!("running `block_abort` on {path:?}."); + block_abort(&mut file, &path); + eprintln!("running `remove_block` on {path:?}."); + remove_block(&mut file, &path); + eprintln!("running `linearize_cf` on {path:?}."); + linearize_cf(&mut file, &path); + let mut out = std::fs::File::create(&path).expect("Could not save the reduction result."); + for line in file { + out.write_all(line.as_bytes()); + } +} From e3d4805a7b6072477e9e7f50031af9a250da74fc Mon Sep 17 00:00:00 2001 From: michal kostrubiec Date: Sun, 8 Jun 2025 18:52:32 +0200 Subject: [PATCH 2/2] Improved reduction by adding support for removign dead functions. Fixed typos, formating. --- build_system/src/fuzz.rs | 20 +-- build_system/src/fuzz/reduce.rs | 208 +++++++++++++++++++++++++------- 2 files changed, 177 insertions(+), 51 deletions(-) diff --git a/build_system/src/fuzz.rs b/build_system/src/fuzz.rs index 1bc43f525ef6..f170453bfe4c 100644 --- a/build_system/src/fuzz.rs +++ b/build_system/src/fuzz.rs @@ -129,7 +129,7 @@ fn fuzz_range(start: u64, end: u64, threads: usize) { // ... if that new file still produces the issue, copy it to `fuzz{seed}.rs`.. out_path.push(&format!("fuzz{next}.rs")); std::fs::copy(tmp_print_err, &out_path).unwrap(); - // ... and start reducing it, using some propierites of `rustlantis` to speed up the process. + // ... and start reducing it, using some properties of `rustlantis` to speed up the process. reduce::reduce(&out_path); } // If the test passed, do nothing @@ -225,7 +225,7 @@ fn release_gcc(path: &std::path::Path) -> Result, String> { res.extend(output.stderr); Ok(res) } - +type ResultCache = Option<(Vec, Vec)>; /// Generates a new rustlantis file, & compares the result of running it with GCC and LLVM. fn test(seed: u64, print_tmp_vars: bool) -> Result, String> { // Generate a Rust source... @@ -237,17 +237,17 @@ fn test(seed: u64, print_tmp_vars: bool) -> Result>, + cache: &mut ResultCache, ) -> Result, String> { - if let None = cache { - // Test `source_file` with debug LLVM ... - *cache = Some(debug_llvm(&source_file)?); - } - let llvm_res = cache.as_ref().unwrap(); - // ... test it with release GCC ... + // Test `source_file` with release GCC ... let gcc_res = release_gcc(&source_file)?; + if cache.is_none() { + // ...test `source_file` with debug LLVM ... + *cache = Some((debug_llvm(&source_file)?, gcc_res.clone())); + } + let (llvm_res, old_gcc) = cache.as_ref().unwrap(); // ... compare the results ... - if *llvm_res != gcc_res { + if *llvm_res != gcc_res && gcc_res == *old_gcc { // .. if they don't match, report an error. Ok(Err(source_file.to_path_buf())) } else { diff --git a/build_system/src/fuzz/reduce.rs b/build_system/src/fuzz/reduce.rs index 1e6dac8b7916..3c18c9555bd9 100644 --- a/build_system/src/fuzz/reduce.rs +++ b/build_system/src/fuzz/reduce.rs @@ -1,36 +1,43 @@ use std::io::Write; use std::path::{Path, PathBuf}; -fn save_reduction(lines: &[String], path: &PathBuf, ext: &str) { + +use super::ResultCache; + +/// Saves a reduced file for a given `stage` +fn save_reduction(lines: &[String], path: &PathBuf, stage: &str) { let mut path = path.clone(); - path.set_extension(&format!("rs.{ext}")); + path.set_extension(&format!("rs.{stage}")); let mut file = std::fs::File::create(&path).expect("Could not create the reduced example file"); for line in lines { file.write_all(line.as_bytes()).expect("Could not save the reduced example"); } } + /// Checks if a given reduction is valid. -fn test_reduction(lines: &[String], path: &PathBuf, cache: &mut Option>) -> bool { +fn test_reduction(lines: &[String], path: &PathBuf, cache: &mut ResultCache) -> bool { let mut path = path.clone(); path.set_extension("rs_reduced"); let mut file = std::fs::File::create(&path).expect("Could not create the reduced example file"); for line in lines { file.write_all(line.as_bytes()).expect("Could not save the reduced example"); } - let Ok(Err(_)) = super::test_cached(&path, false, cache) else { + let res = super::test_cached(&path, false, cache); + let Ok(Err(_)) = res else { return false; }; return true; } -/// Removes duplicate assigements in bulk. + +/// Removes duplicate assignments in bulk. /// If a line A = B is followed directly by A = C, -/// then removing the second line ought to be fully sound, +/// then removing the first line ought to be fully sound, /// and not change the behaviour of the program at all. Detect & remove such lines. fn remove_dup_assign( file: &mut Vec, path: &PathBuf, starts: usize, ends: usize, - cache: &mut Option>, + cache: &mut ResultCache, ) { let mut curr = 0; let mut file_copy = file.clone(); @@ -43,34 +50,52 @@ fn remove_dup_assign( let Some((prefix, _)) = file_copy[index].split_once('=') else { continue; }; - let Some((prefix2, _)) = file_copy[index + 1].split_once('=') else { + let Some((prefix2, postifx2)) = file_copy[index + 1].split_once('=') else { continue; }; let prefix = prefix.trim(); let prefix2 = prefix2.trim(); - - if prefix == prefix2 { + // FIXME: Right now, remove_dup_assign cares about assignments to the exact same place. + // However, given an assigemnt like this: + // ``` + // A.0 = 1_u32; + // A = (2_u32, 3.0); + // ``` + // The first assignment could be safely omitted. + // Additionally, we try to check if the second assignment could depend on the first one. + // In such cases, the result is likely to change, so we bail. + if prefix == prefix2 && !postifx2.contains(prefix) { file_copy[index] = "".into(); reduction_count += 1; } } + // We have removed no lines - no point in testing. if reduction_count == 0 { return; } + // Check if the removed lines affected the execution result in any way, shape or form. if test_reduction(&file_copy, &path, cache) { - eprintln!("Reduced {path:?} by {} lines `remove_dup_assign`", reduction_count); + println!("Reduced {path:?} by {} lines `remove_dup_assign`", reduction_count); *file = file_copy; } else { + // The execution result changed. + // This can occur if the second assignment depended on the first one. + // Eg. + // ``` + // a = b + c; + // a = a + d; + // ``` remove_dup_assign(file, path, starts, (starts + ends) / 2, cache); remove_dup_assign(file, path, (starts + ends) / 2, ends, cache); } save_reduction(file, path, "remove_dup_assign"); } + /// Removes all the unneeded calls to `dump_var`. This is not something tools like `cvise` can do, /// but it greately speeds up MIR interpretation + native execution. fn remove_dump_var(file: &mut Vec, path: &PathBuf) { let mut curr = 0; - // ... try disabling `dump_vars` one by one, until only the neccesarry ones are left. + // ... try disabling `dump_vars` one by one, until only the necessary ones are left. while curr < file.len() { let Some(line) = file[curr..].iter().position(|line| line.contains("dump_var")) else { // No more `dump_var`s to remove - exit early. @@ -87,7 +112,7 @@ fn remove_dump_var(file: &mut Vec, path: &PathBuf) { let mut uncached = None; // Check if this reduction is valid. if test_reduction(&file_copy, &path, &mut uncached) { - eprintln!("Reduced {path:?} by 3 lines `remove_dump_var`"); + println!("Reduced {path:?} by 3 lines `remove_dump_var`"); *file = file_copy; curr = line; } else { @@ -96,13 +121,14 @@ fn remove_dump_var(file: &mut Vec, path: &PathBuf) { } save_reduction(file, path, "remove_dump_var"); } + /// Replaces matches with gotos where possible. /// This exploits some properties of rustlantis(match arm order), /// and is only soundly applicable to MIR generated by it. /// Still, it is not something `cvise` can do, but it simplifies the code a ton. -fn match_to_goto(file: &mut Vec, path: &PathBuf) { - let mut cache = None; +fn match_to_goto(file: &mut Vec, path: &PathBuf, cache: &mut ResultCache) { let mut curr = 0; + while curr < file.len() { let Some(match_starts) = file[curr..].iter().position(|line| line.contains("match")) else { // No more `match`es to remove - exit early. @@ -135,8 +161,8 @@ fn match_to_goto(file: &mut Vec, path: &PathBuf) { file_copy.remove(match_starts); } file_copy.insert(match_starts, format!("Goto(bb{bb_ident})\n")); - if test_reduction(&file_copy, &path, &mut cache) { - eprintln!("Reduced {path:?} by {} lines `match_to_goto`", match_ends - match_starts); + if test_reduction(&file_copy, &path, cache) { + println!("Reduced {path:?} by {} lines `match_to_goto`", match_ends - match_starts); *file = file_copy; curr = match_starts; } else { @@ -145,12 +171,12 @@ fn match_to_goto(file: &mut Vec, path: &PathBuf) { } save_reduction(file, path, "match_to_goto"); } + /// At this point, we can try "killing" blocks, by replacing their bodies with calls to `abort`. /// This is always sound(the program aborts, so no UB can occur after the block), /// and allows us to safely remove *a lot* of unneeded blocks. -fn block_abort(file: &mut Vec, path: &PathBuf) { +fn block_abort(file: &mut Vec, path: &PathBuf, cache: &mut ResultCache) { let mut curr = 0; - let mut cache = None; while curr < file.len() { let Some(block_starts) = file[curr..] .iter() @@ -182,8 +208,8 @@ fn block_abort(file: &mut Vec, path: &PathBuf) { ); file_copy.insert(block_starts, format!("let tmp = ();\n")); - if test_reduction(&file_copy, &path, &mut cache) { - eprintln!("Reduced {path:?} by {} lines `block_abort`", block_ends - block_starts - 2); + if test_reduction(&file_copy, &path, cache) { + println!("Reduced {path:?} by {} lines `block_abort`", block_ends - block_starts - 2); *file = file_copy; curr = block_starts; } else { @@ -192,10 +218,11 @@ fn block_abort(file: &mut Vec, path: &PathBuf) { } save_reduction(file, path, "block_abort"); } + /// Removes unreachable basic blocks -fn remove_block(file: &mut Vec, path: &PathBuf) { +fn remove_block(file: &mut Vec, path: &PathBuf, cache: &mut ResultCache) { let mut curr = 0; - let mut cache = None; + // Next, we try to outright remove blocks. while curr < file.len() { let Some(block_starts) = file[curr..] @@ -215,15 +242,15 @@ fn remove_block(file: &mut Vec, path: &PathBuf) { break; }; let block_ends = block_starts + block_ends + 1; - // Large blocks are likely to be neccsarry. + // Large blocks are likely to be necessary. if block_ends - block_starts > 6 { curr = block_starts + 1; continue; } let mut file_copy = file.clone(); file_copy.drain(block_starts..block_ends); - if test_reduction(&file_copy, &path, &mut cache) { - eprintln!("Reduced {path:?} by {} lines `remove_blocks`", block_ends - block_starts); + if test_reduction(&file_copy, &path, cache) { + println!("Reduced {path:?} by {} lines `remove_blocks`", block_ends - block_starts); *file = file_copy; curr = block_starts; } else { @@ -232,10 +259,11 @@ fn remove_block(file: &mut Vec, path: &PathBuf) { } save_reduction(file, path, "remove_block"); } + /// Merges blocks ending with unconditional jumps. -fn linearize_cf(file: &mut Vec, path: &PathBuf) { +fn linearize_cf(file: &mut Vec, path: &PathBuf, cache: &mut ResultCache) { let mut curr = 0; - let mut cache = None; + // Next, we try to linearize the control flow. What the does that mean? // Given a sequence like this: // Goto(bb22) @@ -268,8 +296,8 @@ fn linearize_cf(file: &mut Vec, path: &PathBuf) { file_copy.remove(block_starts - 2); file_copy.remove(block_starts - 2); // Check if this reduction is valid. - if test_reduction(&file_copy, &path, &mut cache) { - eprintln!("Reduced {path:?} by 3 lines `linearize_cf`"); + if test_reduction(&file_copy, &path, cache) { + println!("Reduced {path:?} by 3 lines `linearize_cf`"); *file = file_copy; curr = block_starts; } else { @@ -278,6 +306,93 @@ fn linearize_cf(file: &mut Vec, path: &PathBuf) { } save_reduction(file, path, "linearize_cf"); } + +/// Replaces a call to a given function with a 0 assignment to the destination place, and a Goto. +/// This is always sound, because: +/// 1. All the functions arguments are always initialized +/// 2. and point to initialized memory(the operand of &raw must be an initialized place in rustlantis). +fn remove_fn_calls(file: &mut Vec, path: &PathBuf, cache: &mut ResultCache) { + let mut curr = 0; + + while curr < file.len() { + let Some(fn_call) = + file[curr..].iter().position(|line| line.contains("Call(") && line.contains(" = fn")) + else { + // No more calls to remove - exit early. + break; + }; + let fn_call = fn_call + curr; + let line = file[fn_call].trim(); + // Skip the Call( + let line = &line["Call(".len()..]; + // Extract the destination place + let Some((place, line)) = line.split_once('=') else { + curr = fn_call + 1; + continue; + }; + // Skip till the return block id. + let Some((_, line)) = line.split_once("ReturnTo(") else { + curr = fn_call + 1; + continue; + }; + // Extract the full return block + let Some((block, _)) = line.split_once(')') else { + curr = fn_call + 1; + continue; + }; + let mut file_copy = file.clone(); + // Remove the call. + file_copy.remove(fn_call); + file_copy.insert(fn_call, format!("Goto({block})\n")); + file_copy.insert(fn_call, format!("{place} = 0;\n")); + // Check if this reduction is valid. + if test_reduction(&file_copy, &path, cache) { + println!("Reduced {path:?} using `remove_fn_calls` {cache:?}"); + *file = file_copy; + curr = fn_call; + } else { + curr = fn_call + 1; + } + } + save_reduction(file, path, "remove_fn_calls"); +} + +/// Fully removes unreachable functions. +fn remove_fns(file: &mut Vec, path: &PathBuf, cache: &mut ResultCache) { + let mut curr = 0; + + while curr < file.len() { + // Find a function start + let Some(fn_start) = file[curr..].iter().position(|line| { + line.contains("#[custom_mir(dialect = \"runtime\", phase = \"initial\")]") + }) else { + // No more functions to remove - exit early. + break; + }; + // Find the next function(and use that to find the end of this one). + // FIXME: this check is flawed: it will never remove the very last function(the one before main). + // The other checks will turn that function into a single call to abort, but it is still annoying that it is kept. + let fn_start = fn_start + curr; + let Some(fn_end) = file[(fn_start + 3)..].iter().position(|line| line.contains("fn fn")) + else { + // No more functions to remove - exit early. + break; + }; + let fn_end = fn_start + 2 + fn_end; + let mut file_copy = file.clone(); + // Remove the function.\\ + file_copy.drain(fn_start..fn_end); + // Check if this reduction is valid. + if test_reduction(&file_copy, &path, cache) { + println!("Reduced {path:?} by {} lines `remove_fns`", fn_end - fn_start); + *file = file_copy; + } else { + curr = fn_start + 1; + } + } + save_reduction(file, path, "remove_fns"); +} + pub(super) fn reduce(path: impl AsRef) { let path = path.as_ref().to_owned(); // ... read the file to a buffer .. @@ -285,21 +400,32 @@ pub(super) fn reduce(path: impl AsRef) { let mut file: Vec<_> = file.split_inclusive('\n').map(|s| s.to_string()).collect(); // ... and run reduction passes. - eprintln!("running `remove_dump_var` on {path:?}."); + println!("running `remove_dump_var` on {path:?}."); remove_dump_var(&mut file, &path); - let len = file.len(); + // After `dump_var`, the execution results ought not to change. Cache them. let mut cache = None; - eprintln!("running `remove_dup_assign` on {path:?}."); + // Fill the cache + assert!( + test_reduction(&file, &path, &mut cache), + "Reduction error: check that the input file is a valid reproducer." + ); + println!("cache:{cache:?}"); + println!("running `remove_fn_calls` on {path:?}."); + remove_fn_calls(&mut file, &path, &mut cache); + println!("running `remove_fns` on {path:?}."); + remove_fns(&mut file, &path, &mut cache); + let len = file.len(); + println!("running `remove_dup_assign` on {path:?}."); remove_dup_assign(&mut file, &path, 0, len, &mut cache); file.retain(|line| !line.is_empty()); - eprintln!("running `match_to_goto` on {path:?}."); - match_to_goto(&mut file, &path); - eprintln!("running `block_abort` on {path:?}."); - block_abort(&mut file, &path); - eprintln!("running `remove_block` on {path:?}."); - remove_block(&mut file, &path); - eprintln!("running `linearize_cf` on {path:?}."); - linearize_cf(&mut file, &path); + println!("running `match_to_goto` on {path:?}."); + match_to_goto(&mut file, &path, &mut cache); + println!("running `block_abort` on {path:?}."); + block_abort(&mut file, &path, &mut cache); + println!("running `remove_block` on {path:?}."); + remove_block(&mut file, &path, &mut cache); + println!("running `linearize_cf` on {path:?}."); + linearize_cf(&mut file, &path, &mut cache); let mut out = std::fs::File::create(&path).expect("Could not save the reduction result."); for line in file { out.write_all(line.as_bytes());