diff --git a/src/shims/os_str.rs b/src/shims/os_str.rs index 73dc9119a820..7635047064f1 100644 --- a/src/shims/os_str.rs +++ b/src/shims/os_str.rs @@ -14,54 +14,41 @@ use rustc_target::abi::LayoutOf; use crate::*; /// Represent how path separator conversion should be done. -enum Pathconversion { +pub enum PathConversion { HostToTarget, TargetToHost, } -/// Perform path separator conversion if needed. -fn convert_path_separator<'a>( - os_str: Cow<'a, OsStr>, - target_os: &str, - direction: Pathconversion, -) -> Cow<'a, OsStr> { - #[cfg(windows)] - return if target_os == "windows" { - // Windows-on-Windows, all fine. - os_str - } else { - // Unix target, Windows host. - let (from, to) = match direction { - Pathconversion::HostToTarget => ('\\', '/'), - Pathconversion::TargetToHost => ('/', '\\'), - }; - let converted = os_str - .encode_wide() - .map(|wchar| if wchar == from as u16 { to as u16 } else { wchar }) - .collect::>(); - Cow::Owned(OsString::from_wide(&converted)) - }; - #[cfg(unix)] - return if target_os == "windows" { - // Windows target, Unix host. - let (from, to) = match direction { - Pathconversion::HostToTarget => ('/', '\\'), - Pathconversion::TargetToHost => ('\\', '/'), - }; - let converted = os_str - .as_bytes() - .iter() - .map(|&wchar| if wchar == from as u8 { to as u8 } else { wchar }) - .collect::>(); - Cow::Owned(OsString::from_vec(converted)) - } else { - // Unix-on-Unix, all is fine. - os_str - }; +#[cfg(unix)] +pub fn os_str_to_bytes<'a, 'tcx>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> { + Ok(os_str.as_bytes()) +} + +#[cfg(not(unix))] +pub fn os_str_to_bytes<'a, 'tcx>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> { + // On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the + // intermediate transformation into strings. Which invalidates non-utf8 paths that are actually + // valid. + os_str + .to_str() + .map(|s| s.as_bytes()) + .ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into()) +} + +#[cfg(unix)] +pub fn bytes_to_os_str<'a, 'tcx>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> { + Ok(OsStr::from_bytes(bytes)) +} +#[cfg(not(unix))] +pub fn bytes_to_os_str<'a, 'tcx>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> { + let s = std::str::from_utf8(bytes) + .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?; + Ok(OsStr::new(s)) } impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {} pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx> { + /// Helper function to read an OsString from a null-terminated sequence of bytes, which is what /// the Unix APIs usually handle. fn read_os_str_from_c_str<'a>(&'a self, scalar: Scalar) -> InterpResult<'tcx, &'a OsStr> @@ -69,17 +56,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx 'tcx: 'a, 'mir: 'a, { - #[cfg(unix)] - fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> { - Ok(OsStr::from_bytes(bytes)) - } - #[cfg(not(unix))] - fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> { - let s = std::str::from_utf8(bytes) - .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?; - Ok(OsStr::new(s)) - } - let this = self.eval_context_ref(); let bytes = this.memory.read_c_str(scalar)?; bytes_to_os_str(bytes) @@ -118,20 +94,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx scalar: Scalar, size: u64, ) -> InterpResult<'tcx, (bool, u64)> { - #[cfg(unix)] - fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> { - Ok(os_str.as_bytes()) - } - #[cfg(not(unix))] - fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> { - // On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the - // intermediate transformation into strings. Which invalidates non-utf8 paths that are actually - // valid. - os_str - .to_str() - .map(|s| s.as_bytes()) - .ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into()) - } let bytes = os_str_to_bytes(os_str)?; // If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required null @@ -226,7 +188,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx let this = self.eval_context_ref(); let os_str = this.read_os_str_from_c_str(scalar)?; - Ok(match convert_path_separator(Cow::Borrowed(os_str), &this.tcx.sess.target.target.target_os, Pathconversion::TargetToHost) { + Ok(match this.convert_path_separator(Cow::Borrowed(os_str), PathConversion::TargetToHost) { Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)), Cow::Owned(y) => Cow::Owned(PathBuf::from(y)), }) @@ -237,7 +199,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx let this = self.eval_context_ref(); let os_str = this.read_os_str_from_wide_str(scalar)?; - Ok(convert_path_separator(Cow::Owned(os_str), &this.tcx.sess.target.target.target_os, Pathconversion::TargetToHost).into_owned().into()) + Ok(this.convert_path_separator(Cow::Owned(os_str), PathConversion::TargetToHost).into_owned().into()) } /// Write a Path to the machine memory (as a null-terminated sequence of bytes), @@ -249,7 +211,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx size: u64, ) -> InterpResult<'tcx, (bool, u64)> { let this = self.eval_context_mut(); - let os_str = convert_path_separator(Cow::Borrowed(path.as_os_str()), &this.tcx.sess.target.target.target_os, Pathconversion::HostToTarget); + let os_str = this.convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget); this.write_os_str_to_c_str(&os_str, scalar, size) } @@ -262,7 +224,50 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx size: u64, ) -> InterpResult<'tcx, (bool, u64)> { let this = self.eval_context_mut(); - let os_str = convert_path_separator(Cow::Borrowed(path.as_os_str()), &this.tcx.sess.target.target.target_os, Pathconversion::HostToTarget); + let os_str = this.convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget); this.write_os_str_to_wide_str(&os_str, scalar, size) } + + fn convert_path_separator<'a>( + &self, + os_str: Cow<'a, OsStr>, + direction: PathConversion, + ) -> Cow<'a, OsStr> { + let this = self.eval_context_ref(); + let target_os = &this.tcx.sess.target.target.target_os; + #[cfg(windows)] + return if target_os == "windows" { + // Windows-on-Windows, all fine. + os_str + } else { + // Unix target, Windows host. + let (from, to) = match direction { + PathConversion::HostToTarget => ('\\', '/'), + PathConversion::TargetToHost => ('/', '\\'), + }; + let converted = os_str + .encode_wide() + .map(|wchar| if wchar == from as u16 { to as u16 } else { wchar }) + .collect::>(); + Cow::Owned(OsString::from_wide(&converted)) + }; + #[cfg(unix)] + return if target_os == "windows" { + // Windows target, Unix host. + let (from, to) = match direction { + PathConversion::HostToTarget => ('/', '\\'), + PathConversion::TargetToHost => ('\\', '/'), + }; + let converted = os_str + .as_bytes() + .iter() + .map(|&wchar| if wchar == from as u8 { to as u8 } else { wchar }) + .collect::>(); + Cow::Owned(OsString::from_vec(converted)) + } else { + // Unix-on-Unix, all is fine. + os_str + }; + } } + diff --git a/src/shims/posix/foreign_items.rs b/src/shims/posix/foreign_items.rs index 26c743b360e0..177678f03d74 100644 --- a/src/shims/posix/foreign_items.rs +++ b/src/shims/posix/foreign_items.rs @@ -123,6 +123,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx let result = this.fdatasync(fd)?; this.write_scalar(Scalar::from_i32(result), dest)?; } + "readlink" => { + let &[pathname, buf, bufsize] = check_arg_count(args)?; + let result = this.readlink(pathname, buf, bufsize)?; + this.write_scalar(Scalar::from_machine_isize(result, this), dest)?; + } // Allocation "posix_memalign" => { diff --git a/src/shims/posix/fs.rs b/src/shims/posix/fs.rs index 88597b4a3981..ebf7e16a1534 100644 --- a/src/shims/posix/fs.rs +++ b/src/shims/posix/fs.rs @@ -4,6 +4,7 @@ use std::fs::{read_dir, remove_dir, remove_file, rename, DirBuilder, File, FileT use std::io::{self, Read, Seek, SeekFrom, Write}; use std::path::Path; use std::time::SystemTime; +use std::borrow::Cow; use log::trace; @@ -1353,6 +1354,41 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx this.handle_not_found() } } + + fn readlink( + &mut self, + pathname_op: OpTy<'tcx, Tag>, + buf_op: OpTy<'tcx, Tag>, + bufsize_op: OpTy<'tcx, Tag> + ) -> InterpResult<'tcx, i64> { + let this = self.eval_context_mut(); + + this.check_no_isolation("readlink")?; + + let pathname = this.read_path_from_c_str(this.read_scalar(pathname_op)?.check_init()?)?; + let buf = this.read_scalar(buf_op)?.check_init()?; + let bufsize = this.read_scalar(bufsize_op)?.to_machine_usize(this)?; + + let result = std::fs::read_link(pathname); + match result { + Ok(resolved) => { + let resolved = this.convert_path_separator(Cow::Borrowed(resolved.as_ref()), crate::shims::os_str::PathConversion::HostToTarget); + let mut path_bytes = crate::shims::os_str::os_str_to_bytes(resolved.as_ref())?; + let bufsize: usize = bufsize.try_into().unwrap(); + if path_bytes.len() > bufsize { + path_bytes = &path_bytes[..bufsize] + } + // 'readlink' truncates the resolved path if + // the provided buffer is not large enough. + this.memory.write_bytes(buf, path_bytes.iter().copied())?; + Ok(path_bytes.len().try_into().unwrap()) + } + Err(e) => { + this.set_last_error_from_io_error(e)?; + Ok(-1) + } + } + } } /// Extracts the number of seconds and nanoseconds elapsed between `time` and the unix epoch when diff --git a/tests/run-pass/fs.rs b/tests/run-pass/fs.rs index caa9bffc2bc8..f74d1c9a36b1 100644 --- a/tests/run-pass/fs.rs +++ b/tests/run-pass/fs.rs @@ -1,12 +1,18 @@ // ignore-windows: File handling is not implemented yet // compile-flags: -Zmiri-disable-isolation +#![feature(rustc_private)] + use std::fs::{ File, create_dir, OpenOptions, read_dir, remove_dir, remove_dir_all, remove_file, rename, }; -use std::io::{Read, Write, ErrorKind, Result, Seek, SeekFrom}; +use std::ffi::CString; +use std::io::{Read, Write, Error, ErrorKind, Result, Seek, SeekFrom}; use std::path::{PathBuf, Path}; +extern crate libc; + + fn main() { test_file(); test_file_clone(); @@ -19,10 +25,23 @@ fn main() { test_errors(); test_rename(); test_directory(); + test_dup_stdout_stderr(); } fn tmp() -> PathBuf { - std::env::var("MIRI_TEMP").map(PathBuf::from).unwrap_or_else(|_| std::env::temp_dir()) + std::env::var("MIRI_TEMP") + .map(|tmp| { + // MIRI_TEMP is set outside of our emulated + // program, so it may have path separators that don't + // correspond to our target platform. We normalize them here + // before constructing a `PathBuf` + + #[cfg(windows)] + return PathBuf::from(tmp.replace("/", "\\")); + + #[cfg(not(windows))] + return PathBuf::from(tmp.replace("\\", "/")); + }).unwrap_or_else(|_| std::env::temp_dir()) } /// Prepare: compute filename and make sure the file does not exist. @@ -215,6 +234,43 @@ fn test_symlink() { let mut contents = Vec::new(); symlink_file.read_to_end(&mut contents).unwrap(); assert_eq!(bytes, contents.as_slice()); + + + #[cfg(unix)] + { + use std::os::unix::ffi::OsStrExt; + + let expected_path = path.as_os_str().as_bytes(); + + // Test that the expected string gets written to a buffer of proper + // length, and that a trailing null byte is not written. + let symlink_c_str = CString::new(symlink_path.as_os_str().as_bytes()).unwrap(); + let symlink_c_ptr = symlink_c_str.as_ptr(); + + // Make the buf one byte larger than it needs to be, + // and check that the last byte is not overwritten. + let mut large_buf = vec![0xFF; expected_path.len() + 1]; + let res = unsafe { libc::readlink(symlink_c_ptr, large_buf.as_mut_ptr().cast(), large_buf.len()) }; + // Check that the resovled path was properly written into the buf. + assert_eq!(&large_buf[..(large_buf.len() - 1)], expected_path); + assert_eq!(large_buf.last(), Some(&0xFF)); + assert_eq!(res, large_buf.len() as isize - 1); + + // Test that the resolved path is truncated if the provided buffer + // is too small. + let mut small_buf = [0u8; 2]; + let res = unsafe { libc::readlink(symlink_c_ptr, small_buf.as_mut_ptr().cast(), small_buf.len()) }; + assert_eq!(small_buf, &expected_path[..small_buf.len()]); + assert_eq!(res, small_buf.len() as isize); + + // Test that we report a proper error for a missing path. + let bad_path = CString::new("MIRI_MISSING_FILE_NAME").unwrap(); + let res = unsafe { libc::readlink(bad_path.as_ptr(), small_buf.as_mut_ptr().cast(), small_buf.len()) }; + assert_eq!(res, -1); + assert_eq!(Error::last_os_error().kind(), ErrorKind::NotFound); + } + + // Test that metadata of a symbolic link is correct. check_metadata(bytes, &symlink_path).unwrap(); // Test that the metadata of a symbolic link is correct when not following it. @@ -292,3 +348,13 @@ fn test_directory() { // Reading the metadata of a non-existent directory should fail with a "not found" error. assert_eq!(ErrorKind::NotFound, check_metadata(&[], &dir_path).unwrap_err().kind()); } + +fn test_dup_stdout_stderr() { + let bytes = b"hello dup fd\n"; + unsafe { + let new_stdout = libc::fcntl(1, libc::F_DUPFD, 0); + let new_stderr = libc::fcntl(2, libc::F_DUPFD, 0); + libc::write(new_stdout, bytes.as_ptr() as *const libc::c_void, bytes.len()); + libc::write(new_stderr, bytes.as_ptr() as *const libc::c_void, bytes.len()); + } +} diff --git a/tests/run-pass/fs_libc.stderr b/tests/run-pass/fs.stderr similarity index 100% rename from tests/run-pass/fs_libc.stderr rename to tests/run-pass/fs.stderr diff --git a/tests/run-pass/fs_libc.stdout b/tests/run-pass/fs.stdout similarity index 100% rename from tests/run-pass/fs_libc.stdout rename to tests/run-pass/fs.stdout diff --git a/tests/run-pass/fs_libc.rs b/tests/run-pass/fs_libc.rs deleted file mode 100644 index e3deb7a5bcd8..000000000000 --- a/tests/run-pass/fs_libc.rs +++ /dev/null @@ -1,20 +0,0 @@ -// ignore-windows -// compile-flags: -Zmiri-disable-isolation - -#![feature(rustc_private)] - -extern crate libc; - -fn main() { - dup_stdout_stderr_test(); -} - -fn dup_stdout_stderr_test() { - let bytes = b"hello dup fd\n"; - unsafe { - let new_stdout = libc::fcntl(1, libc::F_DUPFD, 0); - let new_stderr = libc::fcntl(2, libc::F_DUPFD, 0); - libc::write(new_stdout, bytes.as_ptr() as *const libc::c_void, bytes.len()); - libc::write(new_stderr, bytes.as_ptr() as *const libc::c_void, bytes.len()); - } -}