Auto merge of #1564 - Aaron1011:readlink, r=RalfJung

Implement `readlink` Due to the truncating behavior of `readlink`, I was not able to directly use any of the existing C-cstring helper functions.
2020-10-04 21:07:55 +00:00 · 2020-10-04 21:07:55 +00:00 · 60c1075820
commit 60c1075820
parent 9202f7ddc6 3aaab3dd98
7 changed files with 183 additions and 91 deletions
--- a/src/shims/os_str.rs
+++ b/src/shims/os_str.rs
@ -14,54 +14,41 @@ use rustc_target::abi::LayoutOf;
 use crate::*;

 /// Represent how path separator conversion should be done.
-enum Pathconversion {
+pub enum PathConversion {
    HostToTarget,
    TargetToHost,
 }

-/// Perform path separator conversion if needed.
-fn convert_path_separator<'a>(
-    os_str: Cow<'a, OsStr>,
-    target_os: &str,
-    direction: Pathconversion,
-) -> Cow<'a, OsStr> {
-    #[cfg(windows)]
-    return if target_os == "windows" {
-        // Windows-on-Windows, all fine.
-        os_str
-    } else {
-        // Unix target, Windows host.
-        let (from, to) = match direction {
-            Pathconversion::HostToTarget => ('\\', '/'),
-            Pathconversion::TargetToHost => ('/', '\\'),
-        };
-        let converted = os_str
-            .encode_wide()
-            .map(|wchar| if wchar == from as u16 { to as u16 } else { wchar })
-            .collect::<Vec<_>>();
-        Cow::Owned(OsString::from_wide(&converted))
-    };
-    #[cfg(unix)]
-    return if target_os == "windows" {
-        // Windows target, Unix host.
-        let (from, to) = match direction {
-            Pathconversion::HostToTarget => ('/', '\\'),
-            Pathconversion::TargetToHost => ('\\', '/'),
-        };
-        let converted = os_str
-            .as_bytes()
-            .iter()
-            .map(|&wchar| if wchar == from as u8 { to as u8 } else { wchar })
-            .collect::<Vec<_>>();
-        Cow::Owned(OsString::from_vec(converted))
-    } else {
-        // Unix-on-Unix, all is fine.
-        os_str
-    };
+#[cfg(unix)]
+pub fn os_str_to_bytes<'a, 'tcx>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
+    Ok(os_str.as_bytes())
+}
+
+#[cfg(not(unix))]
+pub fn os_str_to_bytes<'a, 'tcx>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
+    // On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the
+    // intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
+    // valid.
+    os_str
+        .to_str()
+        .map(|s| s.as_bytes())
+        .ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into())
+}
+
+#[cfg(unix)]
+pub fn bytes_to_os_str<'a, 'tcx>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
+    Ok(OsStr::from_bytes(bytes))
+}
+#[cfg(not(unix))]
+pub fn bytes_to_os_str<'a, 'tcx>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
+    let s = std::str::from_utf8(bytes)
+        .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?;
+    Ok(OsStr::new(s))
 }

 impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {}
 pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx> {
+
    /// Helper function to read an OsString from a null-terminated sequence of bytes, which is what
    /// the Unix APIs usually handle.
    fn read_os_str_from_c_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, &'a OsStr>
@ -69,17 +56,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
        'tcx: 'a,
        'mir: 'a,
    {
-        #[cfg(unix)]
-        fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
-            Ok(OsStr::from_bytes(bytes))
-        }
-        #[cfg(not(unix))]
-        fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
-            let s = std::str::from_utf8(bytes)
-                .map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?;
-            Ok(OsStr::new(s))
-        }
-
        let this = self.eval_context_ref();
        let bytes = this.memory.read_c_str(scalar)?;
        bytes_to_os_str(bytes)
@ -118,20 +94,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
        scalar: Scalar<Tag>,
        size: u64,
    ) -> InterpResult<'tcx, (bool, u64)> {
-        #[cfg(unix)]
-        fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
-            Ok(os_str.as_bytes())
-        }
-        #[cfg(not(unix))]
-        fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
-            // On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the
-            // intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
-            // valid.
-            os_str
-                .to_str()
-                .map(|s| s.as_bytes())
-                .ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into())
-        }

        let bytes = os_str_to_bytes(os_str)?;
        // If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required null
@ -226,7 +188,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
        let this = self.eval_context_ref();
        let os_str = this.read_os_str_from_c_str(scalar)?;

-        Ok(match convert_path_separator(Cow::Borrowed(os_str), &this.tcx.sess.target.target.target_os, Pathconversion::TargetToHost) {
+        Ok(match this.convert_path_separator(Cow::Borrowed(os_str), PathConversion::TargetToHost) {
            Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
            Cow::Owned(y) => Cow::Owned(PathBuf::from(y)),
        })
@ -237,7 +199,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
        let this = self.eval_context_ref();
        let os_str = this.read_os_str_from_wide_str(scalar)?;

-        Ok(convert_path_separator(Cow::Owned(os_str), &this.tcx.sess.target.target.target_os, Pathconversion::TargetToHost).into_owned().into())
+        Ok(this.convert_path_separator(Cow::Owned(os_str), PathConversion::TargetToHost).into_owned().into())
    }

    /// Write a Path to the machine memory (as a null-terminated sequence of bytes),
@ -249,7 +211,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
        size: u64,
    ) -> InterpResult<'tcx, (bool, u64)> {
        let this = self.eval_context_mut();
-        let os_str = convert_path_separator(Cow::Borrowed(path.as_os_str()), &this.tcx.sess.target.target.target_os, Pathconversion::HostToTarget);
+        let os_str = this.convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
        this.write_os_str_to_c_str(&os_str, scalar, size)
    }

@ -262,7 +224,50 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
        size: u64,
    ) -> InterpResult<'tcx, (bool, u64)> {
        let this = self.eval_context_mut();
-        let os_str = convert_path_separator(Cow::Borrowed(path.as_os_str()), &this.tcx.sess.target.target.target_os, Pathconversion::HostToTarget);
+        let os_str = this.convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
        this.write_os_str_to_wide_str(&os_str, scalar, size)
    }
+
+    fn convert_path_separator<'a>(
+        &self,
+        os_str: Cow<'a, OsStr>,
+        direction: PathConversion,
+    ) -> Cow<'a, OsStr> {
+        let this = self.eval_context_ref();
+        let target_os = &this.tcx.sess.target.target.target_os;
+        #[cfg(windows)]
+        return if target_os == "windows" {
+            // Windows-on-Windows, all fine.
+            os_str
+        } else {
+            // Unix target, Windows host.
+            let (from, to) = match direction {
+                PathConversion::HostToTarget => ('\\', '/'),
+                PathConversion::TargetToHost => ('/', '\\'),
+            };
+            let converted = os_str
+                .encode_wide()
+                .map(|wchar| if wchar == from as u16 { to as u16 } else { wchar })
+                .collect::<Vec<_>>();
+            Cow::Owned(OsString::from_wide(&converted))
+        };
+        #[cfg(unix)]
+        return if target_os == "windows" {
+            // Windows target, Unix host.
+            let (from, to) = match direction {
+                PathConversion::HostToTarget => ('/', '\\'),
+                PathConversion::TargetToHost => ('\\', '/'),
+            };
+            let converted = os_str
+                .as_bytes()
+                .iter()
+                .map(|&wchar| if wchar == from as u8 { to as u8 } else { wchar })
+                .collect::<Vec<_>>();
+            Cow::Owned(OsString::from_vec(converted))
+        } else {
+            // Unix-on-Unix, all is fine.
+            os_str
+        };
+    }
 }
+
--- a/src/shims/posix/foreign_items.rs
+++ b/src/shims/posix/foreign_items.rs
@ -123,6 +123,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
                let result = this.fdatasync(fd)?;
                this.write_scalar(Scalar::from_i32(result), dest)?;
            }
+            "readlink" => {
+                let &[pathname, buf, bufsize] = check_arg_count(args)?;
+                let result = this.readlink(pathname, buf, bufsize)?;
+                this.write_scalar(Scalar::from_machine_isize(result, this), dest)?;
+            }

            // Allocation
            "posix_memalign" => {
--- a/src/shims/posix/fs.rs
+++ b/src/shims/posix/fs.rs
@ -4,6 +4,7 @@ use std::fs::{read_dir, remove_dir, remove_file, rename, DirBuilder, File, FileT
 use std::io::{self, Read, Seek, SeekFrom, Write};
 use std::path::Path;
 use std::time::SystemTime;
+use std::borrow::Cow;

 use log::trace;

@ -1353,6 +1354,41 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
            this.handle_not_found()
        }
    }
+
+    fn readlink(
+        &mut self,
+        pathname_op: OpTy<'tcx, Tag>,
+        buf_op: OpTy<'tcx, Tag>,
+        bufsize_op: OpTy<'tcx, Tag>
+    ) -> InterpResult<'tcx, i64> {
+        let this = self.eval_context_mut();
+
+        this.check_no_isolation("readlink")?;
+
+        let pathname = this.read_path_from_c_str(this.read_scalar(pathname_op)?.check_init()?)?;
+        let buf = this.read_scalar(buf_op)?.check_init()?;
+        let bufsize = this.read_scalar(bufsize_op)?.to_machine_usize(this)?;
+
+        let result = std::fs::read_link(pathname);
+        match result {
+            Ok(resolved) => {
+                let resolved = this.convert_path_separator(Cow::Borrowed(resolved.as_ref()), crate::shims::os_str::PathConversion::HostToTarget);
+                let mut path_bytes = crate::shims::os_str::os_str_to_bytes(resolved.as_ref())?;
+                let bufsize: usize = bufsize.try_into().unwrap();
+                if path_bytes.len() > bufsize {
+                    path_bytes = &path_bytes[..bufsize]
+                }
+                // 'readlink' truncates the resolved path if
+                // the provided buffer is not large enough.
+                this.memory.write_bytes(buf, path_bytes.iter().copied())?;
+                Ok(path_bytes.len().try_into().unwrap())
+            }
+            Err(e) => {
+                this.set_last_error_from_io_error(e)?;
+                Ok(-1)
+            }
+        }
+    }
 }

 /// Extracts the number of seconds and nanoseconds elapsed between `time` and the unix epoch when
--- a/tests/run-pass/fs.rs
+++ b/tests/run-pass/fs.rs
@ -1,12 +1,18 @@
 // ignore-windows: File handling is not implemented yet
 // compile-flags: -Zmiri-disable-isolation

+#![feature(rustc_private)]
+
 use std::fs::{
    File, create_dir, OpenOptions, read_dir, remove_dir, remove_dir_all, remove_file, rename,
 };
-use std::io::{Read, Write, ErrorKind, Result, Seek, SeekFrom};
+use std::ffi::CString;
+use std::io::{Read, Write, Error, ErrorKind, Result, Seek, SeekFrom};
 use std::path::{PathBuf, Path};

+extern crate libc;
+
+
 fn main() {
    test_file();
    test_file_clone();
@ -19,10 +25,23 @@ fn main() {
    test_errors();
    test_rename();
    test_directory();
+    test_dup_stdout_stderr();
 }

 fn tmp() -> PathBuf {
-    std::env::var("MIRI_TEMP").map(PathBuf::from).unwrap_or_else(|_| std::env::temp_dir())
+    std::env::var("MIRI_TEMP")
+        .map(|tmp| {
+            // MIRI_TEMP is set outside of our emulated
+            // program, so it may have path separators that don't
+            // correspond to our target platform. We normalize them here
+            // before constructing a `PathBuf`
+
+            #[cfg(windows)]
+            return PathBuf::from(tmp.replace("/", "\\"));
+
+            #[cfg(not(windows))]
+            return PathBuf::from(tmp.replace("\\", "/"));
+        }).unwrap_or_else(|_| std::env::temp_dir())
 }

 /// Prepare: compute filename and make sure the file does not exist.
@ -215,6 +234,43 @@ fn test_symlink() {
    let mut contents = Vec::new();
    symlink_file.read_to_end(&mut contents).unwrap();
    assert_eq!(bytes, contents.as_slice());
+
+
+    #[cfg(unix)]
+    {
+        use std::os::unix::ffi::OsStrExt;
+
+        let expected_path = path.as_os_str().as_bytes();
+
+        // Test that the expected string gets written to a buffer of proper
+        // length, and that a trailing null byte is not written.
+        let symlink_c_str = CString::new(symlink_path.as_os_str().as_bytes()).unwrap();
+        let symlink_c_ptr = symlink_c_str.as_ptr();
+
+        // Make the buf one byte larger than it needs to be,
+        // and check that the last byte is not overwritten.
+        let mut large_buf = vec![0xFF; expected_path.len() + 1];
+        let res = unsafe { libc::readlink(symlink_c_ptr, large_buf.as_mut_ptr().cast(), large_buf.len()) };
+        // Check that the resovled path was properly written into the buf.
+        assert_eq!(&large_buf[..(large_buf.len() - 1)], expected_path);
+        assert_eq!(large_buf.last(), Some(&0xFF));
+        assert_eq!(res, large_buf.len() as isize - 1);
+
+        // Test that the resolved path is truncated if the provided buffer
+        // is too small.
+        let mut small_buf = [0u8; 2];
+        let res = unsafe { libc::readlink(symlink_c_ptr, small_buf.as_mut_ptr().cast(), small_buf.len()) };
+        assert_eq!(small_buf, &expected_path[..small_buf.len()]);
+        assert_eq!(res, small_buf.len() as isize);
+
+        // Test that we report a proper error for a missing path.
+        let bad_path = CString::new("MIRI_MISSING_FILE_NAME").unwrap();
+        let res = unsafe { libc::readlink(bad_path.as_ptr(), small_buf.as_mut_ptr().cast(), small_buf.len()) };
+        assert_eq!(res, -1);
+        assert_eq!(Error::last_os_error().kind(), ErrorKind::NotFound);
+    }
+
+
    // Test that metadata of a symbolic link is correct.
    check_metadata(bytes, &symlink_path).unwrap();
    // Test that the metadata of a symbolic link is correct when not following it.
@ -292,3 +348,13 @@ fn test_directory() {
    // Reading the metadata of a non-existent directory should fail with a "not found" error.
    assert_eq!(ErrorKind::NotFound, check_metadata(&[], &dir_path).unwrap_err().kind());
 }
+
+fn test_dup_stdout_stderr() {
+    let bytes = b"hello dup fd\n";
+    unsafe {
+        let new_stdout = libc::fcntl(1, libc::F_DUPFD, 0);
+        let new_stderr = libc::fcntl(2, libc::F_DUPFD, 0);
+        libc::write(new_stdout, bytes.as_ptr() as *const libc::c_void, bytes.len());
+        libc::write(new_stderr, bytes.as_ptr() as *const libc::c_void, bytes.len());
+    }
+}
--- a/tests/run-pass/fs_libc.stderr
+++ b/tests/run-pass/fs_libc.stderr
--- a/tests/run-pass/fs_libc.stdout
+++ b/tests/run-pass/fs_libc.stdout
--- a/tests/run-pass/fs_libc.rs
+++ b/tests/run-pass/fs_libc.rs
@ -1,20 +0,0 @@
-// ignore-windows
-// compile-flags: -Zmiri-disable-isolation
-
-#![feature(rustc_private)]
-
-extern crate libc;
-
-fn main() {
-    dup_stdout_stderr_test();
-}
-
-fn dup_stdout_stderr_test() {
-    let bytes = b"hello dup fd\n";
-    unsafe {
-        let new_stdout = libc::fcntl(1, libc::F_DUPFD, 0);
-        let new_stderr = libc::fcntl(2, libc::F_DUPFD, 0);
-        libc::write(new_stdout, bytes.as_ptr() as *const libc::c_void, bytes.len());
-        libc::write(new_stderr, bytes.as_ptr() as *const libc::c_void, bytes.len());
-    }
-}