remove ancient tex files

2022-07-04 15:26:02 -04:00 · 2022-07-04 15:26:02 -04:00 · 97dd53d5ef
commit 97dd53d5ef
parent 709e29a167
5 changed files with 0 additions and 1131 deletions
--- a/tex/final-presentation/latexmkrc
+++ b/tex/final-presentation/latexmkrc
@ -1,12 +0,0 @@
-# vim: ft=perl
-
-$pdf_mode = 1;
-$pdflatex = 'lualatex --shell-escape %O %S';
-$out_dir = 'out';
-
-# This improves latexmk's detection of source files and generated files.
-$recorder = 1;
-
-# Ignore always-regenerated *.pyg files from the minted package when considering
-# whether to run pdflatex again.
-$hash_calc_ignore_pattern{'pyg'} = '.*';
--- a/tex/final-presentation/rust-logo-512x512.png
+++ b/tex/final-presentation/rust-logo-512x512.png
--- a/tex/final-presentation/slides.tex
+++ b/tex/final-presentation/slides.tex
@ -1,444 +0,0 @@
-\documentclass{beamer}
-\usecolortheme{beaver}
-\beamertemplatenavigationsymbolsempty
-
-% Fonts
-\usepackage{fontspec}
-\setmainfont{Source Serif Pro}[Ligatures=TeX]
-\setsansfont{Source Sans Pro}[Ligatures=TeX]
-\setmonofont{Source Code Pro}[
-  BoldFont={* Medium},
-  BoldItalicFont={* Medium Italic},
-]
-
-\usepackage[outputdir=out]{minted}
-\usepackage{tikz}
-\usetikzlibrary{positioning, fit}
-
-\tikzset{
-  invisible/.style={opacity=0,text opacity=0},
-  highlight/.style={color=red},
-  intro/.code args={<#1>}{%
-    \only<#1>{\pgfkeysalso{highlight}}
-    \alt<#1->{}{\pgfkeysalso{invisible}}
-  },
-}
-
-\title{Miri}
-\subtitle{An interpreter for Rust's mid-level intermediate representation}
-\author{
-  Scott Olson
-  \texorpdfstring{\\ \scriptsize{Supervisor: Christopher Dutchyn}}{}
-}
-\institute{
-  CMPT 400 \\
-  University of Saskatchewan
-}
-\date{}
-\titlegraphic{
-  \includegraphics[width=64px,height=64px]{rust-logo-512x512.png} \\
-  \scriptsize{\url{https://www.rust-lang.org}}
-}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% Intro slides
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{document}
-
-\maketitle
-
-\begin{frame}[fragile]
-  \frametitle{What is Rust? \small{[review]}}
-
-  According to the website\dots
-
-  \begin{quote}
-    \textbf{Rust} is a systems programming language that runs blazingly fast,
-    prevents nearly all segfaults, and guarantees thread safety.
-  \end{quote}
-
-  It's a new programming language from Mozilla, and it looks like this:
-
-  \begin{minted}[
-    autogobble,
-    fontsize=\footnotesize,
-    mathescape,
-    xleftmargin=.3in,
-  ]{rust}
-    fn factorial(n: u64) -> u64 {
-        (1..n).fold(1, |a, b| a * b)
-    }
-
-    fn main() {
-        for x in 1..6 {
-            println!("{}", factorial(x));
-        }
-        // $\Rightarrow$ 1
-        // $\Rightarrow$ 1
-        // $\Rightarrow$ 2
-        // $\Rightarrow$ 6
-        // $\Rightarrow$ 24
-    }
-  \end{minted}
-\end{frame}
-
-\begin{frame}
-  \frametitle{How does Rust compile code? \onslide<-6>{\small{[review]}}}
-
-  \begin{center}
-    \begin{tikzpicture}[x=4cm, y=3.5cm, auto, rounded corners]
-      \tikzstyle{basic-stage}=[rectangle, draw, thick, align=center]
-      \tikzstyle{stage}=[basic-stage, font=\tiny]
-      \tikzstyle{pass}=[thick, -stealth]
-      \tikzstyle{pass-label}=[font=\footnotesize]
-
-      \node[basic-stage] (src) at (0,0) {Source\\Code};
-      \node[basic-stage] (mach) at (2,-1) {Machine\\Code};
-
-      \draw<1>[pass, out=0, in=180]
-        (src.east) to node[font=\Huge] {?} (mach.west);
-
-      \node[stage, intro=<2>] (ast) at (1,0)
-        {\normalsize{AST} \\ Abstract Syntax Tree};
-      \draw[pass, intro=<2>]
-        (src) -- node[pass-label] {Parse} (ast);
-
-      \node[stage, intro=<3>] (hir) at (2,0)
-        {\normalsize{HIR} \\ High-level Intermediate\\Representation};
-      \draw[pass, intro=<3>]
-        (ast) -- node[pass-label] {Simplify} (hir);
-
-      \node[stage, intro=<4>] (mir) at (0,-1)
-        {\normalsize{MIR} \\ Mid-level Intermediate\\Representation};
-      \path (hir.south) -- coordinate (middle) (mir.north);
-      \draw[pass, intro=<4>]
-        (hir.south) |- (middle) -| (mir.north);
-      \node[pass-label, above, intro=<4>] at (middle) {Lower};
-
-      \node[stage, intro=<5>] (llvm) at (1,-1)
-        {\normalsize{LLVM IR} \\ Low-level Intermediate\\Representation};
-      \draw[pass, intro=<5>]
-        (mir) -- node[pass-label] {Translate} (llvm);
-
-      \draw<6->[pass, intro=<6>]
-        (llvm) -- node[pass-label] {Magic} (mach);
-
-      \node[stage, intro=<7>] (exec) at (1,-1.75)
-        {\normalsize{Execution}};
-      \draw[pass, intro=<7>]
-        (mach) -- node[pass-label] {CPU} (exec);
-
-      \draw[pass, intro=<8>]
-        (mir) -- node[pass-label] {Miri} (exec);
-    \end{tikzpicture}
-  \end{center}
-\end{frame}
-
-\begin{frame}
-  \frametitle{Why build Miri?}
-  \begin{itemize}
-    \item For fun and learning.
-
-    \item I originally planned to use it for testing the compiler and execution
-      of unsafe code, but shifted my goals along the way. \pause
-
-    \item Now it serves as an experimental implementation of the upcoming
-      compile-time function evaluation feature in Rust. \pause
-
-      \begin{itemize}
-        \item Similar to C++14's \mintinline{cpp}{constexpr} feature.
-
-        \item You can do complicated calculations at compile time and compile
-          their \emph{results} into the executable. \pause
-
-        \item For example, you can compute a ``perfect hash function'' for a
-          statically-known map at compile-time and have guaranteed no-collision
-          lookup at runtime. \pause
-
-        \item Miri actually supports far more of Rust than C++14's
-          \mintinline{cpp}{constexpr} does of C++ --- even heap allocation and
-          unsafe code.
-      \end{itemize}
-  \end{itemize}
-\end{frame}
-
-\begin{frame}
-  \frametitle{How was it built?}
-
-  At first I wrote a naive version with a number of downsides:
-
-  \begin{itemize}
-    \item represented values in a traditional dynamic language format, where
-      every value was the same size.
-
-    \item didn't work well for aggregates (structs, enums, arrays, etc.).
-
-    \item made unsafe programming tricks that make assumptions about low-level
-      value layout essentially impossible.
-  \end{itemize}
-\end{frame}
-
-\begin{frame}
-  \frametitle{How was it built?}
-  \begin{itemize}
-    \item Later, a Rust compiler team member proposed a ``Rust abstract
-      machine'' with specialized value layout which solved my previous problems.
-      \pause
-
-    \item His proposal was intended for a compile-time function evaluator in the
-      Rust compiler, so I effectively implemented an experimental version of
-      that. \pause
-
-    \item After this point, making Miri work well was primarily a software
-      engineering problem.
-  \end{itemize}
-\end{frame}
-
-\begin{frame}
-  \frametitle{Data layout}
-  \begin{itemize}
-    \item Memory in Miri is literally a HashMap from ``allocation IDs'' to
-      ``abstract allocations''.
-
-    \item Allocations are represented by: \pause
-      \begin{enumerate}
-        \item An array of \textbf{raw bytes} with a size based on the type of
-          the value \pause
-        \item A set of \textbf{relocations} --- pointers into other abstract
-          allocations \pause
-        \item A mask determining which bytes are \textbf{undefined}
-      \end{enumerate}
-  \end{itemize}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{\texttt{square} example}
-  \begin{center}
-    \begin{minted}[autogobble,fontsize=\scriptsize]{rust}
-      // Rust
-      fn square(n: u64) -> u64 {
-          n * n
-      }
-
-      // Generated MIR
-      fn square(arg0: u64) -> u64 {
-          let var0: u64; // n           // On function entry, Miri creates
-                                        // virtual allocations for all the
-                                        // arguments, variables, and
-                                        // temporaries.
-
-          bb0: {
-              var0 = arg0;              // Copy the argument into `n`.
-              return = Mul(var0, var0); // Multiply `n` with itself.
-              goto -> bb1;              // Jump to basic block `bb1`.
-          }
-
-          bb1: {
-              return;                   // Return from the current fn.
-          }
-      }
-    \end{minted}
-  \end{center}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{\texttt{sum} example}
-  \begin{center}
-    \begin{minted}[autogobble,fontsize=\tiny]{rust}
-      // Rust
-      fn sum() -> u64 {
-          let mut sum = 0; let mut i = 0;
-          while i < 10 { sum += i; i += 1; }
-          sum
-      }
-
-      // Generated MIR
-      fn sum() -> u64 {
-          let mut var0: u64; // sum
-          let mut var1: u64; // i
-          let mut tmp0: bool;
-
-          bb0: {
-              // sum = 0; i = 0;
-              var0 = const 0u64; var1 = const 0u64; goto -> bb1;
-          }
-          bb1: {
-              // if i < 10 { goto bb2; } else { goto bb3; }
-              tmp0 = Lt(var1, const 10u64);
-              if(tmp0) -> [true: bb2, false: bb3];
-          }
-          bb2: {
-              var0 = Add(var0, var1);       // sum = sum + i;
-              var1 = Add(var1, const 1u64); // i = i + 1;
-              goto -> bb1;
-          }
-          bb3: {
-              return = var0; goto -> bb4;
-          }
-          bb4: { return; }
-      }
-    \end{minted}
-  \end{center}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Heap allocations!}
-  \begin{minted}[autogobble,fontsize=\scriptsize]{rust}
-    fn make_vec() -> Vec<u8> {
-        // Empty array with space for 4 bytes - allocated on the heap!
-        let mut vec = Vec::with_capacity(4);
-        // Initialize the first two slots.
-        vec.push(1);
-        vec.push(2);
-        vec
-    }
-
-    // For reference:
-    //   struct Vec<T> { capacity: usize, data: *mut T, length: usize }
-
-    // Resulting allocations (on 32-bit little-endian architectures):
-    //   Region A:
-    //     04 00 00 00  00 00 00 00  02 00 00 00
-    //                  └───(B)───┘
-    //
-    //   Region B:
-    //     01 02 __ __ (underscores denote undefined bytes)
-  \end{minted}
-
-  \footnotesize{Evaluating the above involves a number of compiler built-ins,
-  ``unsafe'' code blocks, and more inside the standard library,
-  but Miri handles it all.}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{Unsafe code!}
-  \begin{minted}[autogobble,fontsize=\scriptsize]{rust}
-    fn out_of_bounds() -> u8 {
-        let mut vec = vec![1, 2]
-        unsafe { *vec.get_unchecked(5) }
-    }
-
-    // test.rs:3: error: pointer offset outside bounds of allocation
-    // test.rs:3:     unsafe { *vec.get_unchecked(5) }
-    //                         ^~~~~~~~~~~~~~~~~~~~~
-
-    fn undefined_bytes() -> u8 {
-        let mut vec = Vec::with_capacity(10);
-        unsafe { *vec.get_unchecked(5) }
-    }
-
-    // test.rs:3: error: attempted to read undefined bytes
-    // test.rs:3:     unsafe { *vec.get_unchecked(5) }
-    //                         ^~~~~~~~~~~~~~~~~~~~~
-  \end{minted}
-\end{frame}
-
-\begin{frame}
-  \frametitle{What can't Miri do?}
-  \begin{itemize}
-    \item Miri can't do all the stuff I didn't implement yet. :)
-      \begin{itemize}
-        \item non-trivial casts
-        \item function pointers
-        \item calling destructors and freeing memory
-        \item taking target architecture endianess and alignment information
-          into account when computing data layout
-        \item handling all constants properly (but, well, Miri might be
-          replacing the old constants system)
-      \end{itemize}
-      \pause
-
-    \item Miri can't do foreign function calls (e.g. calling functions defined
-      in C or C++), but there is a reasonable way it could be done with libffi.
-      \begin{itemize}
-        \item On the other hand, for constant evaluation in the compiler, you
-          want the evaluator to be deterministic and safe, so FFI calls might be
-          banned anyway.
-      \end{itemize}
-      \pause
-
-    \item Without quite some effort, Miri will probably never handle inline
-      assembly...
-  \end{itemize}
-\end{frame}
-
-\begin{frame}
-  \begin{center}
-    \LARGE{Questions?}
-  \end{center}
-\end{frame}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% Extra slides
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\begin{frame}[fragile]
-  \frametitle{\texttt{varN} vs. \texttt{argN}}
-  \begin{center}
-    \begin{minted}[autogobble,fontsize=\scriptsize]{rust}
-      // Rust
-      type Pair = (u64, u64);
-      fn swap((a, b): Pair) -> Pair {
-          (b, a)
-      }
-
-      // Generated MIR
-      fn swap(arg0: (u64, u64)) -> (u64, u64) {
-          let var0: u64; // a
-          let var1: u64; // b
-
-          bb0: {
-              var0 = arg0.0;         // get the 1st part of the pair
-              var1 = arg0.1;         // get the 2nd part of the pair
-              return = (var1, var0); // build a new pair in the result
-              goto -> bb1;
-          }
-
-          bb1: {
-              return;
-          }
-      }
-    \end{minted}
-  \end{center}
-\end{frame}
-
-\begin{frame}[fragile]
-  \frametitle{\texttt{factorial} example}
-  \begin{center}
-    \begin{minted}[autogobble,fontsize=\tiny]{rust}
-      // Rust
-      fn factorial(n: u64) -> u64 {
-          (1..n).fold(1, |a, b| a * b)
-      }
-
-      // Generated MIR
-      fn factorial(arg0: u64) -> u64 {
-          let var0: u64; // n
-          let mut tmp0: Range<u64>; // Miri calculates sizes for generics like Range<u64>.
-          let mut tmp1: [closure];
-
-          bb0: {
-              var0 = arg0;
-
-              // tmp0 = 1..n
-              tmp0 = Range<u64> { start: const 1u64, end: var0 };
-
-              // tmp1 = |a, b| a * b
-              tmp1 = [closure];
-
-              // This loads the MIR for the `fold` fn from the standard library.
-              // In general, MIR for any function from any library can be loaded.
-              // return tmp0.fold(1, tmp1)
-              return = Range<u64>::fold(tmp0, const 1u64, tmp1) -> bb1;
-          }
-
-          bb1: {
-              return;
-          }
-      }
-    \end{minted}
-  \end{center}
-\end{frame}
-
-\end{document}
--- a/tex/report/latexmkrc
+++ b/tex/report/latexmkrc
@ -1,12 +0,0 @@
-# vim: ft=perl
-
-$pdf_mode = 1;
-$pdflatex = 'lualatex --shell-escape %O %S';
-$out_dir = 'out';
-
-# This improves latexmk's detection of source files and generated files.
-$recorder = 1;
-
-# Ignore always-regenerated *.pyg files from the minted package when considering
-# whether to run pdflatex again.
-$hash_calc_ignore_pattern{'pyg'} = '.*';
--- a/tex/report/miri-report.tex
+++ b/tex/report/miri-report.tex
@ -1,663 +0,0 @@
-% vim: tw=100
-
-\documentclass[twocolumn]{article}
-\usepackage{blindtext}
-\usepackage[hypcap]{caption}
-\usepackage{fontspec}
-\usepackage[colorlinks, urlcolor={blue!80!black}]{hyperref}
-\usepackage[outputdir=out]{minted}
-\usepackage{relsize}
-\usepackage{xcolor}
-
-\setmonofont{Source Code Pro}[
-  BoldFont={* Medium},
-  BoldItalicFont={* Medium Italic},
-  Scale=MatchLowercase,
-]
-
-\newcommand{\rust}[1]{\mintinline{rust}{#1}}
-
-\begin{document}
-
-\title{Miri: \\ \smaller{An interpreter for Rust's mid-level intermediate representation}}
-\author{Scott Olson\footnote{\href{mailto:scott@solson.me}{scott@solson.me}} \\
-  \smaller{Supervised by Christopher Dutchyn}}
-\date{April 12th, 2016}
-\maketitle
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\section{Abstract}
-
-The increasing need for safe low-level code in contexts like operating systems and browsers is
-driving the development of Rust\footnote{\url{https://www.rust-lang.org}}, a programming language
-promising high performance without the risk of memory unsafety. To make programming more convenient,
-it's often desirable to be able to generate code or perform some computation at compile-time. The
-former is mostly covered by Rust's existing macro feature or build-time code generation, but the
-latter is currently restricted to a limited form of constant evaluation capable of little beyond
-simple math.
-
-The architecture of the compiler at the time the existing constant evaluator was built limited its
-potential for future extension. However, a new intermediate representation was recently
-added\footnote{\href{https://github.com/rust-lang/rfcs/blob/master/text/1211-mir.md}{Rust RFC \#1211: Mid-level IR (MIR)}}
-to the Rust compiler between the abstract syntax tree and the back-end LLVM IR, called mid-level
-intermediate representation, or MIR for short. This report will demonstrate that writing an
-interpreter for MIR is a surprisingly effective approach for supporting a large proportion of Rust's
-features in compile-time execution.
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\section{Background}
-
-The Rust compiler generates an instance of \rust{Mir} for each function [\autoref{fig:mir}]. Each
-\rust{Mir} structure represents a control-flow graph for a given function, and contains a list of
-``basic blocks'' which in turn contain a list of statements followed by a single terminator. Each
-statement is of the form \rust{place = rvalue}. An \rust{Place} is used for referencing variables
-and calculating addresses such as when dereferencing pointers, accessing fields, or indexing arrays.
-An \rust{Rvalue} represents the core set of operations possible in MIR, including reading a value
-from an place, performing math operations, creating new pointers, structures, and arrays, and so
-on. Finally, a terminator decides where control will flow next, optionally based on the value of a
-boolean or integer.
-
-\begin{figure}[ht]
-  \begin{minted}[autogobble]{rust}
-    struct Mir {
-        basic_blocks: Vec<BasicBlockData>,
-        // ...
-    }
-
-    struct BasicBlockData {
-        statements: Vec<Statement>,
-        terminator: Terminator,
-        // ...
-    }
-
-    struct Statement {
-        place: Place,
-        rvalue: Rvalue
-    }
-
-    enum Terminator {
-        Goto { target: BasicBlock },
-        If {
-            cond: Operand,
-            targets: [BasicBlock; 2]
-        },
-        // ...
-    }
-  \end{minted}
-  \caption{MIR (simplified)}
-  \label{fig:mir}
-\end{figure}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\section{First implementation}
-
-\subsection{Basic operation}
-
-To investigate the possibility of executing Rust at compile-time I wrote an interpreter for MIR
-called Miri\footnote{\url{https://github.com/solson/miri}}. The structure of the interpreter closely
-mirrors the structure of MIR itself. It starts executing a function by iterating the statement list
-in the starting basic block, translating the place into a pointer and using the rvalue to decide
-what to write into that pointer. Evaluating the rvalue may involve reads (such as for the two sides
-of a binary operation) or construction of new values. When the terminator is reached, it is used to
-decide which basic block to jump to next. Finally, Miri repeats this entire process, reading
-statements from the new block.
-
-\subsection{Function calls}
-
-To handle function call terminators\footnote{Calls occur only as terminators, never as rvalues.},
-Miri is required to store some information in a virtual call stack so that it may pick up where it
-left off when the callee returns. Each stack frame stores a reference to the \rust{Mir} for the
-function being executed, its local variables, its return value location\footnote{Return value
-pointers are passed in by callers.}, and the basic block where execution should resume. When Miri
-encounters a \rust{Return} terminator in the MIR, it pops one frame off the stack and resumes the
-previous function. Miri's execution ends when the function it was initially invoked with returns,
-leaving the call stack empty.
-
-It should be noted that Miri does not itself recurse when a function is called; it merely pushes a
-virtual stack frame and jumps to the top of the interpreter loop. Consequently, Miri can interpret
-deeply recursive programs without overflowing its native call stack. This approach would allow Miri
-to set a virtual stack depth limit and report an error when a program exceeds it.
-
-\subsection{Flaws}
-
-This version of Miri supported quite a bit of the Rust language, including booleans, integers,
-if-conditions, while-loops, structures, enums, arrays, tuples, pointers, and function calls,
-requiring approximately 400 lines of Rust code. However, it had a particularly naive value
-representation with a number of downsides. It resembled the data layout of a dynamic language like
-Ruby or Python, where every value has the same size\footnote{An \rust{enum} is a discriminated union
-with a tag and space to fit the largest variant, regardless of which variant it contains.} in the
-interpreter:
-
-\begin{minted}[autogobble]{rust}
-  enum Value {
-      Uninitialized,
-      Bool(bool),
-      Int(i64),
-      Pointer(Pointer), // index into stack
-      Aggregate {
-        variant: usize,
-        data: Pointer,
-      },
-  }
-\end{minted}
-
-This representation did not work well for aggregate types\footnote{That is, structures, enums,
-arrays, tuples, and closures.} and required strange hacks to support them. Their contained values
-were allocated elsewhere on the stack and pointed to by the aggregate value, which made it more
-complicated to implement copying aggregate values from place to place.
-
-Moreover, while the aggregate issues could be worked around, this value representation made common
-unsafe programming tricks (which make assumptions about the low-level value layout) fundamentally
-impossible.
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\section{Current implementation}
-
-Roughly halfway through my time working on Miri, Eduard
-Burtescu\footnote{\href{https://github.com/eddyb}{eddyb on GitHub}} from the Rust compiler
-team\footnote{\url{https://www.rust-lang.org/team.html\#Compiler}} made a post on Rust's internal
-forums about a ``Rust Abstract Machine''
-specification\footnote{\href{https://internals.rust-lang.org/t/mir-constant-evaluation/3143/31}{Burtescu's
-reply on ``MIR constant evaluation''}} which could be used to implement more powerful compile-time
-function execution, similar to what is supported by C++14's \mintinline{cpp}{constexpr} feature.
-After clarifying some of the details of the data layout with Burtescu via IRC, I started
-implementing it in Miri.
-
-\subsection{Raw value representation}
-
-The main difference in the new value representation was to represent values by ``abstract
-allocations'' containing arrays of raw bytes with different sizes depending on their types. This
-mimics how Rust values are represented when compiled for physical machines. In addition to the raw
-bytes, allocations carry information about pointers and undefined bytes.
-
-\begin{minted}[autogobble]{rust}
-  struct Memory {
-      map: HashMap<AllocId, Allocation>,
-      next_id: AllocId,
-  }
-
-  struct Allocation {
-      bytes: Vec<u8>,
-      relocations: BTreeMap<usize, AllocId>,
-      undef_mask: UndefMask,
-  }
-\end{minted}
-
-\subsubsection{Relocations}
-
-The abstract machine represents pointers through ``relocations'', which are analogous to relocations
-in linkers\footnote{\href{https://en.wikipedia.org/wiki/Relocation_(computing)}{Relocation
-(computing) - Wikipedia}}. Instead of storing a global memory address in the raw byte representation
-like on a physical machine, we store an offset from the start of the target allocation and add an
-entry to the relocation table which maps the index of the offset bytes to the target allocation.
-
-In \autoref{fig:reloc}, the relocation stored at offset 0 in \rust{y} points to offset 2 in \rust{x}
-(the 2nd 16-bit integer). Thus, the relocation table for \rust{y} is \texttt{\{0 =>
-x\}}, meaning the next $N$ bytes after offset 0 denote an offset into allocation \rust{x} where $N$
-is the size of a pointer (4 in this example). The example shows this as a labelled line beneath the
-offset bytes.
-
-In effect, the abstract machine represents pointers as \rust{(allocation_id, offset)} pairs. This
-makes it easy to detect when pointer accesses go out of bounds.
-
-\begin{figure}[hb]
-  \begin{minted}[autogobble]{rust}
-    let x: [i16; 3] = [0xAABB, 0xCCDD, 0xEEFF];
-    let y = &x[1];
-    // x: BB AA DD CC FF EE (6 bytes)
-    // y: 02 00 00 00 (4 bytes)
-    //    └───(x)───┘
-  \end{minted}
-  \caption{Example relocation on 32-bit little-endian}
-  \label{fig:reloc}
-\end{figure}
-
-\subsubsection{Undefined byte mask}
-
-The final piece of an abstract allocation is the undefined byte mask. Logically, we store a boolean
-for the definedness of every byte in the allocation, but there are multiple ways to make the storage
-more compact. I tried two implementations: one based on the endpoints of alternating ranges of
-defined and undefined bytes and the other based on a bitmask. The former is more compact but I found
-it surprisingly difficult to update cleanly. I currently use the much simpler bitmask system.
-
-See \autoref{fig:undef} for an example of an undefined byte in a value, represented by underscores.
-Note that there is a value for the second byte in the byte array, but it doesn't matter what it is.
-The bitmask would be $10_2$, i.e.\ \rust{[true, false]}.
-
-\begin{figure}[hb]
-  \begin{minted}[autogobble]{rust}
-    let x: [u8; 2] = unsafe {
-        [1, std::mem::uninitialized()]
-    };
-    // x: 01 __ (2 bytes)
-  \end{minted}
-  \caption{Example undefined byte}
-  \label{fig:undef}
-\end{figure}
-
-\subsection{Computing data layout}
-
-Currently, the Rust compiler's data layouts for types are hidden from Miri, so it does its own data
-layout computation which will not always match what the compiler does, since Miri doesn't take
-target type alignments into account. In the future, the Rust compiler may be modified so that Miri
-can use the exact same data layout.
-
-Miri's data layout calculation is a relatively simple transformation from Rust types to a structure
-with constant size values for primitives and sets of fields with offsets for aggregate types. These
-layouts are cached for performance.
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\section{Deterministic execution}
-\label{sec:deterministic}
-
-In order to be effective as a compile-time evaluator, Miri must have \emph{deterministic execution},
-as explained by Burtescu in the ``Rust Abstract Machine'' post. That is, given a function and
-arguments to that function, Miri should always produce identical results. This is important for
-coherence in the type checker when constant evaluations are involved in types, such as for sizes of
-array types:
-
-\begin{minted}[autogobble,mathescape]{rust}
-  const fn get_size() -> usize { /* $\ldots$ */ }
-  let array: [i32; get_size()];
-\end{minted}
-
-Since Miri allows execution of unsafe code\footnote{In fact, the distinction between safe and unsafe
-doesn't exist at the MIR level.}, it is specifically designed to remain safe while interpreting
-potentially unsafe code. When Miri encounters an unrecoverable error, it reports it via the Rust
-compiler's usual error reporting mechanism, pointing to the part of the original code where the
-error occurred. Below is an example from Miri's
-repository.\footnote{\href{https://github.com/solson/miri/blob/master/test/errors.rs}{miri/test/errors.rs}}
-
-\begin{minted}[autogobble]{rust}
-  let b = Box::new(42);
-  let p: *const i32 = &*b;
-  drop(b);
-  unsafe { *p }
-  //       ~~ error: dangling pointer
-  //            was dereferenced
-\end{minted}
-\label{dangling-pointer}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\section{Language support}
-
-In its current state, Miri supports a large proportion of the Rust language, detailed below. The
-major exception is a lack of support for  FFI\footnote{Foreign Function Interface, e.g.\ calling
-functions defined in Assembly, C, or C++.}, which eliminates possibilities like reading and writing
-files, user input, graphics, and more. However, for compile-time evaluation in Rust, this limitation
-is desired.
-
-\subsection{Primitives}
-
-Miri supports booleans, integers of various sizes and signed-ness (i.e.\ \rust{i8}, \rust{i16},
-\rust{i32}, \rust{i64}, \rust{isize}, \rust{u8}, \rust{u16}, \rust{u32}, \rust{u64}, \rust{usize}),
-and unary and binary operations over these types. The \rust{isize} and \rust{usize} types will be
-sized according to the target machine's pointer size just like in compiled Rust. The \rust{char} and
-float types (\rust{f32}, \rust{f64}) are not supported yet, but there are no known barriers to doing
-so.
-
-When examining a boolean in an \rust{if} condition, Miri will report an error if its byte
-representation is not precisely 0 or 1, since having any other value for a boolean is undefined
-behaviour in Rust. The \rust{char} type will have similar restrictions once it is implemented.
-
-\subsection{Pointers}
-
-Both references and raw pointers are supported, with essentially no difference between them in Miri.
-It is also possible to do pointer comparisons and math. However, a few operations are considered
-errors and a few require special support.
-
-Firstly, pointers into the same allocations may be compared for ordering, but pointers into
-different allocations are considered unordered and Miri will complain if you attempt this. The
-reasoning is that different allocations may have different orderings in the global address space at
-runtime, making this non-deterministic. However, pointers into different allocations \emph{may} be
-compared for direct equality (they are always unequal).
-
-Secondly, pointers represented using relocations may be compared against pointers casted from
-integers (e.g.\ \rust{0 as *const i32}) for things like null pointer checks. To handle these cases,
-Miri has a concept of ``integer pointers'' which are always unequal to abstract pointers. Integer
-pointers can be compared and operated upon freely. However, note that it is impossible to go from an
-integer pointer to an abstract pointer backed by a relocation. It is not valid to dereference an
-integer pointer.
-
-\subsubsection{Slice pointers}
-
-Rust supports pointers to ``dynamically-sized types'' such as \rust{[T]} and \rust{str} which
-represent arrays of indeterminate size. Pointers to such types contain an address \emph{and} the
-length of the referenced array. Miri supports these fully.
-
-\subsubsection{Trait objects}
-
-Rust also supports pointers to ``trait objects'' which represent some type that implements a trait,
-with the specific type unknown at compile-time. These are implemented using virtual dispatch with a
-vtable, similar to virtual methods in C++. Miri does not currently support these at all.
-
-\subsection{Aggregates}
-
-Aggregates include types declared with \rust{struct} or \rust{enum} as well as tuples, arrays, and
-closures. Miri supports all common usage of all of these types. The main missing piece is to handle
-\texttt{\#[repr(..)]} annotations which adjust the layout of a \rust{struct} or \rust{enum}.
-
-\subsection{Place projections}
-
-This category includes field accesses, dereferencing, accessing data in an \rust{enum} variant, and
-indexing arrays. Miri supports all of these, including nested projections such as
-\rust{*foo.bar[2]}.
-
-\subsection{Control flow}
-
-All of Rust's standard control flow features, including \rust{loop}, \rust{while}, \rust{for},
-\rust{if}, \rust{if let}, \rust{while let}, \rust{match}, \rust{break}, \rust{continue}, and
-\rust{return} are supported. In fact, supporting these was quite easy since the Rust compiler
-reduces them all down to a small set of control-flow graph primitives in MIR.
-
-\subsection{Function calls}
-
-As previously described, Miri supports arbitrary function calls without growing the native stack
-(only its virtual call stack). It is somewhat limited by the fact that cross-crate\footnote{A crate
-is a single Rust library (or executable).} calls only work for functions whose MIR is stored in
-crate metadata. This is currently true for \rust{const}, generic, and inline functions.
-A branch of the compiler could be made that stores MIR for all functions. This would be a non-issue
-for a compile-time evaluator based on Miri, since it would only call \rust{const fn}s.
-
-\subsubsection{Method calls}
-
-Miri supports trait method calls, including invoking all the compiler-internal lookup needed to find
-the correct implementation of the method.
-
-\subsubsection{Closures}
-
-Calls to closures are also supported with the exception of one edge case\footnote{Calling a closure
-that takes a reference to its captures via a closure interface that passes the captures by value is
-not yet supported.}. The value part of a closure that holds the captured variables is handled as an
-aggregate and the function call part is mostly the same as a trait method call, but with the added
-complication that closures use a separate calling convention within the compiler.
-
-\subsubsection{Function pointers}
-
-Function pointers are not currently supported by Miri, but there is a relatively simple way they
-could be encoded using a relocation with a special reserved allocation identifier. The offset of the
-relocation would determine which function it points to in a special array of functions in the
-interpreter.
-
-\subsubsection{Intrinsics}
-
-To support unsafe code, and in particular to support Rust's standard library, it became clear that
-Miri would have to support calls to compiler
-intrinsics\footnote{\url{https://doc.rust-lang.org/stable/std/intrinsics/index.html}}. Intrinsics
-are function calls which cause the Rust compiler to produce special-purpose code instead of a
-regular function call. Miri simply recognizes intrinsic calls by their unique
-ABI\footnote{Application Binary Interface, which defines calling conventions. Includes ``C'',
-``Rust'', and ``rust-intrinsic''.} and name and runs special-purpose code to handle them.
-
-An example of an important intrinsic is \rust{size_of} which will cause Miri to write the size of
-the type in question to the return value location. The Rust standard library uses intrinsics heavily
-to implement various data structures, so this was a major step toward supporting them. Intrinsics
-have been implemented on a case-by-case basis as tests which required them were written, and not all
-intrinsics are supported yet.
-
-\subsubsection{Generic function calls}
-
-Miri needs special support for generic function calls since Rust is a \emph{monomorphizing}
-compiler, meaning it generates a special version of each function for each distinct set of type
-parameters it gets called with. Since functions in MIR are still polymorphic, Miri has to do the
-same thing and substitute function type parameters into all types it encounters to get fully
-concrete, monomorphized types. For example, in\ldots
-
-\begin{minted}[autogobble]{rust}
-  fn some<T>(t: T) -> Option<T> { Some(t) }
-\end{minted}
-
-\ldots{}Miri needs to know the size of \rust{T} to copy the right amount of bytes from the argument
-to the return value. If we call \rust{some(10i32)} Miri will execute \rust{some} knowing that
-\rust{T = i32} and generate a representation for \rust{Option<i32>}.
-
-Miri currently does this monomorphization lazily on-demand unlike the Rust back-end which does it
-all ahead of time.
-
-\subsection{Heap allocations}
-
-The next piece of the puzzle for supporting interesting programs (and the standard library) was heap
-allocations. There are two main interfaces for heap allocation in Rust: the built-in \rust{Box}
-rvalue in MIR and a set of C ABI foreign functions including \rust{__rust_allocate},
-\rust{__rust_reallocate}, and \rust{__rust_deallocate}. These correspond approximately to
-\mintinline{c}{malloc}, \mintinline{c}{realloc}, and \mintinline{c}{free} in C.
-
-The \rust{Box} rvalue allocates enough space for a single value of a given type. This was easy to
-support in Miri. It simply creates a new abstract allocation in the same manner as for
-stack-allocated values, since there's no major difference between them in Miri.
-
-The allocator functions, which are used to implement things like Rust's standard \rust{Vec<T>} type,
-were a bit trickier. Rust declares them as \rust{extern "C" fn} so that different allocator
-libraries can be linked in at the user's option. Since Miri doesn't actually support FFI and wants
-full control of allocations for safety, it ``cheats'' and recognizes these allocator functions in
-essentially the same way it recognizes compiler intrinsics. Then, a call to \rust{__rust_allocate}
-simply creates another abstract allocation with the requested size and \rust{__rust_reallocate}
-grows one.
-
-In the future, Miri should also track which allocations came from \rust{__rust_allocate} so it can
-reject reallocate or deallocate calls on stack allocations.
-
-\subsection{Destructors}
-
-When a value which ``owns'' some resource (like a heap allocation or file handle) goes out of scope,
-Rust inserts \emph{drop glue} that calls the user-defined destructor for the type if it has one, and
-then drops all of the subfields. Destructors for types like \rust{Box<T>} and \rust{Vec<T>}
-deallocate heap memory.
-
-Miri doesn't yet support calling user-defined destructors, but it has most of the machinery in place
-to do so already. There \emph{is} support for dropping \rust{Box<T>} types, including deallocating
-their associated allocations. This is enough to properly execute the dangling pointer example in
-\autoref{sec:deterministic}.
-
-\subsection{Constants}
-
-Only basic integer, boolean, string, and byte-string literals are currently supported. Evaluating
-more complicated constant expressions in their current form would be a somewhat pointless exercise
-for Miri. Instead, we should lower constant expressions to MIR so Miri can run them directly, which
-is precisely what would need be done to use Miri as the compiler's constant evaluator.
-
-\subsection{Static variables}
-
-Miri doesn't currently support statics, but they would need support similar to constants. Also note
-that while it would be invalid to write to static (i.e.\ global) variables in Miri executions, it
-would probably be fine to allow reads.
-
-\subsection{Standard library}
-
-Throughout the implementation of the above features, I often followed this process:
-
-\begin{enumerate}
-  \item Try using a feature from the standard library.
-  \item See where Miri runs into stuff it can't handle.
-  \item Fix the problem.
-  \item Go to 1.
-\end{enumerate}
-
-At present, Miri supports a number of major non-trivial features from the standard library along
-with tons of minor features. Smart pointer types such as \rust{Box}, \rust{Rc}\footnote{Reference
-counted shared pointer} and \rust{Arc}\footnote{Atomically reference-counted thread-safe shared
-pointer} all seem to work. I've also tested using the shared smart pointer types with \rust{Cell}
-and \rust{RefCell}\footnote{\href{https://doc.rust-lang.org/stable/std/cell/index.html}{Rust
-documentation for cell types}} for internal mutability, and that works as well, although
-\rust{RefCell} can't ever be borrowed twice until I implement destructor calls, since a destructor
-is what releases the borrow.
-
-But the standard library collection I spent the most time on was \rust{Vec}, the standard
-dynamically-growable array type, similar to C++'s \texttt{std::vector} or Java's
-\texttt{java.util.ArrayList}. In Rust, \rust{Vec} is an extremely pervasive collection, so
-supporting it is a big win for supporting a larger swath of Rust programs in Miri.
-
-See \autoref{fig:vec} for an example (working in Miri today) of initializing a \rust{Vec} with a
-small amount of space on the heap and then pushing enough elements to force it to reallocate its
-data array. This involves cross-crate generic function calls, unsafe code using raw pointers, heap
-allocation, handling of uninitialized memory, compiler intrinsics, and more.
-
-\begin{figure}[t]
-  \begin{minted}[autogobble]{rust}
-    struct Vec<T> {
-        data: *mut T,    // 4 byte pointer
-        capacity: usize, // 4 byte integer
-        length: usize,   // 4 byte integer
-    }
-
-    let mut v: Vec<u8> =
-        Vec::with_capacity(2);
-    // v: 00 00 00 00 02 00 00 00 00 00 00 00
-    //    └─(data)──┘
-    // data: __ __
-
-    v.push(1);
-    // v: 00 00 00 00 02 00 00 00 01 00 00 00
-    //    └─(data)──┘
-    // data: 01 __
-
-    v.push(2);
-    // v: 00 00 00 00 02 00 00 00 02 00 00 00
-    //    └─(data)──┘
-    // data: 01 02
-
-    v.push(3);
-    // v: 00 00 00 00 04 00 00 00 03 00 00 00
-    //    └─(data)──┘
-    // data: 01 02 03 __
-  \end{minted}
-  \caption{\rust{Vec} example on 32-bit little-endian}
-  \label{fig:vec}
-\end{figure}
-
-Miri supports unsafe operations on \rust{Vec} like \rust{v.set_len(10)} or
-\rust{v.get_unchecked(2)}, provided that such calls do no invoke undefined behaviour. If a call
-\emph{does} invoke undefined behaviour, Miri will abort with an appropriate error message (see
-\autoref{fig:vec-error}).
-
-\begin{figure}[t]
-  \begin{minted}[autogobble]{rust}
-    fn out_of_bounds() -> u8 {
-        let v = vec![1, 2];
-        let p = unsafe { v.get_unchecked(5) };
-        *p + 10
-    //  ~~ error: pointer offset outside
-    //       bounds of allocation
-    }
-
-    fn undefined_bytes() -> u8 {
-        let v = Vec::<u8>::with_capacity(10);
-        let p = unsafe { v.get_unchecked(5) };
-        *p + 10
-    //  ~~~~~~~ error: attempted to read
-    //            undefined bytes
-    }
-  \end{minted}
-  \caption{\rust{Vec} examples with undefined behaviour}
-  \label{fig:vec-error}
-\end{figure}
-
-\newpage
-
-Here is one final code sample Miri can execute that demonstrates many features at once, including
-vectors, heap allocation, iterators, closures, raw pointers, and math:
-
-\begin{minted}[autogobble]{rust}
-  let x: u8 = vec![1, 2, 3, 4]
-      .into_iter()
-      .map(|x| x * x)
-      .fold(0, |x, y| x + y);
-    // x: 1e (that is, the hex value
-    //        0x1e = 30 = 1 + 4 + 9 + 16)
-\end{minted}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\section{Future directions}
-
-\subsection{Finishing the implementation}
-
-There are a number of pressing items on my to-do list for Miri, including:
-
-\begin{itemize}
-  \item A much more comprehensive and automated test suite.
-  \item User-defined destructor calls.
-  \item Non-trivial casts between primitive types like integers and pointers.
-  \item Handling statics and global memory.
-  \item Reporting errors for all undefined behaviour.\footnote{\href{https://doc.rust-lang.org/reference.html\#behavior-considered-undefined}{The Rust reference on what is considered undefined behaviour}}
-  \item Function pointers.
-  \item Accounting for target machine primitive type alignment and endianness.
-  \item Optimizations (undefined byte masks, tail-calls).
-  \item Benchmarking Miri vs. unoptimized Rust.
-  \item Various \texttt{TODO}s and \texttt{FIXME}s left in the code.
-  \item Integrating into the compiler proper.
-\end{itemize}
-
-\subsection{Future projects}
-
-Other possible Miri-related projects include:
-
-\begin{itemize}
-  \item A read-eval-print-loop (REPL) for Rust, which may be easier to implement on top of Miri than
-    the usual LLVM back-end.
-  \item A graphical or text-mode debugger that steps through MIR execution one statement at a time,
-    for figuring out why some compile-time execution is raising an error or simply learning how Rust
-    works at a low level.
-  \item A less restricted version of Miri that is able to run foreign functions from C/C++ and
-    generally has full access to the operating system. Such an interpreter could be used to more
-    quickly prototype changes to the Rust language that would otherwise require changes to the LLVM
-    back-end.
-  \item Unit-testing the compiler by comparing the results of Miri's execution against the results
-    of LLVM-compiled machine code's execution. This would help to guarantee that compile-time
-    execution works the same as runtime execution.
-  \item Some kind of Miri-based symbolic evaluator that examines multiple possible code paths at
-    once to determine if undefined behaviour could be observed on any of them.
-\end{itemize}
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\section{Final thoughts}
-
-Writing an interpreter which models values of varying sizes, stack and heap allocation, unsafe
-memory operations, and more requires some unconventional techniques compared to conventional
-interpreters targeting dynamically-typed languages. However, aside from the somewhat complicated
-abstract memory model, making Miri work was primarily a software engineering problem, and not a
-particularly tricky one. This is a testament to MIR's suitability as an intermediate representation
-for Rust---removing enough unnecessary abstraction to keep it simple. For example, Miri doesn't even
-need to know that there are different kinds of loops, or how to match patterns in a \rust{match}
-expression.
-
-Another advantage to targeting MIR is that any new features at the syntax-level or type-level
-generally require little to no change in Miri. For example, when the new ``question mark'' syntax
-for error handling\footnote{
-  \href{https://github.com/rust-lang/rfcs/blob/master/text/0243-trait-based-exception-handling.md}
-    {Question mark syntax RFC}}
-was added to rustc, Miri required no change to support it.
-When specialization\footnote{
-  \href{https://github.com/rust-lang/rfcs/blob/master/text/1210-impl-specialization.md}
-    {Specialization RFC}}
-was added, Miri supported it with just minor changes to trait method lookup.
-
-Of course, Miri also has limitations. The inability to execute FFI and inline assembly reduces the
-amount of Rust programs Miri could ever execute. The good news is that in the constant evaluator,
-FFI can be stubbed out in cases where it makes sense, like I did with \rust{__rust_allocate}. For a
-version of Miri not intended for constant evaluation, it may be possible to use libffi to call C
-functions from the interpreter.
-
-In conclusion, Miri is a surprisingly effective project, and a lot of fun to implement. Due to MIR's
-tendency to collapse multiple source-level features into one, I often ended up supporting features I
-hadn't explicitly intended to. I am excited to work with the compiler team going forward to try to
-make Miri useful for constant evaluation in Rust.
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\section{Thanks}
-
-A big thanks goes to Eduard Burtescu for writing the abstract machine specification and answering my
-incessant questions on IRC, to Niko Matsakis for coming up with the idea for Miri and supporting my
-desire to work with the Rust compiler, and to my research supervisor Christopher Dutchyn. Thanks
-also to everyone else on the compiler team and on Mozilla IRC who helped me figure stuff out.
-Finally, thanks to Daniel Keep and everyone else who helped fix my numerous writing mistakes.
-
-\end{document}