From 0027253f188cb6986f4d1281a31bb01769178f7d Mon Sep 17 00:00:00 2001 From: Steve Klabnik Date: Tue, 7 Apr 2015 22:16:02 -0400 Subject: [PATCH] Import real content. --- src/doc/trpl/SUMMARY.md | 2 +- src/doc/trpl/arrays.md | 48 ++ src/doc/trpl/associated-types.md | 202 +++++++ src/doc/trpl/attributes.md | 3 + src/doc/trpl/benchmark-tests.md | 152 +++++ src/doc/trpl/box-syntax-and-patterns.md | 100 ++++ src/doc/trpl/casting-between-types.md | 3 + src/doc/trpl/closures.md | 475 ++++++++++++++++ src/doc/trpl/comments.md | 47 ++ src/doc/trpl/compiler-plugins.md | 242 ++++++++ src/doc/trpl/concurrency.md | 358 ++++++++++++ src/doc/trpl/conditional-compilation.md | 3 + src/doc/trpl/const.md | 3 + src/doc/trpl/crates-and-modules.md | 533 +++++++++++++++++ src/doc/trpl/debug-and-display.md | 3 + src/doc/trpl/deref-coercions.md | 3 + src/doc/trpl/documentation.md | 562 ++++++++++++++++++ src/doc/trpl/drop.md | 3 + src/doc/trpl/effective-rust.md | 1 + src/doc/trpl/enums.md | 149 +++++ src/doc/trpl/error-handling.md | 301 ++++++++++ src/doc/trpl/ffi.md | 530 +++++++++++++++++ src/doc/trpl/for-loops.md | 44 ++ src/doc/trpl/functions.md | 193 +++++++ src/doc/trpl/generics.md | 177 ++++++ src/doc/trpl/getting-started.md | 1 + src/doc/trpl/glossary.md | 39 ++ src/doc/trpl/hello-cargo.md | 168 ++++++ src/doc/trpl/hello-world.md | 164 ++++++ src/doc/trpl/if.md | 157 +++++ src/doc/trpl/inline-assembly.md | 141 +++++ src/doc/trpl/installing-rust.md | 93 +++ src/doc/trpl/intrinsics.md | 25 + src/doc/trpl/iterators.md | 349 ++++++++++++ src/doc/trpl/lang-items.md | 79 +++ src/doc/trpl/learn-rust.md | 1 + src/doc/trpl/lifetimes.md | 3 + src/doc/trpl/link-args.md | 25 + src/doc/trpl/lol.txt | 1 - src/doc/trpl/macros.md | 663 ++++++++++++++++++++++ src/doc/trpl/match.md | 156 +++++ src/doc/trpl/method-syntax.md | 232 ++++++++ src/doc/trpl/move-semantics.md | 3 + src/doc/trpl/mutability.md | 3 + src/doc/trpl/nightly-rust.md | 2 +- src/doc/trpl/no-stdlib.md | 168 ++++++ src/doc/trpl/operators-and-overloading.md | 3 + src/doc/trpl/ownership.md | 555 ++++++++++++++++++ src/doc/trpl/patterns.md | 200 +++++++ src/doc/trpl/plugins.md | 0 src/doc/trpl/primitive-types.md | 3 + src/doc/trpl/references-and-borrowing.md | 3 + src/doc/trpl/slices.md | 21 + src/doc/trpl/static.md | 3 + src/doc/trpl/strings.md | 61 ++ src/doc/trpl/structs.md | 49 ++ src/doc/trpl/syntax-and-semantics.md | 1 + src/doc/trpl/testing.md | 432 ++++++++++++++ src/doc/trpl/the-stack-and-the-heap.md | 3 + src/doc/trpl/trait-objects.md | 306 ++++++++++ src/doc/trpl/traits.md | 508 +++++++++++++++++ src/doc/trpl/tuple-structs.md | 56 ++ src/doc/trpl/tuples.md | 97 ++++ src/doc/trpl/type-aliases.md | 3 + src/doc/trpl/ufcs.md | 3 + src/doc/trpl/unsafe-code.md | 183 ++++++ src/doc/trpl/unsized-types.md | 3 + src/doc/trpl/variable-bindings.md | 161 ++++++ src/doc/trpl/vectors.md | 33 ++ src/doc/trpl/while-loops.md | 83 +++ 70 files changed, 9377 insertions(+), 3 deletions(-) create mode 100644 src/doc/trpl/compiler-plugins.md delete mode 100644 src/doc/trpl/lol.txt delete mode 100644 src/doc/trpl/plugins.md diff --git a/src/doc/trpl/SUMMARY.md b/src/doc/trpl/SUMMARY.md index c9ac638098bd..d894e1c47253 100644 --- a/src/doc/trpl/SUMMARY.md +++ b/src/doc/trpl/SUMMARY.md @@ -58,7 +58,7 @@ * [Macros](macros.md) * [`unsafe` Code](unsafe-code.md) * [Nightly Rust](nightly-rust.md) - * [Compiler Plugins](plugins.md) + * [Compiler Plugins](compiler-plugins.md) * [Inline Assembly](inline-assembly.md) * [No stdlib](no-stdlib.md) * [Intrinsics](intrinsics.md) diff --git a/src/doc/trpl/arrays.md b/src/doc/trpl/arrays.md index e69de29bb2d1..a6ecac962d60 100644 --- a/src/doc/trpl/arrays.md +++ b/src/doc/trpl/arrays.md @@ -0,0 +1,48 @@ +% Arrays + +Like many programming languages, Rust has list types to represent a sequence of +things. The most basic is the *array*, a fixed-size list of elements of the +same type. By default, arrays are immutable. + +```{rust} +let a = [1, 2, 3]; // a: [i32; 3] +let mut m = [1, 2, 3]; // mut m: [i32; 3] +``` + +There's a shorthand for initializing each element of an array to the same +value. In this example, each element of `a` will be initialized to `0`: + +```{rust} +let a = [0; 20]; // a: [i32; 20] +``` + +Arrays have type `[T; N]`. We'll talk about this `T` notation later, when we +cover generics. + +You can get the number of elements in an array `a` with `a.len()`, and use +`a.iter()` to iterate over them with a for loop. This code will print each +number in order: + +```{rust} +let a = [1, 2, 3]; + +println!("a has {} elements", a.len()); +for e in a.iter() { + println!("{}", e); +} +``` + +You can access a particular element of an array with *subscript notation*: + +```{rust} +let names = ["Graydon", "Brian", "Niko"]; // names: [&str; 3] + +println!("The second name is: {}", names[1]); +``` + +Subscripts start at zero, like in most programming languages, so the first name +is `names[0]` and the second name is `names[1]`. The above example prints +`The second name is: Brian`. If you try to use a subscript that is not in the +array, you will get an error: array access is bounds-checked at run-time. Such +errant access is the source of many bugs in other systems programming +languages. diff --git a/src/doc/trpl/associated-types.md b/src/doc/trpl/associated-types.md index e69de29bb2d1..7161cd33f89b 100644 --- a/src/doc/trpl/associated-types.md +++ b/src/doc/trpl/associated-types.md @@ -0,0 +1,202 @@ +% Associated Types + +Associated types are a powerful part of Rust's type system. They're related to +the idea of a 'type family', in other words, grouping multiple types together. That +description is a bit abstract, so let's dive right into an example. If you want +to write a `Graph` trait, you have two types to be generic over: the node type +and the edge type. So you might write a trait, `Graph`, that looks like +this: + +```rust +trait Graph { + fn has_edge(&self, &N, &N) -> bool; + fn edges(&self, &N) -> Vec; + // etc +} +``` + +While this sort of works, it ends up being awkward. For example, any function +that wants to take a `Graph` as a parameter now _also_ needs to be generic over +the `N`ode and `E`dge types too: + +```rust,ignore +fn distance>(graph: &G, start: &N, end: &N) -> u32 { ... } +``` + +Our distance calculation works regardless of our `Edge` type, so the `E` stuff in +this signature is just a distraction. + +What we really want to say is that a certain `E`dge and `N`ode type come together +to form each kind of `Graph`. We can do that with associated types: + +```rust +trait Graph { + type N; + type E; + + fn has_edge(&self, &Self::N, &Self::N) -> bool; + fn edges(&self, &Self::N) -> Vec; + // etc +} +``` + +Now, our clients can be abstract over a given `Graph`: + +```rust,ignore +fn distance(graph: &G, start: &G::N, end: &G::N) -> uint { ... } +``` + +No need to deal with the `E`dge type here! + +Let's go over all this in more detail. + +## Defining associated types + +Let's build that `Graph` trait. Here's the definition: + +```rust +trait Graph { + type N; + type E; + + fn has_edge(&self, &Self::N, &Self::N) -> bool; + fn edges(&self, &Self::N) -> Vec; +} +``` + +Simple enough. Associated types use the `type` keyword, and go inside the body +of the trait, with the functions. + +These `type` declarations can have all the same thing as functions do. For example, +if we wanted our `N` type to implement `Display`, so we can print the nodes out, +we could do this: + +```rust +use std::fmt; + +trait Graph { + type N: fmt::Display; + type E; + + fn has_edge(&self, &Self::N, &Self::N) -> bool; + fn edges(&self, &Self::N) -> Vec; +} +``` + +## Implementing associated types + +Just like any trait, traits that use associated types use the `impl` keyword to +provide implementations. Here's a simple implementation of Graph: + +```rust +# trait Graph { +# type N; +# type E; +# fn has_edge(&self, &Self::N, &Self::N) -> bool; +# fn edges(&self, &Self::N) -> Vec; +# } +struct Node; + +struct Edge; + +struct MyGraph; + +impl Graph for MyGraph { + type N = Node; + type E = Edge; + + fn has_edge(&self, n1: &Node, n2: &Node) -> bool { + true + } + + fn edges(&self, n: &Node) -> Vec { + Vec::new() + } +} +``` + +This silly implementation always returns `true` and an empty `Vec`, but it +gives you an idea of how to implement this kind of thing. We first need three +`struct`s, one for the graph, one for the node, and one for the edge. If it made +more sense to use a different type, that would work as well, we're just going to +use `struct`s for all three here. + +Next is the `impl` line, which is just like implementing any other trait. + +From here, we use `=` to define our associated types. The name the trait uses +goes on the left of the `=`, and the concrete type we're `impl`ementing this +for goes on the right. Finally, we use the concrete types in our function +declarations. + +## Trait objects with associated types + +There’s one more bit of syntax we should talk about: trait objects. If you +try to create a trait object from an associated type, like this: + +```rust,ignore +# trait Graph { +# type N; +# type E; +# fn has_edge(&self, &Self::N, &Self::N) -> bool; +# fn edges(&self, &Self::N) -> Vec; +# } +# struct Node; +# struct Edge; +# struct MyGraph; +# impl Graph for MyGraph { +# type N = Node; +# type E = Edge; +# fn has_edge(&self, n1: &Node, n2: &Node) -> bool { +# true +# } +# fn edges(&self, n: &Node) -> Vec { +# Vec::new() +# } +# } +let graph = MyGraph; +let obj = Box::new(graph) as Box; +``` + +You’ll get two errors: + +```text +error: the value of the associated type `E` (from the trait `main::Graph`) must +be specified [E0191] +let obj = Box::new(graph) as Box; + ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +24:44 error: the value of the associated type `N` (from the trait +`main::Graph`) must be specified [E0191] +let obj = Box::new(graph) as Box; + ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``` + +We can’t create a trait object like this, because we don’t know the associated +types. Instead, we can write this: + +```rust +# trait Graph { +# type N; +# type E; +# fn has_edge(&self, &Self::N, &Self::N) -> bool; +# fn edges(&self, &Self::N) -> Vec; +# } +# struct Node; +# struct Edge; +# struct MyGraph; +# impl Graph for MyGraph { +# type N = Node; +# type E = Edge; +# fn has_edge(&self, n1: &Node, n2: &Node) -> bool { +# true +# } +# fn edges(&self, n: &Node) -> Vec { +# Vec::new() +# } +# } +let graph = MyGraph; +let obj = Box::new(graph) as Box>; +``` + +The `N=Node` syntax allows us to provide a concrete type, `Node`, for the `N` +type parameter. Same with `E=Edge`. If we didn’t proide this constraint, we +couldn’t be sure which `impl` to match this trait object to. diff --git a/src/doc/trpl/attributes.md b/src/doc/trpl/attributes.md index e69de29bb2d1..e699bd85f6ed 100644 --- a/src/doc/trpl/attributes.md +++ b/src/doc/trpl/attributes.md @@ -0,0 +1,3 @@ +% Attributes + +Coming Soon! diff --git a/src/doc/trpl/benchmark-tests.md b/src/doc/trpl/benchmark-tests.md index e69de29bb2d1..890a2f8ae7de 100644 --- a/src/doc/trpl/benchmark-tests.md +++ b/src/doc/trpl/benchmark-tests.md @@ -0,0 +1,152 @@ +% Benchmark tests + +Rust supports benchmark tests, which can test the performance of your +code. Let's make our `src/lib.rs` look like this (comments elided): + +```{rust,ignore} +#![feature(test)] + +extern crate test; + +pub fn add_two(a: i32) -> i32 { + a + 2 +} + +#[cfg(test)] +mod test { + use super::*; + use test::Bencher; + + #[test] + fn it_works() { + assert_eq!(4, add_two(2)); + } + + #[bench] + fn bench_add_two(b: &mut Bencher) { + b.iter(|| add_two(2)); + } +} +``` + +Note the `test` feature gate, which enables this unstable feature. + +We've imported the `test` crate, which contains our benchmarking support. +We have a new function as well, with the `bench` attribute. Unlike regular +tests, which take no arguments, benchmark tests take a `&mut Bencher`. This +`Bencher` provides an `iter` method, which takes a closure. This closure +contains the code we'd like to benchmark. + +We can run benchmark tests with `cargo bench`: + +```bash +$ cargo bench + Compiling adder v0.0.1 (file:///home/steve/tmp/adder) + Running target/release/adder-91b3e234d4ed382a + +running 2 tests +test tests::it_works ... ignored +test tests::bench_add_two ... bench: 1 ns/iter (+/- 0) + +test result: ok. 0 passed; 0 failed; 1 ignored; 1 measured +``` + +Our non-benchmark test was ignored. You may have noticed that `cargo bench` +takes a bit longer than `cargo test`. This is because Rust runs our benchmark +a number of times, and then takes the average. Because we're doing so little +work in this example, we have a `1 ns/iter (+/- 0)`, but this would show +the variance if there was one. + +Advice on writing benchmarks: + + +* Move setup code outside the `iter` loop; only put the part you want to measure inside +* Make the code do "the same thing" on each iteration; do not accumulate or change state +* Make the outer function idempotent too; the benchmark runner is likely to run + it many times +* Make the inner `iter` loop short and fast so benchmark runs are fast and the + calibrator can adjust the run-length at fine resolution +* Make the code in the `iter` loop do something simple, to assist in pinpointing + performance improvements (or regressions) + +## Gotcha: optimizations + +There's another tricky part to writing benchmarks: benchmarks compiled with +optimizations activated can be dramatically changed by the optimizer so that +the benchmark is no longer benchmarking what one expects. For example, the +compiler might recognize that some calculation has no external effects and +remove it entirely. + +```{rust,ignore} +#![feature(test)] + +extern crate test; +use test::Bencher; + +#[bench] +fn bench_xor_1000_ints(b: &mut Bencher) { + b.iter(|| { + (0..1000).fold(0, |old, new| old ^ new); + }); +} +``` + +gives the following results + +```text +running 1 test +test bench_xor_1000_ints ... bench: 0 ns/iter (+/- 0) + +test result: ok. 0 passed; 0 failed; 0 ignored; 1 measured +``` + +The benchmarking runner offers two ways to avoid this. Either, the closure that +the `iter` method receives can return an arbitrary value which forces the +optimizer to consider the result used and ensures it cannot remove the +computation entirely. This could be done for the example above by adjusting the +`b.iter` call to + +```rust +# struct X; +# impl X { fn iter(&self, _: F) where F: FnMut() -> T {} } let b = X; +b.iter(|| { + // note lack of `;` (could also use an explicit `return`). + (0..1000).fold(0, |old, new| old ^ new) +}); +``` + +Or, the other option is to call the generic `test::black_box` function, which +is an opaque "black box" to the optimizer and so forces it to consider any +argument as used. + +```rust +#![feature(test)] + +extern crate test; + +# fn main() { +# struct X; +# impl X { fn iter(&self, _: F) where F: FnMut() -> T {} } let b = X; +b.iter(|| { + let n = test::black_box(1000); + + (0..n).fold(0, |a, b| a ^ b) +}) +# } +``` + +Neither of these read or modify the value, and are very cheap for small values. +Larger values can be passed indirectly to reduce overhead (e.g. +`black_box(&huge_struct)`). + +Performing either of the above changes gives the following benchmarking results + +```text +running 1 test +test bench_xor_1000_ints ... bench: 131 ns/iter (+/- 3) + +test result: ok. 0 passed; 0 failed; 0 ignored; 1 measured +``` + +However, the optimizer can still modify a testcase in an undesirable manner +even when using either of the above. diff --git a/src/doc/trpl/box-syntax-and-patterns.md b/src/doc/trpl/box-syntax-and-patterns.md index e69de29bb2d1..839f07d98432 100644 --- a/src/doc/trpl/box-syntax-and-patterns.md +++ b/src/doc/trpl/box-syntax-and-patterns.md @@ -0,0 +1,100 @@ +% Box Syntax and Patterns + +Currently the only stable way to create a `Box` is via the `Box::new` method. +Also it is not possible in stable Rust to destructure a `Box` in a match +pattern. The unstable `box` keyword can be used to both create and destructure +a `Box`. An example usage would be: + +``` +#![feature(box_syntax, box_patterns)] + +fn main() { + let b = Some(box 5); + match b { + Some(box n) if n < 0 => { + println!("Box contains negative number {}", n); + }, + Some(box n) if n >= 0 => { + println!("Box contains non-negative number {}", n); + }, + None => { + println!("No box"); + }, + _ => unreachable!() + } +} +``` + +Note that these features are currently hidden behind the `box_syntax` (box +creation) and `box_patterns` (destructuring and pattern matching) gates +because the syntax may still change in the future. + +# Returning Pointers + +In many languages with pointers, you'd return a pointer from a function +so as to avoid copying a large data structure. For example: + +```{rust} +struct BigStruct { + one: i32, + two: i32, + // etc + one_hundred: i32, +} + +fn foo(x: Box) -> Box { + Box::new(*x) +} + +fn main() { + let x = Box::new(BigStruct { + one: 1, + two: 2, + one_hundred: 100, + }); + + let y = foo(x); +} +``` + +The idea is that by passing around a box, you're only copying a pointer, rather +than the hundred `int`s that make up the `BigStruct`. + +This is an antipattern in Rust. Instead, write this: + +```rust +#![feature(box_syntax)] + +struct BigStruct { + one: i32, + two: i32, + // etc + one_hundred: i32, +} + +fn foo(x: Box) -> BigStruct { + *x +} + +fn main() { + let x = Box::new(BigStruct { + one: 1, + two: 2, + one_hundred: 100, + }); + + let y: Box = box foo(x); +} +``` + +This gives you flexibility without sacrificing performance. + +You may think that this gives us terrible performance: return a value and then +immediately box it up ?! Isn't this pattern the worst of both worlds? Rust is +smarter than that. There is no copy in this code. `main` allocates enough room +for the `box`, passes a pointer to that memory into `foo` as `x`, and then +`foo` writes the value straight into the `Box`. + +This is important enough that it bears repeating: pointers are not for +optimizing returning values from your code. Allow the caller to choose how they +want to use your output. diff --git a/src/doc/trpl/casting-between-types.md b/src/doc/trpl/casting-between-types.md index e69de29bb2d1..8bb0ec6db025 100644 --- a/src/doc/trpl/casting-between-types.md +++ b/src/doc/trpl/casting-between-types.md @@ -0,0 +1,3 @@ +% Casting Between Types + +Coming Soon diff --git a/src/doc/trpl/closures.md b/src/doc/trpl/closures.md index e69de29bb2d1..e63331e5206b 100644 --- a/src/doc/trpl/closures.md +++ b/src/doc/trpl/closures.md @@ -0,0 +1,475 @@ +% Closures + +Rust not only has named functions, but anonymous functions as well. Anonymous +functions that have an associated environment are called 'closures', because they +close over an environment. Rust has a really great implementation of them, as +we'll see. + +# Syntax + +Closures look like this: + +```rust +let plus_one = |x: i32| x + 1; + +assert_eq!(2, plus_one(1)); +``` + +We create a binding, `plus_one`, and assign it to a closure. The closure's +arguments go between the pipes (`|`), and the body is an expression, in this +case, `x + 1`. Remember that `{ }` is an expression, so we can have multi-line +closures too: + +```rust +let plus_two = |x| { + let mut result: i32 = x; + + result += 1; + result += 1; + + result +}; + +assert_eq!(4, plus_two(2)); +``` + +You'll notice a few things about closures that are a bit different than regular +functions defined with `fn`. The first of which is that we did not need to +annotate the types of arguments the closure takes or the values it returns. We +can: + +```rust +let plus_one = |x: i32| -> i32 { x + 1 }; + +assert_eq!(2, plus_one(1)); +``` + +But we don't have to. Why is this? Basically, it was chosen for ergonomic reasons. +While specifying the full type for named functions is helpful with things like +documentation and type inference, the types of closures are rarely documented +since they’re anonymous, and they don’t cause the kinds of error-at-a-distance +that inferring named function types can. + +The second is that the syntax is similar, but a bit different. I've added spaces +here to make them look a little closer: + +```rust +fn plus_one_v1 (x: i32 ) -> i32 { x + 1 } +let plus_one_v2 = |x: i32 | -> i32 { x + 1 }; +let plus_one_v3 = |x: i32 | x + 1 ; +``` + +Small differences, but they're similar in ways. + +# Closures and their environment + +Closures are called such because they 'close over their environment.' It +looks like this: + +```rust +let num = 5; +let plus_num = |x: i32| x + num; + +assert_eq!(10, plus_num(5)); +``` + +This closure, `plus_num`, refers to a `let` binding in its scope: `num`. More +specifically, it borrows the binding. If we do something that would conflict +with that binding, we get an error. Like this one: + +```rust,ignore +let mut num = 5; +let plus_num = |x: i32| x + num; + +let y = &mut num; +``` + +Which errors with: + +```text +error: cannot borrow `num` as mutable because it is also borrowed as immutable + let y = &mut num; + ^~~ +note: previous borrow of `num` occurs here due to use in closure; the immutable + borrow prevents subsequent moves or mutable borrows of `num` until the borrow + ends + let plus_num = |x| x + num; + ^~~~~~~~~~~ +note: previous borrow ends here +fn main() { + let mut num = 5; + let plus_num = |x| x + num; + + let y = &mut num; +} +^ +``` + +A verbose yet helpful error message! As it says, we can't take a mutable borrow +on `num` because the closure is already borrowing it. If we let the closure go +out of scope, we can: + +```rust +let mut num = 5; +{ + let plus_num = |x: i32| x + num; + +} // plus_num goes out of scope, borrow of num ends + +let y = &mut num; +``` + +If your closure requires it, however, Rust will take ownership and move +the environment instead: + +```rust,ignore +let nums = vec![1, 2, 3]; + +let takes_nums = || nums; + +println!("{:?}", nums); +``` + +This gives us: + +```text +note: `nums` moved into closure environment here because it has type + `[closure(()) -> collections::vec::Vec]`, which is non-copyable +let takes_nums = || nums; + ^~~~~~~ +``` + +`Vec` has ownership over its contents, and therefore, when we refer to it +in our closure, we have to take ownership of `nums`. It's the same as if we'd +passed `nums` to a function that took ownership of it. + +## `move` closures + +We can force our closure to take ownership of its environment with the `move` +keyword: + +```rust +let num = 5; + +let owns_num = move |x: i32| x + num; +``` + +Now, even though the keyword is `move`, the variables follow normal move semantics. +In this case, `5` implements `Copy`, and so `owns_num` takes ownership of a copy +of `num`. So what's the difference? + +```rust +let mut num = 5; + +{ + let mut add_num = |x: i32| num += x; + + add_num(5); +} + +assert_eq!(10, num); +``` + +So in this case, our closure took a mutable reference to `num`, and then when +we called `add_num`, it mutated the underlying value, as we'd expect. We also +needed to declare `add_num` as `mut` too, because we’re mutating its +environment. + +If we change to a `move` closure, it's different: + +```rust +let mut num = 5; + +{ + let mut add_num = move |x: i32| num += x; + + add_num(5); +} + +assert_eq!(5, num); +``` + +We only get `5`. Rather than taking a mutable borrow out on our `num`, we took +ownership of a copy. + +Another way to think about `move` closures: they give a closure its own stack +frame. Without `move`, a closure may be tied to the stack frame that created +it, while a `move` closure is self-contained. This means that you cannot +generally return a non-`move` closure from a function, for example. + +But before we talk about taking and returning closures, we should talk some more +about the way that closures are implemented. As a systems language, Rust gives +you tons of control over what your code does, and closures are no different. + +# Closure implementation + +Rust's implementation of closures is a bit different than other languages. They +are effectively syntax sugar for traits. You'll want to make sure to have read +the [traits chapter][traits] before this one, as well as the chapter on [static +and dynamic dispatch][dispatch], which talks about trait objects. + +[traits]: traits.html +[dispatch]: static-and-dynamic-dispatch.html + +Got all that? Good. + +The key to understanding how closures work under the hood is something a bit +strange: Using `()` to call a function, like `foo()`, is an overloadable +operator. From this, everything else clicks into place. In Rust, we use the +trait system to overload operators. Calling functions is no different. We have +three separate traits to overload with: + +```rust +# mod foo { +pub trait Fn : FnMut { + extern "rust-call" fn call(&self, args: Args) -> Self::Output; +} + +pub trait FnMut : FnOnce { + extern "rust-call" fn call_mut(&mut self, args: Args) -> Self::Output; +} + +pub trait FnOnce { + type Output; + + extern "rust-call" fn call_once(self, args: Args) -> Self::Output; +} +# } +``` + +You'll notice a few differences between these traits, but a big one is `self`: +`Fn` takes `&self`, `FnMut` takes `&mut self`, and `FnOnce` takes `self`. This +covers all three kinds of `self` via the usual method call syntax. But we've +split them up into three traits, rather than having a single one. This gives us +a large amount of control over what kind of closures we can take. + +The `|| {}` syntax for closures is sugar for these three traits. Rust will +generate a struct for the environment, `impl` the appropriate trait, and then +use it. + +# Taking closures as arguments + +Now that we know that closures are traits, we already know how to accept and +return closures: just like any other trait! + +This also means that we can choose static vs dynamic dispatch as well. First, +let's write a function which takes something callable, calls it, and returns +the result: + +```rust +fn call_with_one(some_closure: F) -> i32 + where F : Fn(i32) -> i32 { + + some_closure(1) +} + +let answer = call_with_one(|x| x + 2); + +assert_eq!(3, answer); +``` + +We pass our closure, `|x| x + 2`, to `call_with_one`. It just does what it +suggests: it calls the closure, giving it `1` as an argument. + +Let's examine the signature of `call_with_one` in more depth: + +```rust +fn call_with_one(some_closure: F) -> i32 +# where F : Fn(i32) -> i32 { +# some_closure(1) } +``` + +We take one parameter, and it has the type `F`. We also return a `i32`. This part +isn't interesting. The next part is: + +```rust +# fn call_with_one(some_closure: F) -> i32 + where F : Fn(i32) -> i32 { +# some_closure(1) } +``` + +Because `Fn` is a trait, we can bound our generic with it. In this case, our closure +takes a `i32` as an argument and returns an `i32`, and so the generic bound we use +is `Fn(i32) -> i32`. + +There's one other key point here: because we're bounding a generic with a +trait, this will get monomorphized, and therefore, we'll be doing static +dispatch into the closure. That's pretty neat. In many langauges, closures are +inherently heap allocated, and will always involve dynamic dispatch. In Rust, +we can stack allocate our closure environment, and statically dispatch the +call. This happens quite often with iterators and their adapters, which often +take closures as arguments. + +Of course, if we want dynamic dispatch, we can get that too. A trait object +handles this case, as usual: + +```rust +fn call_with_one(some_closure: &Fn(i32) -> i32) -> i32 { + some_closure(1) +} + +let answer = call_with_one(&|x| x + 2); + +assert_eq!(3, answer); +``` + +Now we take a trait object, a `&Fn`. And we have to make a reference +to our closure when we pass it to `call_with_one`, so we use `&||`. + +# Returning closures + +It’s very common for functional-style code to return closures in various +situations. If you try to return a closure, you may run into an error. At +first, it may seem strange, but we'll figure it out. Here's how you'd probably +try to return a closure from a function: + +```rust,ignore +fn factory() -> (Fn(i32) -> Vec) { + let vec = vec![1, 2, 3]; + + |n| vec.push(n) +} + +let f = factory(); + +let answer = f(4); +assert_eq!(vec![1, 2, 3, 4], answer); +``` + +This gives us these long, related errors: + +```text +error: the trait `core::marker::Sized` is not implemented for the type +`core::ops::Fn(i32) -> collections::vec::Vec` [E0277] +f = factory(); +^ +note: `core::ops::Fn(i32) -> collections::vec::Vec` does not have a +constant size known at compile-time +f = factory(); +^ +error: the trait `core::marker::Sized` is not implemented for the type +`core::ops::Fn(i32) -> collections::vec::Vec` [E0277] +factory() -> (Fn(i32) -> Vec) { + ^~~~~~~~~~~~~~~~~~~~~ +note: `core::ops::Fn(i32) -> collections::vec::Vec` does not have a constant size known at compile-time +fa ctory() -> (Fn(i32) -> Vec) { + ^~~~~~~~~~~~~~~~~~~~~ + +``` + +In order to return something from a function, Rust needs to know what +size the return type is. But since `Fn` is a trait, it could be various +things of various sizes: many different types can implement `Fn`. An easy +way to give something a size is to take a reference to it, as references +have a known size. So we'd write this: + +```rust,ignore +fn factory() -> &(Fn(i32) -> Vec) { + let vec = vec![1, 2, 3]; + + |n| vec.push(n) +} + +let f = factory(); + +let answer = f(4); +assert_eq!(vec![1, 2, 3, 4], answer); +``` + +But we get another error: + +```text +error: missing lifetime specifier [E0106] +fn factory() -> &(Fn(i32) -> i32) { + ^~~~~~~~~~~~~~~~~ +``` + +Right. Because we have a reference, we need to give it a lifetime. But +our `factory()` function takes no arguments, so elision doesn't kick in +here. What lifetime can we choose? `'static`: + +```rust,ignore +fn factory() -> &'static (Fn(i32) -> i32) { + let num = 5; + + |x| x + num +} + +let f = factory(); + +let answer = f(1); +assert_eq!(6, answer); +``` + +But we get another error: + +```text +error: mismatched types: + expected `&'static core::ops::Fn(i32) -> i32`, + found `[closure :7:9: 7:20]` +(expected &-ptr, + found closure) [E0308] + |x| x + num + ^~~~~~~~~~~ + +``` + +This error is letting us know that we don't have a `&'static Fn(i32) -> i32`, +we have a `[closure :7:9: 7:20]`. Wait, what? + +Because each closure generates its own environment `struct` and implementation +of `Fn` and friends, these types are anonymous. They exist just solely for +this closure. So Rust shows them as `closure `, rather than some +autogenerated name. + +But why doesn't our closure implement `&'static Fn`? Well, as we discussed before, +closures borrow their environment. And in this case, our environment is based +on a stack-allocated `5`, the `num` variable binding. So the borrow has a lifetime +of the stack frame. So if we returned this closure, the function call would be +over, the stack frame would go away, and our closure is capturing an environment +of garbage memory! + +So what to do? This _almost_ works: + +```rust,ignore +fn factory() -> Box i32> { + let num = 5; + + Box::new(|x| x + num) +} +# fn main() { +let f = factory(); + +let answer = f(1); +assert_eq!(6, answer); +# } +``` + +We use a trait object, by `Box`ing up the `Fn`. There's just one last problem: + +```text +error: `num` does not live long enough +Box::new(|x| x + num) + ^~~~~~~~~~~ +``` + +We still have a reference to the parent stack frame. With one last fix, we can +make this work: + +```rust +fn factory() -> Box i32> { + let num = 5; + + Box::new(move |x| x + num) +} +# fn main() { +let f = factory(); + +let answer = f(1); +assert_eq!(6, answer); +# } +``` + +By making the inner closure a `move Fn`, we create a new stack frame for our +closure. By `Box`ing it up, we've given it a known size, and allowing it to +escape our stack frame. diff --git a/src/doc/trpl/comments.md b/src/doc/trpl/comments.md index e69de29bb2d1..441496e6a755 100644 --- a/src/doc/trpl/comments.md +++ b/src/doc/trpl/comments.md @@ -0,0 +1,47 @@ +% Comments + +Now that we have some functions, it's a good idea to learn about comments. +Comments are notes that you leave to other programmers to help explain things +about your code. The compiler mostly ignores them. + +Rust has two kinds of comments that you should care about: *line comments* +and *doc comments*. + +```{rust} +// Line comments are anything after '//' and extend to the end of the line. + +let x = 5; // this is also a line comment. + +// If you have a long explanation for something, you can put line comments next +// to each other. Put a space between the // and your comment so that it's +// more readable. +``` + +The other kind of comment is a doc comment. Doc comments use `///` instead of +`//`, and support Markdown notation inside: + +```{rust} +/// `hello` is a function that prints a greeting that is personalized based on +/// the name given. +/// +/// # Arguments +/// +/// * `name` - The name of the person you'd like to greet. +/// +/// # Examples +/// +/// ```rust +/// let name = "Steve"; +/// hello(name); // prints "Hello, Steve!" +/// ``` +fn hello(name: &str) { + println!("Hello, {}!", name); +} +``` + +When writing doc comments, adding sections for any arguments, return values, +and providing some examples of usage is very, very helpful. Don't worry about +the `&str`, we'll get to it soon. + +You can use the [`rustdoc`](documentation.html) tool to generate HTML documentation +from these doc comments. diff --git a/src/doc/trpl/compiler-plugins.md b/src/doc/trpl/compiler-plugins.md new file mode 100644 index 000000000000..9eb22a7f6985 --- /dev/null +++ b/src/doc/trpl/compiler-plugins.md @@ -0,0 +1,242 @@ +% Compiler Plugins + +# Introduction + +`rustc` can load compiler plugins, which are user-provided libraries that +extend the compiler's behavior with new syntax extensions, lint checks, etc. + +A plugin is a dynamic library crate with a designated *registrar* function that +registers extensions with `rustc`. Other crates can load these extensions using +the crate attribute `#![plugin(...)]`. See the +[`rustc::plugin`](../rustc/plugin/index.html) documentation for more about the +mechanics of defining and loading a plugin. + +If present, arguments passed as `#![plugin(foo(... args ...))]` are not +interpreted by rustc itself. They are provided to the plugin through the +`Registry`'s [`args` method](../rustc/plugin/registry/struct.Registry.html#method.args). + +In the vast majority of cases, a plugin should *only* be used through +`#![plugin]` and not through an `extern crate` item. Linking a plugin would +pull in all of libsyntax and librustc as dependencies of your crate. This is +generally unwanted unless you are building another plugin. The +`plugin_as_library` lint checks these guidelines. + +The usual practice is to put compiler plugins in their own crate, separate from +any `macro_rules!` macros or ordinary Rust code meant to be used by consumers +of a library. + +# Syntax extensions + +Plugins can extend Rust's syntax in various ways. One kind of syntax extension +is the procedural macro. These are invoked the same way as [ordinary +macros](macros.html), but the expansion is performed by arbitrary Rust +code that manipulates [syntax trees](../syntax/ast/index.html) at +compile time. + +Let's write a plugin +[`roman_numerals.rs`](https://github.com/rust-lang/rust/tree/master/src/test/auxiliary/roman_numerals.rs) +that implements Roman numeral integer literals. + +```ignore +#![crate_type="dylib"] +#![feature(plugin_registrar, rustc_private)] + +extern crate syntax; +extern crate rustc; + +use syntax::codemap::Span; +use syntax::parse::token; +use syntax::ast::{TokenTree, TtToken}; +use syntax::ext::base::{ExtCtxt, MacResult, DummyResult, MacEager}; +use syntax::ext::build::AstBuilder; // trait for expr_usize +use rustc::plugin::Registry; + +fn expand_rn(cx: &mut ExtCtxt, sp: Span, args: &[TokenTree]) + -> Box { + + static NUMERALS: &'static [(&'static str, u32)] = &[ + ("M", 1000), ("CM", 900), ("D", 500), ("CD", 400), + ("C", 100), ("XC", 90), ("L", 50), ("XL", 40), + ("X", 10), ("IX", 9), ("V", 5), ("IV", 4), + ("I", 1)]; + + let text = match args { + [TtToken(_, token::Ident(s, _))] => token::get_ident(s).to_string(), + _ => { + cx.span_err(sp, "argument should be a single identifier"); + return DummyResult::any(sp); + } + }; + + let mut text = &*text; + let mut total = 0; + while !text.is_empty() { + match NUMERALS.iter().find(|&&(rn, _)| text.starts_with(rn)) { + Some(&(rn, val)) => { + total += val; + text = &text[rn.len()..]; + } + None => { + cx.span_err(sp, "invalid Roman numeral"); + return DummyResult::any(sp); + } + } + } + + MacEager::expr(cx.expr_u32(sp, total)) +} + +#[plugin_registrar] +pub fn plugin_registrar(reg: &mut Registry) { + reg.register_macro("rn", expand_rn); +} +``` + +Then we can use `rn!()` like any other macro: + +```ignore +#![feature(plugin)] +#![plugin(roman_numerals)] + +fn main() { + assert_eq!(rn!(MMXV), 2015); +} +``` + +The advantages over a simple `fn(&str) -> u32` are: + +* The (arbitrarily complex) conversion is done at compile time. +* Input validation is also performed at compile time. +* It can be extended to allow use in patterns, which effectively gives + a way to define new literal syntax for any data type. + +In addition to procedural macros, you can define new +[`derive`](../reference.html#derive)-like attributes and other kinds of +extensions. See +[`Registry::register_syntax_extension`](../rustc/plugin/registry/struct.Registry.html#method.register_syntax_extension) +and the [`SyntaxExtension` +enum](http://doc.rust-lang.org/syntax/ext/base/enum.SyntaxExtension.html). For +a more involved macro example, see +[`regex_macros`](https://github.com/rust-lang/regex/blob/master/regex_macros/src/lib.rs). + + +## Tips and tricks + +Some of the [macro debugging tips](macros.html#debugging-macro-code) are applicable. + +You can use [`syntax::parse`](../syntax/parse/index.html) to turn token trees into +higher-level syntax elements like expressions: + +```ignore +fn expand_foo(cx: &mut ExtCtxt, sp: Span, args: &[TokenTree]) + -> Box { + + let mut parser = cx.new_parser_from_tts(args); + + let expr: P = parser.parse_expr(); +``` + +Looking through [`libsyntax` parser +code](https://github.com/rust-lang/rust/blob/master/src/libsyntax/parse/parser.rs) +will give you a feel for how the parsing infrastructure works. + +Keep the [`Span`s](../syntax/codemap/struct.Span.html) of +everything you parse, for better error reporting. You can wrap +[`Spanned`](../syntax/codemap/struct.Spanned.html) around +your custom data structures. + +Calling +[`ExtCtxt::span_fatal`](../syntax/ext/base/struct.ExtCtxt.html#method.span_fatal) +will immediately abort compilation. It's better to instead call +[`ExtCtxt::span_err`](../syntax/ext/base/struct.ExtCtxt.html#method.span_err) +and return +[`DummyResult`](../syntax/ext/base/struct.DummyResult.html), +so that the compiler can continue and find further errors. + +To print syntax fragments for debugging, you can use +[`span_note`](../syntax/ext/base/struct.ExtCtxt.html#method.span_note) together +with +[`syntax::print::pprust::*_to_string`](http://doc.rust-lang.org/syntax/print/pprust/index.html#functions). + +The example above produced an integer literal using +[`AstBuilder::expr_usize`](../syntax/ext/build/trait.AstBuilder.html#tymethod.expr_usize). +As an alternative to the `AstBuilder` trait, `libsyntax` provides a set of +[quasiquote macros](../syntax/ext/quote/index.html). They are undocumented and +very rough around the edges. However, the implementation may be a good +starting point for an improved quasiquote as an ordinary plugin library. + + +# Lint plugins + +Plugins can extend [Rust's lint +infrastructure](../reference.html#lint-check-attributes) with additional checks for +code style, safety, etc. You can see +[`src/test/auxiliary/lint_plugin_test.rs`](https://github.com/rust-lang/rust/blob/master/src/test/auxiliary/lint_plugin_test.rs) +for a full example, the core of which is reproduced here: + +```ignore +declare_lint!(TEST_LINT, Warn, + "Warn about items named 'lintme'") + +struct Pass; + +impl LintPass for Pass { + fn get_lints(&self) -> LintArray { + lint_array!(TEST_LINT) + } + + fn check_item(&mut self, cx: &Context, it: &ast::Item) { + let name = token::get_ident(it.ident); + if name.get() == "lintme" { + cx.span_lint(TEST_LINT, it.span, "item is named 'lintme'"); + } + } +} + +#[plugin_registrar] +pub fn plugin_registrar(reg: &mut Registry) { + reg.register_lint_pass(box Pass as LintPassObject); +} +``` + +Then code like + +```ignore +#![plugin(lint_plugin_test)] + +fn lintme() { } +``` + +will produce a compiler warning: + +```txt +foo.rs:4:1: 4:16 warning: item is named 'lintme', #[warn(test_lint)] on by default +foo.rs:4 fn lintme() { } + ^~~~~~~~~~~~~~~ +``` + +The components of a lint plugin are: + +* one or more `declare_lint!` invocations, which define static + [`Lint`](../rustc/lint/struct.Lint.html) structs; + +* a struct holding any state needed by the lint pass (here, none); + +* a [`LintPass`](../rustc/lint/trait.LintPass.html) + implementation defining how to check each syntax element. A single + `LintPass` may call `span_lint` for several different `Lint`s, but should + register them all through the `get_lints` method. + +Lint passes are syntax traversals, but they run at a late stage of compilation +where type information is available. `rustc`'s [built-in +lints](https://github.com/rust-lang/rust/blob/master/src/librustc/lint/builtin.rs) +mostly use the same infrastructure as lint plugins, and provide examples of how +to access type information. + +Lints defined by plugins are controlled by the usual [attributes and compiler +flags](../reference.html#lint-check-attributes), e.g. `#[allow(test_lint)]` or +`-A test-lint`. These identifiers are derived from the first argument to +`declare_lint!`, with appropriate case and punctuation conversion. + +You can run `rustc -W help foo.rs` to see a list of lints known to `rustc`, +including those provided by plugins loaded by `foo.rs`. diff --git a/src/doc/trpl/concurrency.md b/src/doc/trpl/concurrency.md index e69de29bb2d1..f9358f28b019 100644 --- a/src/doc/trpl/concurrency.md +++ b/src/doc/trpl/concurrency.md @@ -0,0 +1,358 @@ +% Concurrency + +Concurrency and parallelism are incredibly important topics in computer +science, and are also a hot topic in industry today. Computers are gaining more +and more cores, yet many programmers aren't prepared to fully utilize them. + +Rust's memory safety features also apply to its concurrency story too. Even +concurrent Rust programs must be memory safe, having no data races. Rust's type +system is up to the task, and gives you powerful ways to reason about +concurrent code at compile time. + +Before we talk about the concurrency features that come with Rust, it's important +to understand something: Rust is low-level enough that all of this is provided +by the standard library, not by the language. This means that if you don't like +some aspect of the way Rust handles concurrency, you can implement an alternative +way of doing things. [mio](https://github.com/carllerche/mio) is a real-world +example of this principle in action. + +## Background: `Send` and `Sync` + +Concurrency is difficult to reason about. In Rust, we have a strong, static +type system to help us reason about our code. As such, Rust gives us two traits +to help us make sense of code that can possibly be concurrent. + +### `Send` + +The first trait we're going to talk about is +[`Send`](../std/marker/trait.Send.html). When a type `T` implements `Send`, it indicates +to the compiler that something of this type is able to have ownership transferred +safely between threads. + +This is important to enforce certain restrictions. For example, if we have a +channel connecting two threads, we would want to be able to send some data +down the channel and to the other thread. Therefore, we'd ensure that `Send` was +implemented for that type. + +In the opposite way, if we were wrapping a library with FFI that isn't +threadsafe, we wouldn't want to implement `Send`, and so the compiler will help +us enforce that it can't leave the current thread. + +### `Sync` + +The second of these traits is called [`Sync`](../std/marker/trait.Sync.html). +When a type `T` implements `Sync`, it indicates to the compiler that something +of this type has no possibility of introducing memory unsafety when used from +multiple threads concurrently. + +For example, sharing immutable data with an atomic reference count is +threadsafe. Rust provides a type like this, `Arc`, and it implements `Sync`, +so it is safe to share between threads. + +These two traits allow you to use the type system to make strong guarantees +about the properties of your code under concurrency. Before we demonstrate +why, we need to learn how to create a concurrent Rust program in the first +place! + +## Threads + +Rust's standard library provides a library for 'threads', which allow you to +run Rust code in parallel. Here's a basic example of using `std::thread`: + +``` +use std::thread; + +fn main() { + thread::scoped(|| { + println!("Hello from a thread!"); + }); +} +``` + +The `thread::scoped()` method accepts a closure, which is executed in a new +thread. It's called `scoped` because this thread returns a join guard: + +``` +use std::thread; + +fn main() { + let guard = thread::scoped(|| { + println!("Hello from a thread!"); + }); + + // guard goes out of scope here +} +``` + +When `guard` goes out of scope, it will block execution until the thread is +finished. If we didn't want this behaviour, we could use `thread::spawn()`: + +``` +use std::thread; + +fn main() { + thread::spawn(|| { + println!("Hello from a thread!"); + }); + + thread::sleep_ms(50); +} +``` + +We need to `sleep` here because when `main()` ends, it kills all of the +running threads. + +[`scoped`](std/thread/struct.Builder.html#method.scoped) has an interesting +type signature: + +```text +fn scoped<'a, T, F>(self, f: F) -> JoinGuard<'a, T> + where T: Send + 'a, + F: FnOnce() -> T, + F: Send + 'a +``` + +Specifically, `F`, the closure that we pass to execute in the new thread. It +has two restrictions: It must be a `FnOnce` from `()` to `T`. Using `FnOnce` +allows the closure to take ownership of any data it mentions from the parent +thread. The other restriction is that `F` must be `Send`. We aren't allowed to +transfer this ownership unless the type thinks that's okay. + +Many languages have the ability to execute threads, but it's wildly unsafe. +There are entire books about how to prevent errors that occur from shared +mutable state. Rust helps out with its type system here as well, by preventing +data races at compile time. Let's talk about how you actually share things +between threads. + +## Safe Shared Mutable State + +Due to Rust's type system, we have a concept that sounds like a lie: "safe +shared mutable state." Many programmers agree that shared mutable state is +very, very bad. + +Someone once said this: + +> Shared mutable state is the root of all evil. Most languages attempt to deal +> with this problem through the 'mutable' part, but Rust deals with it by +> solving the 'shared' part. + +The same [ownership system](ownership.html) that helps prevent using pointers +incorrectly also helps rule out data races, one of the worst kinds of +concurrency bugs. + +As an example, here is a Rust program that would have a data race in many +languages. It will not compile: + +```ignore +use std::thread; + +fn main() { + let mut data = vec![1u32, 2, 3]; + + for i in 0..2 { + thread::spawn(move || { + data[i] += 1; + }); + } + + thread::sleep_ms(50); +} +``` + +This gives us an error: + +```text +8:17 error: capture of moved value: `data` + data[i] += 1; + ^~~~ +``` + +In this case, we know that our code _should_ be safe, but Rust isn't sure. And +it's actually not safe: if we had a reference to `data` in each thread, and the +thread takes ownership of the reference, we have three owners! That's bad. We +can fix this by using the `Arc` type, which is an atomic reference counted +pointer. The 'atomic' part means that it's safe to share across threads. + +`Arc` assumes one more property about its contents to ensure that it is safe +to share across threads: it assumes its contents are `Sync`. But in our +case, we want to be able to mutate the value. We need a type that can ensure +only one person at a time can mutate what's inside. For that, we can use the +`Mutex` type. Here's the second version of our code. It still doesn't work, +but for a different reason: + +```ignore +use std::thread; +use std::sync::Mutex; + +fn main() { + let mut data = Mutex::new(vec![1u32, 2, 3]); + + for i in 0..2 { + let data = data.lock().unwrap(); + thread::spawn(move || { + data[i] += 1; + }); + } + + thread::sleep_ms(50); +} +``` + +Here's the error: + +```text +:9:9: 9:22 error: the trait `core::marker::Send` is not implemented for the type `std::sync::mutex::MutexGuard<'_, collections::vec::Vec>` [E0277] +:11 thread::spawn(move || { + ^~~~~~~~~~~~~ +:9:9: 9:22 note: `std::sync::mutex::MutexGuard<'_, collections::vec::Vec>` cannot be sent between threads safely +:11 thread::spawn(move || { + ^~~~~~~~~~~~~ +``` + +You see, [`Mutex`](std/sync/struct.Mutex.html) has a +[`lock`](http://doc.rust-lang.org/nightly/std/sync/struct.Mutex.html#method.lock) +method which has this signature: + +```ignore +fn lock(&self) -> LockResult> +``` + +Because `Send` is not implemented for `MutexGuard`, we can't transfer the +guard across thread boundaries, which gives us our error. + +We can use `Arc` to fix this. Here's the working version: + +``` +use std::sync::{Arc, Mutex}; +use std::thread; + +fn main() { + let data = Arc::new(Mutex::new(vec![1u32, 2, 3])); + + for i in 0..2 { + let data = data.clone(); + thread::spawn(move || { + let mut data = data.lock().unwrap(); + data[i] += 1; + }); + } + + thread::sleep_ms(50); +} +``` + +We now call `clone()` on our `Arc`, which increases the internal count. This +handle is then moved into the new thread. Let's examine the body of the +thread more closely: + +```rust +# use std::sync::{Arc, Mutex}; +# use std::thread; +# fn main() { +# let data = Arc::new(Mutex::new(vec![1u32, 2, 3])); +# for i in 0..2 { +# let data = data.clone(); +thread::spawn(move || { + let mut data = data.lock().unwrap(); + data[i] += 1; +}); +# } +# thread::sleep_ms(50); +# } +``` + +First, we call `lock()`, which acquires the mutex's lock. Because this may fail, +it returns an `Result`, and because this is just an example, we `unwrap()` +it to get a reference to the data. Real code would have more robust error handling +here. We're then free to mutate it, since we have the lock. + +Lastly, while the threads are running, we wait on a short timer. But +this is not ideal: we may have picked a reasonable amount of time to +wait but it's more likely we'll either be waiting longer than +necessary or not long enough, depending on just how much time the +threads actually take to finish computing when the program runs. + +A more precise alternative to the timer would be to use one of the +mechanisms provided by the Rust standard library for synchronizing +threads with each other. Let's talk about one of them: channels. + +## Channels + +Here's a version of our code that uses channels for synchronization, rather +than waiting for a specific time: + +``` +use std::sync::{Arc, Mutex}; +use std::thread; +use std::sync::mpsc; + +fn main() { + let data = Arc::new(Mutex::new(0u32)); + + let (tx, rx) = mpsc::channel(); + + for _ in 0..10 { + let (data, tx) = (data.clone(), tx.clone()); + + thread::spawn(move || { + let mut data = data.lock().unwrap(); + *data += 1; + + tx.send(()); + }); + } + + for _ in 0..10 { + rx.recv(); + } +} +``` + +We use the `mpsc::channel()` method to construct a new channel. We just `send` +a simple `()` down the channel, and then wait for ten of them to come back. + +While this channel is just sending a generic signal, we can send any data that +is `Send` over the channel! + +``` +use std::thread; +use std::sync::mpsc; + +fn main() { + let (tx, rx) = mpsc::channel(); + + for _ in 0..10 { + let tx = tx.clone(); + + thread::spawn(move || { + let answer = 42u32; + + tx.send(answer); + }); + } + + rx.recv().ok().expect("Could not receive answer"); +} +``` + +A `u32` is `Send` because we can make a copy. So we create a thread, ask it to calculate +the answer, and then it `send()`s us the answer over the channel. + + +## Panics + +A `panic!` will crash the currently executing thread. You can use Rust's +threads as a simple isolation mechanism: + +``` +use std::thread; + +let result = thread::spawn(move || { + panic!("oops!"); +}).join(); + +assert!(result.is_err()); +``` + +Our `Thread` gives us a `Result` back, which allows us to check if the thread +has panicked or not. diff --git a/src/doc/trpl/conditional-compilation.md b/src/doc/trpl/conditional-compilation.md index e69de29bb2d1..40367fa844d2 100644 --- a/src/doc/trpl/conditional-compilation.md +++ b/src/doc/trpl/conditional-compilation.md @@ -0,0 +1,3 @@ +% Conditional Compilation + +Coming Soon! diff --git a/src/doc/trpl/const.md b/src/doc/trpl/const.md index e69de29bb2d1..9234c4fc2f9e 100644 --- a/src/doc/trpl/const.md +++ b/src/doc/trpl/const.md @@ -0,0 +1,3 @@ +% `const` + +Coming soon! diff --git a/src/doc/trpl/crates-and-modules.md b/src/doc/trpl/crates-and-modules.md index e69de29bb2d1..83e8cc629fd5 100644 --- a/src/doc/trpl/crates-and-modules.md +++ b/src/doc/trpl/crates-and-modules.md @@ -0,0 +1,533 @@ +% Crates and Modules + +When a project starts getting large, it's considered good software +engineering practice to split it up into a bunch of smaller pieces, and then +fit them together. It's also important to have a well-defined interface, so +that some of your functionality is private, and some is public. To facilitate +these kinds of things, Rust has a module system. + +# Basic terminology: Crates and Modules + +Rust has two distinct terms that relate to the module system: *crate* and +*module*. A crate is synonymous with a *library* or *package* in other +languages. Hence "Cargo" as the name of Rust's package management tool: you +ship your crates to others with Cargo. Crates can produce an executable or a +library, depending on the project. + +Each crate has an implicit *root module* that contains the code for that crate. +You can then define a tree of sub-modules under that root module. Modules allow +you to partition your code within the crate itself. + +As an example, let's make a *phrases* crate, which will give us various phrases +in different languages. To keep things simple, we'll stick to "greetings" and +"farewells" as two kinds of phrases, and use English and Japanese (日本語) as +two languages for those phrases to be in. We'll use this module layout: + +```text + +-----------+ + +---| greetings | + | +-----------+ + +---------+ | + +---| english |---+ + | +---------+ | +-----------+ + | +---| farewells | ++---------+ | +-----------+ +| phrases |---+ ++---------+ | +-----------+ + | +---| greetings | + | +----------+ | +-----------+ + +---| japanese |--+ + +----------+ | + | +-----------+ + +---| farewells | + +-----------+ +``` + +In this example, `phrases` is the name of our crate. All of the rest are +modules. You can see that they form a tree, branching out from the crate +*root*, which is the root of the tree: `phrases` itself. + +Now that we have a plan, let's define these modules in code. To start, +generate a new crate with Cargo: + +```bash +$ cargo new phrases +$ cd phrases +``` + +If you remember, this generates a simple project for us: + +```bash +$ tree . +. +├── Cargo.toml +└── src + └── lib.rs + +1 directory, 2 files +``` + +`src/lib.rs` is our crate root, corresponding to the `phrases` in our diagram +above. + +# Defining Modules + +To define each of our modules, we use the `mod` keyword. Let's make our +`src/lib.rs` look like this: + +``` +mod english { + mod greetings { + } + + mod farewells { + } +} + +mod japanese { + mod greetings { + } + + mod farewells { + } +} +``` + +After the `mod` keyword, you give the name of the module. Module names follow +the conventions for other Rust identifiers: `lower_snake_case`. The contents of +each module are within curly braces (`{}`). + +Within a given `mod`, you can declare sub-`mod`s. We can refer to sub-modules +with double-colon (`::`) notation: our four nested modules are +`english::greetings`, `english::farewells`, `japanese::greetings`, and +`japanese::farewells`. Because these sub-modules are namespaced under their +parent module, the names don't conflict: `english::greetings` and +`japanese::greetings` are distinct, even though their names are both +`greetings`. + +Because this crate does not have a `main()` function, and is called `lib.rs`, +Cargo will build this crate as a library: + +```bash +$ cargo build + Compiling phrases v0.0.1 (file:///home/you/projects/phrases) +$ ls target/debug +build deps examples libphrases-a7448e02a0468eaa.rlib native +``` + +`libphrase-hash.rlib` is the compiled crate. Before we see how to use this +crate from another crate, let's break it up into multiple files. + +# Multiple file crates + +If each crate were just one file, these files would get very large. It's often +easier to split up crates into multiple files, and Rust supports this in two +ways. + +Instead of declaring a module like this: + +```{rust,ignore} +mod english { + // contents of our module go here +} +``` + +We can instead declare our module like this: + +```{rust,ignore} +mod english; +``` + +If we do that, Rust will expect to find either a `english.rs` file, or a +`english/mod.rs` file with the contents of our module. + +Note that in these files, you don't need to re-declare the module: that's +already been done with the initial `mod` declaration. + +Using these two techniques, we can break up our crate into two directories and +seven files: + +```bash +$ tree . +. +├── Cargo.lock +├── Cargo.toml +├── src +│   ├── english +│   │   ├── farewells.rs +│   │   ├── greetings.rs +│   │   └── mod.rs +│   ├── japanese +│   │   ├── farewells.rs +│   │   ├── greetings.rs +│   │   └── mod.rs +│   └── lib.rs +└── target + └── debug + ├── build + ├── deps + ├── examples + ├── libphrases-a7448e02a0468eaa.rlib + └── native +``` + +`src/lib.rs` is our crate root, and looks like this: + +```{rust,ignore} +mod english; +mod japanese; +``` + +These two declarations tell Rust to look for either `src/english.rs` and +`src/japanese.rs`, or `src/english/mod.rs` and `src/japanese/mod.rs`, depending +on our preference. In this case, because our modules have sub-modules, we've +chosen the second. Both `src/english/mod.rs` and `src/japanese/mod.rs` look +like this: + +```{rust,ignore} +mod greetings; +mod farewells; +``` + +Again, these declarations tell Rust to look for either +`src/english/greetings.rs` and `src/japanese/greetings.rs` or +`src/english/farewells/mod.rs` and `src/japanese/farewells/mod.rs`. Because +these sub-modules don't have their own sub-modules, we've chosen to make them +`src/english/greetings.rs` and `src/japanese/farewells.rs`. Whew! + +The contents of `src/english/greetings.rs` and `src/japanese/farewells.rs` are +both empty at the moment. Let's add some functions. + +Put this in `src/english/greetings.rs`: + +```rust +fn hello() -> String { + "Hello!".to_string() +} +``` + +Put this in `src/english/farewells.rs`: + +```rust +fn goodbye() -> String { + "Goodbye.".to_string() +} +``` + +Put this in `src/japanese/greetings.rs`: + +```rust +fn hello() -> String { + "こんにちは".to_string() +} +``` + +Of course, you can copy and paste this from this web page, or just type +something else. It's not important that you actually put "konnichiwa" to learn +about the module system. + +Put this in `src/japanese/farewells.rs`: + +```rust +fn goodbye() -> String { + "さようなら".to_string() +} +``` + +(This is "Sayōnara", if you're curious.) + +Now that we have some functionality in our crate, let's try to use it from +another crate. + +# Importing External Crates + +We have a library crate. Let's make an executable crate that imports and uses +our library. + +Make a `src/main.rs` and put this in it (it won't quite compile yet): + +```rust,ignore +extern crate phrases; + +fn main() { + println!("Hello in English: {}", phrases::english::greetings::hello()); + println!("Goodbye in English: {}", phrases::english::farewells::goodbye()); + + println!("Hello in Japanese: {}", phrases::japanese::greetings::hello()); + println!("Goodbye in Japanese: {}", phrases::japanese::farewells::goodbye()); +} +``` + +The `extern crate` declaration tells Rust that we need to compile and link to +the `phrases` crate. We can then use `phrases`' modules in this one. As we +mentioned earlier, you can use double colons to refer to sub-modules and the +functions inside of them. + +Also, Cargo assumes that `src/main.rs` is the crate root of a binary crate, +rather than a library crate. Our package now has two crates: `src/lib.rs` and +`src/main.rs`. This pattern is quite common for executable crates: most +functionality is in a library crate, and the executable crate uses that +library. This way, other programs can also use the library crate, and it's also +a nice separation of concerns. + +This doesn't quite work yet, though. We get four errors that look similar to +this: + +```bash +$ cargo build + Compiling phrases v0.0.1 (file:///home/you/projects/phrases) +src/main.rs:4:38: 4:72 error: function `hello` is private +src/main.rs:4 println!("Hello in English: {}", phrases::english::greetings::hello()); + ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +note: in expansion of format_args! +:2:25: 2:58 note: expansion site +:1:1: 2:62 note: in expansion of print! +:3:1: 3:54 note: expansion site +:1:1: 3:58 note: in expansion of println! +phrases/src/main.rs:4:5: 4:76 note: expansion site +``` + +By default, everything is private in Rust. Let's talk about this in some more +depth. + +# Exporting a Public Interface + +Rust allows you to precisely control which aspects of your interface are +public, and so private is the default. To make things public, you use the `pub` +keyword. Let's focus on the `english` module first, so let's reduce our `src/main.rs` +to just this: + +```{rust,ignore} +extern crate phrases; + +fn main() { + println!("Hello in English: {}", phrases::english::greetings::hello()); + println!("Goodbye in English: {}", phrases::english::farewells::goodbye()); +} +``` + +In our `src/lib.rs`, let's add `pub` to the `english` module declaration: + +```{rust,ignore} +pub mod english; +mod japanese; +``` + +And in our `src/english/mod.rs`, let's make both `pub`: + +```{rust,ignore} +pub mod greetings; +pub mod farewells; +``` + +In our `src/english/greetings.rs`, let's add `pub` to our `fn` declaration: + +```{rust,ignore} +pub fn hello() -> String { + "Hello!".to_string() +} +``` + +And also in `src/english/farewells.rs`: + +```{rust,ignore} +pub fn goodbye() -> String { + "Goodbye.".to_string() +} +``` + +Now, our crate compiles, albeit with warnings about not using the `japanese` +functions: + +```bash +$ cargo run + Compiling phrases v0.0.1 (file:///home/you/projects/phrases) +src/japanese/greetings.rs:1:1: 3:2 warning: function is never used: `hello`, #[warn(dead_code)] on by default +src/japanese/greetings.rs:1 fn hello() -> String { +src/japanese/greetings.rs:2 "こんにちは".to_string() +src/japanese/greetings.rs:3 } +src/japanese/farewells.rs:1:1: 3:2 warning: function is never used: `goodbye`, #[warn(dead_code)] on by default +src/japanese/farewells.rs:1 fn goodbye() -> String { +src/japanese/farewells.rs:2 "さようなら".to_string() +src/japanese/farewells.rs:3 } + Running `target/debug/phrases` +Hello in English: Hello! +Goodbye in English: Goodbye. +``` + +Now that our functions are public, we can use them. Great! However, typing out +`phrases::english::greetings::hello()` is very long and repetitive. Rust has +another keyword for importing names into the current scope, so that you can +refer to them with shorter names. Let's talk about `use`. + +# Importing Modules with `use` + +Rust has a `use` keyword, which allows us to import names into our local scope. +Let's change our `src/main.rs` to look like this: + +```{rust,ignore} +extern crate phrases; + +use phrases::english::greetings; +use phrases::english::farewells; + +fn main() { + println!("Hello in English: {}", greetings::hello()); + println!("Goodbye in English: {}", farewells::goodbye()); +} +``` + +The two `use` lines import each module into the local scope, so we can refer to +the functions by a much shorter name. By convention, when importing functions, it's +considered best practice to import the module, rather than the function directly. In +other words, you _can_ do this: + +```{rust,ignore} +extern crate phrases; + +use phrases::english::greetings::hello; +use phrases::english::farewells::goodbye; + +fn main() { + println!("Hello in English: {}", hello()); + println!("Goodbye in English: {}", goodbye()); +} +``` + +But it is not idiomatic. This is significantly more likely to introduce a +naming conflict. In our short program, it's not a big deal, but as it grows, it +becomes a problem. If we have conflicting names, Rust will give a compilation +error. For example, if we made the `japanese` functions public, and tried to do +this: + +```{rust,ignore} +extern crate phrases; + +use phrases::english::greetings::hello; +use phrases::japanese::greetings::hello; + +fn main() { + println!("Hello in English: {}", hello()); + println!("Hello in Japanese: {}", hello()); +} +``` + +Rust will give us a compile-time error: + +```text + Compiling phrases v0.0.1 (file:///home/you/projects/phrases) +src/main.rs:4:5: 4:40 error: a value named `hello` has already been imported in this module [E0252] +src/main.rs:4 use phrases::japanese::greetings::hello; + ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +error: aborting due to previous error +Could not compile `phrases`. +``` + +If we're importing multiple names from the same module, we don't have to type it out +twice. Instead of this: + +```{rust,ignore} +use phrases::english::greetings; +use phrases::english::farewells; +``` + +We can use this shortcut: + +```{rust,ignore} +use phrases::english::{greetings, farewells}; +``` + +## Re-exporting with `pub use` + +You don't just use `use` to shorten identifiers. You can also use it inside of your crate +to re-export a function inside another module. This allows you to present an external +interface that may not directly map to your internal code organization. + +Let's look at an example. Modify your `src/main.rs` to read like this: + +```{rust,ignore} +extern crate phrases; + +use phrases::english::{greetings,farewells}; +use phrases::japanese; + +fn main() { + println!("Hello in English: {}", greetings::hello()); + println!("Goodbye in English: {}", farewells::goodbye()); + + println!("Hello in Japanese: {}", japanese::hello()); + println!("Goodbye in Japanese: {}", japanese::goodbye()); +} +``` + +Then, modify your `src/lib.rs` to make the `japanese` mod public: + +```{rust,ignore} +pub mod english; +pub mod japanese; +``` + +Next, make the two functions public, first in `src/japanese/greetings.rs`: + +```{rust,ignore} +pub fn hello() -> String { + "こんにちは".to_string() +} +``` + +And then in `src/japanese/farewells.rs`: + +```{rust,ignore} +pub fn goodbye() -> String { + "さようなら".to_string() +} +``` + +Finally, modify your `src/japanese/mod.rs` to read like this: + +```{rust,ignore} +pub use self::greetings::hello; +pub use self::farewells::goodbye; + +mod greetings; +mod farewells; +``` + +The `pub use` declaration brings the function into scope at this part of our +module hierarchy. Because we've `pub use`d this inside of our `japanese` +module, we now have a `phrases::japanese::hello()` function and a +`phrases::japanese::goodbye()` function, even though the code for them lives in +`phrases::japanese::greetings::hello()` and +`phrases::japanese::farewells::goodbye()`. Our internal organization doesn't +define our external interface. + +Here we have a `pub use` for each function we want to bring into the +`japanese` scope. We could alternatively use the wildcard syntax to include +everything from `greetings` into the current scope: `pub use self::greetings::*`. + +What about the `self`? Well, by default, `use` declarations are absolute paths, +starting from your crate root. `self` makes that path relative to your current +place in the hierarchy instead. There's one more special form of `use`: you can +`use super::` to reach one level up the tree from your current location. Some +people like to think of `self` as `.` and `super` as `..`, from many shells' +display for the current directory and the parent directory. + +Outside of `use`, paths are relative: `foo::bar()` refers to a function inside +of `foo` relative to where we are. If that's prefixed with `::`, as in +`::foo::bar()`, it refers to a different `foo`, an absolute path from your +crate root. + +Also, note that we `pub use`d before we declared our `mod`s. Rust requires that +`use` declarations go first. + +This will build and run: + +```bash +$ cargo run + Compiling phrases v0.0.1 (file:///home/you/projects/phrases) + Running `target/debug/phrases` +Hello in English: Hello! +Goodbye in English: Goodbye. +Hello in Japanese: こんにちは +Goodbye in Japanese: さようなら +``` diff --git a/src/doc/trpl/debug-and-display.md b/src/doc/trpl/debug-and-display.md index e69de29bb2d1..6c8d788b5ae3 100644 --- a/src/doc/trpl/debug-and-display.md +++ b/src/doc/trpl/debug-and-display.md @@ -0,0 +1,3 @@ +% `Debug` and `Display` + +Coming soon! diff --git a/src/doc/trpl/deref-coercions.md b/src/doc/trpl/deref-coercions.md index e69de29bb2d1..afacd3040552 100644 --- a/src/doc/trpl/deref-coercions.md +++ b/src/doc/trpl/deref-coercions.md @@ -0,0 +1,3 @@ +% `Deref` coercions + +Coming soon! diff --git a/src/doc/trpl/documentation.md b/src/doc/trpl/documentation.md index e69de29bb2d1..06071a8f15fa 100644 --- a/src/doc/trpl/documentation.md +++ b/src/doc/trpl/documentation.md @@ -0,0 +1,562 @@ +% Documentation + +Documentation is an important part of any software project, and it's +first-class in Rust. Let's talk about the tooling Rust gives you to +document your project. + +## About `rustdoc` + +The Rust distribution includes a tool, `rustdoc`, that generates documentation. +`rustdoc` is also used by Cargo through `cargo doc`. + +Documentation can be generated in two ways: from source code, and from +standalone Markdown files. + +## Documenting source code + +The primary way of documenting a Rust project is through annotating the source +code. You can use documentation comments for this purpose: + +```rust,ignore +/// Constructs a new `Rc`. +/// +/// # Examples +/// +/// ``` +/// use std::rc::Rc; +/// +/// let five = Rc::new(5); +/// ``` +pub fn new(value: T) -> Rc { + // implementation goes here +} +``` + +This code generates documentation that looks [like this][rc-new]. I've left the +implementation out, with a regular comment in its place. That's the first thing +to notice about this annotation: it uses `///`, instead of `//`. The triple slash +indicates a documentation comment. + +Documentation comments are written in Markdown. + +Rust keeps track of these comments, and uses them when generating +documentation. This is important when documenting things like enums: + +``` +/// The `Option` type. See [the module level documentation](../) for more. +enum Option { + /// No value + None, + /// Some value `T` + Some(T), +} +``` + +The above works, but this does not: + +```rust,ignore +/// The `Option` type. See [the module level documentation](../) for more. +enum Option { + None, /// No value + Some(T), /// Some value `T` +} +``` + +You'll get an error: + +```text +hello.rs:4:1: 4:2 error: expected ident, found `}` +hello.rs:4 } + ^ +``` + +This [unfortunate error](https://github.com/rust-lang/rust/issues/22547) is +correct: documentation comments apply to the thing after them, and there's no +thing after that last comment. + +[rc-new]: http://doc.rust-lang.org/nightly/std/rc/struct.Rc.html#method.new + +### Writing documentation comments + +Anyway, let's cover each part of this comment in detail: + +``` +/// Constructs a new `Rc`. +# fn foo() {} +``` + +The first line of a documentation comment should be a short summary of its +functionality. One sentence. Just the basics. High level. + +``` +/// +/// Other details about constructing `Rc`s, maybe describing complicated +/// semantics, maybe additional options, all kinds of stuff. +/// +# fn foo() {} +``` + +Our original example had just a summary line, but if we had more things to say, +we could have added more explanation in a new paragraph. + +#### Special sections + +``` +/// # Examples +# fn foo() {} +``` + +Next, are special sections. These are indicated with a header, `#`. There +are three kinds of headers that are commonly used. They aren't special syntax, +just convention, for now. + +``` +/// # Panics +# fn foo() {} +``` + +Unrecoverable misuses of a function (i.e. programming errors) in Rust are +usually indicated by panics, which kill the whole current thread at the very +least. If your function has a non-trivial contract like this, that is +detected/enforced by panics, documenting it is very important. + +``` +/// # Failures +# fn foo() {} +``` + +If your function or method returns a `Result`, then describing the +conditions under which it returns `Err(E)` is a nice thing to do. This is +slightly less important than `Panics`, because failure is encoded into the type +system, but it's still a good thing to do. + +``` +/// # Safety +# fn foo() {} +``` + +If your function is `unsafe`, you should explain which invariants the caller is +responsible for upholding. + +``` +/// # Examples +/// +/// ``` +/// use std::rc::Rc; +/// +/// let five = Rc::new(5); +/// ``` +# fn foo() {} +``` + +Third, `Examples`. Include one or more examples of using your function or +method, and your users will love you for it. These examples go inside of +code block annotations, which we'll talk about in a moment, and can have +more than one section: + +``` +/// # Examples +/// +/// Simple `&str` patterns: +/// +/// ``` +/// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect(); +/// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]); +/// ``` +/// +/// More complex patterns with a lambda: +/// +/// ``` +/// let v: Vec<&str> = "abc1def2ghi".split(|c: char| c.is_numeric()).collect(); +/// assert_eq!(v, vec!["abc", "def", "ghi"]); +/// ``` +# fn foo() {} +``` + +Let's discuss the details of these code blocks. + +#### Code block annotations + +To write some Rust code in a comment, use the triple graves: + +``` +/// ``` +/// println!("Hello, world"); +/// ``` +# fn foo() {} +``` + +If you want something that's not Rust code, you can add an annotation: + +``` +/// ```c +/// printf("Hello, world\n"); +/// ``` +# fn foo() {} +``` + +This will highlight according to whatever language you're showing off. +If you're just showing plain text, choose `text`. + +It's important to choose the correct annotation here, because `rustdoc` uses it +in an interesting way: It can be used to actually test your examples, so that +they don't get out of date. If you have some C code but `rustdoc` thinks it's +Rust because you left off the annotation, `rustdoc` will complain when trying to +generate the documentation. + +## Documentation as tests + +Let's discuss our sample example documentation: + +``` +/// ``` +/// println!("Hello, world"); +/// ``` +# fn foo() {} +``` + +You'll notice that you don't need a `fn main()` or anything here. `rustdoc` will +automatically add a main() wrapper around your code, and in the right place. +For example: + +``` +/// ``` +/// use std::rc::Rc; +/// +/// let five = Rc::new(5); +/// ``` +# fn foo() {} +``` + +This will end up testing: + +``` +fn main() { + use std::rc::Rc; + let five = Rc::new(5); +} +``` + +Here's the full algorithm rustdoc uses to postprocess examples: + +1. Any leading `#![foo]` attributes are left intact as crate attributes. +2. Some common `allow` attributes are inserted, including + `unused_variables`, `unused_assignments`, `unused_mut`, + `unused_attributes`, and `dead_code`. Small examples often trigger + these lints. +3. If the example does not contain `extern crate`, then `extern crate + ;` is inserted. +2. Finally, if the example does not contain `fn main`, the remainder of the + text is wrapped in `fn main() { your_code }` + +Sometimes, this isn't enough, though. For example, all of these code samples +with `///` we've been talking about? The raw text: + +```text +/// Some documentation. +# fn foo() {} +``` + +looks different than the output: + +``` +/// Some documentation. +# fn foo() {} +``` + +Yes, that's right: you can add lines that start with `# `, and they will +be hidden from the output, but will be used when compiling your code. You +can use this to your advantage. In this case, documentation comments need +to apply to some kind of function, so if I want to show you just a +documentation comment, I need to add a little function definition below +it. At the same time, it's just there to satisfy the compiler, so hiding +it makes the example more clear. You can use this technique to explain +longer examples in detail, while still preserving the testability of your +documentation. For example, this code: + +``` +let x = 5; +let y = 6; +println!("{}", x + y); +``` + +Here's an explanation, rendered: + +First, we set `x` to five: + +``` +let x = 5; +# let y = 6; +# println!("{}", x + y); +``` + +Next, we set `y` to six: + +``` +# let x = 5; +let y = 6; +# println!("{}", x + y); +``` + +Finally, we print the sum of `x` and `y`: + +``` +# let x = 5; +# let y = 6; +println!("{}", x + y); +``` + +Here's the same explanation, in raw text: + +> First, we set `x` to five: +> +> ```text +> let x = 5; +> # let y = 6; +> # println!("{}", x + y); +> ``` +> +> Next, we set `y` to six: +> +> ```text +> # let x = 5; +> let y = 6; +> # println!("{}", x + y); +> ``` +> +> Finally, we print the sum of `x` and `y`: +> +> ```text +> # let x = 5; +> # let y = 6; +> println!("{}", x + y); +> ``` + +By repeating all parts of the example, you can ensure that your example still +compiles, while only showing the parts that are relevant to that part of your +explanation. + +### Documenting macros + +Here’s an example of documenting a macro: + +``` +/// Panic with a given message unless an expression evaluates to true. +/// +/// # Examples +/// +/// ``` +/// # #[macro_use] extern crate foo; +/// # fn main() { +/// panic_unless!(1 + 1 == 2, “Math is broken.”); +/// # } +/// ``` +/// +/// ```should_panic +/// # #[macro_use] extern crate foo; +/// # fn main() { +/// panic_unless!(true == false, “I’m broken.”); +/// # } +/// ``` +#[macro_export] +macro_rules! panic_unless { + ($condition:expr, $($rest:expr),+) => ({ if ! $condition { panic!($($rest),+); } }); +} +# fn main() {} +``` + +You’ll note three things: we need to add our own `extern crate` line, so that +we can add the `#[macro_use]` attribute. Second, we’ll need to add our own +`main()` as well. Finally, a judicious use of `#` to comment out those two +things, so they don’t show up in the output. + +### Running documentation tests + +To run the tests, either + +```bash +$ rustdoc --test path/to/my/crate/root.rs +# or +$ cargo test +``` + +That's right, `cargo test` tests embedded documentation too. + +There are a few more annotations that are useful to help `rustdoc` do the right +thing when testing your code: + +``` +/// ```ignore +/// fn foo() { +/// ``` +# fn foo() {} +``` + +The `ignore` directive tells Rust to ignore your code. This is almost never +what you want, as it's the most generic. Instead, consider annotating it +with `text` if it's not code, or using `#`s to get a working example that +only shows the part you care about. + +``` +/// ```should_panic +/// assert!(false); +/// ``` +# fn foo() {} +``` + +`should_panic` tells `rustdoc` that the code should compile correctly, but +not actually pass as a test. + +``` +/// ```no_run +/// loop { +/// println!("Hello, world"); +/// } +/// ``` +# fn foo() {} +``` + +The `no_run` attribute will compile your code, but not run it. This is +important for examples such as "Here's how to start up a network service," +which you would want to make sure compile, but might run in an infinite loop! + +### Documenting modules + +Rust has another kind of doc comment, `//!`. This comment doesn't document the next item, but the enclosing item. In other words: + +``` +mod foo { + //! This is documentation for the `foo` module. + //! + //! # Examples + + // ... +} +``` + +This is where you'll see `//!` used most often: for module documentation. If +you have a module in `foo.rs`, you'll often open its code and see this: + +``` +//! A module for using `foo`s. +//! +//! The `foo` module contains a lot of useful functionality blah blah blah +``` + +### Documentation comment style + +Check out [RFC 505][rfc505] for full conventions around the style and format of +documentation. + +[rfc505]: https://github.com/rust-lang/rfcs/blob/master/text/0505-api-comment-conventions.md + +## Other documentation + +All of this behavior works in non-Rust source files too. Because comments +are written in Markdown, they're often `.md` files. + +When you write documentation in Markdown files, you don't need to prefix +the documentation with comments. For example: + +``` +/// # Examples +/// +/// ``` +/// use std::rc::Rc; +/// +/// let five = Rc::new(5); +/// ``` +# fn foo() {} +``` + +is just + +~~~markdown +# Examples + +``` +use std::rc::Rc; + +let five = Rc::new(5); +``` +~~~ + +when it's in a Markdown file. There is one wrinkle though: Markdown files need +to have a title like this: + +```markdown +% The title + +This is the example documentation. +``` + +This `%` line needs to be the very first line of the file. + +## `doc` attributes + +At a deeper level, documentation comments are sugar for documentation attributes: + +``` +/// this +# fn foo() {} + +#[doc="this"] +# fn bar() {} +``` + +are the same, as are these: + +``` +//! this + +#![doc="/// this"] +``` + +You won't often see this attribute used for writing documentation, but it +can be useful when changing some options, or when writing a macro. + +### Re-exports + +`rustdoc` will show the documentation for a public re-export in both places: + +```ignore +extern crate foo; + +pub use foo::bar; +``` + +This will create documentation for bar both inside the documentation for the +crate `foo`, as well as the documentation for your crate. It will use the same +documentation in both places. + +This behavior can be suppressed with `no_inline`: + +```ignore +extern crate foo; + +#[doc(no_inline)] +pub use foo::bar; +``` + +### Controlling HTML + +You can control a few aspects of the HTML that `rustdoc` generates through the +`#![doc]` version of the attribute: + +``` +#![doc(html_logo_url = "http://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png", + html_favicon_url = "http://www.rust-lang.org/favicon.ico", + html_root_url = "http://doc.rust-lang.org/")]; +``` + +This sets a few different options, with a logo, favicon, and a root URL. + +## Generation options + +`rustdoc` also contains a few other options on the command line, for further customiziation: + +- `--html-in-header FILE`: includes the contents of FILE at the end of the + `...` section. +- `--html-before-content FILE`: includes the contents of FILE directly after + ``, before the rendered content (including the search bar). +- `--html-after-content FILE`: includes the contents of FILE after all the rendered content. diff --git a/src/doc/trpl/drop.md b/src/doc/trpl/drop.md index e69de29bb2d1..af58e23561c3 100644 --- a/src/doc/trpl/drop.md +++ b/src/doc/trpl/drop.md @@ -0,0 +1,3 @@ +% `Drop` + +Coming soon! diff --git a/src/doc/trpl/effective-rust.md b/src/doc/trpl/effective-rust.md index e69de29bb2d1..6ea0759e99d7 100644 --- a/src/doc/trpl/effective-rust.md +++ b/src/doc/trpl/effective-rust.md @@ -0,0 +1 @@ +% Effective Rust diff --git a/src/doc/trpl/enums.md b/src/doc/trpl/enums.md index e69de29bb2d1..cbb74d97c355 100644 --- a/src/doc/trpl/enums.md +++ b/src/doc/trpl/enums.md @@ -0,0 +1,149 @@ +% Enums + +Finally, Rust has a "sum type", an *enum*. Enums are an incredibly useful +feature of Rust, and are used throughout the standard library. An `enum` is +a type which relates a set of alternates to a specific name. For example, below +we define `Character` to be either a `Digit` or something else. These +can be used via their fully scoped names: `Character::Other` (more about `::` +below). + +```rust +enum Character { + Digit(i32), + Other, +} +``` + +Most normal types are allowed as the variant components of an `enum`. Here are +some examples: + +```rust +struct Empty; +struct Color(i32, i32, i32); +struct Length(i32); +struct Status { Health: i32, Mana: i32, Attack: i32, Defense: i32 } +struct HeightDatabase(Vec); +``` + +You see that, depending on its type, an `enum` variant may or may not hold data. +In `Character`, for instance, `Digit` gives a meaningful name for an `i32` +value, where `Other` is only a name. However, the fact that they represent +distinct categories of `Character` is a very useful property. + +As with structures, the variants of an enum by default are not comparable with +equality operators (`==`, `!=`), have no ordering (`<`, `>=`, etc.), and do not +support other binary operations such as `*` and `+`. As such, the following code +is invalid for the example `Character` type: + +```{rust,ignore} +// These assignments both succeed +let ten = Character::Digit(10); +let four = Character::Digit(4); + +// Error: `*` is not implemented for type `Character` +let forty = ten * four; + +// Error: `<=` is not implemented for type `Character` +let four_is_smaller = four <= ten; + +// Error: `==` is not implemented for type `Character` +let four_equals_ten = four == ten; +``` + +This may seem rather limiting, but it's a limitation which we can overcome. +There are two ways: by implementing equality ourselves, or by pattern matching +variants with [`match`][match] expressions, which you'll learn in the next +chapter. We don't know enough about Rust to implement equality yet, but we can +use the `Ordering` enum from the standard library, which does: + +``` +enum Ordering { + Less, + Equal, + Greater, +} +``` + +Because `Ordering` has already been defined for us, we will import it with the +`use` keyword. Here's an example of how it is used: + +```{rust} +use std::cmp::Ordering; + +fn cmp(a: i32, b: i32) -> Ordering { + if a < b { Ordering::Less } + else if a > b { Ordering::Greater } + else { Ordering::Equal } +} + +fn main() { + let x = 5; + let y = 10; + + let ordering = cmp(x, y); // ordering: Ordering + + if ordering == Ordering::Less { + println!("less"); + } else if ordering == Ordering::Greater { + println!("greater"); + } else if ordering == Ordering::Equal { + println!("equal"); + } +} +``` + +The `::` symbol is used to indicate a namespace. In this case, `Ordering` lives +in the `cmp` submodule of the `std` module. We'll talk more about modules later +in the guide. For now, all you need to know is that you can `use` things from +the standard library if you need them. + +Okay, let's talk about the actual code in the example. `cmp` is a function that +compares two things, and returns an `Ordering`. We return either +`Ordering::Less`, `Ordering::Greater`, or `Ordering::Equal`, depending on +whether the first value is less than, greater than, or equal to the second. Note +that each variant of the `enum` is namespaced under the `enum` itself: it's +`Ordering::Greater`, not `Greater`. + +The `ordering` variable has the type `Ordering`, and so contains one of the +three values. We then do a bunch of `if`/`else` comparisons to check which +one it is. + +This `Ordering::Greater` notation is too long. Let's use another form of `use` +to import the `enum` variants instead. This will avoid full scoping: + +```{rust} +use std::cmp::Ordering::{self, Equal, Less, Greater}; + +fn cmp(a: i32, b: i32) -> Ordering { + if a < b { Less } + else if a > b { Greater } + else { Equal } +} + +fn main() { + let x = 5; + let y = 10; + + let ordering = cmp(x, y); // ordering: Ordering + + if ordering == Less { println!("less"); } + else if ordering == Greater { println!("greater"); } + else if ordering == Equal { println!("equal"); } +} +``` + +Importing variants is convenient and compact, but can also cause name conflicts, +so do this with caution. For this reason, it's normally considered better style +to `use` an enum rather than its variants directly. + +As you can see, `enum`s are quite a powerful tool for data representation, and +are even more useful when they're [generic][generics] across types. Before we +get to generics, though, let's talk about how to use enums with pattern +matching, a tool that will let us deconstruct sum types (the type theory term +for enums) like `Ordering` in a very elegant way that avoids all these messy +and brittle `if`/`else`s. + + +[arity]: ./glossary.html#arity +[match]: ./match.html +[generics]: ./generics.html diff --git a/src/doc/trpl/error-handling.md b/src/doc/trpl/error-handling.md index e69de29bb2d1..b9e7bd78c5b2 100644 --- a/src/doc/trpl/error-handling.md +++ b/src/doc/trpl/error-handling.md @@ -0,0 +1,301 @@ +% Error Handling + +> The best-laid plans of mice and men +> Often go awry +> +> "Tae a Moose", Robert Burns + +Sometimes, things just go wrong. It's important to have a plan for when the +inevitable happens. Rust has rich support for handling errors that may (let's +be honest: will) occur in your programs. + +There are two main kinds of errors that can occur in your programs: failures, +and panics. Let's talk about the difference between the two, and then discuss +how to handle each. Then, we'll discuss upgrading failures to panics. + +# Failure vs. Panic + +Rust uses two terms to differentiate between two forms of error: failure, and +panic. A *failure* is an error that can be recovered from in some way. A +*panic* is an error that cannot be recovered from. + +What do we mean by "recover"? Well, in most cases, the possibility of an error +is expected. For example, consider the `from_str` function: + +```{rust,ignore} +from_str("5"); +``` + +This function takes a string argument and converts it into another type. But +because it's a string, you can't be sure that the conversion actually works. +For example, what should this convert to? + +```{rust,ignore} +from_str("hello5world"); +``` + +This won't work. So we know that this function will only work properly for some +inputs. It's expected behavior. We call this kind of error a *failure*. + +On the other hand, sometimes, there are errors that are unexpected, or which +we cannot recover from. A classic example is an `assert!`: + +```{rust,ignore} +assert!(x == 5); +``` + +We use `assert!` to declare that something is true. If it's not true, something +is very wrong. Wrong enough that we can't continue with things in the current +state. Another example is using the `unreachable!()` macro: + +```{rust,ignore} +enum Event { + NewRelease, +} + +fn probability(_: &Event) -> f64 { + // real implementation would be more complex, of course + 0.95 +} + +fn descriptive_probability(event: Event) -> &'static str { + match probability(&event) { + 1.00 => "certain", + 0.00 => "impossible", + 0.00 ... 0.25 => "very unlikely", + 0.25 ... 0.50 => "unlikely", + 0.50 ... 0.75 => "likely", + 0.75 ... 1.00 => "very likely", + } +} + +fn main() { + std::io::println(descriptive_probability(NewRelease)); +} +``` + +This will give us an error: + +```text +error: non-exhaustive patterns: `_` not covered [E0004] +``` + +While we know that we've covered all possible cases, Rust can't tell. It +doesn't know that probability is between 0.0 and 1.0. So we add another case: + +```rust +use Event::NewRelease; + +enum Event { + NewRelease, +} + +fn probability(_: &Event) -> f64 { + // real implementation would be more complex, of course + 0.95 +} + +fn descriptive_probability(event: Event) -> &'static str { + match probability(&event) { + 1.00 => "certain", + 0.00 => "impossible", + 0.00 ... 0.25 => "very unlikely", + 0.25 ... 0.50 => "unlikely", + 0.50 ... 0.75 => "likely", + 0.75 ... 1.00 => "very likely", + _ => unreachable!() + } +} + +fn main() { + println!("{}", descriptive_probability(NewRelease)); +} +``` + +We shouldn't ever hit the `_` case, so we use the `unreachable!()` macro to +indicate this. `unreachable!()` gives a different kind of error than `Result`. +Rust calls these sorts of errors *panics*. + +# Handling errors with `Option` and `Result` + +The simplest way to indicate that a function may fail is to use the `Option` +type. Remember our `from_str()` example? Here's its type signature: + +```{rust,ignore} +pub fn from_str(s: &str) -> Option +``` + +`from_str()` returns an `Option`. If the conversion succeeds, it will return +`Some(value)`, and if it fails, it will return `None`. + +This is appropriate for the simplest of cases, but doesn't give us a lot of +information in the failure case. What if we wanted to know _why_ the conversion +failed? For this, we can use the `Result` type. It looks like this: + +```rust +enum Result { + Ok(T), + Err(E) +} +``` + +This enum is provided by Rust itself, so you don't need to define it to use it +in your code. The `Ok(T)` variant represents a success, and the `Err(E)` variant +represents a failure. Returning a `Result` instead of an `Option` is recommended +for all but the most trivial of situations. + +Here's an example of using `Result`: + +```rust +#[derive(Debug)] +enum Version { Version1, Version2 } + +#[derive(Debug)] +enum ParseError { InvalidHeaderLength, InvalidVersion } + +fn parse_version(header: &[u8]) -> Result { + if header.len() < 1 { + return Err(ParseError::InvalidHeaderLength); + } + match header[0] { + 1 => Ok(Version::Version1), + 2 => Ok(Version::Version2), + _ => Err(ParseError::InvalidVersion) + } +} + +let version = parse_version(&[1, 2, 3, 4]); +match version { + Ok(v) => { + println!("working with version: {:?}", v); + } + Err(e) => { + println!("error parsing header: {:?}", e); + } +} +``` + +This function makes use of an enum, `ParseError`, to enumerate the various +errors that can occur. + +# Non-recoverable errors with `panic!` + +In the case of an error that is unexpected and not recoverable, the `panic!` +macro will induce a panic. This will crash the current thread, and give an error: + +```{rust,ignore} +panic!("boom"); +``` + +gives + +```text +thread '
' panicked at 'boom', hello.rs:2 +``` + +when you run it. + +Because these kinds of situations are relatively rare, use panics sparingly. + +# Upgrading failures to panics + +In certain circumstances, even though a function may fail, we may want to treat +it as a panic instead. For example, `io::stdin().read_line(&mut buffer)` returns +an `Result`, when there is an error reading the line. This allows us to +handle and possibly recover from error. + +If we don't want to handle this error, and would rather just abort the program, +we can use the `unwrap()` method: + +```{rust,ignore} +io::stdin().read_line(&mut buffer).unwrap(); +``` + +`unwrap()` will `panic!` if the `Option` is `None`. This basically says "Give +me the value, and if something goes wrong, just crash." This is less reliable +than matching the error and attempting to recover, but is also significantly +shorter. Sometimes, just crashing is appropriate. + +There's another way of doing this that's a bit nicer than `unwrap()`: + +```{rust,ignore} +let mut buffer = String::new(); +let input = io::stdin().read_line(&mut buffer) + .ok() + .expect("Failed to read line"); +``` + +`ok()` converts the `Result` into an `Option`, and `expect()` does the same +thing as `unwrap()`, but takes a message. This message is passed along to the +underlying `panic!`, providing a better error message if the code errors. + +# Using `try!` + +When writing code that calls many functions that return the `Result` type, the +error handling can be tedious. The `try!` macro hides some of the boilerplate +of propagating errors up the call stack. + +It replaces this: + +```rust +use std::fs::File; +use std::io; +use std::io::prelude::*; + +struct Info { + name: String, + age: i32, + rating: i32, +} + +fn write_info(info: &Info) -> io::Result<()> { + let mut file = File::open("my_best_friends.txt").unwrap(); + + if let Err(e) = writeln!(&mut file, "name: {}", info.name) { + return Err(e) + } + if let Err(e) = writeln!(&mut file, "age: {}", info.age) { + return Err(e) + } + if let Err(e) = writeln!(&mut file, "rating: {}", info.rating) { + return Err(e) + } + + return Ok(()); +} +``` + +With this: + +```rust +use std::fs::File; +use std::io; +use std::io::prelude::*; + +struct Info { + name: String, + age: i32, + rating: i32, +} + +fn write_info(info: &Info) -> io::Result<()> { + let mut file = try!(File::open("my_best_friends.txt")); + + try!(writeln!(&mut file, "name: {}", info.name)); + try!(writeln!(&mut file, "age: {}", info.age)); + try!(writeln!(&mut file, "rating: {}", info.rating)); + + return Ok(()); +} +``` + +Wrapping an expression in `try!` will result in the unwrapped success (`Ok`) +value, unless the result is `Err`, in which case `Err` is returned early from +the enclosing function. + +It's worth noting that you can only use `try!` from a function that returns a +`Result`, which means that you cannot use `try!` inside of `main()`, because +`main()` doesn't return anything. + +`try!` makes use of [`FromError`](../std/error/#the-fromerror-trait) to determine +what to return in the error case. diff --git a/src/doc/trpl/ffi.md b/src/doc/trpl/ffi.md index e69de29bb2d1..23f6e17b860b 100644 --- a/src/doc/trpl/ffi.md +++ b/src/doc/trpl/ffi.md @@ -0,0 +1,530 @@ +% Foreign Function Interface + +# Introduction + +This guide will use the [snappy](https://github.com/google/snappy) +compression/decompression library as an introduction to writing bindings for +foreign code. Rust is currently unable to call directly into a C++ library, but +snappy includes a C interface (documented in +[`snappy-c.h`](https://github.com/google/snappy/blob/master/snappy-c.h)). + +The following is a minimal example of calling a foreign function which will +compile if snappy is installed: + +```no_run +# #![feature(libc)] +extern crate libc; +use libc::size_t; + +#[link(name = "snappy")] +extern { + fn snappy_max_compressed_length(source_length: size_t) -> size_t; +} + +fn main() { + let x = unsafe { snappy_max_compressed_length(100) }; + println!("max compressed length of a 100 byte buffer: {}", x); +} +``` + +The `extern` block is a list of function signatures in a foreign library, in +this case with the platform's C ABI. The `#[link(...)]` attribute is used to +instruct the linker to link against the snappy library so the symbols are +resolved. + +Foreign functions are assumed to be unsafe so calls to them need to be wrapped +with `unsafe {}` as a promise to the compiler that everything contained within +truly is safe. C libraries often expose interfaces that aren't thread-safe, and +almost any function that takes a pointer argument isn't valid for all possible +inputs since the pointer could be dangling, and raw pointers fall outside of +Rust's safe memory model. + +When declaring the argument types to a foreign function, the Rust compiler can +not check if the declaration is correct, so specifying it correctly is part of +keeping the binding correct at runtime. + +The `extern` block can be extended to cover the entire snappy API: + +```no_run +# #![feature(libc)] +extern crate libc; +use libc::{c_int, size_t}; + +#[link(name = "snappy")] +extern { + fn snappy_compress(input: *const u8, + input_length: size_t, + compressed: *mut u8, + compressed_length: *mut size_t) -> c_int; + fn snappy_uncompress(compressed: *const u8, + compressed_length: size_t, + uncompressed: *mut u8, + uncompressed_length: *mut size_t) -> c_int; + fn snappy_max_compressed_length(source_length: size_t) -> size_t; + fn snappy_uncompressed_length(compressed: *const u8, + compressed_length: size_t, + result: *mut size_t) -> c_int; + fn snappy_validate_compressed_buffer(compressed: *const u8, + compressed_length: size_t) -> c_int; +} +# fn main() {} +``` + +# Creating a safe interface + +The raw C API needs to be wrapped to provide memory safety and make use of higher-level concepts +like vectors. A library can choose to expose only the safe, high-level interface and hide the unsafe +internal details. + +Wrapping the functions which expect buffers involves using the `slice::raw` module to manipulate Rust +vectors as pointers to memory. Rust's vectors are guaranteed to be a contiguous block of memory. The +length is number of elements currently contained, and the capacity is the total size in elements of +the allocated memory. The length is less than or equal to the capacity. + +``` +# #![feature(libc)] +# extern crate libc; +# use libc::{c_int, size_t}; +# unsafe fn snappy_validate_compressed_buffer(_: *const u8, _: size_t) -> c_int { 0 } +# fn main() {} +pub fn validate_compressed_buffer(src: &[u8]) -> bool { + unsafe { + snappy_validate_compressed_buffer(src.as_ptr(), src.len() as size_t) == 0 + } +} +``` + +The `validate_compressed_buffer` wrapper above makes use of an `unsafe` block, but it makes the +guarantee that calling it is safe for all inputs by leaving off `unsafe` from the function +signature. + +The `snappy_compress` and `snappy_uncompress` functions are more complex, since a buffer has to be +allocated to hold the output too. + +The `snappy_max_compressed_length` function can be used to allocate a vector with the maximum +required capacity to hold the compressed output. The vector can then be passed to the +`snappy_compress` function as an output parameter. An output parameter is also passed to retrieve +the true length after compression for setting the length. + +``` +# #![feature(libc)] +# extern crate libc; +# use libc::{size_t, c_int}; +# unsafe fn snappy_compress(a: *const u8, b: size_t, c: *mut u8, +# d: *mut size_t) -> c_int { 0 } +# unsafe fn snappy_max_compressed_length(a: size_t) -> size_t { a } +# fn main() {} +pub fn compress(src: &[u8]) -> Vec { + unsafe { + let srclen = src.len() as size_t; + let psrc = src.as_ptr(); + + let mut dstlen = snappy_max_compressed_length(srclen); + let mut dst = Vec::with_capacity(dstlen as usize); + let pdst = dst.as_mut_ptr(); + + snappy_compress(psrc, srclen, pdst, &mut dstlen); + dst.set_len(dstlen as usize); + dst + } +} +``` + +Decompression is similar, because snappy stores the uncompressed size as part of the compression +format and `snappy_uncompressed_length` will retrieve the exact buffer size required. + +``` +# #![feature(libc)] +# extern crate libc; +# use libc::{size_t, c_int}; +# unsafe fn snappy_uncompress(compressed: *const u8, +# compressed_length: size_t, +# uncompressed: *mut u8, +# uncompressed_length: *mut size_t) -> c_int { 0 } +# unsafe fn snappy_uncompressed_length(compressed: *const u8, +# compressed_length: size_t, +# result: *mut size_t) -> c_int { 0 } +# fn main() {} +pub fn uncompress(src: &[u8]) -> Option> { + unsafe { + let srclen = src.len() as size_t; + let psrc = src.as_ptr(); + + let mut dstlen: size_t = 0; + snappy_uncompressed_length(psrc, srclen, &mut dstlen); + + let mut dst = Vec::with_capacity(dstlen as usize); + let pdst = dst.as_mut_ptr(); + + if snappy_uncompress(psrc, srclen, pdst, &mut dstlen) == 0 { + dst.set_len(dstlen as usize); + Some(dst) + } else { + None // SNAPPY_INVALID_INPUT + } + } +} +``` + +For reference, the examples used here are also available as an [library on +GitHub](https://github.com/thestinger/rust-snappy). + +# Destructors + +Foreign libraries often hand off ownership of resources to the calling code. +When this occurs, we must use Rust's destructors to provide safety and guarantee +the release of these resources (especially in the case of panic). + +For more about destructors, see the [Drop trait](../std/ops/trait.Drop.html). + +# Callbacks from C code to Rust functions + +Some external libraries require the usage of callbacks to report back their +current state or intermediate data to the caller. +It is possible to pass functions defined in Rust to an external library. +The requirement for this is that the callback function is marked as `extern` +with the correct calling convention to make it callable from C code. + +The callback function can then be sent through a registration call +to the C library and afterwards be invoked from there. + +A basic example is: + +Rust code: + +```no_run +extern fn callback(a: i32) { + println!("I'm called from C with value {0}", a); +} + +#[link(name = "extlib")] +extern { + fn register_callback(cb: extern fn(i32)) -> i32; + fn trigger_callback(); +} + +fn main() { + unsafe { + register_callback(callback); + trigger_callback(); // Triggers the callback + } +} +``` + +C code: + +```c +typedef void (*rust_callback)(int32_t); +rust_callback cb; + +int32_t register_callback(rust_callback callback) { + cb = callback; + return 1; +} + +void trigger_callback() { + cb(7); // Will call callback(7) in Rust +} +``` + +In this example Rust's `main()` will call `trigger_callback()` in C, +which would, in turn, call back to `callback()` in Rust. + + +## Targeting callbacks to Rust objects + +The former example showed how a global function can be called from C code. +However it is often desired that the callback is targeted to a special +Rust object. This could be the object that represents the wrapper for the +respective C object. + +This can be achieved by passing an unsafe pointer to the object down to the +C library. The C library can then include the pointer to the Rust object in +the notification. This will allow the callback to unsafely access the +referenced Rust object. + +Rust code: + +```no_run +#[repr(C)] +struct RustObject { + a: i32, + // other members +} + +extern "C" fn callback(target: *mut RustObject, a: i32) { + println!("I'm called from C with value {0}", a); + unsafe { + // Update the value in RustObject with the value received from the callback + (*target).a = a; + } +} + +#[link(name = "extlib")] +extern { + fn register_callback(target: *mut RustObject, + cb: extern fn(*mut RustObject, i32)) -> i32; + fn trigger_callback(); +} + +fn main() { + // Create the object that will be referenced in the callback + let mut rust_object = Box::new(RustObject { a: 5 }); + + unsafe { + register_callback(&mut *rust_object, callback); + trigger_callback(); + } +} +``` + +C code: + +```c +typedef void (*rust_callback)(void*, int32_t); +void* cb_target; +rust_callback cb; + +int32_t register_callback(void* callback_target, rust_callback callback) { + cb_target = callback_target; + cb = callback; + return 1; +} + +void trigger_callback() { + cb(cb_target, 7); // Will call callback(&rustObject, 7) in Rust +} +``` + +## Asynchronous callbacks + +In the previously given examples the callbacks are invoked as a direct reaction +to a function call to the external C library. +The control over the current thread is switched from Rust to C to Rust for the +execution of the callback, but in the end the callback is executed on the +same thread that called the function which triggered the callback. + +Things get more complicated when the external library spawns its own threads +and invokes callbacks from there. +In these cases access to Rust data structures inside the callbacks is +especially unsafe and proper synchronization mechanisms must be used. +Besides classical synchronization mechanisms like mutexes, one possibility in +Rust is to use channels (in `std::comm`) to forward data from the C thread +that invoked the callback into a Rust thread. + +If an asynchronous callback targets a special object in the Rust address space +it is also absolutely necessary that no more callbacks are performed by the +C library after the respective Rust object gets destroyed. +This can be achieved by unregistering the callback in the object's +destructor and designing the library in a way that guarantees that no +callback will be performed after deregistration. + +# Linking + +The `link` attribute on `extern` blocks provides the basic building block for +instructing rustc how it will link to native libraries. There are two accepted +forms of the link attribute today: + +* `#[link(name = "foo")]` +* `#[link(name = "foo", kind = "bar")]` + +In both of these cases, `foo` is the name of the native library that we're +linking to, and in the second case `bar` is the type of native library that the +compiler is linking to. There are currently three known types of native +libraries: + +* Dynamic - `#[link(name = "readline")]` +* Static - `#[link(name = "my_build_dependency", kind = "static")]` +* Frameworks - `#[link(name = "CoreFoundation", kind = "framework")]` + +Note that frameworks are only available on OSX targets. + +The different `kind` values are meant to differentiate how the native library +participates in linkage. From a linkage perspective, the rust compiler creates +two flavors of artifacts: partial (rlib/staticlib) and final (dylib/binary). +Native dynamic libraries and frameworks are propagated to the final artifact +boundary, while static libraries are not propagated at all. + +A few examples of how this model can be used are: + +* A native build dependency. Sometimes some C/C++ glue is needed when writing + some rust code, but distribution of the C/C++ code in a library format is just + a burden. In this case, the code will be archived into `libfoo.a` and then the + rust crate would declare a dependency via `#[link(name = "foo", kind = + "static")]`. + + Regardless of the flavor of output for the crate, the native static library + will be included in the output, meaning that distribution of the native static + library is not necessary. + +* A normal dynamic dependency. Common system libraries (like `readline`) are + available on a large number of systems, and often a static copy of these + libraries cannot be found. When this dependency is included in a rust crate, + partial targets (like rlibs) will not link to the library, but when the rlib + is included in a final target (like a binary), the native library will be + linked in. + +On OSX, frameworks behave with the same semantics as a dynamic library. + +# Unsafe blocks + +Some operations, like dereferencing unsafe pointers or calling functions that have been marked +unsafe are only allowed inside unsafe blocks. Unsafe blocks isolate unsafety and are a promise to +the compiler that the unsafety does not leak out of the block. + +Unsafe functions, on the other hand, advertise it to the world. An unsafe function is written like +this: + +``` +unsafe fn kaboom(ptr: *const i32) -> i32 { *ptr } +``` + +This function can only be called from an `unsafe` block or another `unsafe` function. + +# Accessing foreign globals + +Foreign APIs often export a global variable which could do something like track +global state. In order to access these variables, you declare them in `extern` +blocks with the `static` keyword: + +```no_run +# #![feature(libc)] +extern crate libc; + +#[link(name = "readline")] +extern { + static rl_readline_version: libc::c_int; +} + +fn main() { + println!("You have readline version {} installed.", + rl_readline_version as i32); +} +``` + +Alternatively, you may need to alter global state provided by a foreign +interface. To do this, statics can be declared with `mut` so we can mutate +them. + +```no_run +# #![feature(libc)] +extern crate libc; + +use std::ffi::CString; +use std::ptr; + +#[link(name = "readline")] +extern { + static mut rl_prompt: *const libc::c_char; +} + +fn main() { + let prompt = CString::new("[my-awesome-shell] $").unwrap(); + unsafe { + rl_prompt = prompt.as_ptr(); + + println!("{:?}", rl_prompt); + + rl_prompt = ptr::null(); + } +} +``` + +Note that all interaction with a `static mut` is unsafe, both reading and +writing. Dealing with global mutable state requires a great deal of care. + +# Foreign calling conventions + +Most foreign code exposes a C ABI, and Rust uses the platform's C calling convention by default when +calling foreign functions. Some foreign functions, most notably the Windows API, use other calling +conventions. Rust provides a way to tell the compiler which convention to use: + +``` +# #![feature(libc)] +extern crate libc; + +#[cfg(all(target_os = "win32", target_arch = "x86"))] +#[link(name = "kernel32")] +#[allow(non_snake_case)] +extern "stdcall" { + fn SetEnvironmentVariableA(n: *const u8, v: *const u8) -> libc::c_int; +} +# fn main() { } +``` + +This applies to the entire `extern` block. The list of supported ABI constraints +are: + +* `stdcall` +* `aapcs` +* `cdecl` +* `fastcall` +* `Rust` +* `rust-intrinsic` +* `system` +* `C` +* `win64` + +Most of the abis in this list are self-explanatory, but the `system` abi may +seem a little odd. This constraint selects whatever the appropriate ABI is for +interoperating with the target's libraries. For example, on win32 with a x86 +architecture, this means that the abi used would be `stdcall`. On x86_64, +however, windows uses the `C` calling convention, so `C` would be used. This +means that in our previous example, we could have used `extern "system" { ... }` +to define a block for all windows systems, not just x86 ones. + +# Interoperability with foreign code + +Rust guarantees that the layout of a `struct` is compatible with the platform's +representation in C only if the `#[repr(C)]` attribute is applied to it. +`#[repr(C, packed)]` can be used to lay out struct members without padding. +`#[repr(C)]` can also be applied to an enum. + +Rust's owned boxes (`Box`) use non-nullable pointers as handles which point +to the contained object. However, they should not be manually created because +they are managed by internal allocators. References can safely be assumed to be +non-nullable pointers directly to the type. However, breaking the borrow +checking or mutability rules is not guaranteed to be safe, so prefer using raw +pointers (`*`) if that's needed because the compiler can't make as many +assumptions about them. + +Vectors and strings share the same basic memory layout, and utilities are +available in the `vec` and `str` modules for working with C APIs. However, +strings are not terminated with `\0`. If you need a NUL-terminated string for +interoperability with C, you should use the `CString` type in the `std::ffi` +module. + +The standard library includes type aliases and function definitions for the C +standard library in the `libc` module, and Rust links against `libc` and `libm` +by default. + +# The "nullable pointer optimization" + +Certain types are defined to not be `null`. This includes references (`&T`, +`&mut T`), boxes (`Box`), and function pointers (`extern "abi" fn()`). +When interfacing with C, pointers that might be null are often used. +As a special case, a generic `enum` that contains exactly two variants, one of +which contains no data and the other containing a single field, is eligible +for the "nullable pointer optimization". When such an enum is instantiated +with one of the non-nullable types, it is represented as a single pointer, +and the non-data variant is represented as the null pointer. So +`Option c_int>` is how one represents a nullable +function pointer using the C ABI. + +# Calling Rust code from C + +You may wish to compile Rust code in a way so that it can be called from C. This is +fairly easy, but requires a few things: + +``` +#[no_mangle] +pub extern fn hello_rust() -> *const u8 { + "Hello, world!\0".as_ptr() +} +# fn main() {} +``` + +The `extern` makes this function adhere to the C calling convention, as +discussed above in "[Foreign Calling +Conventions](ffi.html#foreign-calling-conventions)". The `no_mangle` +attribute turns off Rust's name mangling, so that it is easier to link to. diff --git a/src/doc/trpl/for-loops.md b/src/doc/trpl/for-loops.md index e69de29bb2d1..45ae5a2e2dd9 100644 --- a/src/doc/trpl/for-loops.md +++ b/src/doc/trpl/for-loops.md @@ -0,0 +1,44 @@ +% `for` Loops + +The `for` loop is used to loop a particular number of times. Rust's `for` loops +work a bit differently than in other systems languages, however. Rust's `for` +loop doesn't look like this "C-style" `for` loop: + +```{c} +for (x = 0; x < 10; x++) { + printf( "%d\n", x ); +} +``` + +Instead, it looks like this: + +```{rust} +for x in 0..10 { + println!("{}", x); // x: i32 +} +``` + +In slightly more abstract terms, + +```{ignore} +for var in expression { + code +} +``` + +The expression is an iterator, which we will discuss in more depth later in the +guide. The iterator gives back a series of elements. Each element is one +iteration of the loop. That value is then bound to the name `var`, which is +valid for the loop body. Once the body is over, the next value is fetched from +the iterator, and we loop another time. When there are no more values, the +`for` loop is over. + +In our example, `0..10` is an expression that takes a start and an end position, +and gives an iterator over those values. The upper bound is exclusive, though, +so our loop will print `0` through `9`, not `10`. + +Rust does not have the "C-style" `for` loop on purpose. Manually controlling +each element of the loop is complicated and error prone, even for experienced C +developers. + +We'll talk more about `for` when we cover *iterators*, later in the Guide. diff --git a/src/doc/trpl/functions.md b/src/doc/trpl/functions.md index e69de29bb2d1..8e8ee8d63d62 100644 --- a/src/doc/trpl/functions.md +++ b/src/doc/trpl/functions.md @@ -0,0 +1,193 @@ +% Functions + +You've already seen one function so far, the `main` function: + +```rust +fn main() { +} +``` + +This is the simplest possible function declaration. As we mentioned before, +`fn` says "this is a function," followed by the name, some parentheses because +this function takes no arguments, and then some curly braces to indicate the +body. Here's a function named `foo`: + +```rust +fn foo() { +} +``` + +So, what about taking arguments? Here's a function that prints a number: + +```rust +fn print_number(x: i32) { + println!("x is: {}", x); +} +``` + +Here's a complete program that uses `print_number`: + +```rust +fn main() { + print_number(5); +} + +fn print_number(x: i32) { + println!("x is: {}", x); +} +``` + +As you can see, function arguments work very similar to `let` declarations: +you add a type to the argument name, after a colon. + +Here's a complete program that adds two numbers together and prints them: + +```rust +fn main() { + print_sum(5, 6); +} + +fn print_sum(x: i32, y: i32) { + println!("sum is: {}", x + y); +} +``` + +You separate arguments with a comma, both when you call the function, as well +as when you declare it. + +Unlike `let`, you _must_ declare the types of function arguments. This does +not work: + +```{rust,ignore} +fn print_sum(x, y) { + println!("sum is: {}", x + y); +} +``` + +You get this error: + +```text +hello.rs:5:18: 5:19 expected one of `!`, `:`, or `@`, found `)` +hello.rs:5 fn print_number(x, y) { +``` + +This is a deliberate design decision. While full-program inference is possible, +languages which have it, like Haskell, often suggest that documenting your +types explicitly is a best-practice. We agree that forcing functions to declare +types while allowing for inference inside of function bodies is a wonderful +sweet spot between full inference and no inference. + +What about returning a value? Here's a function that adds one to an integer: + +```rust +fn add_one(x: i32) -> i32 { + x + 1 +} +``` + +Rust functions return exactly one value, and you declare the type after an +"arrow," which is a dash (`-`) followed by a greater-than sign (`>`). + +You'll note the lack of a semicolon here. If we added it in: + +```{rust,ignore} +fn add_one(x: i32) -> i32 { + x + 1; +} +``` + +We would get an error: + +```text +error: not all control paths return a value +fn add_one(x: i32) -> i32 { + x + 1; +} + +help: consider removing this semicolon: + x + 1; + ^ +``` + +Remember our earlier discussions about semicolons and `()`? Our function claims +to return an `i32`, but with a semicolon, it would return `()` instead. Rust +realizes this probably isn't what we want, and suggests removing the semicolon. + +This is very much like our `if` statement before: the result of the block +(`{}`) is the value of the expression. Other expression-oriented languages, +such as Ruby, work like this, but it's a bit unusual in the systems programming +world. When people first learn about this, they usually assume that it +introduces bugs. But because Rust's type system is so strong, and because unit +is its own unique type, we have never seen an issue where adding or removing a +semicolon in a return position would cause a bug. + +But what about early returns? Rust does have a keyword for that, `return`: + +```rust +fn foo(x: i32) -> i32 { + if x < 5 { return x; } + + x + 1 +} +``` + +Using a `return` as the last line of a function works, but is considered poor +style: + +```rust +fn foo(x: i32) -> i32 { + if x < 5 { return x; } + + return x + 1; +} +``` + +The previous definition without `return` may look a bit strange if you haven't +worked in an expression-based language before, but it becomes intuitive over +time. If this were production code, we wouldn't write it in that way anyway, +we'd write this: + +```rust +fn foo(x: i32) -> i32 { + if x < 5 { + x + } else { + x + 1 + } +} +``` + +Because `if` is an expression, and it's the only expression in this function, +the value will be the result of the `if`. + +## Diverging functions + +Rust has some special syntax for 'diverging functions', which are functions that +do not return: + +``` +fn diverges() -> ! { + panic!("This function never returns!"); +} +``` + +`panic!` is a macro, similar to `println!()` that we've already seen. Unlike +`println!()`, `panic!()` causes the current thread of execution to crash with +the given message. + +Because this function will cause a crash, it will never return, and so it has +the type '`!`', which is read "diverges." A diverging function can be used +as any type: + +```should_panic +# fn diverges() -> ! { +# panic!("This function never returns!"); +# } + +let x: i32 = diverges(); +let x: String = diverges(); +``` + +We don't have a good use for diverging functions yet, because they're used in +conjunction with other Rust features. But when you see `-> !` later, you'll +know what it's called. diff --git a/src/doc/trpl/generics.md b/src/doc/trpl/generics.md index e69de29bb2d1..3e4e0a66eae0 100644 --- a/src/doc/trpl/generics.md +++ b/src/doc/trpl/generics.md @@ -0,0 +1,177 @@ +% Generics + +Sometimes, when writing a function or data type, we may want it to work for +multiple types of arguments. For example, remember our `OptionalInt` type? + +```{rust} +enum OptionalInt { + Value(i32), + Missing, +} +``` + +If we wanted to also have an `OptionalFloat64`, we would need a new enum: + +```{rust} +enum OptionalFloat64 { + Valuef64(f64), + Missingf64, +} +``` + +This is really unfortunate. Luckily, Rust has a feature that gives us a better +way: generics. Generics are called *parametric polymorphism* in type theory, +which means that they are types or functions that have multiple forms (*poly* +is multiple, *morph* is form) over a given parameter (*parametric*). + +Anyway, enough with type theory declarations, let's check out the generic form +of `OptionalInt`. It is actually provided by Rust itself, and looks like this: + +```rust +enum Option { + Some(T), + None, +} +``` + +The `` part, which you've seen a few times before, indicates that this is +a generic data type. Inside the declaration of our enum, wherever we see a `T`, +we substitute that type for the same type used in the generic. Here's an +example of using `Option`, with some extra type annotations: + +```{rust} +let x: Option = Some(5); +``` + +In the type declaration, we say `Option`. Note how similar this looks to +`Option`. So, in this particular `Option`, `T` has the value of `i32`. On +the right-hand side of the binding, we do make a `Some(T)`, where `T` is `5`. +Since that's an `i32`, the two sides match, and Rust is happy. If they didn't +match, we'd get an error: + +```{rust,ignore} +let x: Option = Some(5); +// error: mismatched types: expected `core::option::Option`, +// found `core::option::Option<_>` (expected f64 but found integral variable) +``` + +That doesn't mean we can't make `Option`s that hold an `f64`! They just have to +match up: + +```{rust} +let x: Option = Some(5); +let y: Option = Some(5.0f64); +``` + +This is just fine. One definition, multiple uses. + +Generics don't have to only be generic over one type. Consider Rust's built-in +`Result` type: + +```{rust} +enum Result { + Ok(T), + Err(E), +} +``` + +This type is generic over _two_ types: `T` and `E`. By the way, the capital letters +can be any letter you'd like. We could define `Result` as: + +```{rust} +enum Result { + Ok(A), + Err(Z), +} +``` + +if we wanted to. Convention says that the first generic parameter should be +`T`, for 'type,' and that we use `E` for 'error.' Rust doesn't care, however. + +The `Result` type is intended to be used to return the result of a +computation, and to have the ability to return an error if it didn't work out. +Here's an example: + +```{rust} +let x: Result = Ok(2.3f64); +let y: Result = Err("There was an error.".to_string()); +``` + +This particular Result will return an `f64` if there's a success, and a +`String` if there's a failure. Let's write a function that uses `Result`: + +```{rust} +fn inverse(x: f64) -> Result { + if x == 0.0f64 { return Err("x cannot be zero!".to_string()); } + + Ok(1.0f64 / x) +} +``` + +We don't want to take the inverse of zero, so we check to make sure that we +weren't passed zero. If we were, then we return an `Err`, with a message. If +it's okay, we return an `Ok`, with the answer. + +Why does this matter? Well, remember how `match` does exhaustive matches? +Here's how this function gets used: + +```{rust} +# fn inverse(x: f64) -> Result { +# if x == 0.0f64 { return Err("x cannot be zero!".to_string()); } +# Ok(1.0f64 / x) +# } +let x = inverse(25.0f64); + +match x { + Ok(x) => println!("The inverse of 25 is {}", x), + Err(msg) => println!("Error: {}", msg), +} +``` + +The `match` enforces that we handle the `Err` case. In addition, because the +answer is wrapped up in an `Ok`, we can't just use the result without doing +the match: + +```{rust,ignore} +let x = inverse(25.0f64); +println!("{}", x + 2.0f64); // error: binary operation `+` cannot be applied + // to type `core::result::Result` +``` + +This function is great, but there's one other problem: it only works for 64 bit +floating point values. What if we wanted to handle 32 bit floating point as +well? We'd have to write this: + +```{rust} +fn inverse32(x: f32) -> Result { + if x == 0.0f32 { return Err("x cannot be zero!".to_string()); } + + Ok(1.0f32 / x) +} +``` + +Bummer. What we need is a *generic function*. Luckily, we can write one! +However, it won't _quite_ work yet. Before we get into that, let's talk syntax. +A generic version of `inverse` would look something like this: + +```{rust,ignore} +fn inverse(x: T) -> Result { + if x == 0.0 { return Err("x cannot be zero!".to_string()); } + + Ok(1.0 / x) +} +``` + +Just like how we had `Option`, we use a similar syntax for `inverse`. +We can then use `T` inside the rest of the signature: `x` has type `T`, and half +of the `Result` has type `T`. However, if we try to compile that example, we'll get +an error: + +```text +error: binary operation `==` cannot be applied to type `T` +``` + +Because `T` can be _any_ type, it may be a type that doesn't implement `==`, +and therefore, the first line would be wrong. What do we do? + +To fix this example, we need to learn about another Rust feature: traits. diff --git a/src/doc/trpl/getting-started.md b/src/doc/trpl/getting-started.md index e69de29bb2d1..a164def516b9 100644 --- a/src/doc/trpl/getting-started.md +++ b/src/doc/trpl/getting-started.md @@ -0,0 +1 @@ +% Getting Started diff --git a/src/doc/trpl/glossary.md b/src/doc/trpl/glossary.md index e69de29bb2d1..97898324847e 100644 --- a/src/doc/trpl/glossary.md +++ b/src/doc/trpl/glossary.md @@ -0,0 +1,39 @@ +% Glossary + +Not every Rustacean has a background in systems programming, nor in computer +science, so we've added explanations of terms that might be unfamiliar. + +### Arity + +Arity refers to the number of arguments a function or operation takes. + +```rust +let x = (2, 3); +let y = (4, 6); +let z = (8, 2, 6); +``` + +In the example above `x` and `y` have arity 2. `z` has arity 3. + +### Abstract Syntax Tree + +When a compiler is compiling your program, it does a number of different +things. One of the things that it does is turn the text of your program into an +'abstract syntax tree,' or 'AST.' This tree is a representation of the +structure of your program. For example, `2 + 3` can be turned into a tree: + +```text + + + / \ +2 3 +``` + +And `2 + (3 * 4)` would look like this: + +```text + + + / \ +2 * + / \ + 3 4 +``` diff --git a/src/doc/trpl/hello-cargo.md b/src/doc/trpl/hello-cargo.md index e69de29bb2d1..ae2a79bafecd 100644 --- a/src/doc/trpl/hello-cargo.md +++ b/src/doc/trpl/hello-cargo.md @@ -0,0 +1,168 @@ +% Hello, Cargo! + +[Cargo](http://crates.io) is a tool that Rustaceans use to help manage their +Rust projects. Cargo is currently in a pre-1.0 state, just like Rust, and so it +is still a work in progress. However, it is already good enough to use for many +Rust projects, and so it is assumed that Rust projects will use Cargo from the +beginning. + +Cargo manages three things: building your code, downloading the dependencies +your code needs, and building those dependencies. At first, your +program doesn't have any dependencies, so we'll only be using the first part of +its functionality. Eventually, we'll add more. Since we started off by using +Cargo, it'll be easy to add later. + +If you installed Rust via the official installers you will also have +Cargo. If you installed Rust some other way, you may want to [check +the Cargo +README](https://github.com/rust-lang/cargo#installing-cargo-from-nightlies) +for specific instructions about installing it. + +## Converting to Cargo + +Let's convert Hello World to Cargo. + +To Cargo-ify our project, we need to do two things: Make a `Cargo.toml` +configuration file, and put our source file in the right place. Let's +do that part first: + +```bash +$ mkdir src +$ mv main.rs src/main.rs +``` + +Cargo expects your source files to live inside a `src` directory. That leaves +the top level for other things, like READMEs, license information, and anything +not related to your code. Cargo helps us keep our projects nice and tidy. A +place for everything, and everything in its place. + +Next, our configuration file: + +```bash +$ editor Cargo.toml +``` + +Make sure to get this name right: you need the capital `C`! + +Put this inside: + +```toml +[package] + +name = "hello_world" +version = "0.0.1" +authors = [ "Your name " ] + +[[bin]] + +name = "hello_world" +``` + +This file is in the [TOML](https://github.com/toml-lang/toml) format. Let's let +it explain itself to you: + +> TOML aims to be a minimal configuration file format that's easy to read due +> to obvious semantics. TOML is designed to map unambiguously to a hash table. +> TOML should be easy to parse into data structures in a wide variety of +> languages. + +TOML is very similar to INI, but with some extra goodies. + +Anyway, there are two *tables* in this file: `package` and `bin`. The first +tells Cargo metadata about your package. The second tells Cargo that we're +interested in building a binary, not a library (though we could do both!), as +well as what it is named. + +Once you have this file in place, we should be ready to build! Try this: + +```bash +$ cargo build + Compiling hello_world v0.0.1 (file:///home/yourname/projects/hello_world) +$ ./target/debug/hello_world +Hello, world! +``` + +Bam! We build our project with `cargo build`, and run it with +`./target/debug/hello_world`. This hasn't bought us a whole lot over our simple use +of `rustc`, but think about the future: when our project has more than one +file, we would need to call `rustc` more than once and pass it a bunch of options to +tell it to build everything together. With Cargo, as our project grows, we can +just `cargo build`, and it'll work the right way. When your project is finally +ready for release, you can use `cargo build --release` to compile your crates with +optimizations. + +You'll also notice that Cargo has created a new file: `Cargo.lock`. + +```toml +[root] +name = "hello_world" +version = "0.0.1" +``` + +This file is used by Cargo to keep track of dependencies in your application. +Right now, we don't have any, so it's a bit sparse. You won't ever need +to touch this file yourself, just let Cargo handle it. + +That's it! We've successfully built `hello_world` with Cargo. Even though our +program is simple, it's using much of the real tooling that you'll use for the +rest of your Rust career. + +## A New Project + +You don't have to go through this whole process every time you want to start a new +project! Cargo has the ability to make a bare-bones project directory in which you +can start developing right away. + +To start a new project with Cargo, use `cargo new`: + +```bash +$ cargo new hello_world --bin +``` + +We're passing `--bin` because we're making a binary program: if we +were making a library, we'd leave it off. + +Let's check out what Cargo has generated for us: + +```bash +$ cd hello_world +$ tree . +. +├── Cargo.toml +└── src + └── main.rs + +1 directory, 2 files +``` + +If you don't have the `tree` command, you can probably get it from your distro's package +manager. It's not necessary, but it's certainly useful. + +This is all we need to get started. First, let's check out `Cargo.toml`: + +```toml +[package] + +name = "hello_world" +version = "0.0.1" +authors = ["Your Name "] +``` + +Cargo has populated this file with reasonable defaults based off the arguments you gave +it and your `git` global configuration. You may notice that Cargo has also initialized +the `hello_world` directory as a `git` repository. + +Here's what's in `src/main.rs`: + +```rust +fn main() { + println!("Hello, world!"); +} +``` + +Cargo has generated a "Hello World!" for us, and you're ready to start coding! A +much more in-depth guide to Cargo can be found [here](http://doc.crates.io/guide.html). + +Now that you've got the tools down, let's actually learn more about the Rust +language itself. These are the basics that will serve you well through the rest +of your time with Rust. diff --git a/src/doc/trpl/hello-world.md b/src/doc/trpl/hello-world.md index e69de29bb2d1..f726f8627c92 100644 --- a/src/doc/trpl/hello-world.md +++ b/src/doc/trpl/hello-world.md @@ -0,0 +1,164 @@ +% Hello, world! + +Now that you have Rust installed, let's write your first Rust program. It's +traditional to make your first program in any new language one that prints the +text "Hello, world!" to the screen. The nice thing about starting with such a +simple program is that you can verify that your compiler isn't just installed, +but also working properly. And printing information to the screen is a pretty +common thing to do. + +The first thing that we need to do is make a file to put our code in. I like +to make a `projects` directory in my home directory, and keep all my projects +there. Rust does not care where your code lives. + +This actually leads to one other concern we should address: this guide will +assume that you have basic familiarity with the command line. Rust does not +require that you know a whole ton about the command line, but until the +language is in a more finished state, IDE support is spotty. Rust makes no +specific demands on your editing tooling, or where your code lives. + +With that said, let's make a directory in our projects directory. + +```{bash} +$ mkdir ~/projects +$ cd ~/projects +$ mkdir hello_world +$ cd hello_world +``` + +If you're on Windows and not using PowerShell, the `~` may not work. Consult +the documentation for your shell for more details. + +Let's make a new source file next. I'm going to use the syntax `editor +filename` to represent editing a file in these examples, but you should use +whatever method you want. We'll call our file `main.rs`: + +```{bash} +$ editor main.rs +``` + +Rust files always end in a `.rs` extension. If you're using more than one word +in your filename, use an underscore. `hello_world.rs` rather than +`helloworld.rs`. + +Now that you've got your file open, type this in: + +```{rust} +fn main() { + println!("Hello, world!"); +} +``` + +Save the file, and then type this into your terminal window: + +```{bash} +$ rustc main.rs +$ ./main # or main.exe on Windows +Hello, world! +``` + +You can also run these examples on [play.rust-lang.org](http://play.rust-lang.org/) by clicking on the arrow that appears in the upper right of the example when you mouse over the code. + +Success! Let's go over what just happened in detail. + +```{rust} +fn main() { + +} +``` + +These lines define a *function* in Rust. The `main` function is special: +it's the beginning of every Rust program. The first line says "I'm declaring a +function named `main`, which takes no arguments and returns nothing." If there +were arguments, they would go inside the parentheses (`(` and `)`), and because +we aren't returning anything from this function, we can omit the return type +entirely. We'll get to it later. + +You'll also note that the function is wrapped in curly braces (`{` and `}`). +Rust requires these around all function bodies. It is also considered good +style to put the opening curly brace on the same line as the function +declaration, with one space in between. + +Next up is this line: + +```{rust} + println!("Hello, world!"); +``` + +This line does all of the work in our little program. There are a number of +details that are important here. The first is that it's indented with four +spaces, not tabs. Please configure your editor of choice to insert four spaces +with the tab key. We provide some [sample configurations for various +editors](https://github.com/rust-lang/rust/tree/master/src/etc/CONFIGS.md). + +The second point is the `println!()` part. This is calling a Rust *macro*, +which is how metaprogramming is done in Rust. If it were a function instead, it +would look like this: `println()`. For our purposes, we don't need to worry +about this difference. Just know that sometimes, you'll see a `!`, and that +means that you're calling a macro instead of a normal function. Rust implements +`println!` as a macro rather than a function for good reasons, but that's a +very advanced topic. You'll learn more when we talk about macros later. One +last thing to mention: Rust's macros are significantly different from C macros, +if you've used those. Don't be scared of using macros. We'll get to the details +eventually, you'll just have to trust us for now. + +Next, `"Hello, world!"` is a *string*. Strings are a surprisingly complicated +topic in a systems programming language, and this is a *statically allocated* +string. We will talk more about different kinds of allocation later. We pass +this string as an argument to `println!`, which prints the string to the +screen. Easy enough! + +Finally, the line ends with a semicolon (`;`). Rust is an *expression +oriented* language, which means that most things are expressions. The `;` is +used to indicate that this expression is over, and the next one is ready to +begin. Most lines of Rust code end with a `;`. We will cover this in-depth +later in the guide. + +Finally, actually *compiling* and *running* our program. We can compile +with our compiler, `rustc`, by passing it the name of our source file: + +```{bash} +$ rustc main.rs +``` + +This is similar to `gcc` or `clang`, if you come from a C or C++ background. Rust +will output a binary executable. You can see it with `ls`: + +```{bash} +$ ls +main main.rs +``` + +Or on Windows: + +```{bash} +$ dir +main.exe main.rs +``` + +There are now two files: our source code, with the `.rs` extension, and the +executable (`main.exe` on Windows, `main` everywhere else) + +```{bash} +$ ./main # or main.exe on Windows +``` + +This prints out our `Hello, world!` text to our terminal. + +If you come from a dynamically typed language like Ruby, Python, or JavaScript, +you may not be used to these two steps being separate. Rust is an +*ahead-of-time compiled language*, which means that you can compile a +program, give it to someone else, and they don't need to have Rust installed. +If you give someone a `.rb` or `.py` or `.js` file, they need to have a +Ruby/Python/JavaScript implementation installed, but you just need one command +to both compile and run your program. Everything is a tradeoff in language design, +and Rust has made its choice. + +Congratulations! You have officially written a Rust program. That makes you a +Rust programmer! Welcome. + +Next, I'd like to introduce you to another tool, Cargo, which is used to write +real-world Rust programs. Just using `rustc` is nice for simple things, but as +your project grows, you'll want something to help you manage all of the options +that it has, and to make it easy to share your code with other people and +projects. diff --git a/src/doc/trpl/if.md b/src/doc/trpl/if.md index e69de29bb2d1..92f95341f814 100644 --- a/src/doc/trpl/if.md +++ b/src/doc/trpl/if.md @@ -0,0 +1,157 @@ +% `if` + +Rust's take on `if` is not particularly complex, but it's much more like the +`if` you'll find in a dynamically typed language than in a more traditional +systems language. So let's talk about it, to make sure you grasp the nuances. + +`if` is a specific form of a more general concept, the *branch*. The name comes +from a branch in a tree: a decision point, where depending on a choice, +multiple paths can be taken. + +In the case of `if`, there is one choice that leads down two paths: + +```rust +let x = 5; + +if x == 5 { + println!("x is five!"); +} +``` + +If we changed the value of `x` to something else, this line would not print. +More specifically, if the expression after the `if` evaluates to `true`, then +the block is executed. If it's `false`, then it is not. + +If you want something to happen in the `false` case, use an `else`: + +```{rust} +let x = 5; + +if x == 5 { + println!("x is five!"); +} else { + println!("x is not five :("); +} +``` + +If there is more than one case, use an `else if`: + +```rust +let x = 5; + +if x == 5 { + println!("x is five!"); +} else if x == 6 { + println!("x is six!"); +} else { + println!("x is not five or six :("); +} +``` + +This is all pretty standard. However, you can also do this: + + +```{rust} +let x = 5; + +let y = if x == 5 { + 10 +} else { + 15 +}; // y: i32 +``` + +Which we can (and probably should) write like this: + +```{rust} +let x = 5; + +let y = if x == 5 { 10 } else { 15 }; // y: i32 +``` + +This reveals two interesting things about Rust: it is an expression-based +language, and semicolons are different from semicolons in other 'curly brace +and semicolon'-based languages. These two things are related. + +## Expressions vs. Statements + +Rust is primarily an expression based language. There are only two kinds of +statements, and everything else is an expression. + +So what's the difference? Expressions return a value, and statements do not. +In many languages, `if` is a statement, and therefore, `let x = if ...` would +make no sense. But in Rust, `if` is an expression, which means that it returns +a value. We can then use this value to initialize the binding. + +Speaking of which, bindings are a kind of the first of Rust's two statements. +The proper name is a *declaration statement*. So far, `let` is the only kind +of declaration statement we've seen. Let's talk about that some more. + +In some languages, variable bindings can be written as expressions, not just +statements. Like Ruby: + +```{ruby} +x = y = 5 +``` + +In Rust, however, using `let` to introduce a binding is _not_ an expression. The +following will produce a compile-time error: + +```{ignore} +let x = (let y = 5); // expected identifier, found keyword `let` +``` + +The compiler is telling us here that it was expecting to see the beginning of +an expression, and a `let` can only begin a statement, not an expression. + +Note that assigning to an already-bound variable (e.g. `y = 5`) is still an +expression, although its value is not particularly useful. Unlike C, where an +assignment evaluates to the assigned value (e.g. `5` in the previous example), +in Rust the value of an assignment is the unit type `()` (which we'll cover later). + +The second kind of statement in Rust is the *expression statement*. Its +purpose is to turn any expression into a statement. In practical terms, Rust's +grammar expects statements to follow other statements. This means that you use +semicolons to separate expressions from each other. This means that Rust +looks a lot like most other languages that require you to use semicolons +at the end of every line, and you will see semicolons at the end of almost +every line of Rust code you see. + +What is this exception that makes us say "almost"? You saw it already, in this +code: + +```{rust} +let x = 5; + +let y: i32 = if x == 5 { 10 } else { 15 }; +``` + +Note that I've added the type annotation to `y`, to specify explicitly that I +want `y` to be an integer. + +This is not the same as this, which won't compile: + +```{ignore} +let x = 5; + +let y: i32 = if x == 5 { 10; } else { 15; }; +``` + +Note the semicolons after the 10 and 15. Rust will give us the following error: + +```text +error: mismatched types: expected `i32`, found `()` (expected i32, found ()) +``` + +We expected an integer, but we got `()`. `()` is pronounced *unit*, and is a +special type in Rust's type system. In Rust, `()` is _not_ a valid value for a +variable of type `i32`. It's only a valid value for variables of the type `()`, +which aren't very useful. Remember how we said statements don't return a value? +Well, that's the purpose of unit in this case. The semicolon turns any +expression into a statement by throwing away its value and returning unit +instead. + +There's one more time in which you won't see a semicolon at the end of a line +of Rust code. For that, we'll need our next concept: functions. + +TODO: `if let` diff --git a/src/doc/trpl/inline-assembly.md b/src/doc/trpl/inline-assembly.md index e69de29bb2d1..1a4592f980fa 100644 --- a/src/doc/trpl/inline-assembly.md +++ b/src/doc/trpl/inline-assembly.md @@ -0,0 +1,141 @@ +% Inline Assembly + +For extremely low-level manipulations and performance reasons, one +might wish to control the CPU directly. Rust supports using inline +assembly to do this via the `asm!` macro. The syntax roughly matches +that of GCC & Clang: + +```ignore +asm!(assembly template + : output operands + : input operands + : clobbers + : options + ); +``` + +Any use of `asm` is feature gated (requires `#![feature(asm)]` on the +crate to allow) and of course requires an `unsafe` block. + +> **Note**: the examples here are given in x86/x86-64 assembly, but +> all platforms are supported. + +## Assembly template + +The `assembly template` is the only required parameter and must be a +literal string (i.e. `""`) + +``` +#![feature(asm)] + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn foo() { + unsafe { + asm!("NOP"); + } +} + +// other platforms +#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] +fn foo() { /* ... */ } + +fn main() { + // ... + foo(); + // ... +} +``` + +(The `feature(asm)` and `#[cfg]`s are omitted from now on.) + +Output operands, input operands, clobbers and options are all optional +but you must add the right number of `:` if you skip them: + +``` +# #![feature(asm)] +# #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +# fn main() { unsafe { +asm!("xor %eax, %eax" + : + : + : "eax" + ); +# } } +``` + +Whitespace also doesn't matter: + +``` +# #![feature(asm)] +# #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +# fn main() { unsafe { +asm!("xor %eax, %eax" ::: "eax"); +# } } +``` + +## Operands + +Input and output operands follow the same format: `: +"constraints1"(expr1), "constraints2"(expr2), ..."`. Output operand +expressions must be mutable lvalues: + +``` +# #![feature(asm)] +# #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn add(a: i32, b: i32) -> i32 { + let mut c = 0; + unsafe { + asm!("add $2, $0" + : "=r"(c) + : "0"(a), "r"(b) + ); + } + c +} +# #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] +# fn add(a: i32, b: i32) -> i32 { a + b } + +fn main() { + assert_eq!(add(3, 14159), 14162) +} +``` + +## Clobbers + +Some instructions modify registers which might otherwise have held +different values so we use the clobbers list to indicate to the +compiler not to assume any values loaded into those registers will +stay valid. + +``` +# #![feature(asm)] +# #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +# fn main() { unsafe { +// Put the value 0x200 in eax +asm!("mov $$0x200, %eax" : /* no outputs */ : /* no inputs */ : "eax"); +# } } +``` + +Input and output registers need not be listed since that information +is already communicated by the given constraints. Otherwise, any other +registers used either implicitly or explicitly should be listed. + +If the assembly changes the condition code register `cc` should be +specified as one of the clobbers. Similarly, if the assembly modifies +memory, `memory` should also be specified. + +## Options + +The last section, `options` is specific to Rust. The format is comma +separated literal strings (i.e. `:"foo", "bar", "baz"`). It's used to +specify some extra info about the inline assembly: + +Current valid options are: + +1. *volatile* - specifying this is analogous to + `__asm__ __volatile__ (...)` in gcc/clang. +2. *alignstack* - certain instructions expect the stack to be + aligned a certain way (i.e. SSE) and specifying this indicates to + the compiler to insert its usual stack alignment code +3. *intel* - use intel syntax instead of the default AT&T. + diff --git a/src/doc/trpl/installing-rust.md b/src/doc/trpl/installing-rust.md index e69de29bb2d1..c839688047aa 100644 --- a/src/doc/trpl/installing-rust.md +++ b/src/doc/trpl/installing-rust.md @@ -0,0 +1,93 @@ +% Installing Rust + +The first step to using Rust is to install it! There are a number of ways to +install Rust, but the easiest is to use the `rustup` script. If you're on +Linux or a Mac, all you need to do is this (note that you don't need to type +in the `$`s, they just indicate the start of each command): + +```bash +$ curl -sf -L https://static.rust-lang.org/rustup.sh | sudo sh +``` + +If you're concerned about the [potential insecurity](http://curlpipesh.tumblr.com/) of using `curl | sudo sh`, +please keep reading and see our disclaimer below. And feel free to use a two-step version of the installation and examine our installation script: + +```bash +$ curl -f -L https://static.rust-lang.org/rustup.sh -O +$ sudo sh rustup.sh +``` + +If you're on Windows, please download either the [32-bit +installer](https://static.rust-lang.org/dist/rust-1.0.0-beta-i686-pc-windows-gnu.exe) +or the [64-bit +installer](https://static.rust-lang.org/dist/rust-1.0.0-beta-x86_64-pc-windows-gnu.exe) +and run it. + +If you decide you don't want Rust anymore, we'll be a bit sad, but that's okay. +Not every programming language is great for everyone. Just run the uninstall +script: + +```bash +$ sudo /usr/local/lib/rustlib/uninstall.sh +``` + +If you used the Windows installer, just re-run the `.exe` and it will give you +an uninstall option. + +You can re-run this script any time you want to update Rust. Which, at this +point, is often. Rust is still pre-1.0, and so people assume that you're using +a very recent Rust. + +This brings me to one other point: some people, and somewhat rightfully so, get +very upset when we tell you to `curl | sudo sh`. And they should be! Basically, +when you do this, you are trusting that the good people who maintain Rust +aren't going to hack your computer and do bad things. That's a good instinct! +If you're one of those people, please check out the documentation on [building +Rust from Source](https://github.com/rust-lang/rust#building-from-source), or +[the official binary downloads](http://www.rust-lang.org/install.html). And we +promise that this method will not be the way to install Rust forever: it's just +the easiest way to keep people updated while Rust is in its alpha state. + +Oh, we should also mention the officially supported platforms: + +* Windows (7, 8, Server 2008 R2) +* Linux (2.6.18 or later, various distributions), x86 and x86-64 +* OSX 10.7 (Lion) or greater, x86 and x86-64 + +We extensively test Rust on these platforms, and a few others, too, like +Android. But these are the ones most likely to work, as they have the most +testing. + +Finally, a comment about Windows. Rust considers Windows to be a first-class +platform upon release, but if we're honest, the Windows experience isn't as +integrated as the Linux/OS X experience is. We're working on it! If anything +does not work, it is a bug. Please let us know if that happens. Each and every +commit is tested against Windows just like any other platform. + +If you've got Rust installed, you can open up a shell, and type this: + +```bash +$ rustc --version +``` + +You should see the version number, commit hash, commit date and build date: + +```bash +rustc 1.0.0-nightly (f11f3e7ba 2015-01-04) (built 2015-01-06) +``` + +If you did, Rust has been installed successfully! Congrats! + +This installer also installs a copy of the documentation locally, so you can +read it offline. On UNIX systems, `/usr/local/share/doc/rust` is the location. +On Windows, it's in a `share/doc` directory, inside wherever you installed Rust +to. + +If not, there are a number of places where you can get help. The easiest is +[the #rust IRC channel on irc.mozilla.org](irc://irc.mozilla.org/#rust), which +you can access through +[Mibbit](http://chat.mibbit.com/?server=irc.mozilla.org&channel=%23rust). Click +that link, and you'll be chatting with other Rustaceans (a silly nickname we +call ourselves), and we can help you out. Other great resources include [the +/r/rust subreddit](http://www.reddit.com/r/rust), and [Stack +Overflow](http://stackoverflow.com/questions/tagged/rust). diff --git a/src/doc/trpl/intrinsics.md b/src/doc/trpl/intrinsics.md index e69de29bb2d1..25f7c5449318 100644 --- a/src/doc/trpl/intrinsics.md +++ b/src/doc/trpl/intrinsics.md @@ -0,0 +1,25 @@ +% Intrinsics + +> **Note**: intrinsics will forever have an unstable interface, it is +> recommended to use the stable interfaces of libcore rather than intrinsics +> directly. + +These are imported as if they were FFI functions, with the special +`rust-intrinsic` ABI. For example, if one was in a freestanding +context, but wished to be able to `transmute` between types, and +perform efficient pointer arithmetic, one would import those functions +via a declaration like + +``` +# #![feature(intrinsics)] +# fn main() {} + +extern "rust-intrinsic" { + fn transmute(x: T) -> U; + + fn offset(dst: *const T, offset: isize) -> *const T; +} +``` + +As with any other FFI functions, these are always `unsafe` to call. + diff --git a/src/doc/trpl/iterators.md b/src/doc/trpl/iterators.md index e69de29bb2d1..eea575658b92 100644 --- a/src/doc/trpl/iterators.md +++ b/src/doc/trpl/iterators.md @@ -0,0 +1,349 @@ +% Iterators + +Let's talk about loops. + +Remember Rust's `for` loop? Here's an example: + +```rust +for x in 0..10 { + println!("{}", x); +} +``` + +Now that you know more Rust, we can talk in detail about how this works. +Ranges (the `0..10`) are 'iterators'. An iterator is something that we can +call the `.next()` method on repeatedly, and it gives us a sequence of things. + +Like this: + +```rust +let mut range = 0..10; + +loop { + match range.next() { + Some(x) => { + println!("{}", x); + }, + None => { break } + } +} +``` + +We make a mutable binding to the range, which is our iterator. We then `loop`, +with an inner `match`. This `match` is used on the result of `range.next()`, +which gives us a reference to the next value of the iterator. `next` returns an +`Option`, in this case, which will be `Some(i32)` when we have a value and +`None` once we run out. If we get `Some(i32)`, we print it out, and if we get +`None`, we `break` out of the loop. + +This code sample is basically the same as our `for` loop version. The `for` +loop is just a handy way to write this `loop`/`match`/`break` construct. + +`for` loops aren't the only thing that uses iterators, however. Writing your +own iterator involves implementing the `Iterator` trait. While doing that is +outside of the scope of this guide, Rust provides a number of useful iterators +to accomplish various tasks. Before we talk about those, we should talk about a +Rust anti-pattern. And that's using ranges like this. + +Yes, we just talked about how ranges are cool. But ranges are also very +primitive. For example, if you needed to iterate over the contents of a vector, +you may be tempted to write this: + +```rust +let nums = vec![1, 2, 3]; + +for i in 0..nums.len() { + println!("{}", nums[i]); +} +``` + +This is strictly worse than using an actual iterator. You can iterate over vectors +directly, so write this: + +```rust +let nums = vec![1, 2, 3]; + +for num in &nums { + println!("{}", num); +} +``` + +There are two reasons for this. First, this more directly expresses what we +mean. We iterate through the entire vector, rather than iterating through +indexes, and then indexing the vector. Second, this version is more efficient: +the first version will have extra bounds checking because it used indexing, +`nums[i]`. But since we yield a reference to each element of the vector in turn +with the iterator, there's no bounds checking in the second example. This is +very common with iterators: we can ignore unnecessary bounds checks, but still +know that we're safe. + +There's another detail here that's not 100% clear because of how `println!` +works. `num` is actually of type `&i32`. That is, it's a reference to an `i32`, +not an `i32` itself. `println!` handles the dereferencing for us, so we don't +see it. This code works fine too: + +```rust +let nums = vec![1, 2, 3]; + +for num in &nums { + println!("{}", *num); +} +``` + +Now we're explicitly dereferencing `num`. Why does `&nums` give us +references? Firstly, because we explicitly asked it to with +`&`. Secondly, if it gave us the data itself, we would have to be its +owner, which would involve making a copy of the data and giving us the +copy. With references, we're just borrowing a reference to the data, +and so it's just passing a reference, without needing to do the move. + +So, now that we've established that ranges are often not what you want, let's +talk about what you do want instead. + +There are three broad classes of things that are relevant here: iterators, +*iterator adapters*, and *consumers*. Here's some definitions: + +* *iterators* give you a sequence of values. +* *iterator adapters* operate on an iterator, producing a new iterator with a + different output sequence. +* *consumers* operate on an iterator, producing some final set of values. + +Let's talk about consumers first, since you've already seen an iterator, ranges. + +## Consumers + +A *consumer* operates on an iterator, returning some kind of value or values. +The most common consumer is `collect()`. This code doesn't quite compile, +but it shows the intention: + +```{rust,ignore} +let one_to_one_hundred = (1..101).collect(); +``` + +As you can see, we call `collect()` on our iterator. `collect()` takes +as many values as the iterator will give it, and returns a collection +of the results. So why won't this compile? Rust can't determine what +type of things you want to collect, and so you need to let it know. +Here's the version that does compile: + +```rust +let one_to_one_hundred = (1..101).collect::>(); +``` + +If you remember, the `::<>` syntax allows us to give a type hint, +and so we tell it that we want a vector of integers. You don't always +need to use the whole type, though. Using a `_` will let you provide +a partial hint: + +```rust +let one_to_one_hundred = (1..101).collect::>(); +``` + +This says "Collect into a `Vec`, please, but infer what the `T` is for me." +`_` is sometimes called a "type placeholder" for this reason. + +`collect()` is the most common consumer, but there are others too. `find()` +is one: + +```rust +let greater_than_forty_two = (0..100) + .find(|x| *x > 42); + +match greater_than_forty_two { + Some(_) => println!("We got some numbers!"), + None => println!("No numbers found :("), +} +``` + +`find` takes a closure, and works on a reference to each element of an +iterator. This closure returns `true` if the element is the element we're +looking for, and `false` otherwise. Because we might not find a matching +element, `find` returns an `Option` rather than the element itself. + +Another important consumer is `fold`. Here's what it looks like: + +```rust +let sum = (1..4).fold(0, |sum, x| sum + x); +``` + +`fold()` is a consumer that looks like this: +`fold(base, |accumulator, element| ...)`. It takes two arguments: the first +is an element called the *base*. The second is a closure that itself takes two +arguments: the first is called the *accumulator*, and the second is an +*element*. Upon each iteration, the closure is called, and the result is the +value of the accumulator on the next iteration. On the first iteration, the +base is the value of the accumulator. + +Okay, that's a bit confusing. Let's examine the values of all of these things +in this iterator: + +| base | accumulator | element | closure result | +|------|-------------|---------|----------------| +| 0 | 0 | 1 | 1 | +| 0 | 1 | 2 | 3 | +| 0 | 3 | 3 | 6 | + +We called `fold()` with these arguments: + +```rust +# (1..4) +.fold(0, |sum, x| sum + x); +``` + +So, `0` is our base, `sum` is our accumulator, and `x` is our element. On the +first iteration, we set `sum` to `0`, and `x` is the first element of `nums`, +`1`. We then add `sum` and `x`, which gives us `0 + 1 = 1`. On the second +iteration, that value becomes our accumulator, `sum`, and the element is +the second element of the array, `2`. `1 + 2 = 3`, and so that becomes +the value of the accumulator for the last iteration. On that iteration, +`x` is the last element, `3`, and `3 + 3 = 6`, which is our final +result for our sum. `1 + 2 + 3 = 6`, and that's the result we got. + +Whew. `fold` can be a bit strange the first few times you see it, but once it +clicks, you can use it all over the place. Any time you have a list of things, +and you want a single result, `fold` is appropriate. + +Consumers are important due to one additional property of iterators we haven't +talked about yet: laziness. Let's talk some more about iterators, and you'll +see why consumers matter. + +## Iterators + +As we've said before, an iterator is something that we can call the +`.next()` method on repeatedly, and it gives us a sequence of things. +Because you need to call the method, this means that iterators +are *lazy* and don't need to generate all of the values upfront. +This code, for example, does not actually generate the numbers +`1-100`, and just creates a value that represents the sequence: + +```rust +let nums = 1..100; +``` + +Since we didn't do anything with the range, it didn't generate the sequence. +Let's add the consumer: + +```rust +let nums = (1..100).collect::>(); +``` + +Now, `collect()` will require that the range gives it some numbers, and so +it will do the work of generating the sequence. + +Ranges are one of two basic iterators that you'll see. The other is `iter()`. +`iter()` can turn a vector into a simple iterator that gives you each element +in turn: + +```rust +let nums = [1, 2, 3]; + +for num in nums.iter() { + println!("{}", num); +} +``` + +These two basic iterators should serve you well. There are some more +advanced iterators, including ones that are infinite. Like using range syntax +and `step_by`: + +```rust +# #![feature(step_by)] +(1..).step_by(5); +``` + +This iterator counts up from one, adding five each time. It will give +you a new integer every time, forever (well, technically, until it reaches the +maximum number representable by an `i32`). But since iterators are lazy, +that's okay! You probably don't want to use `collect()` on it, though... + +That's enough about iterators. Iterator adapters are the last concept +we need to talk about with regards to iterators. Let's get to it! + +## Iterator adapters + +*Iterator adapters* take an iterator and modify it somehow, producing +a new iterator. The simplest one is called `map`: + +```{rust,ignore} +(1..100).map(|x| x + 1); +``` + +`map` is called upon another iterator, and produces a new iterator where each +element reference has the closure it's been given as an argument called on it. +So this would give us the numbers from `2-100`. Well, almost! If you +compile the example, you'll get a warning: + +```text +warning: unused result which must be used: iterator adaptors are lazy and + do nothing unless consumed, #[warn(unused_must_use)] on by default +(1..100).map(|x| x + 1); + ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``` + +Laziness strikes again! That closure will never execute. This example +doesn't print any numbers: + +```{rust,ignore} +(1..100).map(|x| println!("{}", x)); +``` + +If you are trying to execute a closure on an iterator for its side effects, +just use `for` instead. + +There are tons of interesting iterator adapters. `take(n)` will return an +iterator over the next `n` elements of the original iterator. Note that this +has no side effect on the original iterator. Let's try it out with our infinite +iterator from before: + +```rust +# #![feature(step_by)] +for i in (1..).step_by(5).take(5) { + println!("{}", i); +} +``` + +This will print + +```text +1 +6 +11 +16 +21 +``` + +`filter()` is an adapter that takes a closure as an argument. This closure +returns `true` or `false`. The new iterator `filter()` produces +only the elements that that closure returns `true` for: + +```rust +for i in (1..100).filter(|&x| x % 2 == 0) { + println!("{}", i); +} +``` + +This will print all of the even numbers between one and a hundred. +(Note that because `filter` doesn't consume the elements that are +being iterated over, it is passed a reference to each element, and +thus the filter predicate uses the `&x` pattern to extract the integer +itself.) + +You can chain all three things together: start with an iterator, adapt it +a few times, and then consume the result. Check it out: + +```rust +(1..1000) + .filter(|&x| x % 2 == 0) + .filter(|&x| x % 3 == 0) + .take(5) + .collect::>(); +``` + +This will give you a vector containing `6`, `12`, `18`, `24`, and `30`. + +This is just a small taste of what iterators, iterator adapters, and consumers +can help you with. There are a number of really useful iterators, and you can +write your own as well. Iterators provide a safe, efficient way to manipulate +all kinds of lists. They're a little unusual at first, but if you play with +them, you'll get hooked. For a full list of the different iterators and +consumers, check out the [iterator module documentation](../std/iter/index.html). diff --git a/src/doc/trpl/lang-items.md b/src/doc/trpl/lang-items.md index e69de29bb2d1..5c27c03e8e0b 100644 --- a/src/doc/trpl/lang-items.md +++ b/src/doc/trpl/lang-items.md @@ -0,0 +1,79 @@ +% Lang items + +> **Note**: lang items are often provided by crates in the Rust distribution, +> and lang items themselves have an unstable interface. It is recommended to use +> officially distributed crates instead of defining your own lang items. + +The `rustc` compiler has certain pluggable operations, that is, +functionality that isn't hard-coded into the language, but is +implemented in libraries, with a special marker to tell the compiler +it exists. The marker is the attribute `#[lang="..."]` and there are +various different values of `...`, i.e. various different 'lang +items'. + +For example, `Box` pointers require two lang items, one for allocation +and one for deallocation. A freestanding program that uses the `Box` +sugar for dynamic allocations via `malloc` and `free`: + +``` +#![feature(lang_items, box_syntax, start, no_std, libc)] +#![no_std] + +extern crate libc; + +extern { + fn abort() -> !; +} + +#[lang = "owned_box"] +pub struct Box(*mut T); + +#[lang="exchange_malloc"] +unsafe fn allocate(size: usize, _align: usize) -> *mut u8 { + let p = libc::malloc(size as libc::size_t) as *mut u8; + + // malloc failed + if p as usize == 0 { + abort(); + } + + p +} +#[lang="exchange_free"] +unsafe fn deallocate(ptr: *mut u8, _size: usize, _align: usize) { + libc::free(ptr as *mut libc::c_void) +} + +#[start] +fn main(argc: isize, argv: *const *const u8) -> isize { + let x = box 1; + + 0 +} + +#[lang = "stack_exhausted"] extern fn stack_exhausted() {} +#[lang = "eh_personality"] extern fn eh_personality() {} +#[lang = "panic_fmt"] fn panic_fmt() -> ! { loop {} } +``` + +Note the use of `abort`: the `exchange_malloc` lang item is assumed to +return a valid pointer, and so needs to do the check internally. + +Other features provided by lang items include: + +- overloadable operators via traits: the traits corresponding to the + `==`, `<`, dereferencing (`*`) and `+` (etc.) operators are all + marked with lang items; those specific four are `eq`, `ord`, + `deref`, and `add` respectively. +- stack unwinding and general failure; the `eh_personality`, `fail` + and `fail_bounds_checks` lang items. +- the traits in `std::marker` used to indicate types of + various kinds; lang items `send`, `sync` and `copy`. +- the marker types and variance indicators found in + `std::marker`; lang items `covariant_type`, + `contravariant_lifetime`, etc. + +Lang items are loaded lazily by the compiler; e.g. if one never uses +`Box` then there is no need to define functions for `exchange_malloc` +and `exchange_free`. `rustc` will emit an error when an item is needed +but not found in the current crate or any that it depends on. diff --git a/src/doc/trpl/learn-rust.md b/src/doc/trpl/learn-rust.md index e69de29bb2d1..e5482d3fb968 100644 --- a/src/doc/trpl/learn-rust.md +++ b/src/doc/trpl/learn-rust.md @@ -0,0 +1 @@ +% Learn Rust diff --git a/src/doc/trpl/lifetimes.md b/src/doc/trpl/lifetimes.md index e69de29bb2d1..c6eee97dc6a6 100644 --- a/src/doc/trpl/lifetimes.md +++ b/src/doc/trpl/lifetimes.md @@ -0,0 +1,3 @@ +% Lifetimes + +Coming soon! diff --git a/src/doc/trpl/link-args.md b/src/doc/trpl/link-args.md index e69de29bb2d1..ee5159afb8e6 100644 --- a/src/doc/trpl/link-args.md +++ b/src/doc/trpl/link-args.md @@ -0,0 +1,25 @@ +% Link args + +There is one other way to tell rustc how to customize linking, and that is via +the `link_args` attribute. This attribute is applied to `extern` blocks and +specifies raw flags which need to get passed to the linker when producing an +artifact. An example usage would be: + +``` no_run +#![feature(link_args)] + +#[link_args = "-foo -bar -baz"] +extern {} +# fn main() {} +``` + +Note that this feature is currently hidden behind the `feature(link_args)` gate +because this is not a sanctioned way of performing linking. Right now rustc +shells out to the system linker, so it makes sense to provide extra command line +arguments, but this will not always be the case. In the future rustc may use +LLVM directly to link native libraries in which case `link_args` will have no +meaning. + +It is highly recommended to *not* use this attribute, and rather use the more +formal `#[link(...)]` attribute on `extern` blocks instead. + diff --git a/src/doc/trpl/lol.txt b/src/doc/trpl/lol.txt deleted file mode 100644 index ea7600113641..000000000000 --- a/src/doc/trpl/lol.txt +++ /dev/null @@ -1 +0,0 @@ -getting-started.md installing-rust.md hello-world.md hello-cargo.md learn-rust.md effective-rust.md the-stack-and-the-heap.md debug-and-display.md testing.md documentation.md iterators.md concurrency.md error-handling.md ffi.md deref-coercions.md syntax-and-semantics.md variable-bindings.md primitive-types.md functions.md comments.md structs.md mutability.md method-syntax.md enums.md if.md match.md patterns.md for-loops.md while-loops.md ownership.md references-and-borrowing.md lifetimes.md move-semantics.md drop.md vectors.md arrays.md slices.md strings.md traits.md operators-and-overloading.md generics.md trait-objects.md closures.md ufcs.md crates-and-modules.md static.md const.md tuples.md tuple-structs.md attributes.md conditional-compilation.md type-aliases.md casting-between-types.md associated-types.md unsized-types.md macros.md unsafe-code.md nightly-rust.md plugins.md inline-assembly.md no-stdlib.md intrinsics.md lang-items.md link-args.md benchmark-tests.md box-syntax-and-patterns.md glossary.md diff --git a/src/doc/trpl/macros.md b/src/doc/trpl/macros.md index e69de29bb2d1..6d21cb59383c 100644 --- a/src/doc/trpl/macros.md +++ b/src/doc/trpl/macros.md @@ -0,0 +1,663 @@ +% Macros + +By now you've learned about many of the tools Rust provides for abstracting and +reusing code. These units of code reuse have a rich semantic structure. For +example, functions have a type signature, type parameters have trait bounds, +and overloaded functions must belong to a particular trait. + +This structure means that Rust's core abstractions have powerful compile-time +correctness checking. But this comes at the price of reduced flexibility. If +you visually identify a pattern of repeated code, you may find it's difficult +or cumbersome to express that pattern as a generic function, a trait, or +anything else within Rust's semantics. + +Macros allow us to abstract at a *syntactic* level. A macro invocation is +shorthand for an "expanded" syntactic form. This expansion happens early in +compilation, before any static checking. As a result, macros can capture many +patterns of code reuse that Rust's core abstractions cannot. + +The drawback is that macro-based code can be harder to understand, because +fewer of the built-in rules apply. Like an ordinary function, a well-behaved +macro can be used without understanding its implementation. However, it can be +difficult to design a well-behaved macro! Additionally, compiler errors in +macro code are harder to interpret, because they describe problems in the +expanded code, not the source-level form that developers use. + +These drawbacks make macros something of a "feature of last resort". That's not +to say that macros are bad; they are part of Rust because sometimes they're +needed for truly concise, well-abstracted code. Just keep this tradeoff in +mind. + +# Defining a macro + +You may have seen the `vec!` macro, used to initialize a [vector][] with any +number of elements. + +[vector]: arrays-vectors-and-slices.html + +```rust +let x: Vec = vec![1, 2, 3]; +# assert_eq!(x, [1, 2, 3]); +``` + +This can't be an ordinary function, because it takes any number of arguments. +But we can imagine it as syntactic shorthand for + +```rust +let x: Vec = { + let mut temp_vec = Vec::new(); + temp_vec.push(1); + temp_vec.push(2); + temp_vec.push(3); + temp_vec +}; +# assert_eq!(x, [1, 2, 3]); +``` + +We can implement this shorthand, using a macro: [^actual] + +[^actual]: The actual definition of `vec!` in libcollections differs from the + one presented here, for reasons of efficiency and reusability. Some + of these are mentioned in the [advanced macros chapter][]. + +```rust +macro_rules! vec { + ( $( $x:expr ),* ) => { + { + let mut temp_vec = Vec::new(); + $( + temp_vec.push($x); + )* + temp_vec + } + }; +} +# fn main() { +# assert_eq!(vec![1,2,3], [1, 2, 3]); +# } +``` + +Whoa, that's a lot of new syntax! Let's break it down. + +```ignore +macro_rules! vec { ... } +``` + +This says we're defining a macro named `vec`, much as `fn vec` would define a +function named `vec`. In prose, we informally write a macro's name with an +exclamation point, e.g. `vec!`. The exclamation point is part of the invocation +syntax and serves to distinguish a macro from an ordinary function. + +## Matching + +The macro is defined through a series of *rules*, which are pattern-matching +cases. Above, we had + +```ignore +( $( $x:expr ),* ) => { ... }; +``` + +This is like a `match` expression arm, but the matching happens on Rust syntax +trees, at compile time. The semicolon is optional on the last (here, only) +case. The "pattern" on the left-hand side of `=>` is known as a *matcher*. +These have [their own little grammar] within the language. + +[their own little grammar]: ../reference.html#macros + +The matcher `$x:expr` will match any Rust expression, binding that syntax tree +to the *metavariable* `$x`. The identifier `expr` is a *fragment specifier*; +the full possibilities are enumerated in the [advanced macros chapter][]. +Surrounding the matcher with `$(...),*` will match zero or more expressions, +separated by commas. + +Aside from the special matcher syntax, any Rust tokens that appear in a matcher +must match exactly. For example, + +```rust +macro_rules! foo { + (x => $e:expr) => (println!("mode X: {}", $e)); + (y => $e:expr) => (println!("mode Y: {}", $e)); +} + +fn main() { + foo!(y => 3); +} +``` + +will print + +```text +mode Y: 3 +``` + +With + +```rust,ignore +foo!(z => 3); +``` + +we get the compiler error + +```text +error: no rules expected the token `z` +``` + +## Expansion + +The right-hand side of a macro rule is ordinary Rust syntax, for the most part. +But we can splice in bits of syntax captured by the matcher. From the original +example: + +```ignore +$( + temp_vec.push($x); +)* +``` + +Each matched expression `$x` will produce a single `push` statement in the +macro expansion. The repetition in the expansion proceeds in "lockstep" with +repetition in the matcher (more on this in a moment). + +Because `$x` was already declared as matching an expression, we don't repeat +`:expr` on the right-hand side. Also, we don't include a separating comma as +part of the repetition operator. Instead, we have a terminating semicolon +within the repeated block. + +Another detail: the `vec!` macro has *two* pairs of braces on the right-hand +side. They are often combined like so: + +```ignore +macro_rules! foo { + () => {{ + ... + }} +} +``` + +The outer braces are part of the syntax of `macro_rules!`. In fact, you can use +`()` or `[]` instead. They simply delimit the right-hand side as a whole. + +The inner braces are part of the expanded syntax. Remember, the `vec!` macro is +used in an expression context. To write an expression with multiple statements, +including `let`-bindings, we use a block. If your macro expands to a single +expression, you don't need this extra layer of braces. + +Note that we never *declared* that the macro produces an expression. In fact, +this is not determined until we use the macro as an expression. With care, you +can write a macro whose expansion works in several contexts. For example, +shorthand for a data type could be valid as either an expression or a pattern. + +## Repetition + +The repetition operator follows two principal rules: + +1. `$(...)*` walks through one "layer" of repetitions, for all of the `$name`s + it contains, in lockstep, and +2. each `$name` must be under at least as many `$(...)*`s as it was matched + against. If it is under more, it'll be duplicated, as appropriate. + +This baroque macro illustrates the duplication of variables from outer +repetition levels. + +```rust +macro_rules! o_O { + ( + $( + $x:expr; [ $( $y:expr ),* ] + );* + ) => { + &[ $($( $x + $y ),*),* ] + } +} + +fn main() { + let a: &[i32] + = o_O!(10; [1, 2, 3]; + 20; [4, 5, 6]); + + assert_eq!(a, [11, 12, 13, 24, 25, 26]); +} +``` + +That's most of the matcher syntax. These examples use `$(...)*`, which is a +"zero or more" match. Alternatively you can write `$(...)+` for a "one or +more" match. Both forms optionally include a separator, which can be any token +except `+` or `*`. + +This system is based on +"[Macro-by-Example](http://www.cs.indiana.edu/ftp/techreports/TR206.pdf)" +(PDF link). + +# Hygiene + +Some languages implement macros using simple text substitution, which leads to +various problems. For example, this C program prints `13` instead of the +expected `25`. + +```text +#define FIVE_TIMES(x) 5 * x + +int main() { + printf("%d\n", FIVE_TIMES(2 + 3)); + return 0; +} +``` + +After expansion we have `5 * 2 + 3`, and multiplication has greater precedence +than addition. If you've used C macros a lot, you probably know the standard +idioms for avoiding this problem, as well as five or six others. In Rust, we +don't have to worry about it. + +```rust +macro_rules! five_times { + ($x:expr) => (5 * $x); +} + +fn main() { + assert_eq!(25, five_times!(2 + 3)); +} +``` + +The metavariable `$x` is parsed as a single expression node, and keeps its +place in the syntax tree even after substitution. + +Another common problem in macro systems is *variable capture*. Here's a C +macro, using [a GNU C extension] to emulate Rust's expression blocks. + +[a GNU C extension]: https://gcc.gnu.org/onlinedocs/gcc/Statement-Exprs.html + +```text +#define LOG(msg) ({ \ + int state = get_log_state(); \ + if (state > 0) { \ + printf("log(%d): %s\n", state, msg); \ + } \ +}) +``` + +Here's a simple use case that goes terribly wrong: + +```text +const char *state = "reticulating splines"; +LOG(state) +``` + +This expands to + +```text +const char *state = "reticulating splines"; +int state = get_log_state(); +if (state > 0) { + printf("log(%d): %s\n", state, state); +} +``` + +The second variable named `state` shadows the first one. This is a problem +because the print statement should refer to both of them. + +The equivalent Rust macro has the desired behavior. + +```rust +# fn get_log_state() -> i32 { 3 } +macro_rules! log { + ($msg:expr) => {{ + let state: i32 = get_log_state(); + if state > 0 { + println!("log({}): {}", state, $msg); + } + }}; +} + +fn main() { + let state: &str = "reticulating splines"; + log!(state); +} +``` + +This works because Rust has a [hygienic macro system][]. Each macro expansion +happens in a distinct *syntax context*, and each variable is tagged with the +syntax context where it was introduced. It's as though the variable `state` +inside `main` is painted a different "color" from the variable `state` inside +the macro, and therefore they don't conflict. + +[hygienic macro system]: http://en.wikipedia.org/wiki/Hygienic_macro + +This also restricts the ability of macros to introduce new bindings at the +invocation site. Code such as the following will not work: + +```rust,ignore +macro_rules! foo { + () => (let x = 3); +} + +fn main() { + foo!(); + println!("{}", x); +} +``` + +Instead you need to pass the variable name into the invocation, so it's tagged +with the right syntax context. + +```rust +macro_rules! foo { + ($v:ident) => (let $v = 3); +} + +fn main() { + foo!(x); + println!("{}", x); +} +``` + +This holds for `let` bindings and loop labels, but not for [items][]. +So the following code does compile: + +```rust +macro_rules! foo { + () => (fn x() { }); +} + +fn main() { + foo!(); + x(); +} +``` + +[items]: ../reference.html#items + +# Recursive macros + +A macro's expansion can include more macro invocations, including invocations +of the very same macro being expanded. These recursive macros are useful for +processing tree-structured input, as illustrated by this (simplistic) HTML +shorthand: + +```rust +# #![allow(unused_must_use)] +macro_rules! write_html { + ($w:expr, ) => (()); + + ($w:expr, $e:tt) => (write!($w, "{}", $e)); + + ($w:expr, $tag:ident [ $($inner:tt)* ] $($rest:tt)*) => {{ + write!($w, "<{}>", stringify!($tag)); + write_html!($w, $($inner)*); + write!($w, "", stringify!($tag)); + write_html!($w, $($rest)*); + }}; +} + +fn main() { +# // FIXME(#21826) + use std::fmt::Write; + let mut out = String::new(); + + write_html!(&mut out, + html[ + head[title["Macros guide"]] + body[h1["Macros are the best!"]] + ]); + + assert_eq!(out, + "Macros guide\ +

Macros are the best!

"); +} +``` + +# Debugging macro code + +To see the results of expanding macros, run `rustc --pretty expanded`. The +output represents a whole crate, so you can also feed it back in to `rustc`, +which will sometimes produce better error messages than the original +compilation. Note that the `--pretty expanded` output may have a different +meaning if multiple variables of the same name (but different syntax contexts) +are in play in the same scope. In this case `--pretty expanded,hygiene` will +tell you about the syntax contexts. + +`rustc` provides two syntax extensions that help with macro debugging. For now, +they are unstable and require feature gates. + +* `log_syntax!(...)` will print its arguments to standard output, at compile + time, and "expand" to nothing. + +* `trace_macros!(true)` will enable a compiler message every time a macro is + expanded. Use `trace_macros!(false)` later in expansion to turn it off. + +# Syntactic requirements + +Even when Rust code contains un-expanded macros, it can be parsed as a full +[syntax tree][ast]. This property can be very useful for editors and other +tools that process code. It also has a few consequences for the design of +Rust's macro system. + +[ast]: glossary.html#abstract-syntax-tree + +One consequence is that Rust must determine, when it parses a macro invocation, +whether the macro stands in for + +* zero or more items, +* zero or more methods, +* an expression, +* a statement, or +* a pattern. + +A macro invocation within a block could stand for some items, or for an +expression / statement. Rust uses a simple rule to resolve this ambiguity. A +macro invocation that stands for items must be either + +* delimited by curly braces, e.g. `foo! { ... }`, or +* terminated by a semicolon, e.g. `foo!(...);` + +Another consequence of pre-expansion parsing is that the macro invocation must +consist of valid Rust tokens. Furthermore, parentheses, brackets, and braces +must be balanced within a macro invocation. For example, `foo!([)` is +forbidden. This allows Rust to know where the macro invocation ends. + +More formally, the macro invocation body must be a sequence of *token trees*. +A token tree is defined recursively as either + +* a sequence of token trees surrounded by matching `()`, `[]`, or `{}`, or +* any other single token. + +Within a matcher, each metavariable has a *fragment specifier*, identifying +which syntactic form it matches. + +* `ident`: an identifier. Examples: `x`; `foo`. +* `path`: a qualified name. Example: `T::SpecialA`. +* `expr`: an expression. Examples: `2 + 2`; `if true then { 1 } else { 2 }`; `f(42)`. +* `ty`: a type. Examples: `i32`; `Vec<(char, String)>`; `&T`. +* `pat`: a pattern. Examples: `Some(t)`; `(17, 'a')`; `_`. +* `stmt`: a single statement. Example: `let x = 3`. +* `block`: a brace-delimited sequence of statements. Example: + `{ log(error, "hi"); return 12; }`. +* `item`: an [item][]. Examples: `fn foo() { }`; `struct Bar;`. +* `meta`: a "meta item", as found in attributes. Example: `cfg(target_os = "windows")`. +* `tt`: a single token tree. + +There are additional rules regarding the next token after a metavariable: + +* `expr` variables must be followed by one of: `=> , ;` +* `ty` and `path` variables must be followed by one of: `=> , : = > as` +* `pat` variables must be followed by one of: `=> , =` +* Other variables may be followed by any token. + +These rules provide some flexibility for Rust's syntax to evolve without +breaking existing macros. + +The macro system does not deal with parse ambiguity at all. For example, the +grammar `$($t:ty)* $e:expr` will always fail to parse, because the parser would +be forced to choose between parsing `$t` and parsing `$e`. Changing the +invocation syntax to put a distinctive token in front can solve the problem. In +this case, you can write `$(T $t:ty)* E $e:exp`. + +[item]: ../reference.html#items + +# Scoping and macro import/export + +Macros are expanded at an early stage in compilation, before name resolution. +One downside is that scoping works differently for macros, compared to other +constructs in the language. + +Definition and expansion of macros both happen in a single depth-first, +lexical-order traversal of a crate's source. So a macro defined at module scope +is visible to any subsequent code in the same module, which includes the body +of any subsequent child `mod` items. + +A macro defined within the body of a single `fn`, or anywhere else not at +module scope, is visible only within that item. + +If a module has the `macro_use` attribute, its macros are also visible in its +parent module after the child's `mod` item. If the parent also has `macro_use` +then the macros will be visible in the grandparent after the parent's `mod` +item, and so forth. + +The `macro_use` attribute can also appear on `extern crate`. In this context +it controls which macros are loaded from the external crate, e.g. + +```rust,ignore +#[macro_use(foo, bar)] +extern crate baz; +``` + +If the attribute is given simply as `#[macro_use]`, all macros are loaded. If +there is no `#[macro_use]` attribute then no macros are loaded. Only macros +defined with the `#[macro_export]` attribute may be loaded. + +To load a crate's macros *without* linking it into the output, use `#[no_link]` +as well. + +An example: + +```rust +macro_rules! m1 { () => (()) } + +// visible here: m1 + +mod foo { + // visible here: m1 + + #[macro_export] + macro_rules! m2 { () => (()) } + + // visible here: m1, m2 +} + +// visible here: m1 + +macro_rules! m3 { () => (()) } + +// visible here: m1, m3 + +#[macro_use] +mod bar { + // visible here: m1, m3 + + macro_rules! m4 { () => (()) } + + // visible here: m1, m3, m4 +} + +// visible here: m1, m3, m4 +# fn main() { } +``` + +When this library is loaded with `#[macro_use] extern crate`, only `m2` will +be imported. + +The Rust Reference has a [listing of macro-related +attributes](../reference.html#macro--and-plugin-related-attributes). + +# The variable `$crate` + +A further difficulty occurs when a macro is used in multiple crates. Say that +`mylib` defines + +```rust +pub fn increment(x: u32) -> u32 { + x + 1 +} + +#[macro_export] +macro_rules! inc_a { + ($x:expr) => ( ::increment($x) ) +} + +#[macro_export] +macro_rules! inc_b { + ($x:expr) => ( ::mylib::increment($x) ) +} +# fn main() { } +``` + +`inc_a` only works within `mylib`, while `inc_b` only works outside the +library. Furthermore, `inc_b` will break if the user imports `mylib` under +another name. + +Rust does not (yet) have a hygiene system for crate references, but it does +provide a simple workaround for this problem. Within a macro imported from a +crate named `foo`, the special macro variable `$crate` will expand to `::foo`. +By contrast, when a macro is defined and then used in the same crate, `$crate` +will expand to nothing. This means we can write + +```rust +#[macro_export] +macro_rules! inc { + ($x:expr) => ( $crate::increment($x) ) +} +# fn main() { } +``` + +to define a single macro that works both inside and outside our library. The +function name will expand to either `::increment` or `::mylib::increment`. + +To keep this system simple and correct, `#[macro_use] extern crate ...` may +only appear at the root of your crate, not inside `mod`. This ensures that +`$crate` is a single identifier. + +# The deep end + +The introductory chapter mentioned recursive macros, but it did not give the +full story. Recursive macros are useful for another reason: Each recursive +invocation gives you another opportunity to pattern-match the macro's +arguments. + +As an extreme example, it is possible, though hardly advisable, to implement +the [Bitwise Cyclic Tag](http://esolangs.org/wiki/Bitwise_Cyclic_Tag) automaton +within Rust's macro system. + +```rust +macro_rules! bct { + // cmd 0: d ... => ... + (0, $($ps:tt),* ; $_d:tt) + => (bct!($($ps),*, 0 ; )); + (0, $($ps:tt),* ; $_d:tt, $($ds:tt),*) + => (bct!($($ps),*, 0 ; $($ds),*)); + + // cmd 1p: 1 ... => 1 ... p + (1, $p:tt, $($ps:tt),* ; 1) + => (bct!($($ps),*, 1, $p ; 1, $p)); + (1, $p:tt, $($ps:tt),* ; 1, $($ds:tt),*) + => (bct!($($ps),*, 1, $p ; 1, $($ds),*, $p)); + + // cmd 1p: 0 ... => 0 ... + (1, $p:tt, $($ps:tt),* ; $($ds:tt),*) + => (bct!($($ps),*, 1, $p ; $($ds),*)); + + // halt on empty data string + ( $($ps:tt),* ; ) + => (()); +} +``` + +Exercise: use macros to reduce duplication in the above definition of the +`bct!` macro. + +# Procedural macros + +If Rust's macro system can't do what you need, you may want to write a +[compiler plugin](plugins.html) instead. Compared to `macro_rules!` +macros, this is significantly more work, the interfaces are much less stable, +and bugs can be much harder to track down. In exchange you get the +flexibility of running arbitrary Rust code within the compiler. Syntax +extension plugins are sometimes called *procedural macros* for this reason. diff --git a/src/doc/trpl/match.md b/src/doc/trpl/match.md index e69de29bb2d1..73bc775a1b29 100644 --- a/src/doc/trpl/match.md +++ b/src/doc/trpl/match.md @@ -0,0 +1,156 @@ +% Match + +Often, a simple `if`/`else` isn't enough, because you have more than two +possible options. Also, `else` conditions can get incredibly complicated, so +what's the solution? + +Rust has a keyword, `match`, that allows you to replace complicated `if`/`else` +groupings with something more powerful. Check it out: + +```{rust} +let x = 5; + +match x { + 1 => println!("one"), + 2 => println!("two"), + 3 => println!("three"), + 4 => println!("four"), + 5 => println!("five"), + _ => println!("something else"), +} +``` + +`match` takes an expression and then branches based on its value. Each *arm* of +the branch is of the form `val => expression`. When the value matches, that arm's +expression will be evaluated. It's called `match` because of the term 'pattern +matching', which `match` is an implementation of. + +So what's the big advantage here? Well, there are a few. First of all, `match` +enforces *exhaustiveness checking*. Do you see that last arm, the one with the +underscore (`_`)? If we remove that arm, Rust will give us an error: + +```text +error: non-exhaustive patterns: `_` not covered +``` + +In other words, Rust is trying to tell us we forgot a value. Because `x` is an +integer, Rust knows that it can have a number of different values – for example, +`6`. Without the `_`, however, there is no arm that could match, and so Rust refuses +to compile. `_` acts like a *catch-all arm*. If none of the other arms match, +the arm with `_` will, and since we have this catch-all arm, we now have an arm +for every possible value of `x`, and so our program will compile successfully. + +`match` statements also destructure enums, as well. Remember this code from the +section on enums? + +```{rust} +use std::cmp::Ordering; + +fn cmp(a: i32, b: i32) -> Ordering { + if a < b { Ordering::Less } + else if a > b { Ordering::Greater } + else { Ordering::Equal } +} + +fn main() { + let x = 5; + let y = 10; + + let ordering = cmp(x, y); + + if ordering == Ordering::Less { + println!("less"); + } else if ordering == Ordering::Greater { + println!("greater"); + } else if ordering == Ordering::Equal { + println!("equal"); + } +} +``` + +We can re-write this as a `match`: + +```{rust} +use std::cmp::Ordering; + +fn cmp(a: i32, b: i32) -> Ordering { + if a < b { Ordering::Less } + else if a > b { Ordering::Greater } + else { Ordering::Equal } +} + +fn main() { + let x = 5; + let y = 10; + + match cmp(x, y) { + Ordering::Less => println!("less"), + Ordering::Greater => println!("greater"), + Ordering::Equal => println!("equal"), + } +} +``` + +This version has way less noise, and it also checks exhaustively to make sure +that we have covered all possible variants of `Ordering`. With our `if`/`else` +version, if we had forgotten the `Greater` case, for example, our program would +have happily compiled. If we forget in the `match`, it will not. Rust helps us +make sure to cover all of our bases. + +`match` expressions also allow us to get the values contained in an `enum` +(also known as destructuring) as follows: + +```{rust} +enum OptionalInt { + Value(i32), + Missing, +} + +fn main() { + let x = OptionalInt::Value(5); + let y = OptionalInt::Missing; + + match x { + OptionalInt::Value(n) => println!("x is {}", n), + OptionalInt::Missing => println!("x is missing!"), + } + + match y { + OptionalInt::Value(n) => println!("y is {}", n), + OptionalInt::Missing => println!("y is missing!"), + } +} +``` + +That is how you can get and use the values contained in `enum`s. +It can also allow us to handle errors or unexpected computations; for example, a +function that is not guaranteed to be able to compute a result (an `i32` here) +could return an `OptionalInt`, and we would handle that value with a `match`. +As you can see, `enum` and `match` used together are quite useful! + +`match` is also an expression, which means we can use it on the right-hand +side of a `let` binding or directly where an expression is used. We could +also implement the previous example like this: + +```{rust} +use std::cmp::Ordering; + +fn cmp(a: i32, b: i32) -> Ordering { + if a < b { Ordering::Less } + else if a > b { Ordering::Greater } + else { Ordering::Equal } +} + +fn main() { + let x = 5; + let y = 10; + + println!("{}", match cmp(x, y) { + Ordering::Less => "less", + Ordering::Greater => "greater", + Ordering::Equal => "equal", + }); +} +``` + +Sometimes, it's a nice pattern. diff --git a/src/doc/trpl/method-syntax.md b/src/doc/trpl/method-syntax.md index e69de29bb2d1..f6eacd0a8428 100644 --- a/src/doc/trpl/method-syntax.md +++ b/src/doc/trpl/method-syntax.md @@ -0,0 +1,232 @@ +% Method Syntax + +Functions are great, but if you want to call a bunch of them on some data, it +can be awkward. Consider this code: + +```{rust,ignore} +baz(bar(foo(x))); +``` + +We would read this left-to right, and so we see "baz bar foo." But this isn't the +order that the functions would get called in, that's inside-out: "foo bar baz." +Wouldn't it be nice if we could do this instead? + +```{rust,ignore} +x.foo().bar().baz(); +``` + +Luckily, as you may have guessed with the leading question, you can! Rust provides +the ability to use this *method call syntax* via the `impl` keyword. + +## Method calls + +Here's how it works: + +```{rust} +# #![feature(core)] +struct Circle { + x: f64, + y: f64, + radius: f64, +} + +impl Circle { + fn area(&self) -> f64 { + std::f64::consts::PI * (self.radius * self.radius) + } +} + +fn main() { + let c = Circle { x: 0.0, y: 0.0, radius: 2.0 }; + println!("{}", c.area()); +} +``` + +This will print `12.566371`. + +We've made a struct that represents a circle. We then write an `impl` block, +and inside it, define a method, `area`. Methods take a special first +parameter, of which there are three variants: `self`, `&self`, and `&mut self`. +You can think of this first parameter as being the `x` in `x.foo()`. The three +variants correspond to the three kinds of thing `x` could be: `self` if it's +just a value on the stack, `&self` if it's a reference, and `&mut self` if it's +a mutable reference. We should default to using `&self`, as you should prefer +borrowing over taking ownership, as well as taking immutable references +over mutable ones. Here's an example of all three variants: + +```rust +struct Circle { + x: f64, + y: f64, + radius: f64, +} + +impl Circle { + fn reference(&self) { + println!("taking self by reference!"); + } + + fn mutable_reference(&mut self) { + println!("taking self by mutable reference!"); + } + + fn takes_ownership(self) { + println!("taking ownership of self!"); + } +} +``` + +Finally, as you may remember, the value of the area of a circle is `π*r²`. +Because we took the `&self` parameter to `area`, we can use it just like any +other parameter. Because we know it's a `Circle`, we can access the `radius` +just like we would with any other struct. An import of π and some +multiplications later, and we have our area. + +## Chaining method calls + +So, now we know how to call a method, such as `foo.bar()`. But what about our +original example, `foo.bar().baz()`? This is called 'method chaining', and we +can do it by returning `self`. + +``` +# #![feature(core)] +struct Circle { + x: f64, + y: f64, + radius: f64, +} + +impl Circle { + fn area(&self) -> f64 { + std::f64::consts::PI * (self.radius * self.radius) + } + + fn grow(&self, increment: f64) -> Circle { + Circle { x: self.x, y: self.y, radius: self.radius + increment } + } +} + +fn main() { + let c = Circle { x: 0.0, y: 0.0, radius: 2.0 }; + println!("{}", c.area()); + + let d = c.grow(2.0).area(); + println!("{}", d); +} +``` + +Check the return type: + +``` +# struct Circle; +# impl Circle { +fn grow(&self) -> Circle { +# Circle } } +``` + +We just say we're returning a `Circle`. With this method, we can grow a new +circle to any arbitrary size. + +## Static methods + +You can also define methods that do not take a `self` parameter. Here's a +pattern that's very common in Rust code: + +``` +struct Circle { + x: f64, + y: f64, + radius: f64, +} + +impl Circle { + fn new(x: f64, y: f64, radius: f64) -> Circle { + Circle { + x: x, + y: y, + radius: radius, + } + } +} + +fn main() { + let c = Circle::new(0.0, 0.0, 2.0); +} +``` + +This *static method* builds a new `Circle` for us. Note that static methods +are called with the `Struct::method()` syntax, rather than the `ref.method()` +syntax. + +## Builder Pattern + +Let's say that we want our users to be able to create Circles, but we will +allow them to only set the properties they care about. Otherwise, the `x` +and `y` attributes will be `0.0`, and the `radius` will be `1.0`. Rust doesn't +have method overloading, named arguments, or variable arguments. We employ +the builder pattern instead. It looks like this: + +``` +# #![feature(core)] +struct Circle { + x: f64, + y: f64, + radius: f64, +} + +impl Circle { + fn area(&self) -> f64 { + std::f64::consts::PI * (self.radius * self.radius) + } +} + +struct CircleBuilder { + x: f64, + y: f64, + radius: f64, +} + +impl CircleBuilder { + fn new() -> CircleBuilder { + CircleBuilder { x: 0.0, y: 0.0, radius: 0.0, } + } + + fn x(&mut self, coordinate: f64) -> &mut CircleBuilder { + self.x = coordinate; + self + } + + fn y(&mut self, coordinate: f64) -> &mut CircleBuilder { + self.y = coordinate; + self + } + + fn radius(&mut self, radius: f64) -> &mut CircleBuilder { + self.radius = radius; + self + } + + fn finalize(&self) -> Circle { + Circle { x: self.x, y: self.y, radius: self.radius } + } +} + +fn main() { + let c = CircleBuilder::new() + .x(1.0) + .y(2.0) + .radius(2.0) + .finalize(); + + println!("area: {}", c.area()); + println!("x: {}", c.x); + println!("y: {}", c.y); +} +``` + +What we've done here is make another struct, `CircleBuilder`. We've defined our +builder methods on it. We've also defined our `area()` method on `Circle`. We +also made one more method on `CircleBuilder`: `finalize()`. This method creates +our final `Circle` from the builder. Now, we've used the type system to enforce +our concerns: we can use the methods on `CircleBuilder` to constrain making +`Circle`s in any way we choose. diff --git a/src/doc/trpl/move-semantics.md b/src/doc/trpl/move-semantics.md index e69de29bb2d1..6917d7f8b8e0 100644 --- a/src/doc/trpl/move-semantics.md +++ b/src/doc/trpl/move-semantics.md @@ -0,0 +1,3 @@ +% Move Semantics + +Coming Soon diff --git a/src/doc/trpl/mutability.md b/src/doc/trpl/mutability.md index e69de29bb2d1..ccb03c7f85f6 100644 --- a/src/doc/trpl/mutability.md +++ b/src/doc/trpl/mutability.md @@ -0,0 +1,3 @@ +% Mutability + +Coming Soon diff --git a/src/doc/trpl/nightly-rust.md b/src/doc/trpl/nightly-rust.md index d69831c23783..1b58b73994dc 100644 --- a/src/doc/trpl/nightly-rust.md +++ b/src/doc/trpl/nightly-rust.md @@ -1,4 +1,4 @@ -% Unstable Rust +% Nightly Rust Rust provides three distribution channels for Rust: nightly, beta, and stable. Unstable features are only available on nightly Rust. For more details on this diff --git a/src/doc/trpl/no-stdlib.md b/src/doc/trpl/no-stdlib.md index e69de29bb2d1..094c82a08cc9 100644 --- a/src/doc/trpl/no-stdlib.md +++ b/src/doc/trpl/no-stdlib.md @@ -0,0 +1,168 @@ +% No stdlib + +By default, `std` is linked to every Rust crate. In some contexts, +this is undesirable, and can be avoided with the `#![no_std]` +attribute attached to the crate. + +```ignore +// a minimal library +#![crate_type="lib"] +#![feature(no_std)] +#![no_std] +# // fn main() {} tricked you, rustdoc! +``` + +Obviously there's more to life than just libraries: one can use +`#[no_std]` with an executable, controlling the entry point is +possible in two ways: the `#[start]` attribute, or overriding the +default shim for the C `main` function with your own. + +The function marked `#[start]` is passed the command line parameters +in the same format as C: + +``` +#![feature(lang_items, start, no_std, libc)] +#![no_std] + +// Pull in the system libc library for what crt0.o likely requires +extern crate libc; + +// Entry point for this program +#[start] +fn start(_argc: isize, _argv: *const *const u8) -> isize { + 0 +} + +// These functions and traits are used by the compiler, but not +// for a bare-bones hello world. These are normally +// provided by libstd. +#[lang = "stack_exhausted"] extern fn stack_exhausted() {} +#[lang = "eh_personality"] extern fn eh_personality() {} +#[lang = "panic_fmt"] fn panic_fmt() -> ! { loop {} } +# // fn main() {} tricked you, rustdoc! +``` + +To override the compiler-inserted `main` shim, one has to disable it +with `#![no_main]` and then create the appropriate symbol with the +correct ABI and the correct name, which requires overriding the +compiler's name mangling too: + +```ignore +#![feature(no_std)] +#![no_std] +#![no_main] +#![feature(lang_items, start)] + +extern crate libc; + +#[no_mangle] // ensure that this symbol is called `main` in the output +pub extern fn main(argc: i32, argv: *const *const u8) -> i32 { + 0 +} + +#[lang = "stack_exhausted"] extern fn stack_exhausted() {} +#[lang = "eh_personality"] extern fn eh_personality() {} +#[lang = "panic_fmt"] fn panic_fmt() -> ! { loop {} } +# // fn main() {} tricked you, rustdoc! +``` + + +The compiler currently makes a few assumptions about symbols which are available +in the executable to call. Normally these functions are provided by the standard +library, but without it you must define your own. + +The first of these three functions, `stack_exhausted`, is invoked whenever stack +overflow is detected. This function has a number of restrictions about how it +can be called and what it must do, but if the stack limit register is not being +maintained then a thread always has an "infinite stack" and this function +shouldn't get triggered. + +The second of these three functions, `eh_personality`, is used by the +failure mechanisms of the compiler. This is often mapped to GCC's +personality function (see the +[libstd implementation](../std/rt/unwind/index.html) for more +information), but crates which do not trigger a panic can be assured +that this function is never called. The final function, `panic_fmt`, is +also used by the failure mechanisms of the compiler. + +## Using libcore + +> **Note**: the core library's structure is unstable, and it is recommended to +> use the standard library instead wherever possible. + +With the above techniques, we've got a bare-metal executable running some Rust +code. There is a good deal of functionality provided by the standard library, +however, that is necessary to be productive in Rust. If the standard library is +not sufficient, then [libcore](../core/index.html) is designed to be used +instead. + +The core library has very few dependencies and is much more portable than the +standard library itself. Additionally, the core library has most of the +necessary functionality for writing idiomatic and effective Rust code. + +As an example, here is a program that will calculate the dot product of two +vectors provided from C, using idiomatic Rust practices. + +``` +#![feature(lang_items, start, no_std, core, libc)] +#![no_std] + +# extern crate libc; +extern crate core; + +use core::prelude::*; + +use core::mem; + +#[no_mangle] +pub extern fn dot_product(a: *const u32, a_len: u32, + b: *const u32, b_len: u32) -> u32 { + use core::raw::Slice; + + // Convert the provided arrays into Rust slices. + // The core::raw module guarantees that the Slice + // structure has the same memory layout as a &[T] + // slice. + // + // This is an unsafe operation because the compiler + // cannot tell the pointers are valid. + let (a_slice, b_slice): (&[u32], &[u32]) = unsafe { + mem::transmute(( + Slice { data: a, len: a_len as usize }, + Slice { data: b, len: b_len as usize }, + )) + }; + + // Iterate over the slices, collecting the result + let mut ret = 0; + for (i, j) in a_slice.iter().zip(b_slice.iter()) { + ret += (*i) * (*j); + } + return ret; +} + +#[lang = "panic_fmt"] +extern fn panic_fmt(args: &core::fmt::Arguments, + file: &str, + line: u32) -> ! { + loop {} +} + +#[lang = "stack_exhausted"] extern fn stack_exhausted() {} +#[lang = "eh_personality"] extern fn eh_personality() {} +# #[start] fn start(argc: isize, argv: *const *const u8) -> isize { 0 } +# fn main() {} +``` + +Note that there is one extra lang item here which differs from the examples +above, `panic_fmt`. This must be defined by consumers of libcore because the +core library declares panics, but it does not define it. The `panic_fmt` +lang item is this crate's definition of panic, and it must be guaranteed to +never return. + +As can be seen in this example, the core library is intended to provide the +power of Rust in all circumstances, regardless of platform requirements. Further +libraries, such as liballoc, add functionality to libcore which make other +platform-specific assumptions, but continue to be more portable than the +standard library itself. + diff --git a/src/doc/trpl/operators-and-overloading.md b/src/doc/trpl/operators-and-overloading.md index e69de29bb2d1..f6f9d5cae192 100644 --- a/src/doc/trpl/operators-and-overloading.md +++ b/src/doc/trpl/operators-and-overloading.md @@ -0,0 +1,3 @@ +% Operators and Overloading + +Coming soon! diff --git a/src/doc/trpl/ownership.md b/src/doc/trpl/ownership.md index e69de29bb2d1..223085cc40b8 100644 --- a/src/doc/trpl/ownership.md +++ b/src/doc/trpl/ownership.md @@ -0,0 +1,555 @@ +% Ownership + +This guide presents Rust's ownership system. This is one of Rust's most unique +and compelling features, with which Rust developers should become quite +acquainted. Ownership is how Rust achieves its largest goal, memory safety. +The ownership system has a few distinct concepts: *ownership*, *borrowing*, +and *lifetimes*. We'll talk about each one in turn. + +# Meta + +Before we get to the details, two important notes about the ownership system. + +Rust has a focus on safety and speed. It accomplishes these goals through many +*zero-cost abstractions*, which means that in Rust, abstractions cost as little +as possible in order to make them work. The ownership system is a prime example +of a zero cost abstraction. All of the analysis we'll talk about in this guide +is _done at compile time_. You do not pay any run-time cost for any of these +features. + +However, this system does have a certain cost: learning curve. Many new users +to Rust experience something we like to call "fighting with the borrow +checker," where the Rust compiler refuses to compile a program that the author +thinks is valid. This often happens because the programmer's mental model of +how ownership should work doesn't match the actual rules that Rust implements. +You probably will experience similar things at first. There is good news, +however: more experienced Rust developers report that once they work with the +rules of the ownership system for a period of time, they fight the borrow +checker less and less. + +With that in mind, let's learn about ownership. + +# Ownership + +At its core, ownership is about *resources*. For the purposes of the vast +majority of this guide, we will talk about a specific resource: memory. The +concept generalizes to any kind of resource, like a file handle, but to make it +more concrete, we'll focus on memory. + +When your program allocates some memory, it needs some way to deallocate that +memory. Imagine a function `foo` that allocates four bytes of memory, and then +never deallocates that memory. We call this problem *leaking* memory, because +each time we call `foo`, we're allocating another four bytes. Eventually, with +enough calls to `foo`, we will run our system out of memory. That's no good. So +we need some way for `foo` to deallocate those four bytes. It's also important +that we don't deallocate too many times, either. Without getting into the +details, attempting to deallocate memory multiple times can lead to problems. +In other words, any time some memory is allocated, we need to make sure that we +deallocate that memory once and only once. Too many times is bad, not enough +times is bad. The counts must match. + +There's one other important detail with regards to allocating memory. Whenever +we request some amount of memory, what we are given is a handle to that memory. +This handle (often called a *pointer*, when we're referring to memory) is how +we interact with the allocated memory. As long as we have that handle, we can +do something with the memory. Once we're done with the handle, we're also done +with the memory, as we can't do anything useful without a handle to it. + +Historically, systems programming languages require you to track these +allocations, deallocations, and handles yourself. For example, if we want some +memory from the heap in a language like C, we do this: + +```c +{ + int *x = malloc(sizeof(int)); + + // we can now do stuff with our handle x + *x = 5; + + free(x); +} +``` + +The call to `malloc` allocates some memory. The call to `free` deallocates the +memory. There's also bookkeeping about allocating the correct amount of memory. + +Rust combines these two aspects of allocating memory (and other resources) into +a concept called *ownership*. Whenever we request some memory, that handle we +receive is called the *owning handle*. Whenever that handle goes out of scope, +Rust knows that you cannot do anything with the memory anymore, and so +therefore deallocates the memory for you. Here's the equivalent example in +Rust: + +```rust +{ + let x = Box::new(5); +} +``` + +The `Box::new` function creates a `Box` (specifically `Box` in this +case) by allocating a small segment of memory on the heap with enough space to +fit an `i32`. But where in the code is the box deallocated? We said before that +we must have a deallocation for each allocation. Rust handles this for you. It +knows that our handle, `x`, is the owning reference to our box. Rust knows that +`x` will go out of scope at the end of the block, and so it inserts a call to +deallocate the memory at the end of the scope. Because the compiler does this +for us, it's impossible to forget. We always have exactly one deallocation + paired with each of our allocations. + +This is pretty straightforward, but what happens when we want to pass our box +to a function? Let's look at some code: + +```rust +fn main() { + let x = Box::new(5); + + add_one(x); +} + +fn add_one(mut num: Box) { + *num += 1; +} +``` + +This code works, but it's not ideal. For example, let's add one more line of +code, where we print out the value of `x`: + +```{rust,ignore} +fn main() { + let x = Box::new(5); + + add_one(x); + + println!("{}", x); +} + +fn add_one(mut num: Box) { + *num += 1; +} +``` + +This does not compile, and gives us an error: + +```text +error: use of moved value: `x` + println!("{}", x); + ^ +``` + +Remember, we need one deallocation for every allocation. When we try to pass +our box to `add_one`, we would have two handles to the memory: `x` in `main`, +and `num` in `add_one`. If we deallocated the memory when each handle went out +of scope, we would have two deallocations and one allocation, and that's wrong. +So when we call `add_one`, Rust defines `num` as the owner of the handle. And +so, now that we've given ownership to `num`, `x` is invalid. `x`'s value has +"moved" from `x` to `num`. Hence the error: use of moved value `x`. + +To fix this, we can have `add_one` give ownership back when it's done with the +box: + +```rust +fn main() { + let x = Box::new(5); + + let y = add_one(x); + + println!("{}", y); +} + +fn add_one(mut num: Box) -> Box { + *num += 1; + + num +} +``` + +This code will compile and run just fine. Now, we return a `box`, and so the +ownership is transferred back to `y` in `main`. We only have ownership for the +duration of our function before giving it back. This pattern is very common, +and so Rust introduces a concept to describe a handle which temporarily refers +to something another handle owns. It's called *borrowing*, and it's done with +*references*, designated by the `&` symbol. + +# Borrowing + +Here's the current state of our `add_one` function: + +```rust +fn add_one(mut num: Box) -> Box { + *num += 1; + + num +} +``` + +This function takes ownership, because it takes a `Box`, which owns its +contents. But then we give ownership right back. + +In the physical world, you can give one of your possessions to someone for a +short period of time. You still own your possession, you're just letting someone +else use it for a while. We call that *lending* something to someone, and that +person is said to be *borrowing* that something from you. + +Rust's ownership system also allows an owner to lend out a handle for a limited +period. This is also called *borrowing*. Here's a version of `add_one` which +borrows its argument rather than taking ownership: + +```rust +fn add_one(num: &mut i32) { + *num += 1; +} +``` + +This function borrows an `i32` from its caller, and then increments it. When +the function is over, and `num` goes out of scope, the borrow is over. + +We have to change our `main` a bit too: + +```rust +fn main() { + let mut x = 5; + + add_one(&mut x); + + println!("{}", x); +} + +fn add_one(num: &mut i32) { + *num += 1; +} +``` + +We don't need to assign the result of `add_one()` anymore, because it doesn't +return anything anymore. This is because we're not passing ownership back, +since we just borrow, not take ownership. + +# Lifetimes + +Lending out a reference to a resource that someone else owns can be +complicated, however. For example, imagine this set of operations: + +1. I acquire a handle to some kind of resource. +2. I lend you a reference to the resource. +3. I decide I'm done with the resource, and deallocate it, while you still have + your reference. +4. You decide to use the resource. + +Uh oh! Your reference is pointing to an invalid resource. This is called a +*dangling pointer* or "use after free," when the resource is memory. + +To fix this, we have to make sure that step four never happens after step +three. The ownership system in Rust does this through a concept called +*lifetimes*, which describe the scope that a reference is valid for. + +Remember the function that borrowed an `i32`? Let's look at it again. + +```rust +fn add_one(num: &mut i32) { + *num += 1; +} +``` + +Rust has a feature called *lifetime elision*, which allows you to not write +lifetime annotations in certain circumstances. This is one of them. We will +cover the others later. Without eliding the lifetimes, `add_one` looks like +this: + +```rust +fn add_one<'a>(num: &'a mut i32) { + *num += 1; +} +``` + +The `'a` is called a *lifetime*. Most lifetimes are used in places where +short names like `'a`, `'b` and `'c` are clearest, but it's often useful to +have more descriptive names. Let's dig into the syntax in a bit more detail: + +```{rust,ignore} +fn add_one<'a>(...) +``` + +This part _declares_ our lifetimes. This says that `add_one` has one lifetime, +`'a`. If we had two, it would look like this: + +```{rust,ignore} +fn add_two<'a, 'b>(...) +``` + +Then in our parameter list, we use the lifetimes we've named: + +```{rust,ignore} +...(num: &'a mut i32) +``` + +If you compare `&mut i32` to `&'a mut i32`, they're the same, it's just that the +lifetime `'a` has snuck in between the `&` and the `mut i32`. We read `&mut i32` as "a +mutable reference to an i32" and `&'a mut i32` as "a mutable reference to an i32 with the lifetime 'a.'" + +Why do lifetimes matter? Well, for example, here's some code: + +```rust +struct Foo<'a> { + x: &'a i32, +} + +fn main() { + let y = &5; // this is the same as `let _y = 5; let y = &_y;` + let f = Foo { x: y }; + + println!("{}", f.x); +} +``` + +As you can see, `struct`s can also have lifetimes. In a similar way to functions, + +```{rust} +struct Foo<'a> { +# x: &'a i32, +# } +``` + +declares a lifetime, and + +```rust +# struct Foo<'a> { +x: &'a i32, +# } +``` + +uses it. So why do we need a lifetime here? We need to ensure that any reference +to a `Foo` cannot outlive the reference to an `i32` it contains. + +## Thinking in scopes + +A way to think about lifetimes is to visualize the scope that a reference is +valid for. For example: + +```rust +fn main() { + let y = &5; // -+ y goes into scope + // | + // stuff // | + // | +} // -+ y goes out of scope +``` + +Adding in our `Foo`: + +```rust +struct Foo<'a> { + x: &'a i32, +} + +fn main() { + let y = &5; // -+ y goes into scope + let f = Foo { x: y }; // -+ f goes into scope + // stuff // | + // | +} // -+ f and y go out of scope +``` + +Our `f` lives within the scope of `y`, so everything works. What if it didn't? +This code won't work: + +```{rust,ignore} +struct Foo<'a> { + x: &'a i32, +} + +fn main() { + let x; // -+ x goes into scope + // | + { // | + let y = &5; // ---+ y goes into scope + let f = Foo { x: y }; // ---+ f goes into scope + x = &f.x; // | | error here + } // ---+ f and y go out of scope + // | + println!("{}", x); // | +} // -+ x goes out of scope +``` + +Whew! As you can see here, the scopes of `f` and `y` are smaller than the scope +of `x`. But when we do `x = &f.x`, we make `x` a reference to something that's +about to go out of scope. + +Named lifetimes are a way of giving these scopes a name. Giving something a +name is the first step towards being able to talk about it. + +## 'static + +The lifetime named *static* is a special lifetime. It signals that something +has the lifetime of the entire program. Most Rust programmers first come across +`'static` when dealing with strings: + +```rust +let x: &'static str = "Hello, world."; +``` + +String literals have the type `&'static str` because the reference is always +alive: they are baked into the data segment of the final binary. Another +example are globals: + +```rust +static FOO: i32 = 5; +let x: &'static i32 = &FOO; +``` + +This adds an `i32` to the data segment of the binary, and `x` is a reference +to it. + +# Shared Ownership + +In all the examples we've considered so far, we've assumed that each handle has +a singular owner. But sometimes, this doesn't work. Consider a car. Cars have +four wheels. We would want a wheel to know which car it was attached to. But +this won't work: + +```{rust,ignore} +struct Car { + name: String, +} + +struct Wheel { + size: i32, + owner: Car, +} + +fn main() { + let car = Car { name: "DeLorean".to_string() }; + + for _ in 0..4 { + Wheel { size: 360, owner: car }; + } +} +``` + +We try to make four `Wheel`s, each with a `Car` that it's attached to. But the +compiler knows that on the second iteration of the loop, there's a problem: + +```text +error: use of moved value: `car` + Wheel { size: 360, owner: car }; + ^~~ +note: `car` moved here because it has type `Car`, which is non-copyable + Wheel { size: 360, owner: car }; + ^~~ +``` + +We need our `Car` to be pointed to by multiple `Wheel`s. We can't do that with +`Box`, because it has a single owner. We can do it with `Rc` instead: + +```rust +use std::rc::Rc; + +struct Car { + name: String, +} + +struct Wheel { + size: i32, + owner: Rc, +} + +fn main() { + let car = Car { name: "DeLorean".to_string() }; + + let car_owner = Rc::new(car); + + for _ in 0..4 { + Wheel { size: 360, owner: car_owner.clone() }; + } +} +``` + +We wrap our `Car` in an `Rc`, getting an `Rc`, and then use the +`clone()` method to make new references. We've also changed our `Wheel` to have +an `Rc` rather than just a `Car`. + +This is the simplest kind of multiple ownership possible. For example, there's +also `Arc`, which uses more expensive atomic instructions to be the +thread-safe counterpart of `Rc`. + +## Lifetime Elision + +Rust supports powerful local type inference in function bodies, but it’s +forbidden in item signatures to allow reasoning about the types just based in +the item signature alone. However, for ergonomic reasons a very restricted +secondary inference algorithm called “lifetime elision” applies in function +signatures. It infers only based on the signature components themselves and not +based on the body of the function, only infers lifetime parameters, and does +this with only three easily memorizable and unambiguous rules. This makes +lifetime elision a shorthand for writing an item signature, while not hiding +away the actual types involved as full local inference would if applied to it. + +When talking about lifetime elision, we use the term *input lifetime* and +*output lifetime*. An *input lifetime* is a lifetime associated with a parameter +of a function, and an *output lifetime* is a lifetime associated with the return +value of a function. For example, this function has an input lifetime: + +```{rust,ignore} +fn foo<'a>(bar: &'a str) +``` + +This one has an output lifetime: + +```{rust,ignore} +fn foo<'a>() -> &'a str +``` + +This one has a lifetime in both positions: + +```{rust,ignore} +fn foo<'a>(bar: &'a str) -> &'a str +``` + +Here are the three rules: + +* Each elided lifetime in a function's arguments becomes a distinct lifetime + parameter. + +* If there is exactly one input lifetime, elided or not, that lifetime is + assigned to all elided lifetimes in the return values of that function. + +* If there are multiple input lifetimes, but one of them is `&self` or `&mut + self`, the lifetime of `self` is assigned to all elided output lifetimes. + +Otherwise, it is an error to elide an output lifetime. + +### Examples + +Here are some examples of functions with elided lifetimes. We've paired each +example of an elided lifetime with its expanded form. + +```{rust,ignore} +fn print(s: &str); // elided +fn print<'a>(s: &'a str); // expanded + +fn debug(lvl: u32, s: &str); // elided +fn debug<'a>(lvl: u32, s: &'a str); // expanded + +// In the preceding example, `lvl` doesn't need a lifetime because it's not a +// reference (`&`). Only things relating to references (such as a `struct` +// which contains a reference) need lifetimes. + +fn substr(s: &str, until: u32) -> &str; // elided +fn substr<'a>(s: &'a str, until: u32) -> &'a str; // expanded + +fn get_str() -> &str; // ILLEGAL, no inputs + +fn frob(s: &str, t: &str) -> &str; // ILLEGAL, two inputs +fn frob<'a, 'b>(s: &'a str, t: &'b str) -> &str; // Expanded: Output lifetime is unclear + +fn get_mut(&mut self) -> &mut T; // elided +fn get_mut<'a>(&'a mut self) -> &'a mut T; // expanded + +fn args(&mut self, args: &[T]) -> &mut Command // elided +fn args<'a, 'b, T:ToCStr>(&'a mut self, args: &'b [T]) -> &'a mut Command // expanded + +fn new(buf: &mut [u8]) -> BufWriter; // elided +fn new<'a>(buf: &'a mut [u8]) -> BufWriter<'a> // expanded +``` + +# Related Resources + +Coming Soon. diff --git a/src/doc/trpl/patterns.md b/src/doc/trpl/patterns.md index e69de29bb2d1..4ebf696aa57a 100644 --- a/src/doc/trpl/patterns.md +++ b/src/doc/trpl/patterns.md @@ -0,0 +1,200 @@ +% Patterns + +We've made use of patterns a few times in the guide: first with `let` bindings, +then with `match` statements. Let's go on a whirlwind tour of all of the things +patterns can do! + +A quick refresher: you can match against literals directly, and `_` acts as an +*any* case: + +```{rust} +let x = 1; + +match x { + 1 => println!("one"), + 2 => println!("two"), + 3 => println!("three"), + _ => println!("anything"), +} +``` + +You can match multiple patterns with `|`: + +```{rust} +let x = 1; + +match x { + 1 | 2 => println!("one or two"), + 3 => println!("three"), + _ => println!("anything"), +} +``` + +You can match a range of values with `...`: + +```{rust} +let x = 1; + +match x { + 1 ... 5 => println!("one through five"), + _ => println!("anything"), +} +``` + +Ranges are mostly used with integers and single characters. + +If you're matching multiple things, via a `|` or a `...`, you can bind +the value to a name with `@`: + +```{rust} +let x = 1; + +match x { + e @ 1 ... 5 => println!("got a range element {}", e), + _ => println!("anything"), +} +``` + +If you're matching on an enum which has variants, you can use `..` to +ignore the value and type in the variant: + +```{rust} +enum OptionalInt { + Value(i32), + Missing, +} + +let x = OptionalInt::Value(5); + +match x { + OptionalInt::Value(..) => println!("Got an int!"), + OptionalInt::Missing => println!("No such luck."), +} +``` + +You can introduce *match guards* with `if`: + +```{rust} +enum OptionalInt { + Value(i32), + Missing, +} + +let x = OptionalInt::Value(5); + +match x { + OptionalInt::Value(i) if i > 5 => println!("Got an int bigger than five!"), + OptionalInt::Value(..) => println!("Got an int!"), + OptionalInt::Missing => println!("No such luck."), +} +``` + +If you're matching on a pointer, you can use the same syntax as you declared it +with. First, `&`: + +```{rust} +let x = &5; + +match x { + &val => println!("Got a value: {}", val), +} +``` + +Here, the `val` inside the `match` has type `i32`. In other words, the left-hand +side of the pattern destructures the value. If we have `&5`, then in `&val`, `val` +would be `5`. + +If you want to get a reference, use the `ref` keyword: + +```{rust} +let x = 5; + +match x { + ref r => println!("Got a reference to {}", r), +} +``` + +Here, the `r` inside the `match` has the type `&i32`. In other words, the `ref` +keyword _creates_ a reference, for use in the pattern. If you need a mutable +reference, `ref mut` will work in the same way: + +```{rust} +let mut x = 5; + +match x { + ref mut mr => println!("Got a mutable reference to {}", mr), +} +``` + +If you have a struct, you can destructure it inside of a pattern: + +```{rust} +# #![allow(non_shorthand_field_patterns)] +struct Point { + x: i32, + y: i32, +} + +let origin = Point { x: 0, y: 0 }; + +match origin { + Point { x: x, y: y } => println!("({},{})", x, y), +} +``` + +If we only care about some of the values, we don't have to give them all names: + +```{rust} +# #![allow(non_shorthand_field_patterns)] +struct Point { + x: i32, + y: i32, +} + +let origin = Point { x: 0, y: 0 }; + +match origin { + Point { x: x, .. } => println!("x is {}", x), +} +``` + +You can do this kind of match on any member, not just the first: + +```{rust} +# #![allow(non_shorthand_field_patterns)] +struct Point { + x: i32, + y: i32, +} + +let origin = Point { x: 0, y: 0 }; + +match origin { + Point { y: y, .. } => println!("y is {}", y), +} +``` + +If you want to match against a slice or array, you can use `&`: + +```{rust} +# #![feature(slice_patterns)] +fn main() { + let v = vec!["match_this", "1"]; + + match &v[..] { + ["match_this", second] => println!("The second element is {}", second), + _ => {}, + } +} +``` + +Whew! That's a lot of different ways to match things, and they can all be +mixed and matched, depending on what you're doing: + +```{rust,ignore} +match x { + Foo { x: Some(ref name), y: None } => ... +} +``` + +Patterns are very powerful. Make good use of them. diff --git a/src/doc/trpl/plugins.md b/src/doc/trpl/plugins.md deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/src/doc/trpl/primitive-types.md b/src/doc/trpl/primitive-types.md index e69de29bb2d1..2878e7ce4754 100644 --- a/src/doc/trpl/primitive-types.md +++ b/src/doc/trpl/primitive-types.md @@ -0,0 +1,3 @@ +% Primitive Types + +Coming Soon! diff --git a/src/doc/trpl/references-and-borrowing.md b/src/doc/trpl/references-and-borrowing.md index e69de29bb2d1..6acb326958d3 100644 --- a/src/doc/trpl/references-and-borrowing.md +++ b/src/doc/trpl/references-and-borrowing.md @@ -0,0 +1,3 @@ +% References and Borrowing + +Coming Soon! diff --git a/src/doc/trpl/slices.md b/src/doc/trpl/slices.md index e69de29bb2d1..a31c0ac3c4e6 100644 --- a/src/doc/trpl/slices.md +++ b/src/doc/trpl/slices.md @@ -0,0 +1,21 @@ +% Slices + +A *slice* is a reference to (or "view" into) an array. They are useful for +allowing safe, efficient access to a portion of an array without copying. For +example, you might want to reference just one line of a file read into memory. +By nature, a slice is not created directly, but from an existing variable. +Slices have a length, can be mutable or not, and in many ways behave like +arrays: + +```{rust} +let a = [0, 1, 2, 3, 4]; +let middle = &a[1..4]; // A slice of a: just the elements 1, 2, and 3 + +for e in middle.iter() { + println!("{}", e); // Prints 1, 2, 3 +} +``` + +You can also take a slice of a vector, `String`, or `&str`, because they are +backed by arrays. Slices have type `&[T]`, which we'll talk about when we cover +generics. diff --git a/src/doc/trpl/static.md b/src/doc/trpl/static.md index e69de29bb2d1..b29c4952c94e 100644 --- a/src/doc/trpl/static.md +++ b/src/doc/trpl/static.md @@ -0,0 +1,3 @@ +% `static` + +Coming soon! diff --git a/src/doc/trpl/strings.md b/src/doc/trpl/strings.md index e69de29bb2d1..2c2e6a8c7c5a 100644 --- a/src/doc/trpl/strings.md +++ b/src/doc/trpl/strings.md @@ -0,0 +1,61 @@ +% Strings + +Strings are an important concept for any programmer to master. Rust's string +handling system is a bit different from other languages, due to its systems +focus. Any time you have a data structure of variable size, things can get +tricky, and strings are a re-sizable data structure. That being said, Rust's +strings also work differently than in some other systems languages, such as C. + +Let's dig into the details. A *string* is a sequence of Unicode scalar values +encoded as a stream of UTF-8 bytes. All strings are guaranteed to be +validly encoded UTF-8 sequences. Additionally, strings are not null-terminated +and can contain null bytes. + +Rust has two main types of strings: `&str` and `String`. + +The first kind is a `&str`. These are called *string slices*. String literals +are of the type `&str`: + +```{rust} +let string = "Hello there."; // string: &str +``` + +This string is statically allocated, meaning that it's saved inside our +compiled program, and exists for the entire duration it runs. The `string` +binding is a reference to this statically allocated string. String slices +have a fixed size, and cannot be mutated. + +A `String`, on the other hand, is a heap-allocated string. This string +is growable, and is also guaranteed to be UTF-8. `String`s are +commonly created by converting from a string slice using the +`to_string` method. + +```{rust} +let mut s = "Hello".to_string(); // mut s: String +println!("{}", s); + +s.push_str(", world."); +println!("{}", s); +``` + +`String`s will coerce into `&str` with an `&`: + +``` +fn takes_slice(slice: &str) { + println!("Got: {}", slice); +} + +fn main() { + let s = "Hello".to_string(); + takes_slice(&s); +} +``` + +Viewing a `String` as a `&str` is cheap, but converting the `&str` to a +`String` involves allocating memory. No reason to do that unless you have to! + +That's the basics of strings in Rust! They're probably a bit more complicated +than you are used to, if you come from a scripting language, but when the +low-level details matter, they really matter. Just remember that `String`s +allocate memory and control their data, while `&str`s are a reference to +another string, and you'll be all set. diff --git a/src/doc/trpl/structs.md b/src/doc/trpl/structs.md index e69de29bb2d1..eff1a47761d6 100644 --- a/src/doc/trpl/structs.md +++ b/src/doc/trpl/structs.md @@ -0,0 +1,49 @@ +% Structs + +A struct is another form of a *record type*, just like a tuple. There's a +difference: structs give each element that they contain a name, called a +*field* or a *member*. Check it out: + +```rust +struct Point { + x: i32, + y: i32, +} + +fn main() { + let origin = Point { x: 0, y: 0 }; // origin: Point + + println!("The origin is at ({}, {})", origin.x, origin.y); +} +``` + +There's a lot going on here, so let's break it down. We declare a struct with +the `struct` keyword, and then with a name. By convention, structs begin with a +capital letter and are also camel cased: `PointInSpace`, not `Point_In_Space`. + +We can create an instance of our struct via `let`, as usual, but we use a `key: +value` style syntax to set each field. The order doesn't need to be the same as +in the original declaration. + +Finally, because fields have names, we can access the field through dot +notation: `origin.x`. + +The values in structs are immutable by default, like other bindings in Rust. +Use `mut` to make them mutable: + +```{rust} +struct Point { + x: i32, + y: i32, +} + +fn main() { + let mut point = Point { x: 0, y: 0 }; + + point.x = 5; + + println!("The point is at ({}, {})", point.x, point.y); +} +``` + +This will print `The point is at (5, 0)`. diff --git a/src/doc/trpl/syntax-and-semantics.md b/src/doc/trpl/syntax-and-semantics.md index e69de29bb2d1..6f992cf68873 100644 --- a/src/doc/trpl/syntax-and-semantics.md +++ b/src/doc/trpl/syntax-and-semantics.md @@ -0,0 +1 @@ +% Syntax and Semantics diff --git a/src/doc/trpl/testing.md b/src/doc/trpl/testing.md index e69de29bb2d1..8cf126cad95f 100644 --- a/src/doc/trpl/testing.md +++ b/src/doc/trpl/testing.md @@ -0,0 +1,432 @@ +% Testing + +> Program testing can be a very effective way to show the presence of bugs, but +> it is hopelessly inadequate for showing their absence. +> +> Edsger W. Dijkstra, "The Humble Programmer" (1972) + +Let's talk about how to test Rust code. What we will not be talking about is +the right way to test Rust code. There are many schools of thought regarding +the right and wrong way to write tests. All of these approaches use the same +basic tools, and so we'll show you the syntax for using them. + +# The `test` attribute + +At its simplest, a test in Rust is a function that's annotated with the `test` +attribute. Let's make a new project with Cargo called `adder`: + +```bash +$ cargo new adder +$ cd adder +``` + +Cargo will automatically generate a simple test when you make a new project. +Here's the contents of `src/lib.rs`: + +```rust +#[test] +fn it_works() { +} +``` + +Note the `#[test]`. This attribute indicates that this is a test function. It +currently has no body. That's good enough to pass! We can run the tests with +`cargo test`: + +```bash +$ cargo test + Compiling adder v0.0.1 (file:///home/you/projects/adder) + Running target/adder-91b3e234d4ed382a + +running 1 test +test it_works ... ok + +test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured + + Doc-tests adder + +running 0 tests + +test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured +``` + +Cargo compiled and ran our tests. There are two sets of output here: one +for the test we wrote, and another for documentation tests. We'll talk about +those later. For now, see this line: + +```text +test it_works ... ok +``` + +Note the `it_works`. This comes from the name of our function: + +```rust +fn it_works() { +# } +``` + +We also get a summary line: + +```text +test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured +``` + +So why does our do-nothing test pass? Any test which doesn't `panic!` passes, +and any test that does `panic!` fails. Let's make our test fail: + +```rust +#[test] +fn it_works() { + assert!(false); +} +``` + +`assert!` is a macro provided by Rust which takes one argument: if the argument +is `true`, nothing happens. If the argument is false, it `panic!`s. Let's run +our tests again: + +```bash +$ cargo test + Compiling adder v0.0.1 (file:///home/you/projects/adder) + Running target/adder-91b3e234d4ed382a + +running 1 test +test it_works ... FAILED + +failures: + +---- it_works stdout ---- + thread 'it_works' panicked at 'assertion failed: false', /home/steve/tmp/adder/src/lib.rs:3 + + + +failures: + it_works + +test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured + +thread '
' panicked at 'Some tests failed', /home/steve/src/rust/src/libtest/lib.rs:247 +``` + +Rust indicates that our test failed: + +```text +test it_works ... FAILED +``` + +And that's reflected in the summary line: + +```text +test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured +``` + +We also get a non-zero status code: + +```bash +$ echo $? +101 +``` + +This is useful if you want to integrate `cargo test` into other tooling. + +We can invert our test's failure with another attribute: `should_panic`: + +```rust +#[test] +#[should_panic] +fn it_works() { + assert!(false); +} +``` + +This test will now succeed if we `panic!` and fail if we complete. Let's try it: + +```bash +$ cargo test + Compiling adder v0.0.1 (file:///home/you/projects/adder) + Running target/adder-91b3e234d4ed382a + +running 1 test +test it_works ... ok + +test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured + + Doc-tests adder + +running 0 tests + +test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured +``` + +Rust provides another macro, `assert_eq!`, that compares two arguments for +equality: + +```rust +#[test] +#[should_panic] +fn it_works() { + assert_eq!("Hello", "world"); +} +``` + +Does this test pass or fail? Because of the `should_panic` attribute, it +passes: + +```bash +$ cargo test + Compiling adder v0.0.1 (file:///home/you/projects/adder) + Running target/adder-91b3e234d4ed382a + +running 1 test +test it_works ... ok + +test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured + + Doc-tests adder + +running 0 tests + +test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured +``` + +`should_panic` tests can be fragile, as it's hard to guarantee that the test +didn't fail for an unexpected reason. To help with this, an optional `expected` +parameter can be added to the `should_panic` attribute. The test harness will +make sure that the failure message contains the provided text. A safer version +of the example above would be: + +``` +#[test] +#[should_panic(expected = "assertion failed")] +fn it_works() { + assert_eq!("Hello", "world"); +} +``` + +That's all there is to the basics! Let's write one 'real' test: + +```{rust,ignore} +pub fn add_two(a: i32) -> i32 { + a + 2 +} + +#[test] +fn it_works() { + assert_eq!(4, add_two(2)); +} +``` + +This is a very common use of `assert_eq!`: call some function with +some known arguments and compare it to the expected output. + +# The `test` module + +There is one way in which our existing example is not idiomatic: it's +missing the test module. The idiomatic way of writing our example +looks like this: + +```{rust,ignore} +pub fn add_two(a: i32) -> i32 { + a + 2 +} + +#[cfg(test)] +mod test { + use super::add_two; + + #[test] + fn it_works() { + assert_eq!(4, add_two(2)); + } +} +``` + +There's a few changes here. The first is the introduction of a `mod test` with +a `cfg` attribute. The module allows us to group all of our tests together, and +to also define helper functions if needed, that don't become a part of the rest +of our crate. The `cfg` attribute only compiles our test code if we're +currently trying to run the tests. This can save compile time, and also ensures +that our tests are entirely left out of a normal build. + +The second change is the `use` declaration. Because we're in an inner module, +we need to bring our test function into scope. This can be annoying if you have +a large module, and so this is a common use of the `glob` feature. Let's change +our `src/lib.rs` to make use of it: + +```{rust,ignore} + +pub fn add_two(a: i32) -> i32 { + a + 2 +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn it_works() { + assert_eq!(4, add_two(2)); + } +} +``` + +Note the different `use` line. Now we run our tests: + +```bash +$ cargo test + Updating registry `https://github.com/rust-lang/crates.io-index` + Compiling adder v0.0.1 (file:///home/you/projects/adder) + Running target/adder-91b3e234d4ed382a + +running 1 test +test test::it_works ... ok + +test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured + + Doc-tests adder + +running 0 tests + +test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured +``` + +It works! + +The current convention is to use the `test` module to hold your "unit-style" +tests. Anything that just tests one small bit of functionality makes sense to +go here. But what about "integration-style" tests instead? For that, we have +the `tests` directory + +# The `tests` directory + +To write an integration test, let's make a `tests` directory, and +put a `tests/lib.rs` file inside, with this as its contents: + +```{rust,ignore} +extern crate adder; + +#[test] +fn it_works() { + assert_eq!(4, adder::add_two(2)); +} +``` + +This looks similar to our previous tests, but slightly different. We now have +an `extern crate adder` at the top. This is because the tests in the `tests` +directory are an entirely separate crate, and so we need to import our library. +This is also why `tests` is a suitable place to write integration-style tests: +they use the library like any other consumer of it would. + +Let's run them: + +```bash +$ cargo test + Compiling adder v0.0.1 (file:///home/you/projects/adder) + Running target/adder-91b3e234d4ed382a + +running 1 test +test test::it_works ... ok + +test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured + + Running target/lib-c18e7d3494509e74 + +running 1 test +test it_works ... ok + +test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured + + Doc-tests adder + +running 0 tests + +test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured +``` + +Now we have three sections: our previous test is also run, as well as our new +one. + +That's all there is to the `tests` directory. The `test` module isn't needed +here, since the whole thing is focused on tests. + +Let's finally check out that third section: documentation tests. + +# Documentation tests + +Nothing is better than documentation with examples. Nothing is worse than +examples that don't actually work, because the code has changed since the +documentation has been written. To this end, Rust supports automatically +running examples in your documentation. Here's a fleshed-out `src/lib.rs` +with examples: + +```{rust,ignore} +//! The `adder` crate provides functions that add numbers to other numbers. +//! +//! # Examples +//! +//! ``` +//! assert_eq!(4, adder::add_two(2)); +//! ``` + +/// This function adds two to its argument. +/// +/// # Examples +/// +/// ``` +/// use adder::add_two; +/// +/// assert_eq!(4, add_two(2)); +/// ``` +pub fn add_two(a: i32) -> i32 { + a + 2 +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn it_works() { + assert_eq!(4, add_two(2)); + } +} +``` + +Note the module-level documentation with `//!` and the function-level +documentation with `///`. Rust's documentation supports Markdown in comments, +and so triple graves mark code blocks. It is conventional to include the +`# Examples` section, exactly like that, with examples following. + +Let's run the tests again: + +```bash +$ cargo test + Compiling adder v0.0.1 (file:///home/steve/tmp/adder) + Running target/adder-91b3e234d4ed382a + +running 1 test +test test::it_works ... ok + +test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured + + Running target/lib-c18e7d3494509e74 + +running 1 test +test it_works ... ok + +test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured + + Doc-tests adder + +running 2 tests +test add_two_0 ... ok +test _0 ... ok + +test result: ok. 2 passed; 0 failed; 0 ignored; 0 measured +``` + +Now we have all three kinds of tests running! Note the names of the +documentation tests: the `_0` is generated for the module test, and `add_two_0` +for the function test. These will auto increment with names like `add_two_1` as +you add more examples. + diff --git a/src/doc/trpl/the-stack-and-the-heap.md b/src/doc/trpl/the-stack-and-the-heap.md index e69de29bb2d1..cc0941bc025a 100644 --- a/src/doc/trpl/the-stack-and-the-heap.md +++ b/src/doc/trpl/the-stack-and-the-heap.md @@ -0,0 +1,3 @@ +% The Stack and the Heap + +Coming Soon diff --git a/src/doc/trpl/trait-objects.md b/src/doc/trpl/trait-objects.md index e69de29bb2d1..d008d30597fe 100644 --- a/src/doc/trpl/trait-objects.md +++ b/src/doc/trpl/trait-objects.md @@ -0,0 +1,306 @@ +% Trait Objects + +When code involves polymorphism, there needs to be a mechanism to determine +which specific version is actually run. This is called 'dispatch.' There are +two major forms of dispatch: static dispatch and dynamic dispatch. While Rust +favors static dispatch, it also supports dynamic dispatch through a mechanism +called 'trait objects.' + +## Background + +For the rest of this chapter, we'll need a trait and some implementations. +Let's make a simple one, `Foo`. It has one method that is expected to return a +`String`. + +```rust +trait Foo { + fn method(&self) -> String; +} +``` + +We'll also implement this trait for `u8` and `String`: + +```rust +# trait Foo { fn method(&self) -> String; } +impl Foo for u8 { + fn method(&self) -> String { format!("u8: {}", *self) } +} + +impl Foo for String { + fn method(&self) -> String { format!("string: {}", *self) } +} +``` + + +## Static dispatch + +We can use this trait to perform static dispatch with trait bounds: + +```rust +# trait Foo { fn method(&self) -> String; } +# impl Foo for u8 { fn method(&self) -> String { format!("u8: {}", *self) } } +# impl Foo for String { fn method(&self) -> String { format!("string: {}", *self) } } +fn do_something(x: T) { + x.method(); +} + +fn main() { + let x = 5u8; + let y = "Hello".to_string(); + + do_something(x); + do_something(y); +} +``` + +Rust uses 'monomorphization' to perform static dispatch here. This means that +Rust will create a special version of `do_something()` for both `u8` and +`String`, and then replace the call sites with calls to these specialized +functions. In other words, Rust generates something like this: + +```rust +# trait Foo { fn method(&self) -> String; } +# impl Foo for u8 { fn method(&self) -> String { format!("u8: {}", *self) } } +# impl Foo for String { fn method(&self) -> String { format!("string: {}", *self) } } +fn do_something_u8(x: u8) { + x.method(); +} + +fn do_something_string(x: String) { + x.method(); +} + +fn main() { + let x = 5u8; + let y = "Hello".to_string(); + + do_something_u8(x); + do_something_string(y); +} +``` + +This has a great upside: static dispatch allows function calls to be +inlined because the callee is known at compile time, and inlining is +the key to good optimization. Static dispatch is fast, but it comes at +a tradeoff: 'code bloat', due to many copies of the same function +existing in the binary, one for each type. + +Furthermore, compilers aren’t perfect and may “optimize” code to become slower. +For example, functions inlined too eagerly will bloat the instruction cache +(cache rules everything around us). This is part of the reason that `#[inline]` +and `#[inline(always)]` should be used carefully, and one reason why using a +dynamic dispatch is sometimes more efficient. + +However, the common case is that it is more efficient to use static dispatch, +and one can always have a thin statically-dispatched wrapper function that does +a dynamic dispatch, but not vice versa, meaning static calls are more flexible. +The standard library tries to be statically dispatched where possible for this +reason. + +## Dynamic dispatch + +Rust provides dynamic dispatch through a feature called 'trait objects.' Trait +objects, like `&Foo` or `Box`, are normal values that store a value of +*any* type that implements the given trait, where the precise type can only be +known at runtime. + +A trait object can be obtained from a pointer to a concrete type that +implements the trait by *casting* it (e.g. `&x as &Foo`) or *coercing* it +(e.g. using `&x` as an argument to a function that takes `&Foo`). + +These trait object coercions and casts also work for pointers like `&mut T` to +`&mut Foo` and `Box` to `Box`, but that's all at the moment. Coercions +and casts are identical. + +This operation can be seen as "erasing" the compiler's knowledge about the +specific type of the pointer, and hence trait objects are sometimes referred to +as "type erasure". + +Coming back to the example above, we can use the same trait to perform dynamic +dispatch with trait objects by casting: + +```rust +# trait Foo { fn method(&self) -> String; } +# impl Foo for u8 { fn method(&self) -> String { format!("u8: {}", *self) } } +# impl Foo for String { fn method(&self) -> String { format!("string: {}", *self) } } + +fn do_something(x: &Foo) { + x.method(); +} + +fn main() { + let x = 5u8; + do_something(&x as &Foo); +} +``` + +or by coercing: + +```rust +# trait Foo { fn method(&self) -> String; } +# impl Foo for u8 { fn method(&self) -> String { format!("u8: {}", *self) } } +# impl Foo for String { fn method(&self) -> String { format!("string: {}", *self) } } + +fn do_something(x: &Foo) { + x.method(); +} + +fn main() { + let x = "Hello".to_string(); + do_something(&x); +} +``` + +A function that takes a trait object is not specialized to each of the types +that implements `Foo`: only one copy is generated, often (but not always) +resulting in less code bloat. However, this comes at the cost of requiring +slower virtual function calls, and effectively inhibiting any chance of +inlining and related optimisations from occurring. + +### Why pointers? + +Rust does not put things behind a pointer by default, unlike many managed +languages, so types can have different sizes. Knowing the size of the value at +compile time is important for things like passing it as an argument to a +function, moving it about on the stack and allocating (and deallocating) space +on the heap to store it. + +For `Foo`, we would need to have a value that could be at least either a +`String` (24 bytes) or a `u8` (1 byte), as well as any other type for which +dependent crates may implement `Foo` (any number of bytes at all). There's no +way to guarantee that this last point can work if the values are stored without +a pointer, because those other types can be arbitrarily large. + +Putting the value behind a pointer means the size of the value is not relevant +when we are tossing a trait object around, only the size of the pointer itself. + +### Representation + +The methods of the trait can be called on a trait object via a special record +of function pointers traditionally called a 'vtable' (created and managed by +the compiler). + +Trait objects are both simple and complicated: their core representation and +layout is quite straight-forward, but there are some curly error messages and +surprising behaviors to discover. + +Let's start simple, with the runtime representation of a trait object. The +`std::raw` module contains structs with layouts that are the same as the +complicated built-in types, [including trait objects][stdraw]: + +```rust +# mod foo { +pub struct TraitObject { + pub data: *mut (), + pub vtable: *mut (), +} +# } +``` + +[stdraw]: ../std/raw/struct.TraitObject.html + +That is, a trait object like `&Foo` consists of a "data" pointer and a "vtable" +pointer. + +The data pointer addresses the data (of some unknown type `T`) that the trait +object is storing, and the vtable pointer points to the vtable ("virtual method +table") corresponding to the implementation of `Foo` for `T`. + + +A vtable is essentially a struct of function pointers, pointing to the concrete +piece of machine code for each method in the implementation. A method call like +`trait_object.method()` will retrieve the correct pointer out of the vtable and +then do a dynamic call of it. For example: + +```{rust,ignore} +struct FooVtable { + destructor: fn(*mut ()), + size: usize, + align: usize, + method: fn(*const ()) -> String, +} + +// u8: + +fn call_method_on_u8(x: *const ()) -> String { + // the compiler guarantees that this function is only called + // with `x` pointing to a u8 + let byte: &u8 = unsafe { &*(x as *const u8) }; + + byte.method() +} + +static Foo_for_u8_vtable: FooVtable = FooVtable { + destructor: /* compiler magic */, + size: 1, + align: 1, + + // cast to a function pointer + method: call_method_on_u8 as fn(*const ()) -> String, +}; + + +// String: + +fn call_method_on_String(x: *const ()) -> String { + // the compiler guarantees that this function is only called + // with `x` pointing to a String + let string: &String = unsafe { &*(x as *const String) }; + + string.method() +} + +static Foo_for_String_vtable: FooVtable = FooVtable { + destructor: /* compiler magic */, + // values for a 64-bit computer, halve them for 32-bit ones + size: 24, + align: 8, + + method: call_method_on_String as fn(*const ()) -> String, +}; +``` + +The `destructor` field in each vtable points to a function that will clean up +any resources of the vtable's type, for `u8` it is trivial, but for `String` it +will free the memory. This is necessary for owning trait objects like +`Box`, which need to clean-up both the `Box` allocation as well as the +internal type when they go out of scope. The `size` and `align` fields store +the size of the erased type, and its alignment requirements; these are +essentially unused at the moment since the information is embedded in the +destructor, but will be used in the future, as trait objects are progressively +made more flexible. + +Suppose we've got some values that implement `Foo`, then the explicit form of +construction and use of `Foo` trait objects might look a bit like (ignoring the +type mismatches: they're all just pointers anyway): + +```{rust,ignore} +let a: String = "foo".to_string(); +let x: u8 = 1; + +// let b: &Foo = &a; +let b = TraitObject { + // store the data + data: &a, + // store the methods + vtable: &Foo_for_String_vtable +}; + +// let y: &Foo = x; +let y = TraitObject { + // store the data + data: &x, + // store the methods + vtable: &Foo_for_u8_vtable +}; + +// b.method(); +(b.vtable.method)(b.data); + +// y.method(); +(y.vtable.method)(y.data); +``` + +If `b` or `y` were owning trait objects (`Box`), there would be a +`(b.vtable.destructor)(b.data)` (respectively `y`) call when they went out of +scope. diff --git a/src/doc/trpl/traits.md b/src/doc/trpl/traits.md index e69de29bb2d1..2986de4179b8 100644 --- a/src/doc/trpl/traits.md +++ b/src/doc/trpl/traits.md @@ -0,0 +1,508 @@ +% Traits + +Do you remember the `impl` keyword, used to call a function with method +syntax? + +```{rust} +# #![feature(core)] +struct Circle { + x: f64, + y: f64, + radius: f64, +} + +impl Circle { + fn area(&self) -> f64 { + std::f64::consts::PI * (self.radius * self.radius) + } +} +``` + +Traits are similar, except that we define a trait with just the method +signature, then implement the trait for that struct. Like this: + +```{rust} +# #![feature(core)] +struct Circle { + x: f64, + y: f64, + radius: f64, +} + +trait HasArea { + fn area(&self) -> f64; +} + +impl HasArea for Circle { + fn area(&self) -> f64 { + std::f64::consts::PI * (self.radius * self.radius) + } +} +``` + +As you can see, the `trait` block looks very similar to the `impl` block, +but we don't define a body, just a type signature. When we `impl` a trait, +we use `impl Trait for Item`, rather than just `impl Item`. + +So what's the big deal? Remember the error we were getting with our generic +`inverse` function? + +```text +error: binary operation `==` cannot be applied to type `T` +``` + +We can use traits to constrain our generics. Consider this function, which +does not compile, and gives us a similar error: + +```{rust,ignore} +fn print_area(shape: T) { + println!("This shape has an area of {}", shape.area()); +} +``` + +Rust complains: + +```text +error: type `T` does not implement any method in scope named `area` +``` + +Because `T` can be any type, we can't be sure that it implements the `area` +method. But we can add a *trait constraint* to our generic `T`, ensuring +that it does: + +```{rust} +# trait HasArea { +# fn area(&self) -> f64; +# } +fn print_area(shape: T) { + println!("This shape has an area of {}", shape.area()); +} +``` + +The syntax `` means `any type that implements the HasArea trait`. +Because traits define function type signatures, we can be sure that any type +which implements `HasArea` will have an `.area()` method. + +Here's an extended example of how this works: + +```{rust} +# #![feature(core)] +trait HasArea { + fn area(&self) -> f64; +} + +struct Circle { + x: f64, + y: f64, + radius: f64, +} + +impl HasArea for Circle { + fn area(&self) -> f64 { + std::f64::consts::PI * (self.radius * self.radius) + } +} + +struct Square { + x: f64, + y: f64, + side: f64, +} + +impl HasArea for Square { + fn area(&self) -> f64 { + self.side * self.side + } +} + +fn print_area(shape: T) { + println!("This shape has an area of {}", shape.area()); +} + +fn main() { + let c = Circle { + x: 0.0f64, + y: 0.0f64, + radius: 1.0f64, + }; + + let s = Square { + x: 0.0f64, + y: 0.0f64, + side: 1.0f64, + }; + + print_area(c); + print_area(s); +} +``` + +This program outputs: + +```text +This shape has an area of 3.141593 +This shape has an area of 1 +``` + +As you can see, `print_area` is now generic, but also ensures that we +have passed in the correct types. If we pass in an incorrect type: + +```{rust,ignore} +print_area(5); +``` + +We get a compile-time error: + +```text +error: failed to find an implementation of trait main::HasArea for int +``` + +So far, we've only added trait implementations to structs, but you can +implement a trait for any type. So technically, we _could_ implement +`HasArea` for `i32`: + +```{rust} +trait HasArea { + fn area(&self) -> f64; +} + +impl HasArea for i32 { + fn area(&self) -> f64 { + println!("this is silly"); + + *self as f64 + } +} + +5.area(); +``` + +It is considered poor style to implement methods on such primitive types, even +though it is possible. + +This may seem like the Wild West, but there are two other restrictions around +implementing traits that prevent this from getting out of hand. First, traits +must be `use`d in any scope where you wish to use the trait's method. So for +example, this does not work: + +```{rust,ignore} +mod shapes { + use std::f64::consts; + + trait HasArea { + fn area(&self) -> f64; + } + + struct Circle { + x: f64, + y: f64, + radius: f64, + } + + impl HasArea for Circle { + fn area(&self) -> f64 { + consts::PI * (self.radius * self.radius) + } + } +} + +fn main() { + let c = shapes::Circle { + x: 0.0f64, + y: 0.0f64, + radius: 1.0f64, + }; + + println!("{}", c.area()); +} +``` + +Now that we've moved the structs and traits into their own module, we get an +error: + +```text +error: type `shapes::Circle` does not implement any method in scope named `area` +``` + +If we add a `use` line right above `main` and make the right things public, +everything is fine: + +```{rust} +# #![feature(core)] +mod shapes { + use std::f64::consts; + + pub trait HasArea { + fn area(&self) -> f64; + } + + pub struct Circle { + pub x: f64, + pub y: f64, + pub radius: f64, + } + + impl HasArea for Circle { + fn area(&self) -> f64 { + consts::PI * (self.radius * self.radius) + } + } +} + +use shapes::HasArea; + +fn main() { + let c = shapes::Circle { + x: 0.0f64, + y: 0.0f64, + radius: 1.0f64, + }; + + println!("{}", c.area()); +} +``` + +This means that even if someone does something bad like add methods to `int`, +it won't affect you, unless you `use` that trait. + +There's one more restriction on implementing traits. Either the trait or the +type you're writing the `impl` for must be inside your crate. So, we could +implement the `HasArea` type for `i32`, because `HasArea` is in our crate. But +if we tried to implement `Float`, a trait provided by Rust, for `i32`, we could +not, because both the trait and the type aren't in our crate. + +One last thing about traits: generic functions with a trait bound use +*monomorphization* (*mono*: one, *morph*: form), so they are statically +dispatched. What's that mean? Check out the chapter on [static and dynamic +dispatch](static-and-dynamic-dispatch.html) for more. + +## Multiple trait bounds + +You’ve seen that you can bound a generic type parameter with a trait: + +```rust +fn foo(x: T) { + x.clone(); +} +``` + +If you need more than one bound, you can use `+`: + +```rust +use std::fmt::Debug; + +fn foo(x: T) { + x.clone(); + println!("{:?}", x); +} +``` + +`T` now needs to be both `Clone` as well as `Debug`. + +## Where clause + +Writing functions with only a few generic types and a small number of trait +bounds isn't too bad, but as the number increases, the syntax gets increasingly +awkward: + +``` +use std::fmt::Debug; + +fn foo(x: T, y: K) { + x.clone(); + y.clone(); + println!("{:?}", y); +} +``` + +The name of the function is on the far left, and the parameter list is on the +far right. The bounds are getting in the way. + +Rust has a solution, and it's called a '`where` clause': + +``` +use std::fmt::Debug; + +fn foo(x: T, y: K) { + x.clone(); + y.clone(); + println!("{:?}", y); +} + +fn bar(x: T, y: K) where T: Clone, K: Clone + Debug { + x.clone(); + y.clone(); + println!("{:?}", y); +} + +fn main() { + foo("Hello", "world"); + bar("Hello", "workd"); +} +``` + +`foo()` uses the syntax we showed earlier, and `bar()` uses a `where` clause. +All you need to do is leave off the bounds when defining your type parameters, +and then add `where` after the parameter list. For longer lists, whitespace can +be added: + +``` +use std::fmt::Debug; + +fn bar(x: T, y: K) + where T: Clone, + K: Clone + Debug { + + x.clone(); + y.clone(); + println!("{:?}", y); +} +``` + +This flexibility can add clarity in complex situations. + +`where` is also more powerful than the simpler syntax. For example: + +``` +trait ConvertTo { + fn convert(&self) -> Output; +} + +impl ConvertTo for i32 { + fn convert(&self) -> i64 { *self as i64 } +} + +// can be called with T == i32 +fn normal>(x: &T) -> i64 { + x.convert() +} + +// can be called with T == i64 +fn inverse() -> T + // this is using ConvertTo as if it were "ConvertFrom" + where i32: ConvertTo { + 1i32.convert() +} +``` + +This shows off the additional feature of `where` clauses: they allow bounds +where the left-hand side is an arbitrary type (`i32` in this case), not just a +plain type parameter (like `T`). + +## Our `inverse` Example + +Back in [Generics](generics.html), we were trying to write code like this: + +```{rust,ignore} +fn inverse(x: T) -> Result { + if x == 0.0 { return Err("x cannot be zero!".to_string()); } + + Ok(1.0 / x) +} +``` + +If we try to compile it, we get this error: + +```text +error: binary operation `==` cannot be applied to type `T` +``` + +This is because `T` is too generic: we don't know if a random `T` can be +compared. For that, we can use trait bounds. It doesn't quite work, but try +this: + +```{rust,ignore} +fn inverse(x: T) -> Result { + if x == 0.0 { return Err("x cannot be zero!".to_string()); } + + Ok(1.0 / x) +} +``` + +You should get this error: + +```text +error: mismatched types: + expected `T`, + found `_` +(expected type parameter, + found floating-point variable) +``` + +So this won't work. While our `T` is `PartialEq`, we expected to have another `T`, +but instead, we found a floating-point variable. We need a different bound. `Float` +to the rescue: + +``` +# #![feature(std_misc)] +use std::num::Float; + +fn inverse(x: T) -> Result { + if x == Float::zero() { return Err("x cannot be zero!".to_string()) } + + let one: T = Float::one(); + Ok(one / x) +} +``` + +We've had to replace our generic `0.0` and `1.0` with the appropriate methods +from the `Float` trait. Both `f32` and `f64` implement `Float`, so our function +works just fine: + +``` +# #![feature(std_misc)] +# use std::num::Float; +# fn inverse(x: T) -> Result { +# if x == Float::zero() { return Err("x cannot be zero!".to_string()) } +# let one: T = Float::one(); +# Ok(one / x) +# } +println!("the inverse of {} is {:?}", 2.0f32, inverse(2.0f32)); +println!("the inverse of {} is {:?}", 2.0f64, inverse(2.0f64)); + +println!("the inverse of {} is {:?}", 0.0f32, inverse(0.0f32)); +println!("the inverse of {} is {:?}", 0.0f64, inverse(0.0f64)); +``` + +## Default methods + +There's one last feature of traits we should cover: default methods. It's +easiest just to show an example: + +```rust +trait Foo { + fn bar(&self); + + fn baz(&self) { println!("We called baz."); } +} +``` + +Implementors of the `Foo` trait need to implement `bar()`, but they don't +need to implement `baz()`. They'll get this default behavior. They can +override the default if they so choose: + +```rust +# trait Foo { +# fn bar(&self); +# fn baz(&self) { println!("We called baz."); } +# } +struct UseDefault; + +impl Foo for UseDefault { + fn bar(&self) { println!("We called bar."); } +} + +struct OverrideDefault; + +impl Foo for OverrideDefault { + fn bar(&self) { println!("We called bar."); } + + fn baz(&self) { println!("Override baz!"); } +} + +let default = UseDefault; +default.baz(); // prints "We called baz." + +let over = OverrideDefault; +over.baz(); // prints "Override baz!" +``` diff --git a/src/doc/trpl/tuple-structs.md b/src/doc/trpl/tuple-structs.md index e69de29bb2d1..8fba658fba2c 100644 --- a/src/doc/trpl/tuple-structs.md +++ b/src/doc/trpl/tuple-structs.md @@ -0,0 +1,56 @@ +% Tuple Structs + +Rust has another data type that's like a hybrid between a tuple and a struct, +called a *tuple struct*. Tuple structs do have a name, but their fields don't: + +```{rust} +struct Color(i32, i32, i32); +struct Point(i32, i32, i32); +``` + +These two will not be equal, even if they have the same values: + +```{rust} +# struct Color(i32, i32, i32); +# struct Point(i32, i32, i32); +let black = Color(0, 0, 0); +let origin = Point(0, 0, 0); +``` + +It is almost always better to use a struct than a tuple struct. We would write +`Color` and `Point` like this instead: + +```{rust} +struct Color { + red: i32, + blue: i32, + green: i32, +} + +struct Point { + x: i32, + y: i32, + z: i32, +} +``` + +Now, we have actual names, rather than positions. Good names are important, +and with a struct, we have actual names. + +There _is_ one case when a tuple struct is very useful, though, and that's a +tuple struct with only one element. We call this the *newtype* pattern, because +it allows you to create a new type, distinct from that of its contained value +and expressing its own semantic meaning: + +```{rust} +struct Inches(i32); + +let length = Inches(10); + +let Inches(integer_length) = length; +println!("length is {} inches", integer_length); +``` + +As you can see here, you can extract the inner integer type through a +destructuring `let`, as we discussed previously in 'tuples.' In this case, the +`let Inches(integer_length)` assigns `10` to `integer_length`. diff --git a/src/doc/trpl/tuples.md b/src/doc/trpl/tuples.md index e69de29bb2d1..dd526d05b671 100644 --- a/src/doc/trpl/tuples.md +++ b/src/doc/trpl/tuples.md @@ -0,0 +1,97 @@ +% Tuples + +The first compound data type we're going to talk about is called the *tuple*. +A tuple is an ordered list of fixed size. Like this: + +```rust +let x = (1, "hello"); +``` + +The parentheses and commas form this two-length tuple. Here's the same code, but +with the type annotated: + +```rust +let x: (i32, &str) = (1, "hello"); +``` + +As you can see, the type of a tuple looks just like the tuple, but with each +position having a type name rather than the value. Careful readers will also +note that tuples are heterogeneous: we have an `i32` and a `&str` in this tuple. +You have briefly seen `&str` used as a type before, and we'll discuss the +details of strings later. In systems programming languages, strings are a bit +more complex than in other languages. For now, just read `&str` as a *string +slice*, and we'll learn more soon. + +You can access the fields in a tuple through a *destructuring let*. Here's +an example: + +```rust +let (x, y, z) = (1, 2, 3); + +println!("x is {}", x); +``` + +Remember before when I said the left-hand side of a `let` statement was more +powerful than just assigning a binding? Here we are. We can put a pattern on +the left-hand side of the `let`, and if it matches up to the right-hand side, +we can assign multiple bindings at once. In this case, `let` "destructures," +or "breaks up," the tuple, and assigns the bits to three bindings. + +This pattern is very powerful, and we'll see it repeated more later. + +There are also a few things you can do with a tuple as a whole, without +destructuring. You can assign one tuple into another, if they have the same +contained types and [arity]. Tuples have the same arity when they have the same +length. + +```rust +let mut x = (1, 2); // x: (i32, i32) +let y = (2, 3); // y: (i32, i32) + +x = y; +``` + +You can also check for equality with `==`. Again, this will only compile if the +tuples have the same type. + +```rust +let x = (1, 2, 3); +let y = (2, 2, 4); + +if x == y { + println!("yes"); +} else { + println!("no"); +} +``` + +This will print `no`, because some of the values aren't equal. + +Note that the order of the values is considered when checking for equality, +so the following example will also print `no`. + +```rust +let x = (1, 2, 3); +let y = (2, 1, 3); + +if x == y { + println!("yes"); +} else { + println!("no"); +} +``` + +One other use of tuples is to return multiple values from a function: + +```rust +fn next_two(x: i32) -> (i32, i32) { (x + 1, x + 2) } + +fn main() { + let (x, y) = next_two(5); + println!("x, y = {}, {}", x, y); +} +``` + +Even though Rust functions can only return one value, a tuple *is* one value, +that happens to be made up of more than one value. You can also see in this +example how you can destructure a pattern returned by a function, as well. diff --git a/src/doc/trpl/type-aliases.md b/src/doc/trpl/type-aliases.md index e69de29bb2d1..fffa0ae1383c 100644 --- a/src/doc/trpl/type-aliases.md +++ b/src/doc/trpl/type-aliases.md @@ -0,0 +1,3 @@ +% `type` Aliases + +Coming soon diff --git a/src/doc/trpl/ufcs.md b/src/doc/trpl/ufcs.md index e69de29bb2d1..6b9a417c4394 100644 --- a/src/doc/trpl/ufcs.md +++ b/src/doc/trpl/ufcs.md @@ -0,0 +1,3 @@ +% Universal Function Call Syntax + +Coming soon diff --git a/src/doc/trpl/unsafe-code.md b/src/doc/trpl/unsafe-code.md index e69de29bb2d1..b641f2b104a7 100644 --- a/src/doc/trpl/unsafe-code.md +++ b/src/doc/trpl/unsafe-code.md @@ -0,0 +1,183 @@ +% Unsafe Code + +# Introduction + +Rust aims to provide safe abstractions over the low-level details of +the CPU and operating system, but sometimes one needs to drop down and +write code at that level. This guide aims to provide an overview of +the dangers and power one gets with Rust's unsafe subset. + +Rust provides an escape hatch in the form of the `unsafe { ... }` +block which allows the programmer to dodge some of the compiler's +checks and do a wide range of operations, such as: + +- dereferencing [raw pointers](#raw-pointers) +- calling a function via FFI ([covered by the FFI guide](ffi.html)) +- casting between types bitwise (`transmute`, aka "reinterpret cast") +- [inline assembly](#inline-assembly) + +Note that an `unsafe` block does not relax the rules about lifetimes +of `&` and the freezing of borrowed data. + +Any use of `unsafe` is the programmer saying "I know more than you" to +the compiler, and, as such, the programmer should be very sure that +they actually do know more about why that piece of code is valid. In +general, one should try to minimize the amount of unsafe code in a +code base; preferably by using the bare minimum `unsafe` blocks to +build safe interfaces. + +> **Note**: the low-level details of the Rust language are still in +> flux, and there is no guarantee of stability or backwards +> compatibility. In particular, there may be changes that do not cause +> compilation errors, but do cause semantic changes (such as invoking +> undefined behaviour). As such, extreme care is required. + +# Pointers + +## References + +One of Rust's biggest features is memory safety. This is achieved in +part via [the ownership system](ownership.html), which is how the +compiler can guarantee that every `&` reference is always valid, and, +for example, never pointing to freed memory. + +These restrictions on `&` have huge advantages. However, they also +constrain how we can use them. For example, `&` doesn't behave +identically to C's pointers, and so cannot be used for pointers in +foreign function interfaces (FFI). Additionally, both immutable (`&`) +and mutable (`&mut`) references have some aliasing and freezing +guarantees, required for memory safety. + +In particular, if you have an `&T` reference, then the `T` must not be +modified through that reference or any other reference. There are some +standard library types, e.g. `Cell` and `RefCell`, that provide inner +mutability by replacing compile time guarantees with dynamic checks at +runtime. + +An `&mut` reference has a different constraint: when an object has an +`&mut T` pointing into it, then that `&mut` reference must be the only +such usable path to that object in the whole program. That is, an +`&mut` cannot alias with any other references. + +Using `unsafe` code to incorrectly circumvent and violate these +restrictions is undefined behaviour. For example, the following +creates two aliasing `&mut` pointers, and is invalid. + +``` +use std::mem; +let mut x: u8 = 1; + +let ref_1: &mut u8 = &mut x; +let ref_2: &mut u8 = unsafe { mem::transmute(&mut *ref_1) }; + +// oops, ref_1 and ref_2 point to the same piece of data (x) and are +// both usable +*ref_1 = 10; +*ref_2 = 20; +``` + +## Raw pointers + +Rust offers two additional pointer types (*raw pointers*), written as +`*const T` and `*mut T`. They're an approximation of C's `const T*` and `T*` +respectively; indeed, one of their most common uses is for FFI, +interfacing with external C libraries. + +Raw pointers have much fewer guarantees than other pointer types +offered by the Rust language and libraries. For example, they + +- are not guaranteed to point to valid memory and are not even + guaranteed to be non-null (unlike both `Box` and `&`); +- do not have any automatic clean-up, unlike `Box`, and so require + manual resource management; +- are plain-old-data, that is, they don't move ownership, again unlike + `Box`, hence the Rust compiler cannot protect against bugs like + use-after-free; +- lack any form of lifetimes, unlike `&`, and so the compiler cannot + reason about dangling pointers; and +- have no guarantees about aliasing or mutability other than mutation + not being allowed directly through a `*const T`. + +Fortunately, they come with a redeeming feature: the weaker guarantees +mean weaker restrictions. The missing restrictions make raw pointers +appropriate as a building block for implementing things like smart +pointers and vectors inside libraries. For example, `*` pointers are +allowed to alias, allowing them to be used to write shared-ownership +types like reference counted and garbage collected pointers, and even +thread-safe shared memory types (`Rc` and the `Arc` types are both +implemented entirely in Rust). + +There are two things that you are required to be careful about +(i.e. require an `unsafe { ... }` block) with raw pointers: + +- dereferencing: they can have any value: so possible results include + a crash, a read of uninitialised memory, a use-after-free, or + reading data as normal. +- pointer arithmetic via the `offset` [intrinsic](#intrinsics) (or + `.offset` method): this intrinsic uses so-called "in-bounds" + arithmetic, that is, it is only defined behaviour if the result is + inside (or one-byte-past-the-end) of the object from which the + original pointer came. + +The latter assumption allows the compiler to optimize more +effectively. As can be seen, actually *creating* a raw pointer is not +unsafe, and neither is converting to an integer. + +### References and raw pointers + +At runtime, a raw pointer `*` and a reference pointing to the same +piece of data have an identical representation. In fact, an `&T` +reference will implicitly coerce to an `*const T` raw pointer in safe code +and similarly for the `mut` variants (both coercions can be performed +explicitly with, respectively, `value as *const T` and `value as *mut T`). + +Going the opposite direction, from `*const` to a reference `&`, is not +safe. A `&T` is always valid, and so, at a minimum, the raw pointer +`*const T` has to point to a valid instance of type `T`. Furthermore, +the resulting pointer must satisfy the aliasing and mutability laws of +references. The compiler assumes these properties are true for any +references, no matter how they are created, and so any conversion from +raw pointers is asserting that they hold. The programmer *must* +guarantee this. + +The recommended method for the conversion is + +``` +let i: u32 = 1; +// explicit cast +let p_imm: *const u32 = &i as *const u32; +let mut m: u32 = 2; +// implicit coercion +let p_mut: *mut u32 = &mut m; + +unsafe { + let ref_imm: &u32 = &*p_imm; + let ref_mut: &mut u32 = &mut *p_mut; +} +``` + +The `&*x` dereferencing style is preferred to using a `transmute`. +The latter is far more powerful than necessary, and the more +restricted operation is harder to use incorrectly; for example, it +requires that `x` is a pointer (unlike `transmute`). + + + +## Making the unsafe safe(r) + +There are various ways to expose a safe interface around some unsafe +code: + +- store pointers privately (i.e. not in public fields of public + structs), so that you can see and control all reads and writes to + the pointer in one place. +- use `assert!()` a lot: since you can't rely on the protection of the + compiler & type-system to ensure that your `unsafe` code is correct + at compile-time, use `assert!()` to verify that it is doing the + right thing at run-time. +- implement the `Drop` for resource clean-up via a destructor, and use + RAII (Resource Acquisition Is Initialization). This reduces the need + for any manual memory management by users, and automatically ensures + that clean-up is always run, even when the thread panics. +- ensure that any data stored behind a raw pointer is destroyed at the + appropriate time. diff --git a/src/doc/trpl/unsized-types.md b/src/doc/trpl/unsized-types.md index e69de29bb2d1..f307f23f0116 100644 --- a/src/doc/trpl/unsized-types.md +++ b/src/doc/trpl/unsized-types.md @@ -0,0 +1,3 @@ +% Unsized Types + +Coming Soon! diff --git a/src/doc/trpl/variable-bindings.md b/src/doc/trpl/variable-bindings.md index e69de29bb2d1..88babd8659c2 100644 --- a/src/doc/trpl/variable-bindings.md +++ b/src/doc/trpl/variable-bindings.md @@ -0,0 +1,161 @@ +% Variable Bindings + +The first thing we'll learn about are *variable bindings*. They look like this: + +```{rust} +fn main() { + let x = 5; +} +``` + +Putting `fn main() {` in each example is a bit tedious, so we'll leave that out +in the future. If you're following along, make sure to edit your `main()` +function, rather than leaving it off. Otherwise, you'll get an error. + +In many languages, this is called a *variable*. But Rust's variable bindings +have a few tricks up their sleeves. Rust has a very powerful feature called +*pattern matching* that we'll get into detail with later, but the left +hand side of a `let` expression is a full pattern, not just a variable name. +This means we can do things like: + +```{rust} +let (x, y) = (1, 2); +``` + +After this expression is evaluated, `x` will be one, and `y` will be two. +Patterns are really powerful, but this is about all we can do with them so far. +So let's just keep this in the back of our minds as we go forward. + +Rust is a statically typed language, which means that we specify our types up +front. So why does our first example compile? Well, Rust has this thing called +*type inference*. If it can figure out what the type of something is, Rust +doesn't require you to actually type it out. + +We can add the type if we want to, though. Types come after a colon (`:`): + +```{rust} +let x: i32 = 5; +``` + +If I asked you to read this out loud to the rest of the class, you'd say "`x` +is a binding with the type `i32` and the value `five`." + +In this case we chose to represent `x` as a 32-bit signed integer. Rust has +many different primitive integer types. They begin with `i` for signed integers +and `u` for unsigned integers. The possible integer sizes are 8, 16, 32, and 64 +bits. + +In future examples, we may annotate the type in a comment. The examples will +look like this: + +```{rust} +fn main() { + let x = 5; // x: i32 +} +``` + +Note the similarities between this annotation and the syntax you use with `let`. +Including these kinds of comments is not idiomatic Rust, but we'll occasionally +include them to help you understand what the types that Rust infers are. + +By default, bindings are *immutable*. This code will not compile: + +```{ignore} +let x = 5; +x = 10; +``` + +It will give you this error: + +```text +error: re-assignment of immutable variable `x` + x = 10; + ^~~~~~~ +``` + +If you want a binding to be mutable, you can use `mut`: + +```{rust} +let mut x = 5; // mut x: i32 +x = 10; +``` + +There is no single reason that bindings are immutable by default, but we can +think about it through one of Rust's primary focuses: safety. If you forget to +say `mut`, the compiler will catch it, and let you know that you have mutated +something you may not have intended to mutate. If bindings were mutable by +default, the compiler would not be able to tell you this. If you _did_ intend +mutation, then the solution is quite easy: add `mut`. + +There are other good reasons to avoid mutable state when possible, but they're +out of the scope of this guide. In general, you can often avoid explicit +mutation, and so it is preferable in Rust. That said, sometimes, mutation is +what you need, so it's not verboten. + +Let's get back to bindings. Rust variable bindings have one more aspect that +differs from other languages: bindings are required to be initialized with a +value before you're allowed to use them. + +Let's try it out. Change your `src/main.rs` file to look like this: + +```{rust} +fn main() { + let x: i32; + + println!("Hello world!"); +} +``` + +You can use `cargo build` on the command line to build it. You'll get a warning, +but it will still print "Hello, world!": + +```text + Compiling hello_world v0.0.1 (file:///home/you/projects/hello_world) +src/main.rs:2:9: 2:10 warning: unused variable: `x`, #[warn(unused_variable)] on by default +src/main.rs:2 let x: i32; + ^ +``` + +Rust warns us that we never use the variable binding, but since we never use it, +no harm, no foul. Things change if we try to actually use this `x`, however. Let's +do that. Change your program to look like this: + +```{rust,ignore} +fn main() { + let x: i32; + + println!("The value of x is: {}", x); +} +``` + +And try to build it. You'll get an error: + +```{bash} +$ cargo build + Compiling hello_world v0.0.1 (file:///home/you/projects/hello_world) +src/main.rs:4:39: 4:40 error: use of possibly uninitialized variable: `x` +src/main.rs:4 println!("The value of x is: {}", x); + ^ +note: in expansion of format_args! +:2:23: 2:77 note: expansion site +:1:1: 3:2 note: in expansion of println! +src/main.rs:4:5: 4:42 note: expansion site +error: aborting due to previous error +Could not compile `hello_world`. +``` + +Rust will not let us use a value that has not been initialized. Next, let's +talk about this stuff we've added to `println!`. + +If you include two curly braces (`{}`, some call them moustaches...) in your +string to print, Rust will interpret this as a request to interpolate some sort +of value. *String interpolation* is a computer science term that means "stick +in the middle of a string." We add a comma, and then `x`, to indicate that we +want `x` to be the value we're interpolating. The comma is used to separate +arguments we pass to functions and macros, if you're passing more than one. + +When you just use the curly braces, Rust will attempt to display the +value in a meaningful way by checking out its type. If you want to specify the +format in a more detailed manner, there are a [wide number of options +available](../std/fmt/index.html). For now, we'll just stick to the default: +integers aren't very complicated to print. diff --git a/src/doc/trpl/vectors.md b/src/doc/trpl/vectors.md index e69de29bb2d1..cba435233fab 100644 --- a/src/doc/trpl/vectors.md +++ b/src/doc/trpl/vectors.md @@ -0,0 +1,33 @@ +% Vectors + +A *vector* is a dynamic or "growable" array, implemented as the standard +library type [`Vec`](../std/vec/) (we'll talk about what the `` means +later). Vectors always allocate their data on the heap. Vectors are to slices +what `String` is to `&str`. You can create them with the `vec!` macro: + +```{rust} +let v = vec![1, 2, 3]; // v: Vec +``` + +(Notice that unlike the `println!` macro we've used in the past, we use square +brackets `[]` with `vec!`. Rust allows you to use either in either situation, +this is just convention.) + +There's an alternate form of `vec!` for repeating an initial value: + +``` +let v = vec![0; 10]; // ten zeroes +``` + +You can get the length of, iterate over, and subscript vectors just like +arrays. In addition, (mutable) vectors can grow automatically: + +```{rust} +let mut nums = vec![1, 2, 3]; // mut nums: Vec + +nums.push(4); + +println!("The length of nums is now {}", nums.len()); // Prints 4 +``` + +Vectors have many more useful methods. diff --git a/src/doc/trpl/while-loops.md b/src/doc/trpl/while-loops.md index e69de29bb2d1..508c4ee117a5 100644 --- a/src/doc/trpl/while-loops.md +++ b/src/doc/trpl/while-loops.md @@ -0,0 +1,83 @@ +% `while` loops + +The other kind of looping construct in Rust is the `while` loop. It looks like +this: + +```{rust} +let mut x = 5; // mut x: u32 +let mut done = false; // mut done: bool + +while !done { + x += x - 3; + println!("{}", x); + if x % 5 == 0 { done = true; } +} +``` + +`while` loops are the correct choice when you're not sure how many times +you need to loop. + +If you need an infinite loop, you may be tempted to write this: + +```{rust,ignore} +while true { +``` + +However, Rust has a dedicated keyword, `loop`, to handle this case: + +```{rust,ignore} +loop { +``` + +Rust's control-flow analysis treats this construct differently than a +`while true`, since we know that it will always loop. The details of what +that _means_ aren't super important to understand at this stage, but in +general, the more information we can give to the compiler, the better it +can do with safety and code generation, so you should always prefer +`loop` when you plan to loop infinitely. + +## Ending iteration early + +Let's take a look at that `while` loop we had earlier: + +```{rust} +let mut x = 5; +let mut done = false; + +while !done { + x += x - 3; + println!("{}", x); + if x % 5 == 0 { done = true; } +} +``` + +We had to keep a dedicated `mut` boolean variable binding, `done`, to know +when we should exit out of the loop. Rust has two keywords to help us with +modifying iteration: `break` and `continue`. + +In this case, we can write the loop in a better way with `break`: + +```{rust} +let mut x = 5; + +loop { + x += x - 3; + println!("{}", x); + if x % 5 == 0 { break; } +} +``` + +We now loop forever with `loop` and use `break` to break out early. + +`continue` is similar, but instead of ending the loop, goes to the next +iteration. This will only print the odd numbers: + +```{rust} +for x in 0..10 { + if x % 2 == 0 { continue; } + + println!("{}", x); +} +``` + +Both `continue` and `break` are valid in both kinds of loops.