diff --git a/src/librustc/middle/typeck/infer/error_reporting.rs b/src/librustc/middle/typeck/infer/error_reporting.rs index 8c55872d9621..6012fa43eb55 100644 --- a/src/librustc/middle/typeck/infer/error_reporting.rs +++ b/src/librustc/middle/typeck/infer/error_reporting.rs @@ -1460,8 +1460,8 @@ impl<'a, 'tcx> ErrorReportingHelpers for InferCtxt<'a, 'tcx> { format!(" for {}in function call", bound_region_to_string(self.tcx, "lifetime parameter ", true, br)) } - infer::LateBoundRegion(_, br, infer::FnType) => { - format!(" for {}in function type", + infer::LateBoundRegion(_, br, infer::HigherRankedType) => { + format!(" for {}in generic type", bound_region_to_string(self.tcx, "lifetime parameter ", true, br)) } infer::EarlyBoundRegion(_, name) => { diff --git a/src/librustc/middle/typeck/infer/glb.rs b/src/librustc/middle/typeck/infer/glb.rs index 4ff4857ea06b..bb8c59f02597 100644 --- a/src/librustc/middle/typeck/infer/glb.rs +++ b/src/librustc/middle/typeck/infer/glb.rs @@ -10,23 +10,18 @@ use middle::ty::{BuiltinBounds}; -use middle::ty::RegionVid; use middle::ty; use middle::typeck::infer::combine::*; use middle::typeck::infer::lattice::*; use middle::typeck::infer::equate::Equate; -use middle::typeck::infer::fold_regions_in_sig; -use middle::typeck::infer::LateBoundRegionConversionTime::FnType; +use middle::typeck::infer::higher_ranked::HigherRankedRelations; use middle::typeck::infer::lub::Lub; -use middle::typeck::infer::region_inference::RegionMark; use middle::typeck::infer::sub::Sub; use middle::typeck::infer::{cres, InferCtxt}; use middle::typeck::infer::{TypeTrace, Subtype}; use syntax::ast::{Many, Once, MutImmutable, MutMutable}; -use syntax::ast::{NormalFn, UnsafeFn, NodeId}; +use syntax::ast::{NormalFn, UnsafeFn}; use syntax::ast::{Onceness, FnStyle}; -use util::common::{indenter}; -use util::nodemap::FnvHashMap; use util::ppaux::mt_to_string; use util::ppaux::Repr; @@ -128,139 +123,6 @@ impl<'f, 'tcx> Combine<'tcx> for Glb<'f, 'tcx> { } fn fn_sigs(&self, a: &ty::FnSig, b: &ty::FnSig) -> cres { - // Note: this is a subtle algorithm. For a full explanation, - // please see the large comment in `region_inference.rs`. - - debug!("{}.fn_sigs({}, {})", - self.tag(), a.repr(self.fields.infcx.tcx), b.repr(self.fields.infcx.tcx)); - let _indenter = indenter(); - - // Make a mark so we can examine "all bindings that were - // created as part of this type comparison". - let mark = self.fields.infcx.region_vars.mark(); - - // Instantiate each bound region with a fresh region variable. - let (a_with_fresh, a_map) = - self.fields.infcx.replace_late_bound_regions_with_fresh_var( - a.binder_id, self.trace().span(), FnType, a); - let a_vars = var_ids(self, &a_map); - let (b_with_fresh, b_map) = - self.fields.infcx.replace_late_bound_regions_with_fresh_var( - b.binder_id, self.trace().span(), FnType, b); - let b_vars = var_ids(self, &b_map); - - // Collect constraints. - let sig0 = try!(super_fn_sigs(self, &a_with_fresh, &b_with_fresh)); - debug!("sig0 = {}", sig0.repr(self.fields.infcx.tcx)); - - // Generalize the regions appearing in fn_ty0 if possible - let new_vars = - self.fields.infcx.region_vars.vars_created_since_mark(mark); - let sig1 = - fold_regions_in_sig( - self.fields.infcx.tcx, - &sig0, - |r| { - generalize_region(self, - mark, - new_vars.as_slice(), - sig0.binder_id, - &a_map, - a_vars.as_slice(), - b_vars.as_slice(), - r) - }); - debug!("sig1 = {}", sig1.repr(self.fields.infcx.tcx)); - return Ok(sig1); - - fn generalize_region(this: &Glb, - mark: RegionMark, - new_vars: &[RegionVid], - new_binder_id: NodeId, - a_map: &FnvHashMap, - a_vars: &[RegionVid], - b_vars: &[RegionVid], - r0: ty::Region) -> ty::Region { - if !is_var_in_set(new_vars, r0) { - assert!(!r0.is_bound()); - return r0; - } - - let tainted = this.fields.infcx.region_vars.tainted(mark, r0); - - let mut a_r = None; - let mut b_r = None; - let mut only_new_vars = true; - for r in tainted.iter() { - if is_var_in_set(a_vars, *r) { - if a_r.is_some() { - return fresh_bound_variable(this, new_binder_id); - } else { - a_r = Some(*r); - } - } else if is_var_in_set(b_vars, *r) { - if b_r.is_some() { - return fresh_bound_variable(this, new_binder_id); - } else { - b_r = Some(*r); - } - } else if !is_var_in_set(new_vars, *r) { - only_new_vars = false; - } - } - - // NB---I do not believe this algorithm computes - // (necessarily) the GLB. As written it can - // spuriously fail. In particular, if there is a case - // like: |fn(&a)| and fn(fn(&b)), where a and b are - // free, it will return fn(&c) where c = GLB(a,b). If - // however this GLB is not defined, then the result is - // an error, even though something like - // "fn(fn(&X))" where X is bound would be a - // subtype of both of those. - // - // The problem is that if we were to return a bound - // variable, we'd be computing a lower-bound, but not - // necessarily the *greatest* lower-bound. - // - // Unfortunately, this problem is non-trivial to solve, - // because we do not know at the time of computing the GLB - // whether a GLB(a,b) exists or not, because we haven't - // run region inference (or indeed, even fully computed - // the region hierarchy!). The current algorithm seems to - // works ok in practice. - - if a_r.is_some() && b_r.is_some() && only_new_vars { - // Related to exactly one bound variable from each fn: - return rev_lookup(this, a_map, new_binder_id, a_r.unwrap()); - } else if a_r.is_none() && b_r.is_none() { - // Not related to bound variables from either fn: - assert!(!r0.is_bound()); - return r0; - } else { - // Other: - return fresh_bound_variable(this, new_binder_id); - } - } - - fn rev_lookup(this: &Glb, - a_map: &FnvHashMap, - new_binder_id: NodeId, - r: ty::Region) -> ty::Region - { - for (a_br, a_r) in a_map.iter() { - if *a_r == r { - return ty::ReLateBound(new_binder_id, *a_br); - } - } - this.fields.infcx.tcx.sess.span_bug( - this.fields.trace.origin.span(), - format!("could not find original bound region for {}", - r).as_slice()) - } - - fn fresh_bound_variable(this: &Glb, binder_id: NodeId) -> ty::Region { - this.fields.infcx.region_vars.new_bound(binder_id) - } + self.higher_ranked_glb(a, b) } } diff --git a/src/librustc/middle/typeck/infer/higher_ranked/doc.rs b/src/librustc/middle/typeck/infer/higher_ranked/doc.rs new file mode 100644 index 000000000000..4c4452ac8923 --- /dev/null +++ b/src/librustc/middle/typeck/infer/higher_ranked/doc.rs @@ -0,0 +1,415 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/*! + +# Skolemization and functions + +One of the trickiest and most subtle aspects of regions is dealing +with higher-ranked things which include bound region variables, such +as function types. I strongly suggest that if you want to understand +the situation, you read this paper (which is, admittedly, very long, +but you don't have to read the whole thing): + +http://research.microsoft.com/en-us/um/people/simonpj/papers/higher-rank/ + +Although my explanation will never compete with SPJ's (for one thing, +his is approximately 100 pages), I will attempt to explain the basic +problem and also how we solve it. Note that the paper only discusses +subtyping, not the computation of LUB/GLB. + +The problem we are addressing is that there is a kind of subtyping +between functions with bound region parameters. Consider, for +example, whether the following relation holds: + + for<'a> fn(&'a int) <: for<'b> fn(&'b int)? (Yes, a => b) + +The answer is that of course it does. These two types are basically +the same, except that in one we used the name `a` and one we used +the name `b`. + +In the examples that follow, it becomes very important to know whether +a lifetime is bound in a function type (that is, is a lifetime +parameter) or appears free (is defined in some outer scope). +Therefore, from now on I will always write the bindings explicitly, +using the Rust syntax `for<'a> fn(&'a int)` to indicate that `a` is a +lifetime parameter. + +Now let's consider two more function types. Here, we assume that the +`'b` lifetime is defined somewhere outside and hence is not a lifetime +parameter bound by the function type (it "appears free"): + + for<'a> fn(&'a int) <: fn(&'b int)? (Yes, a => b) + +This subtyping relation does in fact hold. To see why, you have to +consider what subtyping means. One way to look at `T1 <: T2` is to +say that it means that it is always ok to treat an instance of `T1` as +if it had the type `T2`. So, with our functions, it is always ok to +treat a function that can take pointers with any lifetime as if it +were a function that can only take a pointer with the specific +lifetime `'b`. After all, `'b` is a lifetime, after all, and +the function can take values of any lifetime. + +You can also look at subtyping as the *is a* relationship. This amounts +to the same thing: a function that accepts pointers with any lifetime +*is a* function that accepts pointers with some specific lifetime. + +So, what if we reverse the order of the two function types, like this: + + fn(&'b int) <: for<'a> fn(&'a int)? (No) + +Does the subtyping relationship still hold? The answer of course is +no. In this case, the function accepts *only the lifetime `'b`*, +so it is not reasonable to treat it as if it were a function that +accepted any lifetime. + +What about these two examples: + + for<'a,'b> fn(&'a int, &'b int) <: for<'a> fn(&'a int, &'a int)? (Yes) + for<'a> fn(&'a int, &'a int) <: for<'a,'b> fn(&'a int, &'b int)? (No) + +Here, it is true that functions which take two pointers with any two +lifetimes can be treated as if they only accepted two pointers with +the same lifetime, but not the reverse. + +## The algorithm + +Here is the algorithm we use to perform the subtyping check: + +1. Replace all bound regions in the subtype with new variables +2. Replace all bound regions in the supertype with skolemized + equivalents. A "skolemized" region is just a new fresh region + name. +3. Check that the parameter and return types match as normal +4. Ensure that no skolemized regions 'leak' into region variables + visible from "the outside" + +Let's walk through some examples and see how this algorithm plays out. + +#### First example + +We'll start with the first example, which was: + + 1. for<'a> fn(&'a T) <: for<'b> fn(&'b T)? Yes: a -> b + +After steps 1 and 2 of the algorithm we will have replaced the types +like so: + + 1. fn(&'A T) <: fn(&'x T)? + +Here the upper case `&A` indicates a *region variable*, that is, a +region whose value is being inferred by the system. I also replaced +`&b` with `&x`---I'll use letters late in the alphabet (`x`, `y`, `z`) +to indicate skolemized region names. We can assume they don't appear +elsewhere. Note that neither the sub- nor the supertype bind any +region names anymore (as indicated by the absence of `<` and `>`). + +The next step is to check that the parameter types match. Because +parameters are contravariant, this means that we check whether: + + &'x T <: &'A T + +Region pointers are contravariant so this implies that + + &A <= &x + +must hold, where `<=` is the subregion relationship. Processing +*this* constrain simply adds a constraint into our graph that `&A <= +&x` and is considered successful (it can, for example, be satisfied by +choosing the value `&x` for `&A`). + +So far we have encountered no error, so the subtype check succeeds. + +#### The third example + +Now let's look first at the third example, which was: + + 3. fn(&'a T) <: for<'b> fn(&'b T)? No! + +After steps 1 and 2 of the algorithm we will have replaced the types +like so: + + 3. fn(&'a T) <: fn(&'x T)? + +This looks pretty much the same as before, except that on the LHS +`'a` was not bound, and hence was left as-is and not replaced with +a variable. The next step is again to check that the parameter types +match. This will ultimately require (as before) that `'a` <= `&x` +must hold: but this does not hold. `self` and `x` are both distinct +free regions. So the subtype check fails. + +#### Checking for skolemization leaks + +You may be wondering about that mysterious last step in the algorithm. +So far it has not been relevant. The purpose of that last step is to +catch something like *this*: + + for<'a> fn() -> fn(&'a T) <: fn() -> for<'b> fn(&'b T)? No. + +Here the function types are the same but for where the binding occurs. +The subtype returns a function that expects a value in precisely one +region. The supertype returns a function that expects a value in any +region. If we allow an instance of the subtype to be used where the +supertype is expected, then, someone could call the fn and think that +the return value has type `fn(&'b T)` when it really has type +`fn(&'a T)` (this is case #3, above). Bad. + +So let's step through what happens when we perform this subtype check. +We first replace the bound regions in the subtype (the supertype has +no bound regions). This gives us: + + fn() -> fn(&'A T) <: fn() -> for<'b> fn(&'b T)? + +Now we compare the return types, which are covariant, and hence we have: + + fn(&'A T) <: for<'b> fn(&'b T)? + +Here we skolemize the bound region in the supertype to yield: + + fn(&'A T) <: fn(&'x T)? + +And then proceed to compare the argument types: + + &'x T <: &'A T + 'A <= 'x + +Finally, this is where it gets interesting! This is where an error +*should* be reported. But in fact this will not happen. The reason why +is that `A` is a variable: we will infer that its value is the fresh +region `x` and think that everything is happy. In fact, this behavior +is *necessary*, it was key to the first example we walked through. + +The difference between this example and the first one is that the variable +`A` already existed at the point where the skolemization occurred. In +the first example, you had two functions: + + for<'a> fn(&'a T) <: for<'b> fn(&'b T) + +and hence `&A` and `&x` were created "together". In general, the +intention of the skolemized names is that they are supposed to be +fresh names that could never be equal to anything from the outside. +But when inference comes into play, we might not be respecting this +rule. + +So the way we solve this is to add a fourth step that examines the +constraints that refer to skolemized names. Basically, consider a +non-directed verison of the constraint graph. Let `Tainted(x)` be the +set of all things reachable from a skolemized variable `x`. +`Tainted(x)` should not contain any regions that existed before the +step at which the skolemization was performed. So this case here +would fail because `&x` was created alone, but is relatable to `&A`. + +## Computing the LUB and GLB + +The paper I pointed you at is written for Haskell. It does not +therefore considering subtyping and in particular does not consider +LUB or GLB computation. We have to consider this. Here is the +algorithm I implemented. + +First though, let's discuss what we are trying to compute in more +detail. The LUB is basically the "common supertype" and the GLB is +"common subtype"; one catch is that the LUB should be the +*most-specific* common supertype and the GLB should be *most general* +common subtype (as opposed to any common supertype or any common +subtype). + +Anyway, to help clarify, here is a table containing some function +pairs and their LUB/GLB (for conciseness, in this table, I'm just +including the lifetimes here, not the rest of the types, and I'm +writing `fn<>` instead of `for<> fn`): + +``` +Type 1 Type 2 LUB GLB +fn<'a>('a) fn('X) fn('X) fn<'a>('a) +fn('a) fn('X) -- fn<'a>('a) +fn<'a,'b>('a, 'b) fn<'x>('x, 'x) fn<'a>('a, 'a) fn<'a,'b>('a, 'b) +fn<'a,'b>('a, 'b, 'a) fn<'x,'y>('x, 'y, 'y) fn<'a>('a, 'a, 'a) fn<'a,'b,'c>('a,'b,'c) +``` + +### Conventions + +I use lower-case letters (e.g., `&a`) for bound regions and upper-case +letters for free regions (`&A`). Region variables written with a +dollar-sign (e.g., `$a`). I will try to remember to enumerate the +bound-regions on the fn type as well (e.g., `for<'a> fn(&a)`). + +### High-level summary + +Both the LUB and the GLB algorithms work in a similar fashion. They +begin by replacing all bound regions (on both sides) with fresh region +inference variables. Therefore, both functions are converted to types +that contain only free regions. We can then compute the LUB/GLB in a +straightforward way, as described in `combine.rs`. This results in an +interim type T. The algorithms then examine the regions that appear +in T and try to, in some cases, replace them with bound regions to +yield the final result. + +To decide whether to replace a region `R` that appears in `T` with a +bound region, the algorithms make use of two bits of information. +First is a set `V` that contains all region variables created as part +of the LUB/GLB computation. `V` will contain the region variables +created to replace the bound regions in the input types, but it also +contains 'intermediate' variables created to represent the LUB/GLB of +individual regions. Basically, when asked to compute the LUB/GLB of a +region variable with another region, the inferencer cannot oblige +immediately since the values of that variables are not known. +Therefore, it creates a new variable that is related to the two +regions. For example, the LUB of two variables `$x` and `$y` is a +fresh variable `$z` that is constrained such that `$x <= $z` and `$y +<= $z`. So `V` will contain these intermediate variables as well. + +The other important factor in deciding how to replace a region in T is +the function `Tainted($r)` which, for a region variable, identifies +all regions that the region variable is related to in some way +(`Tainted()` made an appearance in the subtype computation as well). + +### LUB + +The LUB algorithm proceeds in three steps: + +1. Replace all bound regions (on both sides) with fresh region + inference variables. +2. Compute the LUB "as normal", meaning compute the GLB of each + pair of argument types and the LUB of the return types and + so forth. Combine those to a new function type `F`. +3. Replace each region `R` that appears in `F` as follows: + - Let `V` be the set of variables created during the LUB + computational steps 1 and 2, as described in the previous section. + - If `R` is not in `V`, replace `R` with itself. + - If `Tainted(R)` contains a region that is not in `V`, + replace `R` with itself. + - Otherwise, select the earliest variable in `Tainted(R)` that originates + from the left-hand side and replace `R` with the bound region that + this variable was a replacement for. + +So, let's work through the simplest example: `fn(&A)` and `for<'a> fn(&a)`. +In this case, `&a` will be replaced with `$a` and the interim LUB type +`fn($b)` will be computed, where `$b=GLB(&A,$a)`. Therefore, `V = +{$a, $b}` and `Tainted($b) = { $b, $a, &A }`. When we go to replace +`$b`, we find that since `&A \in Tainted($b)` is not a member of `V`, +we leave `$b` as is. When region inference happens, `$b` will be +resolved to `&A`, as we wanted. + +Let's look at a more complex one: `fn(&a, &b)` and `fn(&x, &x)`. In +this case, we'll end up with a (pre-replacement) LUB type of `fn(&g, +&h)` and a graph that looks like: + +``` + $a $b *--$x + \ \ / / + \ $h-* / + $g-----------* +``` + +Here `$g` and `$h` are fresh variables that are created to represent +the LUB/GLB of things requiring inference. This means that `V` and +`Tainted` will look like: + +``` +V = {$a, $b, $g, $h, $x} +Tainted($g) = Tainted($h) = { $a, $b, $h, $g, $x } +``` + +Therefore we replace both `$g` and `$h` with `$a`, and end up +with the type `fn(&a, &a)`. + +### GLB + +The procedure for computing the GLB is similar. The difference lies +in computing the replacements for the various variables. For each +region `R` that appears in the type `F`, we again compute `Tainted(R)` +and examine the results: + +1. If `R` is not in `V`, it is not replaced. +2. Else, if `Tainted(R)` contains only variables in `V`, and it + contains exactly one variable from the LHS and one variable from + the RHS, then `R` can be mapped to the bound version of the + variable from the LHS. +3. Else, if `Tainted(R)` contains no variable from the LHS and no + variable from the RHS, then `R` can be mapped to itself. +4. Else, `R` is mapped to a fresh bound variable. + +These rules are pretty complex. Let's look at some examples to see +how they play out. + +Out first example was `fn(&a)` and `fn(&X)`. In this case, `&a` will +be replaced with `$a` and we will ultimately compute a +(pre-replacement) GLB type of `fn($g)` where `$g=LUB($a,&X)`. +Therefore, `V={$a,$g}` and `Tainted($g)={$g,$a,&X}. To find the +replacement for `$g` we consult the rules above: +- Rule (1) does not apply because `$g \in V` +- Rule (2) does not apply because `&X \in Tainted($g)` +- Rule (3) does not apply because `$a \in Tainted($g)` +- Hence, by rule (4), we replace `$g` with a fresh bound variable `&z`. +So our final result is `fn(&z)`, which is correct. + +The next example is `fn(&A)` and `fn(&Z)`. In this case, we will again +have a (pre-replacement) GLB of `fn(&g)`, where `$g = LUB(&A,&Z)`. +Therefore, `V={$g}` and `Tainted($g) = {$g, &A, &Z}`. In this case, +by rule (3), `$g` is mapped to itself, and hence the result is +`fn($g)`. This result is correct (in this case, at least), but it is +indicative of a case that *can* lead us into concluding that there is +no GLB when in fact a GLB does exist. See the section "Questionable +Results" below for more details. + +The next example is `fn(&a, &b)` and `fn(&c, &c)`. In this case, as +before, we'll end up with `F=fn($g, $h)` where `Tainted($g) = +Tainted($h) = {$g, $h, $a, $b, $c}`. Only rule (4) applies and hence +we'll select fresh bound variables `y` and `z` and wind up with +`fn(&y, &z)`. + +For the last example, let's consider what may seem trivial, but is +not: `fn(&a, &a)` and `fn(&b, &b)`. In this case, we'll get `F=fn($g, +$h)` where `Tainted($g) = {$g, $a, $x}` and `Tainted($h) = {$h, $a, +$x}`. Both of these sets contain exactly one bound variable from each +side, so we'll map them both to `&a`, resulting in `fn(&a, &a)`, which +is the desired result. + +### Shortcomings and correctness + +You may be wondering whether this algorithm is correct. The answer is +"sort of". There are definitely cases where they fail to compute a +result even though a correct result exists. I believe, though, that +if they succeed, then the result is valid, and I will attempt to +convince you. The basic argument is that the "pre-replacement" step +computes a set of constraints. The replacements, then, attempt to +satisfy those constraints, using bound identifiers where needed. + +For now I will briefly go over the cases for LUB/GLB and identify +their intent: + +- LUB: + - The region variables that are substituted in place of bound regions + are intended to collect constraints on those bound regions. + - If Tainted(R) contains only values in V, then this region is unconstrained + and can therefore be generalized, otherwise it cannot. +- GLB: + - The region variables that are substituted in place of bound regions + are intended to collect constraints on those bound regions. + - If Tainted(R) contains exactly one variable from each side, and + only variables in V, that indicates that those two bound regions + must be equated. + - Otherwise, if Tainted(R) references any variables from left or right + side, then it is trying to combine a bound region with a free one or + multiple bound regions, so we need to select fresh bound regions. + +Sorry this is more of a shorthand to myself. I will try to write up something +more convincing in the future. + +#### Where are the algorithms wrong? + +- The pre-replacement computation can fail even though using a + bound-region would have succeeded. +- We will compute GLB(fn(fn($a)), fn(fn($b))) as fn($c) where $c is the + GLB of $a and $b. But if inference finds that $a and $b must be mapped + to regions without a GLB, then this is effectively a failure to compute + the GLB. However, the result `fn<$c>(fn($c))` is a valid GLB. + + */ diff --git a/src/librustc/middle/typeck/infer/higher_ranked/mod.rs b/src/librustc/middle/typeck/infer/higher_ranked/mod.rs new file mode 100644 index 000000000000..09f0bbb2254a --- /dev/null +++ b/src/librustc/middle/typeck/infer/higher_ranked/mod.rs @@ -0,0 +1,445 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/*! + * Helper routines for higher-ranked things. See the `doc` module at + * the end of the file for details. + */ + +use middle::ty; +use middle::ty::replace_late_bound_regions; +use middle::typeck::infer::{mod, combine, cres, InferCtxt}; +use middle::typeck::infer::combine::Combine; +use middle::typeck::infer::region_inference::{RegionMark}; +use middle::ty_fold::{mod, HigherRankedFoldable, TypeFoldable}; +use syntax::codemap::Span; +use util::nodemap::FnvHashMap; +use util::ppaux::{bound_region_to_string, Repr}; + +pub trait HigherRankedCombineable : HigherRankedFoldable + TypeFoldable + Repr { + fn super_combine<'tcx,C:Combine<'tcx>>(combiner: &C, a: &Self, b: &Self) -> cres; +} + +pub trait HigherRankedRelations { + fn higher_ranked_sub(&self, a: &T, b: &T) -> cres + where T : HigherRankedCombineable; + + fn higher_ranked_lub(&self, a: &T, b: &T) -> cres + where T : HigherRankedCombineable; + + fn higher_ranked_glb(&self, a: &T, b: &T) -> cres + where T : HigherRankedCombineable; +} + +impl<'tcx,C> HigherRankedRelations for C + where C : Combine<'tcx> +{ + fn higher_ranked_sub(&self, a: &T, b: &T) -> cres + where T : HigherRankedCombineable + { + debug!("higher_ranked_sub(a={}, b={})", + a.repr(self.tcx()), b.repr(self.tcx())); + + // Rather than checking the subtype relationship between `a` and `b` + // as-is, we need to do some extra work here in order to make sure + // that function subtyping works correctly with respect to regions + // + // Note: this is a subtle algorithm. For a full explanation, + // please see the large comment at the end of the file in the (inlined) module + // `doc`. + + // Make a mark so we can examine "all bindings that were + // created as part of this type comparison". + let mark = self.infcx().region_vars.mark(); + + // First, we instantiate each bound region in the subtype with a fresh + // region variable. + let (a_prime, _) = + self.infcx().replace_late_bound_regions_with_fresh_var( + self.trace().origin.span(), + infer::HigherRankedType, + a); + + // Second, we instantiate each bound region in the supertype with a + // fresh concrete region. + let (b_prime, skol_map) = { + replace_late_bound_regions(self.tcx(), b, |br, _| { + let skol = self.infcx().region_vars.new_skolemized(br); + debug!("Bound region {} skolemized to {}", + bound_region_to_string(self.tcx(), "", false, br), + skol); + skol + }) + }; + + debug!("a_prime={}", a_prime.repr(self.tcx())); + debug!("b_prime={}", b_prime.repr(self.tcx())); + + // Compare types now that bound regions have been replaced. + let result = try!(HigherRankedCombineable::super_combine(self, &a_prime, &b_prime)); + + // Presuming type comparison succeeds, we need to check + // that the skolemized regions do not "leak". + let new_vars = + self.infcx().region_vars.vars_created_since_mark(mark); + for (&skol_br, &skol) in skol_map.iter() { + let tainted = self.infcx().region_vars.tainted(mark, skol); + for tainted_region in tainted.iter() { + // Each skolemized should only be relatable to itself + // or new variables: + match *tainted_region { + ty::ReInfer(ty::ReVar(ref vid)) => { + if new_vars.iter().any(|x| x == vid) { continue; } + } + _ => { + if *tainted_region == skol { continue; } + } + }; + + // A is not as polymorphic as B: + if self.a_is_expected() { + debug!("Not as polymorphic!"); + return Err(ty::terr_regions_insufficiently_polymorphic( + skol_br, *tainted_region)); + } else { + debug!("Overly polymorphic!"); + return Err(ty::terr_regions_overly_polymorphic( + skol_br, *tainted_region)); + } + } + } + + debug!("higher_ranked_sub: OK result={}", + result.repr(self.tcx())); + + return Ok(result); + } + + fn higher_ranked_lub(&self, a: &T, b: &T) -> cres + where T : HigherRankedCombineable + { + // Make a mark so we can examine "all bindings that were + // created as part of this type comparison". + let mark = self.infcx().region_vars.mark(); + + // Instantiate each bound region with a fresh region variable. + let span = self.trace().origin.span(); + let (a_with_fresh, a_map) = + self.infcx().replace_late_bound_regions_with_fresh_var( + span, infer::HigherRankedType, a); + let (b_with_fresh, _) = + self.infcx().replace_late_bound_regions_with_fresh_var( + span, infer::HigherRankedType, b); + + // Collect constraints. + let result0 = + try!(HigherRankedCombineable::super_combine(self, &a_with_fresh, &b_with_fresh)); + debug!("lub result0 = {}", result0.repr(self.tcx())); + + // Generalize the regions appearing in result0 if possible + let new_vars = self.infcx().region_vars.vars_created_since_mark(mark); + let span = self.trace().origin.span(); + let result1 = + fold_regions_in( + self.tcx(), + &result0, + |r, debruijn| generalize_region(self.infcx(), span, mark, debruijn, + new_vars.as_slice(), &a_map, r)); + + debug!("lub({},{}) = {}", + a.repr(self.tcx()), + b.repr(self.tcx()), + result1.repr(self.tcx())); + + return Ok(result1); + + fn generalize_region(infcx: &InferCtxt, + span: Span, + mark: RegionMark, + debruijn: ty::DebruijnIndex, + new_vars: &[ty::RegionVid], + a_map: &FnvHashMap, + r0: ty::Region) + -> ty::Region { + // Regions that pre-dated the LUB computation stay as they are. + if !is_var_in_set(new_vars, r0) { + assert!(!r0.is_bound()); + debug!("generalize_region(r0={}): not new variable", r0); + return r0; + } + + let tainted = infcx.region_vars.tainted(mark, r0); + + // Variables created during LUB computation which are + // *related* to regions that pre-date the LUB computation + // stay as they are. + if !tainted.iter().all(|r| is_var_in_set(new_vars, *r)) { + debug!("generalize_region(r0={}): \ + non-new-variables found in {}", + r0, tainted); + assert!(!r0.is_bound()); + return r0; + } + + // Otherwise, the variable must be associated with at + // least one of the variables representing bound regions + // in both A and B. Replace the variable with the "first" + // bound region from A that we find it to be associated + // with. + for (a_br, a_r) in a_map.iter() { + if tainted.iter().any(|x| x == a_r) { + debug!("generalize_region(r0={}): \ + replacing with {}, tainted={}", + r0, *a_br, tainted); + return ty::ReLateBound(debruijn, *a_br); + } + } + + infcx.tcx.sess.span_bug( + span, + format!("region {} is not associated with \ + any bound region from A!", + r0).as_slice()) + } + } + + fn higher_ranked_glb(&self, a: &T, b: &T) -> cres + where T : HigherRankedCombineable + { + debug!("{}.higher_ranked_glb({}, {})", + self.tag(), a.repr(self.tcx()), b.repr(self.tcx())); + + // Make a mark so we can examine "all bindings that were + // created as part of this type comparison". + let mark = self.infcx().region_vars.mark(); + + // Instantiate each bound region with a fresh region variable. + let (a_with_fresh, a_map) = + self.infcx().replace_late_bound_regions_with_fresh_var( + self.trace().origin.span(), infer::HigherRankedType, a); + let (b_with_fresh, b_map) = + self.infcx().replace_late_bound_regions_with_fresh_var( + self.trace().origin.span(), infer::HigherRankedType, b); + let a_vars = var_ids(self, &a_map); + let b_vars = var_ids(self, &b_map); + + // Collect constraints. + let result0 = + try!(HigherRankedCombineable::super_combine(self, &a_with_fresh, &b_with_fresh)); + debug!("glb result0 = {}", result0.repr(self.tcx())); + + // Generalize the regions appearing in fn_ty0 if possible + let new_vars = self.infcx().region_vars.vars_created_since_mark(mark); + let span = self.trace().origin.span(); + let result1 = + fold_regions_in( + self.tcx(), + &result0, + |r, debruijn| generalize_region(self.infcx(), span, mark, debruijn, + new_vars.as_slice(), + &a_map, a_vars.as_slice(), b_vars.as_slice(), + r)); + + debug!("glb({},{}) = {}", + a.repr(self.tcx()), + b.repr(self.tcx()), + result1.repr(self.tcx())); + + return Ok(result1); + + fn generalize_region(infcx: &InferCtxt, + span: Span, + mark: RegionMark, + debruijn: ty::DebruijnIndex, + new_vars: &[ty::RegionVid], + a_map: &FnvHashMap, + a_vars: &[ty::RegionVid], + b_vars: &[ty::RegionVid], + r0: ty::Region) -> ty::Region { + if !is_var_in_set(new_vars, r0) { + assert!(!r0.is_bound()); + return r0; + } + + let tainted = infcx.region_vars.tainted(mark, r0); + + let mut a_r = None; + let mut b_r = None; + let mut only_new_vars = true; + for r in tainted.iter() { + if is_var_in_set(a_vars, *r) { + if a_r.is_some() { + return fresh_bound_variable(infcx, debruijn); + } else { + a_r = Some(*r); + } + } else if is_var_in_set(b_vars, *r) { + if b_r.is_some() { + return fresh_bound_variable(infcx, debruijn); + } else { + b_r = Some(*r); + } + } else if !is_var_in_set(new_vars, *r) { + only_new_vars = false; + } + } + + // NB---I do not believe this algorithm computes + // (necessarily) the GLB. As written it can + // spuriously fail. In particular, if there is a case + // like: |fn(&a)| and fn(fn(&b)), where a and b are + // free, it will return fn(&c) where c = GLB(a,b). If + // however this GLB is not defined, then the result is + // an error, even though something like + // "fn(fn(&X))" where X is bound would be a + // subtype of both of those. + // + // The problem is that if we were to return a bound + // variable, we'd be computing a lower-bound, but not + // necessarily the *greatest* lower-bound. + // + // Unfortunately, this problem is non-trivial to solve, + // because we do not know at the time of computing the GLB + // whether a GLB(a,b) exists or not, because we haven't + // run region inference (or indeed, even fully computed + // the region hierarchy!). The current algorithm seems to + // works ok in practice. + + if a_r.is_some() && b_r.is_some() && only_new_vars { + // Related to exactly one bound variable from each fn: + return rev_lookup(infcx, span, a_map, a_r.unwrap()); + } else if a_r.is_none() && b_r.is_none() { + // Not related to bound variables from either fn: + assert!(!r0.is_bound()); + return r0; + } else { + // Other: + return fresh_bound_variable(infcx, debruijn); + } + } + + fn rev_lookup(infcx: &InferCtxt, + span: Span, + a_map: &FnvHashMap, + r: ty::Region) -> ty::Region + { + for (a_br, a_r) in a_map.iter() { + if *a_r == r { + return ty::ReLateBound(ty::DebruijnIndex::new(1), *a_br); + } + } + infcx.tcx.sess.span_bug( + span, + format!("could not find original bound region for {}", r)[]); + } + + fn fresh_bound_variable(infcx: &InferCtxt, debruijn: ty::DebruijnIndex) -> ty::Region { + infcx.region_vars.new_bound(debruijn) + } + } +} + +impl HigherRankedCombineable for ty::FnSig { + fn super_combine<'tcx,C:Combine<'tcx>>(combiner: &C, a: &ty::FnSig, b: &ty::FnSig) + -> cres + { + if a.variadic != b.variadic { + return Err(ty::terr_variadic_mismatch( + combine::expected_found(combiner, a.variadic, b.variadic))); + } + + let inputs = try!(argvecs(combiner, + a.inputs.as_slice(), + b.inputs.as_slice())); + + let output = try!(match (a.output, b.output) { + (ty::FnConverging(a_ty), ty::FnConverging(b_ty)) => + Ok(ty::FnConverging(try!(combiner.tys(a_ty, b_ty)))), + (ty::FnDiverging, ty::FnDiverging) => + Ok(ty::FnDiverging), + (a, b) => + Err(ty::terr_convergence_mismatch( + combine::expected_found(combiner, a != ty::FnDiverging, b != ty::FnDiverging))), + }); + + return Ok(ty::FnSig {inputs: inputs, + output: output, + variadic: a.variadic}); + + + fn argvecs<'tcx, C: Combine<'tcx>>(combiner: &C, + a_args: &[ty::t], + b_args: &[ty::t]) + -> cres> + { + if a_args.len() == b_args.len() { + a_args.iter().zip(b_args.iter()) + .map(|(a, b)| combiner.args(*a, *b)).collect() + } else { + Err(ty::terr_arg_count) + } + } + } +} + +impl HigherRankedCombineable for ty::TraitRef { + fn super_combine<'tcx,C:Combine<'tcx>>(combiner: &C, a: &ty::TraitRef, b: &ty::TraitRef) + -> cres + { + // Different traits cannot be related + if a.def_id != b.def_id { + Err(ty::terr_traits( + combine::expected_found(combiner, a.def_id, b.def_id))) + } else { + let substs = try!(combiner.substs(a.def_id, &a.substs, &b.substs)); + Ok(ty::TraitRef { def_id: a.def_id, + substs: substs }) + } + } +} + +fn var_ids<'tcx, T: Combine<'tcx>>(combiner: &T, + map: &FnvHashMap) + -> Vec { + map.iter().map(|(_, r)| match *r { + ty::ReInfer(ty::ReVar(r)) => { r } + r => { + combiner.infcx().tcx.sess.span_bug( + combiner.trace().origin.span(), + format!("found non-region-vid: {}", r).as_slice()); + } + }).collect() +} + +fn is_var_in_set(new_vars: &[ty::RegionVid], r: ty::Region) -> bool { + match r { + ty::ReInfer(ty::ReVar(ref v)) => new_vars.iter().any(|x| x == v), + _ => false + } +} + +fn fold_regions_in(tcx: &ty::ctxt, + value: &T, + fldr: |ty::Region, ty::DebruijnIndex| -> ty::Region) + -> T +{ + value.fold_contents(&mut ty_fold::RegionFolder::new(tcx, |region, current_depth| { + // we should only be encountering "escaping" late-bound regions here, + // because the ones at the current level should have been replaced + // with fresh variables + assert!(match region { + ty::ReLateBound(..) => false, + _ => true + }); + + fldr(region, ty::DebruijnIndex::new(current_depth)) + })) +} + diff --git a/src/librustc/middle/typeck/infer/lub.rs b/src/librustc/middle/typeck/infer/lub.rs index aaad3d573be5..492ac5e92d3c 100644 --- a/src/librustc/middle/typeck/infer/lub.rs +++ b/src/librustc/middle/typeck/infer/lub.rs @@ -9,23 +9,19 @@ // except according to those terms. use middle::ty::{BuiltinBounds}; -use middle::ty::RegionVid; use middle::ty; use middle::typeck::infer::combine::*; use middle::typeck::infer::equate::Equate; use middle::typeck::infer::glb::Glb; +use middle::typeck::infer::higher_ranked::HigherRankedRelations; use middle::typeck::infer::lattice::*; -use middle::typeck::infer::LateBoundRegionConversionTime::FnType; use middle::typeck::infer::sub::Sub; use middle::typeck::infer::{cres, InferCtxt}; -use middle::typeck::infer::fold_regions_in_sig; use middle::typeck::infer::{TypeTrace, Subtype}; -use middle::typeck::infer::region_inference::RegionMark; -use syntax::ast::{Many, Once, NodeId}; +use syntax::ast::{Many, Once}; use syntax::ast::{NormalFn, UnsafeFn}; use syntax::ast::{Onceness, FnStyle}; use syntax::ast::{MutMutable, MutImmutable}; -use util::nodemap::FnvHashMap; use util::ppaux::mt_to_string; use util::ppaux::Repr; @@ -118,83 +114,7 @@ impl<'f, 'tcx> Combine<'tcx> for Lub<'f, 'tcx> { } fn fn_sigs(&self, a: &ty::FnSig, b: &ty::FnSig) -> cres { - // Note: this is a subtle algorithm. For a full explanation, - // please see the large comment in `region_inference.rs`. - - // Make a mark so we can examine "all bindings that were - // created as part of this type comparison". - let mark = self.fields.infcx.region_vars.mark(); - - // Instantiate each bound region with a fresh region variable. - let (a_with_fresh, a_map) = - self.fields.infcx.replace_late_bound_regions_with_fresh_var( - a.binder_id, self.trace().span(), FnType, a); - let (b_with_fresh, _) = - self.fields.infcx.replace_late_bound_regions_with_fresh_var( - b.binder_id, self.trace().span(), FnType, b); - - // Collect constraints. - let sig0 = try!(super_fn_sigs(self, &a_with_fresh, &b_with_fresh)); - debug!("sig0 = {}", sig0.repr(self.fields.infcx.tcx)); - - // Generalize the regions appearing in sig0 if possible - let new_vars = - self.fields.infcx.region_vars.vars_created_since_mark(mark); - let sig1 = - fold_regions_in_sig( - self.fields.infcx.tcx, - &sig0, - |r| generalize_region(self, mark, new_vars.as_slice(), - sig0.binder_id, &a_map, r)); - return Ok(sig1); - - fn generalize_region(this: &Lub, - mark: RegionMark, - new_vars: &[RegionVid], - new_scope: NodeId, - a_map: &FnvHashMap, - r0: ty::Region) - -> ty::Region { - // Regions that pre-dated the LUB computation stay as they are. - if !is_var_in_set(new_vars, r0) { - assert!(!r0.is_bound()); - debug!("generalize_region(r0={}): not new variable", r0); - return r0; - } - - let tainted = this.fields.infcx.region_vars.tainted(mark, r0); - - // Variables created during LUB computation which are - // *related* to regions that pre-date the LUB computation - // stay as they are. - if !tainted.iter().all(|r| is_var_in_set(new_vars, *r)) { - debug!("generalize_region(r0={}): \ - non-new-variables found in {}", - r0, tainted); - assert!(!r0.is_bound()); - return r0; - } - - // Otherwise, the variable must be associated with at - // least one of the variables representing bound regions - // in both A and B. Replace the variable with the "first" - // bound region from A that we find it to be associated - // with. - for (a_br, a_r) in a_map.iter() { - if tainted.iter().any(|x| x == a_r) { - debug!("generalize_region(r0={}): \ - replacing with {}, tainted={}", - r0, *a_br, tainted); - return ty::ReLateBound(new_scope, *a_br); - } - } - - this.fields.infcx.tcx.sess.span_bug( - this.fields.trace.origin.span(), - format!("region {} is not associated with \ - any bound region from A!", - r0).as_slice()) - } + self.higher_ranked_lub(a, b) } fn tys(&self, a: ty::t, b: ty::t) -> cres { diff --git a/src/librustc/middle/typeck/infer/mod.rs b/src/librustc/middle/typeck/infer/mod.rs index a75e3a78e4b2..12807f9050e4 100644 --- a/src/librustc/middle/typeck/infer/mod.rs +++ b/src/librustc/middle/typeck/infer/mod.rs @@ -61,6 +61,7 @@ pub mod doc; pub mod equate; pub mod error_reporting; pub mod glb; +pub mod higher_ranked; pub mod lattice; pub mod lub; pub mod region_inference; @@ -233,8 +234,8 @@ pub enum LateBoundRegionConversionTime { /// when a fn is called FnCall, - /// when two fn types are compared - FnType, + /// when two higher-ranked types are compared + HigherRankedType, } /// Reasons to create a region inference variable diff --git a/src/librustc/middle/typeck/infer/region_inference/doc.rs b/src/librustc/middle/typeck/infer/region_inference/doc.rs index a64f7896cfbf..40b41deeb2b6 100644 --- a/src/librustc/middle/typeck/infer/region_inference/doc.rs +++ b/src/librustc/middle/typeck/infer/region_inference/doc.rs @@ -370,404 +370,9 @@ address this problem somehow and make region inference somewhat more efficient. Note that this is solely a matter of performance, not expressiveness. -# Skolemization and functions +### Skolemization -One of the trickiest and most subtle aspects of regions is dealing -with the fact that region variables are bound in function types. I -strongly suggest that if you want to understand the situation, you -read this paper (which is, admittedly, very long, but you don't have -to read the whole thing): - -http://research.microsoft.com/en-us/um/people/simonpj/papers/higher-rank/ - -Although my explanation will never compete with SPJ's (for one thing, -his is approximately 100 pages), I will attempt to explain the basic -problem and also how we solve it. Note that the paper only discusses -subtyping, not the computation of LUB/GLB. - -The problem we are addressing is that there is a kind of subtyping -between functions with bound region parameters. Consider, for -example, whether the following relation holds: - - fn(&'a int) <: |&'b int|? (Yes, a => b) - -The answer is that of course it does. These two types are basically -the same, except that in one we used the name `a` and one we used -the name `b`. - -In the examples that follow, it becomes very important to know whether -a lifetime is bound in a function type (that is, is a lifetime -parameter) or appears free (is defined in some outer scope). -Therefore, from now on I will write the bindings explicitly, using a -notation like `fn(&'a int)` to indicate that `a` is a lifetime -parameter. - -Now let's consider two more function types. Here, we assume that the -`self` lifetime is defined somewhere outside and hence is not a -lifetime parameter bound by the function type (it "appears free"): - - fn(&'a int) <: |&'a int|? (Yes, a => self) - -This subtyping relation does in fact hold. To see why, you have to -consider what subtyping means. One way to look at `T1 <: T2` is to -say that it means that it is always ok to treat an instance of `T1` as -if it had the type `T2`. So, with our functions, it is always ok to -treat a function that can take pointers with any lifetime as if it -were a function that can only take a pointer with the specific -lifetime `&self`. After all, `&self` is a lifetime, after all, and -the function can take values of any lifetime. - -You can also look at subtyping as the *is a* relationship. This amounts -to the same thing: a function that accepts pointers with any lifetime -*is a* function that accepts pointers with some specific lifetime. - -So, what if we reverse the order of the two function types, like this: - - fn(&'a int) <: |&'a int|? (No) - -Does the subtyping relationship still hold? The answer of course is -no. In this case, the function accepts *only the lifetime `&self`*, -so it is not reasonable to treat it as if it were a function that -accepted any lifetime. - -What about these two examples: - - fn(&'a int, &'b int) <: |&'a int, &'a int|? (Yes) - fn(&'a int, &'a int) <: |&'a int, &'b int|? (No) - -Here, it is true that functions which take two pointers with any two -lifetimes can be treated as if they only accepted two pointers with -the same lifetime, but not the reverse. - -## The algorithm - -Here is the algorithm we use to perform the subtyping check: - -1. Replace all bound regions in the subtype with new variables -2. Replace all bound regions in the supertype with skolemized - equivalents. A "skolemized" region is just a new fresh region - name. -3. Check that the parameter and return types match as normal -4. Ensure that no skolemized regions 'leak' into region variables - visible from "the outside" - -Let's walk through some examples and see how this algorithm plays out. - -#### First example - -We'll start with the first example, which was: - - 1. fn(&'a T) <: |&'b T|? Yes: a -> b - -After steps 1 and 2 of the algorithm we will have replaced the types -like so: - - 1. fn(&'A T) <: |&'x T|? - -Here the upper case `&A` indicates a *region variable*, that is, a -region whose value is being inferred by the system. I also replaced -`&b` with `&x`---I'll use letters late in the alphabet (`x`, `y`, `z`) -to indicate skolemized region names. We can assume they don't appear -elsewhere. Note that neither the sub- nor the supertype bind any -region names anymore (as indicated by the absence of `<` and `>`). - -The next step is to check that the parameter types match. Because -parameters are contravariant, this means that we check whether: - - &'x T <: &'A T - -Region pointers are contravariant so this implies that - - &A <= &x - -must hold, where `<=` is the subregion relationship. Processing -*this* constrain simply adds a constraint into our graph that `&A <= -&x` and is considered successful (it can, for example, be satisfied by -choosing the value `&x` for `&A`). - -So far we have encountered no error, so the subtype check succeeds. - -#### The third example - -Now let's look first at the third example, which was: - - 3. fn(&'a T) <: |&'b T|? No! - -After steps 1 and 2 of the algorithm we will have replaced the types -like so: - - 3. fn(&'a T) <: |&'x T|? - -This looks pretty much the same as before, except that on the LHS -`&self` was not bound, and hence was left as-is and not replaced with -a variable. The next step is again to check that the parameter types -match. This will ultimately require (as before) that `&self` <= `&x` -must hold: but this does not hold. `self` and `x` are both distinct -free regions. So the subtype check fails. - -#### Checking for skolemization leaks - -You may be wondering about that mysterious last step in the algorithm. -So far it has not been relevant. The purpose of that last step is to -catch something like *this*: - - fn() -> fn(&'a T) <: || -> fn(&'b T)? No. - -Here the function types are the same but for where the binding occurs. -The subtype returns a function that expects a value in precisely one -region. The supertype returns a function that expects a value in any -region. If we allow an instance of the subtype to be used where the -supertype is expected, then, someone could call the fn and think that -the return value has type `fn(&'b T)` when it really has type -`fn(&'a T)` (this is case #3, above). Bad. - -So let's step through what happens when we perform this subtype check. -We first replace the bound regions in the subtype (the supertype has -no bound regions). This gives us: - - fn() -> fn(&'A T) <: || -> fn(&'b T)? - -Now we compare the return types, which are covariant, and hence we have: - - fn(&'A T) <: |&'b T|? - -Here we skolemize the bound region in the supertype to yield: - - fn(&'A T) <: |&'x T|? - -And then proceed to compare the argument types: - - &'x T <: &'A T - &A <= &x - -Finally, this is where it gets interesting! This is where an error -*should* be reported. But in fact this will not happen. The reason why -is that `A` is a variable: we will infer that its value is the fresh -region `x` and think that everything is happy. In fact, this behavior -is *necessary*, it was key to the first example we walked through. - -The difference between this example and the first one is that the variable -`A` already existed at the point where the skolemization occurred. In -the first example, you had two functions: - - fn(&'a T) <: |&'b T| - -and hence `&A` and `&x` were created "together". In general, the -intention of the skolemized names is that they are supposed to be -fresh names that could never be equal to anything from the outside. -But when inference comes into play, we might not be respecting this -rule. - -So the way we solve this is to add a fourth step that examines the -constraints that refer to skolemized names. Basically, consider a -non-directed verison of the constraint graph. Let `Tainted(x)` be the -set of all things reachable from a skolemized variable `x`. -`Tainted(x)` should not contain any regions that existed before the -step at which the skolemization was performed. So this case here -would fail because `&x` was created alone, but is relatable to `&A`. - -## Computing the LUB and GLB - -The paper I pointed you at is written for Haskell. It does not -therefore considering subtyping and in particular does not consider -LUB or GLB computation. We have to consider this. Here is the -algorithm I implemented. - -First though, let's discuss what we are trying to compute in more -detail. The LUB is basically the "common supertype" and the GLB is -"common subtype"; one catch is that the LUB should be the -*most-specific* common supertype and the GLB should be *most general* -common subtype (as opposed to any common supertype or any common -subtype). - -Anyway, to help clarify, here is a table containing some -function pairs and their LUB/GLB: - -``` -Type 1 Type 2 LUB GLB -fn(&a) fn(&X) fn(&X) fn(&a) -fn(&A) fn(&X) -- fn(&a) -fn(&a, &b) fn(&x, &x) fn(&a, &a) fn(&a, &b) -fn(&a, &b, &a) fn(&x, &y, &y) fn(&a, &a, &a) fn(&a,&b,&c) -``` - -### Conventions - -I use lower-case letters (e.g., `&a`) for bound regions and upper-case -letters for free regions (`&A`). Region variables written with a -dollar-sign (e.g., `$a`). I will try to remember to enumerate the -bound-regions on the fn type as well (e.g., `fn(&a)`). - -### High-level summary - -Both the LUB and the GLB algorithms work in a similar fashion. They -begin by replacing all bound regions (on both sides) with fresh region -inference variables. Therefore, both functions are converted to types -that contain only free regions. We can then compute the LUB/GLB in a -straightforward way, as described in `combine.rs`. This results in an -interim type T. The algorithms then examine the regions that appear -in T and try to, in some cases, replace them with bound regions to -yield the final result. - -To decide whether to replace a region `R` that appears in `T` with a -bound region, the algorithms make use of two bits of information. -First is a set `V` that contains all region variables created as part -of the LUB/GLB computation. `V` will contain the region variables -created to replace the bound regions in the input types, but it also -contains 'intermediate' variables created to represent the LUB/GLB of -individual regions. Basically, when asked to compute the LUB/GLB of a -region variable with another region, the inferencer cannot oblige -immediately since the values of that variables are not known. -Therefore, it creates a new variable that is related to the two -regions. For example, the LUB of two variables `$x` and `$y` is a -fresh variable `$z` that is constrained such that `$x <= $z` and `$y -<= $z`. So `V` will contain these intermediate variables as well. - -The other important factor in deciding how to replace a region in T is -the function `Tainted($r)` which, for a region variable, identifies -all regions that the region variable is related to in some way -(`Tainted()` made an appearance in the subtype computation as well). - -### LUB - -The LUB algorithm proceeds in three steps: - -1. Replace all bound regions (on both sides) with fresh region - inference variables. -2. Compute the LUB "as normal", meaning compute the GLB of each - pair of argument types and the LUB of the return types and - so forth. Combine those to a new function type `F`. -3. Replace each region `R` that appears in `F` as follows: - - Let `V` be the set of variables created during the LUB - computational steps 1 and 2, as described in the previous section. - - If `R` is not in `V`, replace `R` with itself. - - If `Tainted(R)` contains a region that is not in `V`, - replace `R` with itself. - - Otherwise, select the earliest variable in `Tainted(R)` that originates - from the left-hand side and replace `R` with the bound region that - this variable was a replacement for. - -So, let's work through the simplest example: `fn(&A)` and `fn(&a)`. -In this case, `&a` will be replaced with `$a` and the interim LUB type -`fn($b)` will be computed, where `$b=GLB(&A,$a)`. Therefore, `V = -{$a, $b}` and `Tainted($b) = { $b, $a, &A }`. When we go to replace -`$b`, we find that since `&A \in Tainted($b)` is not a member of `V`, -we leave `$b` as is. When region inference happens, `$b` will be -resolved to `&A`, as we wanted. - -Let's look at a more complex one: `fn(&a, &b)` and `fn(&x, &x)`. In -this case, we'll end up with a (pre-replacement) LUB type of `fn(&g, -&h)` and a graph that looks like: - -``` - $a $b *--$x - \ \ / / - \ $h-* / - $g-----------* -``` - -Here `$g` and `$h` are fresh variables that are created to represent -the LUB/GLB of things requiring inference. This means that `V` and -`Tainted` will look like: - -``` -V = {$a, $b, $g, $h, $x} -Tainted($g) = Tainted($h) = { $a, $b, $h, $g, $x } -``` - -Therefore we replace both `$g` and `$h` with `$a`, and end up -with the type `fn(&a, &a)`. - -### GLB - -The procedure for computing the GLB is similar. The difference lies -in computing the replacements for the various variables. For each -region `R` that appears in the type `F`, we again compute `Tainted(R)` -and examine the results: - -1. If `R` is not in `V`, it is not replaced. -2. Else, if `Tainted(R)` contains only variables in `V`, and it - contains exactly one variable from the LHS and one variable from - the RHS, then `R` can be mapped to the bound version of the - variable from the LHS. -3. Else, if `Tainted(R)` contains no variable from the LHS and no - variable from the RHS, then `R` can be mapped to itself. -4. Else, `R` is mapped to a fresh bound variable. - -These rules are pretty complex. Let's look at some examples to see -how they play out. - -Out first example was `fn(&a)` and `fn(&X)`. In this case, `&a` will -be replaced with `$a` and we will ultimately compute a -(pre-replacement) GLB type of `fn($g)` where `$g=LUB($a,&X)`. -Therefore, `V={$a,$g}` and `Tainted($g)={$g,$a,&X}. To find the -replacement for `$g` we consult the rules above: -- Rule (1) does not apply because `$g \in V` -- Rule (2) does not apply because `&X \in Tainted($g)` -- Rule (3) does not apply because `$a \in Tainted($g)` -- Hence, by rule (4), we replace `$g` with a fresh bound variable `&z`. -So our final result is `fn(&z)`, which is correct. - -The next example is `fn(&A)` and `fn(&Z)`. In this case, we will again -have a (pre-replacement) GLB of `fn(&g)`, where `$g = LUB(&A,&Z)`. -Therefore, `V={$g}` and `Tainted($g) = {$g, &A, &Z}`. In this case, -by rule (3), `$g` is mapped to itself, and hence the result is -`fn($g)`. This result is correct (in this case, at least), but it is -indicative of a case that *can* lead us into concluding that there is -no GLB when in fact a GLB does exist. See the section "Questionable -Results" below for more details. - -The next example is `fn(&a, &b)` and `fn(&c, &c)`. In this case, as -before, we'll end up with `F=fn($g, $h)` where `Tainted($g) = -Tainted($h) = {$g, $h, $a, $b, $c}`. Only rule (4) applies and hence -we'll select fresh bound variables `y` and `z` and wind up with -`fn(&y, &z)`. - -For the last example, let's consider what may seem trivial, but is -not: `fn(&a, &a)` and `fn(&b, &b)`. In this case, we'll get `F=fn($g, -$h)` where `Tainted($g) = {$g, $a, $x}` and `Tainted($h) = {$h, $a, -$x}`. Both of these sets contain exactly one bound variable from each -side, so we'll map them both to `&a`, resulting in `fn(&a, &a)`, which -is the desired result. - -### Shortcomings and correctness - -You may be wondering whether this algorithm is correct. The answer is -"sort of". There are definitely cases where they fail to compute a -result even though a correct result exists. I believe, though, that -if they succeed, then the result is valid, and I will attempt to -convince you. The basic argument is that the "pre-replacement" step -computes a set of constraints. The replacements, then, attempt to -satisfy those constraints, using bound identifiers where needed. - -For now I will briefly go over the cases for LUB/GLB and identify -their intent: - -- LUB: - - The region variables that are substituted in place of bound regions - are intended to collect constraints on those bound regions. - - If Tainted(R) contains only values in V, then this region is unconstrained - and can therefore be generalized, otherwise it cannot. -- GLB: - - The region variables that are substituted in place of bound regions - are intended to collect constraints on those bound regions. - - If Tainted(R) contains exactly one variable from each side, and - only variables in V, that indicates that those two bound regions - must be equated. - - Otherwise, if Tainted(R) references any variables from left or right - side, then it is trying to combine a bound region with a free one or - multiple bound regions, so we need to select fresh bound regions. - -Sorry this is more of a shorthand to myself. I will try to write up something -more convincing in the future. - -#### Where are the algorithms wrong? - -- The pre-replacement computation can fail even though using a - bound-region would have succeeded. -- We will compute GLB(fn(fn($a)), fn(fn($b))) as fn($c) where $c is the - GLB of $a and $b. But if inference finds that $a and $b must be mapped - to regions without a GLB, then this is effectively a failure to compute - the GLB. However, the result `fn<$c>(fn($c))` is a valid GLB. +For a discussion on skolemization and higher-ranked subtyping, please +see the module `middle::typeck::infer::higher_ranked::doc`. */ diff --git a/src/librustc/middle/typeck/infer/sub.rs b/src/librustc/middle/typeck/infer/sub.rs index 2763132024b3..f3df33f7b0e6 100644 --- a/src/librustc/middle/typeck/infer/sub.rs +++ b/src/librustc/middle/typeck/infer/sub.rs @@ -12,18 +12,16 @@ use middle::ty::{BuiltinBounds}; use middle::ty; use middle::ty::TyVar; -use middle::typeck::check::regionmanip::replace_late_bound_regions; use middle::typeck::infer::combine::*; use middle::typeck::infer::{cres, CresCompare}; use middle::typeck::infer::equate::Equate; -use middle::typeck::infer::LateBoundRegionConversionTime::FnType; use middle::typeck::infer::glb::Glb; +use middle::typeck::infer::higher_ranked::HigherRankedRelations; use middle::typeck::infer::InferCtxt; use middle::typeck::infer::lub::Lub; use middle::typeck::infer::{TypeTrace, Subtype}; use middle::typeck::infer::type_variable::{SubtypeOf, SupertypeOf}; -use util::common::{indenter}; -use util::ppaux::{bound_region_to_string, Repr}; +use util::ppaux::{Repr}; use syntax::ast::{Onceness, FnStyle, MutImmutable, MutMutable}; @@ -158,77 +156,7 @@ impl<'f, 'tcx> Combine<'tcx> for Sub<'f, 'tcx> { } fn fn_sigs(&self, a: &ty::FnSig, b: &ty::FnSig) -> cres { - debug!("fn_sigs(a={}, b={})", - a.repr(self.fields.infcx.tcx), b.repr(self.fields.infcx.tcx)); - let _indenter = indenter(); - - // Rather than checking the subtype relationship between `a` and `b` - // as-is, we need to do some extra work here in order to make sure - // that function subtyping works correctly with respect to regions - // - // Note: this is a subtle algorithm. For a full explanation, - // please see the large comment in `region_inference.rs`. - - // Make a mark so we can examine "all bindings that were - // created as part of this type comparison". - let mark = self.fields.infcx.region_vars.mark(); - - // First, we instantiate each bound region in the subtype with a fresh - // region variable. - let (a_sig, _) = - self.fields.infcx.replace_late_bound_regions_with_fresh_var( - a.binder_id, self.trace().span(), FnType, a); - - // Second, we instantiate each bound region in the supertype with a - // fresh concrete region. - let (skol_map, b_sig) = { - replace_late_bound_regions(self.fields.infcx.tcx, b.binder_id, b, |br| { - let skol = self.fields.infcx.region_vars.new_skolemized(br); - debug!("Bound region {} skolemized to {}", - bound_region_to_string(self.fields.infcx.tcx, "", false, br), - skol); - skol - }) - }; - - debug!("a_sig={}", a_sig.repr(self.fields.infcx.tcx)); - debug!("b_sig={}", b_sig.repr(self.fields.infcx.tcx)); - - // Compare types now that bound regions have been replaced. - let sig = try!(super_fn_sigs(self, &a_sig, &b_sig)); - - // Presuming type comparison succeeds, we need to check - // that the skolemized regions do not "leak". - let new_vars = - self.fields.infcx.region_vars.vars_created_since_mark(mark); - for (&skol_br, &skol) in skol_map.iter() { - let tainted = self.fields.infcx.region_vars.tainted(mark, skol); - for tainted_region in tainted.iter() { - // Each skolemized should only be relatable to itself - // or new variables: - match *tainted_region { - ty::ReInfer(ty::ReVar(ref vid)) => { - if new_vars.iter().any(|x| x == vid) { continue; } - } - _ => { - if *tainted_region == skol { continue; } - } - }; - - // A is not as polymorphic as B: - if self.a_is_expected() { - debug!("Not as polymorphic!"); - return Err(ty::terr_regions_insufficiently_polymorphic( - skol_br, *tainted_region)); - } else { - debug!("Overly polymorphic!"); - return Err(ty::terr_regions_overly_polymorphic( - skol_br, *tainted_region)); - } - } - } - - return Ok(sig); + self.higher_ranked_sub(a, b) } }