added gensym_copy mechanism to ensure sharing of pointers in the interner

this makes comparisons constant-time, and enables spelling-comparison
of identifiers, crucial in many parts of resolve.
This commit is contained in:
John Clements 2013-06-26 10:11:19 -07:00
parent 9d33001a90
commit 58e7598c2e
2 changed files with 88 additions and 48 deletions

View file

@ -15,12 +15,12 @@ use parse::token;
use util::interner::StrInterner;
use util::interner;
use std::cast;
use std::char;
use std::cmp::Equiv;
use std::local_data;
use std::rand;
use std::rand::RngUtil;
use std::ptr::to_unsafe_ptr;
#[deriving(Clone, Encodable, Decodable, Eq, IterBytes)]
pub enum binop {
@ -382,30 +382,8 @@ pub fn token_to_binop(tok: &Token) -> Option<ast::BinOp> {
}
}
pub struct ident_interner {
priv interner: StrInterner,
}
impl ident_interner {
pub fn intern(&self, val: &str) -> Name {
self.interner.intern(val)
}
pub fn gensym(&self, val: &str) -> Name {
self.interner.gensym(val)
}
pub fn get(&self, idx: Name) -> @str {
self.interner.get(idx)
}
// is this really something that should be exposed?
pub fn len(&self) -> uint {
self.interner.len()
}
pub fn find_equiv<Q:Hash + IterBytes + Equiv<@str>>(&self, val: &Q)
-> Option<Name> {
self.interner.find_equiv(val)
}
}
// looks like we can get rid of this completely...
pub type ident_interner = StrInterner;
// return a fresh interner, preloaded with special identifiers.
fn mk_fresh_ident_interner() -> @ident_interner {
@ -486,9 +464,7 @@ fn mk_fresh_ident_interner() -> @ident_interner {
"typeof", // 67
];
@ident_interner {
interner: interner::StrInterner::prefill(init_vec)
}
@interner::StrInterner::prefill(init_vec)
}
// if an interner exists in TLS, return it. Otherwise, prepare a
@ -509,7 +485,7 @@ pub fn get_ident_interner() -> @ident_interner {
/* for when we don't care about the contents; doesn't interact with TLD or
serialization */
pub fn mk_fake_ident_interner() -> @ident_interner {
@ident_interner { interner: interner::StrInterner::new() }
@interner::StrInterner::new()
}
// maps a string to its interned representation
@ -545,10 +521,11 @@ pub fn gensym_ident(str : &str) -> ast::Ident {
}
// create a fresh name that maps to the same string as the old one.
// note that this guarantees that ptr_eq(ident_to_str(src),interner_get(fresh_name(src)));
// note that this guarantees that str_ptr_eq(ident_to_str(src),interner_get(fresh_name(src)));
// that is, that the new name and the old one are connected to ptr_eq strings.
pub fn fresh_name(src : &ast::Ident) -> Name {
gensym(ident_to_str(src))
let interner = get_ident_interner();
interner.gensym_copy(src.name)
// following: debug version. Could work in final except that it's incompatible with
// good error messages and uses of struct names in ambiguous could-be-binding
// locations. Also definitely destroys the guarantee given above about ptr_eq.
@ -557,18 +534,26 @@ pub fn fresh_name(src : &ast::Ident) -> Name {
}
// it looks like there oughta be a str_ptr_eq fn, but no one bothered to implement it?
pub fn str_ptr_eq<T>(a: @str, b: @str) -> bool {
// doesn't compile! ...because of rebase mangling. this should be fixed
// in the commit that follows this.
let (a_ptr, b_ptr): (*uint, *uint) = (to_unsafe_ptr(a), to_unsafe_ptr(b));
a_ptr == b_ptr
// determine whether two @str values are pointer-equal
pub fn str_ptr_eq(a : @str, b : @str) -> bool {
unsafe {
let p : uint = cast::transmute(a);
let q : uint = cast::transmute(b);
let result = p == q;
// got to transmute them back, to make sure the ref count is correct:
let junk1 : @str = cast::transmute(p);
let junk2 : @str = cast::transmute(q);
result
}
}
// return true when two identifiers refer (through the intern table) to the same ptr_eq
// string. This is used to compare identifiers in places where hygienic comparison is
// not wanted (i.e. not lexical vars).
pub fn ident_spelling_eq(a : &ast::Ident, b : &ast::Ident) -> bool {
str_ptr_eq(interner_get(a.name),interner_get(b.name))
}
// create a fresh mark.
pub fn fresh_mark() -> Mrk {
@ -721,13 +706,21 @@ mod test {
use ast_util;
#[test] fn t1() {
#[test] fn str_ptr_eq_tests(){
let a = @"abc";
let b = @"abc";
let c = a;
assert!(str_ptr_eq(a,c));
assert!(!str_ptr_eq(a,b));
}
#[test] fn fresh_name_pointer_sharing() {
let ghi = str_to_ident("ghi");
assert_eq!(ident_to_str(&ghi),@"ghi");
assert!(str_ptr_eq(ident_to_str(&ghi),ident_to_str(&ghi)))
let fresh = ast::Ident::new(fresh_name(&ghi));
assert_eq!(ident_to_str(&fresh),@"ghi");
assert!(str_ptr_eq(ident_to_str(&ghi),ident_to_str(&fresh)));
assert_eq!(3,4);
}
}

View file

@ -117,6 +117,23 @@ impl StrInterner {
new_idx
}
// I want these gensyms to share name pointers
// with existing entries. This would be automatic,
// except that the existing gensym creates its
// own managed ptr using to_managed. I think that
// adding this utility function is the most
// lightweight way to get what I want, though not
// necessarily the cleanest.
// create a gensym with the same name as an existing
// entry.
pub fn gensym_copy(&self, idx : uint) -> uint {
let new_idx = self.len();
// leave out of map to avoid colliding
self.vect.push(self.vect[idx]);
new_idx
}
// this isn't "pure" in the traditional sense, because it can go from
// failing to returning a value as items are interned. But for typestate,
// where we first check a pred and then rely on it, ceasing to fail is ok.
@ -144,23 +161,23 @@ mod tests {
}
#[test]
fn i2 () {
fn interner_tests () {
let i : Interner<@str> = Interner::new();
// first one is zero:
assert_eq!(i.intern (@"dog"), 0);
assert_eq!(i.intern(@"dog"), 0);
// re-use gets the same entry:
assert_eq!(i.intern (@"dog"), 0);
assert_eq!(i.intern(@"dog"), 0);
// different string gets a different #:
assert_eq!(i.intern (@"cat"), 1);
assert_eq!(i.intern (@"cat"), 1);
assert_eq!(i.intern(@"cat"), 1);
assert_eq!(i.intern(@"cat"), 1);
// dog is still at zero
assert_eq!(i.intern (@"dog"), 0);
assert_eq!(i.intern(@"dog"), 0);
// gensym gets 3
assert_eq!(i.gensym (@"zebra" ), 2);
assert_eq!(i.gensym(@"zebra" ), 2);
// gensym of same string gets new number :
assert_eq!(i.gensym (@"zebra" ), 3);
// gensym of *existing* string gets new number:
assert_eq!(i.gensym (@"dog"), 4);
assert_eq!(i.gensym(@"dog"), 4);
assert_eq!(i.get(0), @"dog");
assert_eq!(i.get(1), @"cat");
assert_eq!(i.get(2), @"zebra");
@ -176,4 +193,34 @@ mod tests {
assert_eq!(i.get(2), @"Carol");
assert_eq!(i.intern(@"Bob"), 1);
}
#[test]
fn string_interner_tests() {
let i : StrInterner = StrInterner::new();
// first one is zero:
assert_eq!(i.intern("dog"), 0);
// re-use gets the same entry:
assert_eq!(i.intern ("dog"), 0);
// different string gets a different #:
assert_eq!(i.intern("cat"), 1);
assert_eq!(i.intern("cat"), 1);
// dog is still at zero
assert_eq!(i.intern("dog"), 0);
// gensym gets 3
assert_eq!(i.gensym("zebra"), 2);
// gensym of same string gets new number :
assert_eq!(i.gensym("zebra"), 3);
// gensym of *existing* string gets new number:
assert_eq!(i.gensym("dog"), 4);
// gensym tests again with gensym_copy:
assert_eq!(i.gensym_copy(2), 5);
assert_eq!(i.get(5), @"zebra");
assert_eq!(i.gensym_copy(2), 6);
assert_eq!(i.get(6), @"zebra");
assert_eq!(i.get(0), @"dog");
assert_eq!(i.get(1), @"cat");
assert_eq!(i.get(2), @"zebra");
assert_eq!(i.get(3), @"zebra");
assert_eq!(i.get(4), @"dog");
}
}