hygiene infrastructure.

- added a hash table to memoize rename and mark operations.
- added rename, mark, and resolve fold fns
This commit is contained in:
John Clements 2013-05-16 17:42:08 -07:00
parent b621820dc4
commit fc4f304ef9
5 changed files with 194 additions and 110 deletions

View file

@ -19,6 +19,7 @@ use core::option::{None, Option, Some};
use core::to_bytes;
use core::to_bytes::IterBytes;
use core::to_str::ToStr;
use core::hashmap::HashMap;
use std::serialize::{Encodable, Decodable, Encoder, Decoder};
@ -38,14 +39,20 @@ pub struct ident { repr: Name, ctxt: SyntaxContext }
// that's causing unreleased memory to cause core dumps
// and also perhaps to save some work in destructor checks.
// the special uint '0' will be used to indicate an empty
// syntax context
// syntax context.
// this uint is a reference to a table stored in thread-local
// storage.
pub type SyntaxContext = uint;
pub type SCTable = ~[SyntaxContext_];
pub struct SCTable {
table : ~[SyntaxContext_],
mark_memo : HashMap<(SyntaxContext,Mrk),SyntaxContext>,
rename_memo : HashMap<(SyntaxContext,ident,Name),SyntaxContext>
}
// NB: these must be placed in any SCTable...
pub static empty_ctxt : uint = 0;
pub static illegal_ctxt : uint = 1;
#[deriving(Eq, Encodable, Decodable)]
pub enum SyntaxContext_ {
@ -59,7 +66,8 @@ pub enum SyntaxContext_ {
// "to" slot must have the same name and context
// in the "from" slot. In essence, they're all
// pointers to a single "rename" event node.
Rename (ident,Name,SyntaxContext)
Rename (ident,Name,SyntaxContext),
IllegalCtxt()
}
// a name represents an identifier

View file

@ -15,6 +15,7 @@ use codemap::{span, spanned};
use parse::token;
use visit;
use opt_vec;
use core::hashmap::HashMap;
use core::to_bytes;
@ -577,22 +578,61 @@ pub enum Privacy {
// HYGIENE FUNCTIONS
/// Construct an identifier with the given repr and an empty context:
pub fn mk_ident(repr: uint) -> ident { ident {repr: repr, ctxt: 0}}
pub fn new_ident(repr: uint) -> ident { ident {repr: repr, ctxt: 0}}
/// Extend a syntax context with a given mark
pub fn mk_mark (m:Mrk,ctxt:SyntaxContext,table:&mut SCTable)
pub fn new_mark (m:Mrk, tail:SyntaxContext,table:&mut SCTable)
-> SyntaxContext {
idx_push(table,Mark(m,ctxt))
let key = (tail,m);
// FIXME #5074 : can't use more natural style because we're missing
// flow-sensitivity. Results in two lookups on a hash table hit.
// also applies to new_rename, below.
// let try_lookup = table.mark_memo.find(&key);
match table.mark_memo.contains_key(&key) {
false => {
let new_idx = idx_push(&mut table.table,Mark(m,tail));
table.mark_memo.insert(key,new_idx);
new_idx
}
true => {
match table.mark_memo.find(&key) {
None => fail!(~"internal error: key disappeared 2013042901"),
Some(idxptr) => {*idxptr}
}
}
}
}
/// Extend a syntax context with a given rename
pub fn mk_rename (id:ident, to:Name, tail:SyntaxContext, table: &mut SCTable)
pub fn new_rename (id:ident, to:Name, tail:SyntaxContext, table: &mut SCTable)
-> SyntaxContext {
idx_push(table,Rename(id,to,tail))
let key = (tail,id,to);
// FIXME #5074
//let try_lookup = table.rename_memo.find(&key);
match table.rename_memo.contains_key(&key) {
false => {
let new_idx = idx_push(&mut table.table,Rename(id,to,tail));
table.rename_memo.insert(key,new_idx);
new_idx
}
true => {
match table.rename_memo.find(&key) {
None => fail!(~"internal error: key disappeared 2013042902"),
Some(idxptr) => {*idxptr}
}
}
}
}
/// Make a fresh syntax context table with EmptyCtxt in slot zero
pub fn mk_sctable() -> SCTable { ~[EmptyCtxt] }
/// and IllegalCtxt in slot one.
pub fn new_sctable() -> SCTable {
SCTable {
table: ~[EmptyCtxt,IllegalCtxt],
mark_memo: HashMap::new(),
rename_memo: HashMap::new()
}
}
/// Add a value to the end of a vec, return its index
fn idx_push<T>(vec: &mut ~[T], val: T) -> uint {
@ -601,8 +641,8 @@ fn idx_push<T>(vec: &mut ~[T], val: T) -> uint {
}
/// Resolve a syntax object to a name, per MTWT.
pub fn resolve (id : ident, table : &SCTable) -> Name {
match table[id.ctxt] {
pub fn resolve (id : ident, table : &mut SCTable) -> Name {
match table.table[id.ctxt] {
EmptyCtxt => id.repr,
// ignore marks here:
Mark(_,subctxt) => resolve (ident{repr:id.repr, ctxt: subctxt},table),
@ -619,6 +659,7 @@ pub fn resolve (id : ident, table : &SCTable) -> Name {
resolvedthis
}
}
IllegalCtxt() => fail!(~"expected resolvable context, got IllegalCtxt")
}
}
@ -629,7 +670,7 @@ pub fn marksof(ctxt: SyntaxContext, stopname: Name, table: &SCTable) -> ~[Mrk] {
let mut result = ~[];
let mut loopvar = ctxt;
loop {
match table[loopvar] {
match table.table[loopvar] {
EmptyCtxt => {return result;},
Mark(mark,tl) => {
xorPush(&mut result,mark);
@ -644,6 +685,7 @@ pub fn marksof(ctxt: SyntaxContext, stopname: Name, table: &SCTable) -> ~[Mrk] {
loopvar = tl;
}
}
IllegalCtxt => fail!(~"expected resolvable context, got IllegalCtxt")
}
}
}
@ -713,15 +755,15 @@ mod test {
-> SyntaxContext {
tscs.foldr(tail, |tsc : &TestSC,tail : SyntaxContext|
{match *tsc {
M(mrk) => mk_mark(mrk,tail,table),
R(ident,name) => mk_rename(ident,name,tail,table)}})
M(mrk) => new_mark(mrk,tail,table),
R(ident,name) => new_rename(ident,name,tail,table)}})
}
// gather a SyntaxContext back into a vector of TestSCs
fn refold_test_sc(mut sc: SyntaxContext, table : &SCTable) -> ~[TestSC] {
let mut result = ~[];
loop {
match table[sc] {
match table.table[sc] {
EmptyCtxt => {return result;},
Mark(mrk,tail) => {
result.push(M(mrk));
@ -733,40 +775,41 @@ mod test {
sc = tail;
loop;
}
IllegalCtxt => fail!("expected resolvable context, got IllegalCtxt")
}
}
}
#[test] fn test_unfold_refold(){
let mut t = mk_sctable();
let mut t = new_sctable();
let test_sc = ~[M(3),R(id(101,0),14),M(9)];
assert_eq!(unfold_test_sc(copy test_sc,empty_ctxt,&mut t),3);
assert_eq!(t[1],Mark(9,0));
assert_eq!(t[2],Rename(id(101,0),14,1));
assert_eq!(t[3],Mark(3,2));
assert_eq!(refold_test_sc(3,&t),test_sc);
assert_eq!(unfold_test_sc(copy test_sc,empty_ctxt,&mut t),4);
assert_eq!(t.table[2],Mark(9,0));
assert_eq!(t.table[3],Rename(id(101,0),14,2));
assert_eq!(t.table[4],Mark(3,3));
assert_eq!(refold_test_sc(4,&t),test_sc);
}
// extend a syntax context with a sequence of marks given
// in a vector. v[0] will be the outermost mark.
fn unfold_marks(mrks:~[Mrk],tail:SyntaxContext,table: &mut SCTable) -> SyntaxContext {
mrks.foldr(tail, |mrk:&Mrk,tail:SyntaxContext|
{mk_mark(*mrk,tail,table)})
{new_mark(*mrk,tail,table)})
}
#[test] fn unfold_marks_test() {
let mut t = ~[EmptyCtxt];
let mut t = new_sctable();
assert_eq!(unfold_marks(~[3,7],empty_ctxt,&mut t),2);
assert_eq!(t[1],Mark(7,0));
assert_eq!(t[2],Mark(3,1));
assert_eq!(unfold_marks(~[3,7],empty_ctxt,&mut t),3);
assert_eq!(t.table[2],Mark(7,0));
assert_eq!(t.table[3],Mark(3,2));
}
#[test] fn test_marksof () {
let stopname = 242;
let name1 = 243;
let mut t = mk_sctable();
let mut t = new_sctable();
assert_eq!(marksof (empty_ctxt,stopname,&t),~[]);
// FIXME #5074: ANF'd to dodge nested calls
{ let ans = unfold_marks(~[4,98],empty_ctxt,&mut t);
@ -780,13 +823,13 @@ mod test {
// rename where stop doesn't match:
{ let chain = ~[M(9),
R(id(name1,
mk_mark (4, empty_ctxt,&mut t)),
new_mark (4, empty_ctxt,&mut t)),
100101102),
M(14)];
let ans = unfold_test_sc(chain,empty_ctxt,&mut t);
assert_eq! (marksof (ans, stopname, &t), ~[9,14]);}
// rename where stop does match
{ let name1sc = mk_mark(4, empty_ctxt, &mut t);
{ let name1sc = new_mark(4, empty_ctxt, &mut t);
let chain = ~[M(9),
R(id(name1, name1sc),
stopname),
@ -798,30 +841,30 @@ mod test {
#[test] fn resolve_tests () {
let a = 40;
let mut t = mk_sctable();
let mut t = new_sctable();
// - ctxt is MT
assert_eq!(resolve(id(a,empty_ctxt),&t),a);
assert_eq!(resolve(id(a,empty_ctxt),&mut t),a);
// - simple ignored marks
{ let sc = unfold_marks(~[1,2,3],empty_ctxt,&mut t);
assert_eq!(resolve(id(a,sc),&t),a);}
assert_eq!(resolve(id(a,sc),&mut t),a);}
// - orthogonal rename where names don't match
{ let sc = unfold_test_sc(~[R(id(50,empty_ctxt),51),M(12)],empty_ctxt,&mut t);
assert_eq!(resolve(id(a,sc),&t),a);}
assert_eq!(resolve(id(a,sc),&mut t),a);}
// - rename where names do match, but marks don't
{ let sc1 = mk_mark(1,empty_ctxt,&mut t);
{ let sc1 = new_mark(1,empty_ctxt,&mut t);
let sc = unfold_test_sc(~[R(id(a,sc1),50),
M(1),
M(2)],
empty_ctxt,&mut t);
assert_eq!(resolve(id(a,sc),&t), a);}
assert_eq!(resolve(id(a,sc),&mut t), a);}
// - rename where names and marks match
{ let sc1 = unfold_test_sc(~[M(1),M(2)],empty_ctxt,&mut t);
let sc = unfold_test_sc(~[R(id(a,sc1),50),M(1),M(2)],empty_ctxt,&mut t);
assert_eq!(resolve(id(a,sc),&t), 50); }
assert_eq!(resolve(id(a,sc),&mut t), 50); }
// - rename where names and marks match by literal sharing
{ let sc1 = unfold_test_sc(~[M(1),M(2)],empty_ctxt,&mut t);
let sc = unfold_test_sc(~[R(id(a,sc1),50)],sc1,&mut t);
assert_eq!(resolve(id(a,sc),&t), 50); }
assert_eq!(resolve(id(a,sc),&mut t), 50); }
// - two renames of the same var.. can only happen if you use
// local-expand to prevent the inner binding from being renamed
// during the rename-pass caused by the first:
@ -829,20 +872,29 @@ mod test {
{ let sc = unfold_test_sc(~[R(id(a,empty_ctxt),50),
R(id(a,empty_ctxt),51)],
empty_ctxt,&mut t);
assert_eq!(resolve(id(a,sc),&t), 51); }
assert_eq!(resolve(id(a,sc),&mut t), 51); }
// the simplest double-rename:
{ let a_to_a50 = mk_rename(id(a,empty_ctxt),50,empty_ctxt,&mut t);
let a50_to_a51 = mk_rename(id(a,a_to_a50),51,a_to_a50,&mut t);
assert_eq!(resolve(id(a,a50_to_a51),&t),51);
{ let a_to_a50 = new_rename(id(a,empty_ctxt),50,empty_ctxt,&mut t);
let a50_to_a51 = new_rename(id(a,a_to_a50),51,a_to_a50,&mut t);
assert_eq!(resolve(id(a,a50_to_a51),&mut t),51);
// mark on the outside doesn't stop rename:
let sc = mk_mark(9,a50_to_a51,&mut t);
assert_eq!(resolve(id(a,sc),&t),51);
let sc = new_mark(9,a50_to_a51,&mut t);
assert_eq!(resolve(id(a,sc),&mut t),51);
// but mark on the inside does:
let a50_to_a51_b = unfold_test_sc(~[R(id(a,a_to_a50),51),
M(9)],
a_to_a50,
&mut t);
assert_eq!(resolve(id(a,a50_to_a51_b),&t),50);}
assert_eq!(resolve(id(a,a50_to_a51_b),&mut t),50);}
}
#[test] fn hashing_tests () {
let mut t = new_sctable();
assert_eq!(new_mark(12,empty_ctxt,&mut t),2);
assert_eq!(new_mark(13,empty_ctxt,&mut t),3);
// using the same one again should result in the same index:
assert_eq!(new_mark(12,empty_ctxt,&mut t),2);
// I'm assuming that the rename table will behave the same....
}
}

View file

@ -11,7 +11,9 @@
use ast::{blk_, attribute_, attr_outer, meta_word};
use ast::{crate, expr_, expr_mac, mac_invoc_tt};
use ast::{item_mac, stmt_, stmt_mac, stmt_expr, stmt_semi};
use ast::{SCTable, illegal_ctxt};
use ast;
use ast_util::{new_rename, new_mark, resolve};
use attr;
use codemap;
use codemap::{span, CallInfo, ExpandedFrom, NameAndSpan, spanned};
@ -635,62 +637,65 @@ pub fn expand_crate(parse_sess: @mut parse::ParseSess,
@f.fold_crate(&*c)
}
// given a function from paths to paths, produce
// given a function from idents to idents, produce
// an ast_fold that applies that function:
fn fun_to_path_folder(f: @fn(&ast::Path)->ast::Path) -> @ast_fold{
pub fn fun_to_ident_folder(f: @fn(ast::ident)->ast::ident) -> @ast_fold{
let afp = default_ast_fold();
let f_pre = @AstFoldFns{
fold_path : |p, _| f(p),
fold_ident : |id, _| f(id),
.. *afp
};
make_fold(f_pre)
}
/* going to have to figure out whether the table is passed in or
extracted from TLS...
// update the ctxts in a path to get a rename node
fn ctxt_update_rename(from: ast::Name,
fromctx: ast::SyntaxContext, to: ast::Name) ->
@fn(&ast::Path,@ast_fold)->ast::Path {
return |p:&ast::Path,_|
ast::Path {span: p.span,
global: p.global,
idents: p.idents.map(|id|
ast::ident{
repr: id.repr,
// this needs to be cached....
ctxt: Some(@ast::Rename(from,fromctx,
to,id.ctxt))
}),
rp: p.rp,
types: p.types};
pub fn new_ident_renamer(from: ast::ident,
to: ast::Name,
table: @mut SCTable) ->
@fn(ast::ident)->ast::ident {
|id : ast::ident|
ast::ident{
repr: id.repr,
ctxt: new_rename(from,to,id.ctxt,table)
}
}
// update the ctxts in a path to get a mark node
fn ctxt_update_mark(mark: uint) ->
@fn(&ast::Path,@ast_fold)->ast::Path {
return |p:&ast::Path,_|
ast::Path {span: p.span,
global: p.global,
idents: p.idents.map(|id|
ast::ident{
repr: id.repr,
// this needs to be cached....
ctxt: Some(@ast::Mark(mark,id.ctxt))
}),
rp: p.rp,
types: p.types};
pub fn new_ident_marker(mark: uint,
table: @mut SCTable) ->
@fn(ast::ident)->ast::ident {
|id : ast::ident|
ast::ident{
repr: id.repr,
ctxt: new_mark(mark,id.ctxt,table)
}
}
*/
// perform resolution (in the MTWT sense) on all of the
// idents in the tree. This is the final step in expansion.
pub fn new_ident_resolver(table: @mut SCTable) ->
@fn(ast::ident)->ast::ident {
|id : ast::ident|
ast::ident {
repr : resolve(id,table),
ctxt : illegal_ctxt
}
}
#[cfg(test)]
mod test {
use super::*;
use ast;
use ast::{attribute_, attr_outer, meta_word};
use ast::{attribute_, attr_outer, meta_word, empty_ctxt};
use ast_util::{new_sctable};
use codemap;
use codemap::spanned;
use parse;
use core::io;
use core::option::{None, Some};
use util::parser_testing::{string_to_item_and_sess};
// make sure that fail! is present
#[test] fn fail_exists_test () {
@ -792,4 +797,22 @@ mod test {
}
}
#[test]
fn renaming () {
let (maybe_item_ast,sess) = string_to_item_and_sess(@~"fn a() -> int { let b = 13; b} ");
let item_ast = match maybe_item_ast {
Some(x) => x,
None => fail!("test case fail")
};
let table = @mut new_sctable();
let a_name = 100; // enforced by testing_interner
let a2_name = sess.interner.gensym(@~"a2").repr;
let renamer = new_ident_renamer(ast::ident{repr:a_name,ctxt:empty_ctxt},
a2_name,table);
let renamed_ast = fun_to_ident_folder(renamer).fold_item(item_ast).get();
let resolver = new_ident_resolver(table);
let resolved_ast = fun_to_ident_folder(resolver).fold_item(renamed_ast).get();
io::print(fmt!("ast: %?\n",resolved_ast))
}
}

View file

@ -346,21 +346,16 @@ mod test {
use std::serialize::Encodable;
use std;
use core::io;
use core::option::Option;
use core::option::Some;
use core::option::None;
use core::int;
use core::num::NumCast;
use codemap::{CodeMap, span, BytePos, spanned};
use codemap::{span, BytePos, spanned};
use opt_vec;
use ast;
use abi;
use ast_util::mk_ident;
use ast_util::new_ident;
use parse::parser::Parser;
use parse::token::{ident_interner, mk_ident_interner, mk_fresh_ident_interner};
use diagnostic::{span_handler, mk_span_handler, mk_handler, Emitter};
use util::parser_testing::{string_to_tts_and_sess,string_to_parser};
use util::parser_testing::{string_to_crate, string_to_expr, string_to_item};
use util::parser_testing::{string_to_expr, string_to_item};
use util::parser_testing::{string_to_stmt};
// map a string to tts, return the tt without its parsesess
@ -384,7 +379,7 @@ mod test {
// convert a vector of uints to a vector of ast::idents
fn ints_to_idents(ids: ~[uint]) -> ~[ast::ident] {
ids.map(|u| mk_ident(*u))
ids.map(|u| new_ident(*u))
}
#[test] fn path_exprs_1 () {
@ -393,7 +388,7 @@ mod test {
callee_id:2,
node:ast::expr_path(@ast::Path {span:sp(0,1),
global:false,
idents:~[mk_ident(100)],
idents:~[new_ident(100)],
rp:None,
types:~[]}),
span:sp(0,1)})
@ -456,7 +451,7 @@ mod test {
node:ast::expr_path(
@ast::Path{span:sp(7,8),
global:false,
idents:~[mk_ident(103)],
idents:~[new_ident(103)],
rp:None,
types:~[]
}),
@ -474,7 +469,7 @@ mod test {
@ast::Path{
span:sp(0,1),
global:false,
idents:~[mk_ident(101)],
idents:~[new_ident(101)],
rp:None,
types: ~[]}),
span: sp(0,1)},
@ -495,7 +490,7 @@ mod test {
@ast::Path{
span:sp(0,1),
global:false,
idents:~[mk_ident(101)],
idents:~[new_ident(101)],
rp: None,
types: ~[]},
None // no idea
@ -514,7 +509,7 @@ mod test {
span:sp(4,4), // this is bizarre...
// check this in the original parser?
global:false,
idents:~[mk_ident(105)],
idents:~[new_ident(105)],
rp: None,
types: ~[]},
2),
@ -524,7 +519,7 @@ mod test {
@ast::Path{
span:sp(0,1),
global:false,
idents:~[mk_ident(101)],
idents:~[new_ident(101)],
rp: None,
types: ~[]},
None // no idea
@ -540,7 +535,7 @@ mod test {
// assignment order of the node_ids.
assert_eq!(string_to_item(@~"fn a (b : int) { b; }"),
Some(
@ast::item{ident:mk_ident(100),
@ast::item{ident:new_ident(100),
attrs:~[],
id: 10, // fixme
node: ast::item_fn(ast::fn_decl{
@ -550,7 +545,7 @@ mod test {
node: ast::ty_path(@ast::Path{
span:sp(10,13),
global:false,
idents:~[mk_ident(106)],
idents:~[new_ident(106)],
rp: None,
types: ~[]},
2),
@ -561,7 +556,7 @@ mod test {
@ast::Path{
span:sp(6,7),
global:false,
idents:~[mk_ident(101)],
idents:~[new_ident(101)],
rp: None,
types: ~[]},
None // no idea
@ -592,7 +587,7 @@ mod test {
@ast::Path{
span:sp(17,18),
global:false,
idents:~[mk_ident(101)],
idents:~[new_ident(101)],
rp:None,
types: ~[]}),
span: sp(17,18)},

View file

@ -8,22 +8,16 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::serialize::Encodable;
use std;
use core::io;
use core::option::{Option,None};
use core::int;
use core::num::NumCast;
use codemap::{dummy_sp, CodeMap, BytePos, spanned};
use opt_vec;
use codemap::CodeMap;
use ast;
use abi;
use ast_util::mk_ident;
use parse::parser::Parser;
use parse::token::{ident_interner, mk_ident_interner, mk_fresh_ident_interner};
use diagnostic::{span_handler, mk_span_handler, mk_handler, Emitter};
use parse::token::{ident_interner, mk_fresh_ident_interner};
use diagnostic::{mk_handler, mk_span_handler};
use syntax::parse::{ParseSess,new_parse_sess,string_to_filemap,filemap_to_tts};
use syntax::parse::{ParseSess,string_to_filemap,filemap_to_tts};
use syntax::parse::{new_parser_from_source_str};
// add known names to interner for testing
@ -71,10 +65,15 @@ pub fn string_to_tts_and_sess (source_str : @~str) -> (~[ast::token_tree],@mut P
(filemap_to_tts(ps,string_to_filemap(ps,source_str,~"bogofile")),ps)
}
pub fn string_to_parser_and_sess(source_str: @~str) -> (Parser,@mut ParseSess) {
let ps = mk_testing_parse_sess();
(new_parser_from_source_str(ps,~[],~"bogofile",source_str),ps)
}
// map string to parser (via tts)
pub fn string_to_parser(source_str: @~str) -> Parser {
let ps = mk_testing_parse_sess();
new_parser_from_source_str(ps,~[],~"bogofile",source_str)
let (p,_) = string_to_parser_and_sess(source_str);
p
}
pub fn string_to_crate (source_str : @~str) -> @ast::crate {
@ -86,10 +85,17 @@ pub fn string_to_expr (source_str : @~str) -> @ast::expr {
string_to_parser(source_str).parse_expr()
}
// parse a string, return an item
pub fn string_to_item (source_str : @~str) -> Option<@ast::item> {
string_to_parser(source_str).parse_item(~[])
}
// parse a string, return an item and the ParseSess
pub fn string_to_item_and_sess (source_str : @~str) -> (Option<@ast::item>,@mut ParseSess) {
let (p,ps) = string_to_parser_and_sess(source_str);
(p.parse_item(~[]),ps)
}
pub fn string_to_stmt (source_str : @~str) -> @ast::stmt {
string_to_parser(source_str).parse_stmt(~[])
}