rust/src/boot/fe/parser.ml

375 lines
9.2 KiB
OCaml

open Common;;
open Token;;
(* Fundamental parser types and actions *)
type get_mod_fn = (Ast.meta_pat
-> node_id
-> (node_id ref)
-> (opaque_id ref)
-> (filename * Ast.mod_items))
;;
type pstate =
{ mutable pstate_peek : token;
mutable pstate_ctxt : (string * pos) list;
mutable pstate_rstr : bool;
mutable pstate_depth: int;
pstate_lexbuf : Lexing.lexbuf;
pstate_file : filename;
pstate_sess : Session.sess;
pstate_temp_id : temp_id ref;
pstate_node_id : node_id ref;
pstate_opaque_id : opaque_id ref;
pstate_get_mod : get_mod_fn;
pstate_get_cenv_tok : pstate -> Ast.ident -> token;
pstate_infer_lib_name : (Ast.ident -> filename);
pstate_required : (node_id, (required_lib * nabi_conv)) Hashtbl.t;
pstate_required_syms : (node_id, string) Hashtbl.t; }
;;
let log (ps:pstate) = Session.log "parse"
ps.pstate_sess.Session.sess_log_parse
ps.pstate_sess.Session.sess_log_out
;;
let iflog ps thunk =
if ps.pstate_sess.Session.sess_log_parse
then thunk ()
else ()
;;
let make_parser
(tref:temp_id ref)
(nref:node_id ref)
(oref:opaque_id ref)
(sess:Session.sess)
(get_mod:get_mod_fn)
(get_cenv_tok:pstate -> Ast.ident -> token)
(infer_lib_name:Ast.ident -> filename)
(required:(node_id, (required_lib * nabi_conv)) Hashtbl.t)
(required_syms:(node_id, string) Hashtbl.t)
(fname:string)
: pstate =
let lexbuf = Lexing.from_channel (open_in fname) in
let spos = { lexbuf.Lexing.lex_start_p with Lexing.pos_fname = fname } in
let cpos = { lexbuf.Lexing.lex_curr_p with Lexing.pos_fname = fname } in
lexbuf.Lexing.lex_start_p <- spos;
lexbuf.Lexing.lex_curr_p <- cpos;
let first = Lexer.token lexbuf in
let ps =
{ pstate_peek = first;
pstate_ctxt = [];
pstate_rstr = false;
pstate_depth = 0;
pstate_lexbuf = lexbuf;
pstate_file = fname;
pstate_sess = sess;
pstate_temp_id = tref;
pstate_node_id = nref;
pstate_opaque_id = oref;
pstate_get_mod = get_mod;
pstate_get_cenv_tok = get_cenv_tok;
pstate_infer_lib_name = infer_lib_name;
pstate_required = required;
pstate_required_syms = required_syms; }
in
iflog ps (fun _ -> log ps "made parser for: %s\n%!" fname);
ps
;;
exception Parse_err of (pstate * string)
;;
let lexpos (ps:pstate) : pos =
let p = ps.pstate_lexbuf.Lexing.lex_start_p in
(p.Lexing.pos_fname,
p.Lexing.pos_lnum ,
(p.Lexing.pos_cnum) - (p.Lexing.pos_bol))
;;
let next_node_id (ps:pstate) : node_id =
let id = !(ps.pstate_node_id) in
ps.pstate_node_id := Node ((int_of_node id)+1);
id
;;
let next_opaque_id (ps:pstate) : opaque_id =
let id = !(ps.pstate_opaque_id) in
ps.pstate_opaque_id := Opaque ((int_of_opaque id)+1);
id
;;
let span
(ps:pstate)
(apos:pos)
(bpos:pos)
(x:'a)
: 'a identified =
let span = { lo = apos; hi = bpos } in
let id = next_node_id ps in
iflog ps (fun _ -> log ps "span for node #%d: %s"
(int_of_node id) (Session.string_of_span span));
htab_put ps.pstate_sess.Session.sess_spans id span;
{ node = x; id = id }
;;
let decl p i =
{ Ast.decl_params = p;
Ast.decl_item = i }
;;
let spans
(ps:pstate)
(things:('a identified) array)
(apos:pos)
(thing:'a)
: ('a identified) array =
Array.append things [| (span ps apos (lexpos ps) thing) |]
;;
(*
* The point of this is to make a new node_id entry for a node that is a
* "copy" of an lval returned from somewhere else. For example if you create
* a temp, the lval it returns can only be used in *one* place, for the
* node_id denotes the place that lval is first used; subsequent uses of
* 'the same' reference must clone_lval it into a new node_id. Otherwise
* there is trouble.
*)
let clone_span
(ps:pstate)
(oldnode:'a identified)
(newthing:'b)
: 'b identified =
let s = Hashtbl.find ps.pstate_sess.Session.sess_spans oldnode.id in
span ps s.lo s.hi newthing
;;
let rec clone_lval (ps:pstate) (lval:Ast.lval) : Ast.lval =
match lval with
Ast.LVAL_base nb ->
let nnb = clone_span ps nb nb.node in
Ast.LVAL_base nnb
| Ast.LVAL_ext (base, ext) ->
Ast.LVAL_ext ((clone_lval ps base), ext)
;;
let clone_atom (ps:pstate) (atom:Ast.atom) : Ast.atom =
match atom with
Ast.ATOM_literal _ -> atom
| Ast.ATOM_lval lv -> Ast.ATOM_lval (clone_lval ps lv)
;;
let ctxt (n:string) (f:pstate -> 'a) (ps:pstate) : 'a =
(ps.pstate_ctxt <- (n, lexpos ps) :: ps.pstate_ctxt;
let res = f ps in
ps.pstate_ctxt <- List.tl ps.pstate_ctxt;
res)
;;
let rstr (r:bool) (f:pstate -> 'a) (ps:pstate) : 'a =
let prev = ps.pstate_rstr in
(ps.pstate_rstr <- r;
let res = f ps in
ps.pstate_rstr <- prev;
res)
;;
let err (str:string) (ps:pstate) =
(Parse_err (ps, (str)))
;;
let (slot_nil:Ast.slot) =
{ Ast.slot_mode = Ast.MODE_local;
Ast.slot_ty = Some Ast.TY_nil }
;;
let (slot_auto:Ast.slot) =
{ Ast.slot_mode = Ast.MODE_local;
Ast.slot_ty = None }
;;
let build_tmp
(ps:pstate)
(slot:Ast.slot)
(apos:pos)
(bpos:pos)
: (temp_id * Ast.lval * Ast.stmt) =
let nonce = !(ps.pstate_temp_id) in
ps.pstate_temp_id := Temp ((int_of_temp nonce)+1);
iflog ps
(fun _ -> log ps "building temporary %d" (int_of_temp nonce));
let decl = Ast.DECL_slot (Ast.KEY_temp nonce, (span ps apos bpos slot)) in
let declstmt = span ps apos bpos (Ast.STMT_decl decl) in
let tmp = Ast.LVAL_base (span ps apos bpos (Ast.BASE_temp nonce)) in
(nonce, tmp, declstmt)
;;
(* Simple helpers *)
(* FIXME (issue #71): please rename these, they make eyes bleed. *)
let arr (ls:'a list) : 'a array = Array.of_list ls ;;
let arl (ls:'a list) : 'a array = Array.of_list (List.rev ls) ;;
let arj (ar:('a array array)) = Array.concat (Array.to_list ar) ;;
let arj1st (pairs:(('a array) * 'b) array) : (('a array) * 'b array) =
let (az, bz) = List.split (Array.to_list pairs) in
(Array.concat az, Array.of_list bz)
(* Bottom-most parser actions. *)
let peek (ps:pstate) : token =
iflog ps
begin
fun _ ->
log ps "peeking at: %s // %s"
(string_of_tok ps.pstate_peek)
(match ps.pstate_ctxt with
(s, _) :: _ -> s
| _ -> "<empty>")
end;
ps.pstate_peek
;;
let bump (ps:pstate) : unit =
begin
iflog ps (fun _ -> log ps "bumping past: %s"
(string_of_tok ps.pstate_peek));
ps.pstate_peek <- Lexer.token ps.pstate_lexbuf
end
;;
let bump_bracequote (ps:pstate) : unit =
begin
assert (ps.pstate_peek = LBRACE);
iflog ps (fun _ -> log ps "bumping past: %s"
(string_of_tok ps.pstate_peek));
let buf = Buffer.create 32 in
ps.pstate_peek <- Lexer.bracequote buf 1 ps.pstate_lexbuf
end
;;
let expect (ps:pstate) (t:token) : unit =
let p = peek ps in
if p == t
then bump ps
else
let msg = ("Expected '" ^ (string_of_tok t) ^
"', found '" ^ (string_of_tok p ) ^ "'") in
raise (Parse_err (ps, msg))
;;
let unexpected (ps:pstate) =
err ("Unexpected token '" ^ (string_of_tok (peek ps)) ^ "'") ps
;;
(* Parser combinators. *)
let one_or_more
(sep:token)
(prule:pstate -> 'a)
(ps:pstate)
: 'a array =
let accum = ref [prule ps] in
while peek ps == sep
do
bump ps;
accum := (prule ps) :: !accum
done;
arl !accum
;;
let bracketed_seq
(mandatory:int)
(bra:token)
(ket:token)
(sepOpt:token option)
(prule:pstate -> 'a)
(ps:pstate)
: 'a array =
expect ps bra;
let accum = ref [] in
let dosep _ =
(match sepOpt with
None -> ()
| Some tok ->
if (!accum = [])
then ()
else expect ps tok)
in
while mandatory > List.length (!accum) do
dosep ();
accum := (prule ps) :: (!accum)
done;
while (not (peek ps = ket))
do
dosep ();
accum := (prule ps) :: !accum
done;
expect ps ket;
arl !accum
;;
let bracketed_zero_or_more
(bra:token)
(ket:token)
(sepOpt:token option)
(prule:pstate -> 'a)
(ps:pstate)
: 'a array =
bracketed_seq 0 bra ket sepOpt (ctxt "bracketed_seq" prule) ps
;;
let paren_comma_list
(prule:pstate -> 'a)
(ps:pstate)
: 'a array =
bracketed_zero_or_more LPAREN RPAREN (Some COMMA) prule ps
;;
let bracketed_one_or_more
(bra:token)
(ket:token)
(sepOpt:token option)
(prule:pstate -> 'a)
(ps:pstate)
: 'a array =
bracketed_seq 1 bra ket sepOpt (ctxt "bracketed_seq" prule) ps
;;
let bracketed_two_or_more
(bra:token)
(ket:token)
(sepOpt:token option)
(prule:pstate -> 'a)
(ps:pstate)
: 'a array =
bracketed_seq 2 bra ket sepOpt (ctxt "bracketed_seq" prule) ps
;;
let bracketed (bra:token) (ket:token) (prule:pstate -> 'a) (ps:pstate) : 'a =
expect ps bra;
let res = ctxt "bracketed" prule ps in
expect ps ket;
res
;;
(*
* Local Variables:
* fill-column: 78;
* indent-tabs-mode: nil
* buffer-file-coding-system: utf-8-unix
* compile-command: "make -k -C ../.. 2>&1 | sed -e 's/\\/x\\//x:\\//g'";
* End:
*)