Remove ANTLR grammar.

It is unused and not maintained.
This commit is contained in:
Mark Simulacrum 2017-05-02 08:34:09 -06:00
parent 12e76e24cc
commit c3781e6200
13 changed files with 0 additions and 4221 deletions

View file

@ -1,4 +0,0 @@
verify
*.class
*.java
*.tokens

View file

@ -1,33 +0,0 @@
# Reference grammar.
Uses [antlr4](http://www.antlr.org/) and a custom Rust tool to compare
ASTs/token streams generated. You can use the `make check-lexer` target to
run all of the available tests.
The build of the rust part is included with `make tidy` and can be run with `make check-build-lexer-verifier`.
# Manual build
To use manually, assuming antlr4 is installed at `/usr/share/java/antlr-complete.jar`:
```
antlr4 RustLexer.g4
javac -classpath /usr/share/java/antlr-complete.jar *.java
rustc -O verify.rs
for file in ../*/**.rs; do
echo $file;
grun RustLexer tokens -tokens < "$file" | ./verify "$file" RustLexer.tokens || break
done
```
Note that the `../*/**.rs` glob will match every `*.rs` file in the above
directory and all of its recursive children. This is a Zsh extension.
## Cleanup
To cleanup you can use a command like this:
```bash
rm -f verify *.class *.java *.tokens
```

View file

@ -1,197 +0,0 @@
lexer grammar RustLexer;
@lexer::members {
public boolean is_at(int pos) {
return _input.index() == pos;
}
}
tokens {
EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUS,
MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP,
BINOPEQ, LARROW, AT, DOT, DOTDOT, DOTDOTDOT, COMMA, SEMI, COLON,
MOD_SEP, RARROW, FAT_ARROW, LPAREN, RPAREN, LBRACKET, RBRACKET,
LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR, LIT_BYTE,
LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BYTE_STR,
LIT_BYTE_STR_RAW, QUESTION, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT,
COMMENT, SHEBANG, UTF8_BOM
}
import xidstart , xidcontinue;
/* Expression-operator symbols */
EQ : '=' ;
LT : '<' ;
LE : '<=' ;
EQEQ : '==' ;
NE : '!=' ;
GE : '>=' ;
GT : '>' ;
ANDAND : '&&' ;
OROR : '||' ;
NOT : '!' ;
TILDE : '~' ;
PLUS : '+' ;
MINUS : '-' ;
STAR : '*' ;
SLASH : '/' ;
PERCENT : '%' ;
CARET : '^' ;
AND : '&' ;
OR : '|' ;
SHL : '<<' ;
SHR : '>>' ;
LARROW : '<-' ;
BINOP
: PLUS
| SLASH
| MINUS
| STAR
| PERCENT
| CARET
| AND
| OR
| SHL
| SHR
| LARROW
;
BINOPEQ : BINOP EQ ;
/* "Structural symbols" */
AT : '@' ;
DOT : '.' ;
DOTDOT : '..' ;
DOTDOTDOT : '...' ;
COMMA : ',' ;
SEMI : ';' ;
COLON : ':' ;
MOD_SEP : '::' ;
RARROW : '->' ;
FAT_ARROW : '=>' ;
LPAREN : '(' ;
RPAREN : ')' ;
LBRACKET : '[' ;
RBRACKET : ']' ;
LBRACE : '{' ;
RBRACE : '}' ;
POUND : '#';
DOLLAR : '$' ;
UNDERSCORE : '_' ;
// Literals
fragment HEXIT
: [0-9a-fA-F]
;
fragment CHAR_ESCAPE
: [nrt\\'"0]
| [xX] HEXIT HEXIT
| 'u' HEXIT HEXIT HEXIT HEXIT
| 'U' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT
| 'u{' HEXIT '}'
| 'u{' HEXIT HEXIT '}'
| 'u{' HEXIT HEXIT HEXIT '}'
| 'u{' HEXIT HEXIT HEXIT HEXIT '}'
| 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT '}'
| 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT '}'
;
fragment SUFFIX
: IDENT
;
fragment INTEGER_SUFFIX
: { _input.LA(1) != 'e' && _input.LA(1) != 'E' }? SUFFIX
;
LIT_CHAR
: '\'' ( '\\' CHAR_ESCAPE
| ~[\\'\n\t\r]
| '\ud800' .. '\udbff' '\udc00' .. '\udfff'
)
'\'' SUFFIX?
;
LIT_BYTE
: 'b\'' ( '\\' ( [xX] HEXIT HEXIT
| [nrt\\'"0] )
| ~[\\'\n\t\r] '\udc00'..'\udfff'?
)
'\'' SUFFIX?
;
LIT_INTEGER
: [0-9][0-9_]* INTEGER_SUFFIX?
| '0b' [01_]+ INTEGER_SUFFIX?
| '0o' [0-7_]+ INTEGER_SUFFIX?
| '0x' [0-9a-fA-F_]+ INTEGER_SUFFIX?
;
LIT_FLOAT
: [0-9][0-9_]* ('.' {
/* dot followed by another dot is a range, not a float */
_input.LA(1) != '.' &&
/* dot followed by an identifier is an integer with a function call, not a float */
_input.LA(1) != '_' &&
!(_input.LA(1) >= 'a' && _input.LA(1) <= 'z') &&
!(_input.LA(1) >= 'A' && _input.LA(1) <= 'Z')
}? | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX?)
;
LIT_STR
: '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? '"' SUFFIX?
;
LIT_BYTE_STR : 'b' LIT_STR ;
LIT_BYTE_STR_RAW : 'b' LIT_STR_RAW ;
/* this is a bit messy */
fragment LIT_STR_RAW_INNER
: '"' .*? '"'
| LIT_STR_RAW_INNER2
;
fragment LIT_STR_RAW_INNER2
: POUND LIT_STR_RAW_INNER POUND
;
LIT_STR_RAW
: 'r' LIT_STR_RAW_INNER SUFFIX?
;
QUESTION : '?';
IDENT : XID_Start XID_Continue* ;
fragment QUESTION_IDENTIFIER : QUESTION? IDENT;
LIFETIME : '\'' IDENT ;
WHITESPACE : [ \r\n\t]+ ;
UNDOC_COMMENT : '////' ~[\n]* -> type(COMMENT) ;
YESDOC_COMMENT : '///' ~[\r\n]* -> type(DOC_COMMENT) ;
OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ;
LINE_COMMENT : '//' ( ~[/\n] ~[\n]* )? -> type(COMMENT) ;
DOC_BLOCK_COMMENT
: ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT)
;
BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ;
/* these appear at the beginning of a file */
SHEBANG : '#!' { is_at(2) && _input.LA(1) != '[' }? ~[\r\n]* -> type(SHEBANG) ;
UTF8_BOM : '\ufeff' { is_at(1) }? -> skip ;

View file

@ -1,52 +0,0 @@
#!/bin/sh
# ignore-license
# Run the reference lexer against libsyntax and compare the tokens and spans.
# If "// ignore-lexer-test" is present in the file, it will be ignored.
# Argument $1 is the file to check, $2 is the classpath to use, $3 is the path
# to the grun binary, $4 is the path to the verify binary, $5 is the path to
# RustLexer.tokens
if [ "${VERBOSE}" == "1" ]; then
set -x
fi
passed=0
failed=0
skipped=0
check() {
grep --silent "// ignore-lexer-test" "$1";
# if it is *not* found...
if [ $? -eq 1 ]; then
cd $2 # This `cd` is so java will pick up RustLexer.class. I could not
# figure out how to wrangle the CLASSPATH, just adding build/grammar
# did not seem to have any effect.
if $3 RustLexer tokens -tokens < $1 | $4 $1 $5; then
echo "pass: $1"
passed=`expr $passed + 1`
else
echo "fail: $1"
failed=`expr $failed + 1`
fi
else
echo "skip: $1"
skipped=`expr $skipped + 1`
fi
}
for file in $(find $1 -iname '*.rs' ! -path '*/test/compile-fail*'); do
check "$file" $2 $3 $4 $5
done
printf "\ntest result: "
if [ $failed -eq 0 ]; then
printf "ok. $passed passed; $failed failed; $skipped skipped\n\n"
else
printf "failed. $passed passed; $failed failed; $skipped skipped\n\n"
exit 1
fi

View file

@ -1,343 +0,0 @@
%{
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#include <stdio.h>
#include <ctype.h>
static int num_hashes;
static int end_hashes;
static int saw_non_hash;
%}
%option stack
%option yylineno
%x str
%x rawstr
%x rawstr_esc_begin
%x rawstr_esc_body
%x rawstr_esc_end
%x byte
%x bytestr
%x rawbytestr
%x rawbytestr_nohash
%x pound
%x shebang_or_attr
%x ltorchar
%x linecomment
%x doc_line
%x blockcomment
%x doc_block
%x suffix
ident [a-zA-Z\x80-\xff_][a-zA-Z0-9\x80-\xff_]*
%%
<suffix>{ident} { BEGIN(INITIAL); }
<suffix>(.|\n) { yyless(0); BEGIN(INITIAL); }
[ \n\t\r] { }
\xef\xbb\xbf {
// UTF-8 byte order mark (BOM), ignore if in line 1, error otherwise
if (yyget_lineno() != 1) {
return -1;
}
}
\/\/(\/|\!) { BEGIN(doc_line); yymore(); }
<doc_line>\n { BEGIN(INITIAL);
yyleng--;
yytext[yyleng] = 0;
return ((yytext[2] == '!') ? INNER_DOC_COMMENT : OUTER_DOC_COMMENT);
}
<doc_line>[^\n]* { yymore(); }
\/\/|\/\/\/\/ { BEGIN(linecomment); }
<linecomment>\n { BEGIN(INITIAL); }
<linecomment>[^\n]* { }
\/\*(\*|\!)[^*] { yy_push_state(INITIAL); yy_push_state(doc_block); yymore(); }
<doc_block>\/\* { yy_push_state(doc_block); yymore(); }
<doc_block>\*\/ {
yy_pop_state();
if (yy_top_state() == doc_block) {
yymore();
} else {
return ((yytext[2] == '!') ? INNER_DOC_COMMENT : OUTER_DOC_COMMENT);
}
}
<doc_block>(.|\n) { yymore(); }
\/\* { yy_push_state(blockcomment); }
<blockcomment>\/\* { yy_push_state(blockcomment); }
<blockcomment>\*\/ { yy_pop_state(); }
<blockcomment>(.|\n) { }
_ { return UNDERSCORE; }
as { return AS; }
box { return BOX; }
break { return BREAK; }
const { return CONST; }
continue { return CONTINUE; }
crate { return CRATE; }
else { return ELSE; }
enum { return ENUM; }
extern { return EXTERN; }
false { return FALSE; }
fn { return FN; }
for { return FOR; }
if { return IF; }
impl { return IMPL; }
in { return IN; }
let { return LET; }
loop { return LOOP; }
match { return MATCH; }
mod { return MOD; }
move { return MOVE; }
mut { return MUT; }
priv { return PRIV; }
proc { return PROC; }
pub { return PUB; }
ref { return REF; }
return { return RETURN; }
self { return SELF; }
static { return STATIC; }
struct { return STRUCT; }
trait { return TRAIT; }
true { return TRUE; }
type { return TYPE; }
typeof { return TYPEOF; }
unsafe { return UNSAFE; }
use { return USE; }
where { return WHERE; }
while { return WHILE; }
{ident} { return IDENT; }
0x[0-9a-fA-F_]+ { BEGIN(suffix); return LIT_INTEGER; }
0o[0-8_]+ { BEGIN(suffix); return LIT_INTEGER; }
0b[01_]+ { BEGIN(suffix); return LIT_INTEGER; }
[0-9][0-9_]* { BEGIN(suffix); return LIT_INTEGER; }
[0-9][0-9_]*\.(\.|[a-zA-Z]) { yyless(yyleng - 2); BEGIN(suffix); return LIT_INTEGER; }
[0-9][0-9_]*\.[0-9_]*([eE][-\+]?[0-9_]+)? { BEGIN(suffix); return LIT_FLOAT; }
[0-9][0-9_]*(\.[0-9_]*)?[eE][-\+]?[0-9_]+ { BEGIN(suffix); return LIT_FLOAT; }
; { return ';'; }
, { return ','; }
\.\.\. { return DOTDOTDOT; }
\.\. { return DOTDOT; }
\. { return '.'; }
\( { return '('; }
\) { return ')'; }
\{ { return '{'; }
\} { return '}'; }
\[ { return '['; }
\] { return ']'; }
@ { return '@'; }
# { BEGIN(pound); yymore(); }
<pound>\! { BEGIN(shebang_or_attr); yymore(); }
<shebang_or_attr>\[ {
BEGIN(INITIAL);
yyless(2);
return SHEBANG;
}
<shebang_or_attr>[^\[\n]*\n {
// Since the \n was eaten as part of the token, yylineno will have
// been incremented to the value 2 if the shebang was on the first
// line. This yyless undoes that, setting yylineno back to 1.
yyless(yyleng - 1);
if (yyget_lineno() == 1) {
BEGIN(INITIAL);
return SHEBANG_LINE;
} else {
BEGIN(INITIAL);
yyless(2);
return SHEBANG;
}
}
<pound>. { BEGIN(INITIAL); yyless(1); return '#'; }
\~ { return '~'; }
:: { return MOD_SEP; }
: { return ':'; }
\$ { return '$'; }
\? { return '?'; }
== { return EQEQ; }
=> { return FAT_ARROW; }
= { return '='; }
\!= { return NE; }
\! { return '!'; }
\<= { return LE; }
\<\< { return SHL; }
\<\<= { return SHLEQ; }
\< { return '<'; }
\>= { return GE; }
\>\> { return SHR; }
\>\>= { return SHREQ; }
\> { return '>'; }
\x27 { BEGIN(ltorchar); yymore(); }
<ltorchar>static { BEGIN(INITIAL); return STATIC_LIFETIME; }
<ltorchar>{ident} { BEGIN(INITIAL); return LIFETIME; }
<ltorchar>\\[nrt\\\x27\x220]\x27 { BEGIN(suffix); return LIT_CHAR; }
<ltorchar>\\x[0-9a-fA-F]{2}\x27 { BEGIN(suffix); return LIT_CHAR; }
<ltorchar>\\u\{[0-9a-fA-F]?{6}\}\x27 { BEGIN(suffix); return LIT_CHAR; }
<ltorchar>.\x27 { BEGIN(suffix); return LIT_CHAR; }
<ltorchar>[\x80-\xff]{2,4}\x27 { BEGIN(suffix); return LIT_CHAR; }
<ltorchar><<EOF>> { BEGIN(INITIAL); return -1; }
b\x22 { BEGIN(bytestr); yymore(); }
<bytestr>\x22 { BEGIN(suffix); return LIT_BYTE_STR; }
<bytestr><<EOF>> { return -1; }
<bytestr>\\[n\nrt\\\x27\x220] { yymore(); }
<bytestr>\\x[0-9a-fA-F]{2} { yymore(); }
<bytestr>\\u\{[0-9a-fA-F]?{6}\} { yymore(); }
<bytestr>\\[^n\nrt\\\x27\x220] { return -1; }
<bytestr>(.|\n) { yymore(); }
br\x22 { BEGIN(rawbytestr_nohash); yymore(); }
<rawbytestr_nohash>\x22 { BEGIN(suffix); return LIT_BYTE_STR_RAW; }
<rawbytestr_nohash>(.|\n) { yymore(); }
<rawbytestr_nohash><<EOF>> { return -1; }
br/# {
BEGIN(rawbytestr);
yymore();
num_hashes = 0;
saw_non_hash = 0;
end_hashes = 0;
}
<rawbytestr># {
if (!saw_non_hash) {
num_hashes++;
} else if (end_hashes != 0) {
end_hashes++;
if (end_hashes == num_hashes) {
BEGIN(INITIAL);
return LIT_BYTE_STR_RAW;
}
}
yymore();
}
<rawbytestr>\x22# {
end_hashes = 1;
if (end_hashes == num_hashes) {
BEGIN(INITIAL);
return LIT_BYTE_STR_RAW;
}
yymore();
}
<rawbytestr>(.|\n) {
if (!saw_non_hash) {
saw_non_hash = 1;
}
if (end_hashes != 0) {
end_hashes = 0;
}
yymore();
}
<rawbytestr><<EOF>> { return -1; }
b\x27 { BEGIN(byte); yymore(); }
<byte>\\[nrt\\\x27\x220]\x27 { BEGIN(INITIAL); return LIT_BYTE; }
<byte>\\x[0-9a-fA-F]{2}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
<byte>\\u[0-9a-fA-F]{4}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
<byte>\\U[0-9a-fA-F]{8}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
<byte>.\x27 { BEGIN(INITIAL); return LIT_BYTE; }
<byte><<EOF>> { BEGIN(INITIAL); return -1; }
r\x22 { BEGIN(rawstr); yymore(); }
<rawstr>\x22 { BEGIN(suffix); return LIT_STR_RAW; }
<rawstr>(.|\n) { yymore(); }
<rawstr><<EOF>> { return -1; }
r/# {
BEGIN(rawstr_esc_begin);
yymore();
num_hashes = 0;
saw_non_hash = 0;
end_hashes = 0;
}
<rawstr_esc_begin># {
num_hashes++;
yymore();
}
<rawstr_esc_begin>\x22 {
BEGIN(rawstr_esc_body);
yymore();
}
<rawstr_esc_begin>(.|\n) { return -1; }
<rawstr_esc_body>\x22/# {
BEGIN(rawstr_esc_end);
yymore();
}
<rawstr_esc_body>(.|\n) {
yymore();
}
<rawstr_esc_end># {
end_hashes++;
if (end_hashes == num_hashes) {
BEGIN(INITIAL);
return LIT_STR_RAW;
}
yymore();
}
<rawstr_esc_end>[^#] {
end_hashes = 0;
BEGIN(rawstr_esc_body);
yymore();
}
<rawstr_esc_begin,rawstr_esc_body,rawstr_esc_end><<EOF>> { return -1; }
\x22 { BEGIN(str); yymore(); }
<str>\x22 { BEGIN(suffix); return LIT_STR; }
<str><<EOF>> { return -1; }
<str>\\[n\nr\rt\\\x27\x220] { yymore(); }
<str>\\x[0-9a-fA-F]{2} { yymore(); }
<str>\\u\{[0-9a-fA-F]?{6}\} { yymore(); }
<str>\\[^n\nrt\\\x27\x220] { return -1; }
<str>(.|\n) { yymore(); }
\<- { return LARROW; }
-\> { return RARROW; }
- { return '-'; }
-= { return MINUSEQ; }
&& { return ANDAND; }
& { return '&'; }
&= { return ANDEQ; }
\|\| { return OROR; }
\| { return '|'; }
\|= { return OREQ; }
\+ { return '+'; }
\+= { return PLUSEQ; }
\* { return '*'; }
\*= { return STAREQ; }
\/ { return '/'; }
\/= { return SLASHEQ; }
\^ { return '^'; }
\^= { return CARETEQ; }
% { return '%'; }
%= { return PERCENTEQ; }
<<EOF>> { return 0; }
%%

View file

@ -1,203 +0,0 @@
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
extern int yylex();
extern int rsparse();
#define PUSHBACK_LEN 4
static char pushback[PUSHBACK_LEN];
static int verbose;
void print(const char* format, ...) {
va_list args;
va_start(args, format);
if (verbose) {
vprintf(format, args);
}
va_end(args);
}
// If there is a non-null char at the head of the pushback queue,
// dequeue it and shift the rest of the queue forwards. Otherwise,
// return the token from calling yylex.
int rslex() {
if (pushback[0] == '\0') {
return yylex();
} else {
char c = pushback[0];
memmove(pushback, pushback + 1, PUSHBACK_LEN - 1);
pushback[PUSHBACK_LEN - 1] = '\0';
return c;
}
}
// Note: this does nothing if the pushback queue is full. As long as
// there aren't more than PUSHBACK_LEN consecutive calls to push_back
// in an action, this shouldn't be a problem.
void push_back(char c) {
for (int i = 0; i < PUSHBACK_LEN; ++i) {
if (pushback[i] == '\0') {
pushback[i] = c;
break;
}
}
}
extern int rsdebug;
struct node {
struct node *next;
struct node *prev;
int own_string;
char const *name;
int n_elems;
struct node *elems[];
};
struct node *nodes = NULL;
int n_nodes;
struct node *mk_node(char const *name, int n, ...) {
va_list ap;
int i = 0;
unsigned sz = sizeof(struct node) + (n * sizeof(struct node *));
struct node *nn, *nd = (struct node *)malloc(sz);
print("# New %d-ary node: %s = %p\n", n, name, nd);
nd->own_string = 0;
nd->prev = NULL;
nd->next = nodes;
if (nodes) {
nodes->prev = nd;
}
nodes = nd;
nd->name = name;
nd->n_elems = n;
va_start(ap, n);
while (i < n) {
nn = va_arg(ap, struct node *);
print("# arg[%d]: %p\n", i, nn);
print("# (%s ...)\n", nn->name);
nd->elems[i++] = nn;
}
va_end(ap);
n_nodes++;
return nd;
}
struct node *mk_atom(char *name) {
struct node *nd = mk_node((char const *)strdup(name), 0);
nd->own_string = 1;
return nd;
}
struct node *mk_none() {
return mk_atom("<none>");
}
struct node *ext_node(struct node *nd, int n, ...) {
va_list ap;
int i = 0, c = nd->n_elems + n;
unsigned sz = sizeof(struct node) + (c * sizeof(struct node *));
struct node *nn;
print("# Extending %d-ary node by %d nodes: %s = %p",
nd->n_elems, c, nd->name, nd);
if (nd->next) {
nd->next->prev = nd->prev;
}
if (nd->prev) {
nd->prev->next = nd->next;
}
nd = realloc(nd, sz);
nd->prev = NULL;
nd->next = nodes;
nodes->prev = nd;
nodes = nd;
print(" ==> %p\n", nd);
va_start(ap, n);
while (i < n) {
nn = va_arg(ap, struct node *);
print("# arg[%d]: %p\n", i, nn);
print("# (%s ...)\n", nn->name);
nd->elems[nd->n_elems++] = nn;
++i;
}
va_end(ap);
return nd;
}
int const indent_step = 4;
void print_indent(int depth) {
while (depth) {
if (depth-- % indent_step == 0) {
print("|");
} else {
print(" ");
}
}
}
void print_node(struct node *n, int depth) {
int i = 0;
print_indent(depth);
if (n->n_elems == 0) {
print("%s\n", n->name);
} else {
print("(%s\n", n->name);
for (i = 0; i < n->n_elems; ++i) {
print_node(n->elems[i], depth + indent_step);
}
print_indent(depth);
print(")\n");
}
}
int main(int argc, char **argv) {
if (argc == 2 && strcmp(argv[1], "-v") == 0) {
verbose = 1;
} else {
verbose = 0;
}
int ret = 0;
struct node *tmp;
memset(pushback, '\0', PUSHBACK_LEN);
ret = rsparse();
print("--- PARSE COMPLETE: ret:%d, n_nodes:%d ---\n", ret, n_nodes);
if (nodes) {
print_node(nodes, 0);
}
while (nodes) {
tmp = nodes;
nodes = tmp->next;
if (tmp->own_string) {
free((void*)tmp->name);
}
free(tmp);
}
return ret;
}
void rserror(char const *s) {
fprintf(stderr, "%s\n", s);
}

File diff suppressed because it is too large Load diff

View file

@ -1,64 +0,0 @@
Rust's lexical grammar is not context-free. Raw string literals are the source
of the problem. Informally, a raw string literal is an `r`, followed by `N`
hashes (where N can be zero), a quote, any characters, then a quote followed
by `N` hashes. Critically, once inside the first pair of quotes,
another quote cannot be followed by `N` consecutive hashes. e.g.
`r###""###"###` is invalid.
This grammar describes this as best possible:
R -> 'r' S
S -> '"' B '"'
S -> '#' S '#'
B -> . B
B -> ε
Where `.` represents any character, and `ε` the empty string. Consider the
string `r#""#"#`. This string is not a valid raw string literal, but can be
accepted as one by the above grammar, using the derivation:
R : #""#"#
S : ""#"
S : "#
B : #
B : ε
(Where `T : U` means the rule `T` is applied, and `U` is the remainder of the
string.) The difficulty arises from the fact that it is fundamentally
context-sensitive. In particular, the context needed is the number of hashes.
To prove that Rust's string literals are not context-free, we will use
the fact that context-free languages are closed under intersection with
regular languages, and the
[pumping lemma for context-free languages](https://en.wikipedia.org/wiki/Pumping_lemma_for_context-free_languages).
Consider the regular language `R = r#+""#*"#+`. If Rust's raw string literals are
context-free, then their intersection with `R`, `R'`, should also be context-free.
Therefore, to prove that raw string literals are not context-free,
it is sufficient to prove that `R'` is not context-free.
The language `R'` is `{r#^n""#^m"#^n | m < n}`.
Assume `R'` *is* context-free. Then `R'` has some pumping length `p > 0` for which
the pumping lemma applies. Consider the following string `s` in `R'`:
`r#^p""#^{p-1}"#^p`
e.g. for `p = 2`: `s = r##""#"##`
Then `s = uvwxy` for some choice of `uvwxy` such that `vx` is non-empty,
`|vwx| < p+1`, and `uv^iwx^iy` is in `R'` for all `i >= 0`.
Neither `v` nor `x` can contain a `"` or `r`, as the number of these characters
in any string in `R'` is fixed. So `v` and `x` contain only hashes.
Consequently, of the three sequences of hashes, `v` and `x` combined
can only pump two of them.
If we ever choose the central sequence of hashes, then one of the outer sequences
will not grow when we pump, leading to an imbalance between the outer sequences.
Therefore, we must pump both outer sequences of hashes. However,
there are `p+2` characters between these two sequences of hashes, and `|vwx|` must
be less than `p+1`. Therefore we have a contradiction, and `R'` must not be
context-free.
Since `R'` is not context-free, it follows that the Rust's raw string literals
must not be context-free.

View file

@ -1,76 +0,0 @@
#!/usr/bin/env python
#
# Copyright 2015 The Rust Project Developers. See the COPYRIGHT
# file at the top-level directory of this distribution and at
# http://rust-lang.org/COPYRIGHT.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.
# ignore-tidy-linelength
import sys
import os
import subprocess
import argparse
# usage: testparser.py [-h] [-p PARSER [PARSER ...]] -s SOURCE_DIR
# Parsers should read from stdin and return exit status 0 for a
# successful parse, and nonzero for an unsuccessful parse
parser = argparse.ArgumentParser()
parser.add_argument('-p', '--parser', nargs='+')
parser.add_argument('-s', '--source-dir', nargs=1, required=True)
args = parser.parse_args(sys.argv[1:])
total = 0
ok = {}
bad = {}
for parser in args.parser:
ok[parser] = 0
bad[parser] = []
devnull = open(os.devnull, 'w')
print("\n")
for base, dirs, files in os.walk(args.source_dir[0]):
for f in filter(lambda p: p.endswith('.rs'), files):
p = os.path.join(base, f)
parse_fail = 'parse-fail' in p
if sys.version_info.major == 3:
lines = open(p, encoding='utf-8').readlines()
else:
lines = open(p).readlines()
if any('ignore-test' in line or 'ignore-lexer-test' in line for line in lines):
continue
total += 1
for parser in args.parser:
if subprocess.call(parser, stdin=open(p), stderr=subprocess.STDOUT, stdout=devnull) == 0:
if parse_fail:
bad[parser].append(p)
else:
ok[parser] += 1
else:
if parse_fail:
ok[parser] += 1
else:
bad[parser].append(p)
parser_stats = ', '.join(['{}: {}'.format(parser, ok[parser]) for parser in args.parser])
sys.stdout.write("\033[K\r total: {}, {}, scanned {}"
.format(total, os.path.relpath(parser_stats), os.path.relpath(p)))
devnull.close()
print("\n")
for parser in args.parser:
filename = os.path.basename(parser) + '.bad'
print("writing {} files that did not yield the correct result with {} to {}".format(len(bad[parser]), parser, filename))
with open(filename, "w") as f:
for p in bad[parser]:
f.write(p)
f.write("\n")

View file

@ -1,91 +0,0 @@
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
enum Token {
SHL = 257, // Parser generators reserve 0-256 for char literals
SHR,
LE,
EQEQ,
NE,
GE,
ANDAND,
OROR,
SHLEQ,
SHREQ,
MINUSEQ,
ANDEQ,
OREQ,
PLUSEQ,
STAREQ,
SLASHEQ,
CARETEQ,
PERCENTEQ,
DOTDOT,
DOTDOTDOT,
MOD_SEP,
RARROW,
FAT_ARROW,
LIT_BYTE,
LIT_CHAR,
LIT_INTEGER,
LIT_FLOAT,
LIT_STR,
LIT_STR_RAW,
LIT_BYTE_STR,
LIT_BYTE_STR_RAW,
IDENT,
UNDERSCORE,
LIFETIME,
// keywords
SELF,
STATIC,
AS,
BREAK,
CRATE,
ELSE,
ENUM,
EXTERN,
FALSE,
FN,
FOR,
IF,
IMPL,
IN,
LET,
LOOP,
MATCH,
MOD,
MOVE,
MUT,
PRIV,
PUB,
REF,
RETURN,
STRUCT,
TRUE,
TRAIT,
TYPE,
UNSAFE,
USE,
WHILE,
CONTINUE,
PROC,
BOX,
CONST,
WHERE,
TYPEOF,
INNER_DOC_COMMENT,
OUTER_DOC_COMMENT,
SHEBANG,
SHEBANG_LINE,
STATIC_LIFETIME
};

View file

@ -1,361 +0,0 @@
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
#![feature(plugin, rustc_private)]
extern crate syntax;
extern crate syntax_pos;
extern crate rustc;
#[macro_use]
extern crate log;
use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::io::{BufRead, Read};
use std::path::Path;
use syntax::parse::lexer;
use rustc::dep_graph::DepGraph;
use rustc::session::{self, config};
use rustc::middle::cstore::DummyCrateStore;
use std::rc::Rc;
use syntax::ast;
use syntax::codemap;
use syntax::parse::token::{self, BinOpToken, DelimToken, Lit, Token};
use syntax::parse::lexer::TokenAndSpan;
use syntax_pos::Pos;
use syntax::symbol::{Symbol, keywords};
fn parse_token_list(file: &str) -> HashMap<String, token::Token> {
fn id() -> token::Token {
Token::Ident(ast::Ident::with_empty_ctxt(keywords::Invalid.name()))
}
let mut res = HashMap::new();
res.insert("-1".to_string(), Token::Eof);
for line in file.split('\n') {
let eq = match line.trim().rfind('=') {
Some(val) => val,
None => continue
};
let val = &line[..eq];
let num = &line[eq + 1..];
let tok = match val {
"SHR" => Token::BinOp(BinOpToken::Shr),
"DOLLAR" => Token::Dollar,
"LT" => Token::Lt,
"STAR" => Token::BinOp(BinOpToken::Star),
"FLOAT_SUFFIX" => id(),
"INT_SUFFIX" => id(),
"SHL" => Token::BinOp(BinOpToken::Shl),
"LBRACE" => Token::OpenDelim(DelimToken::Brace),
"RARROW" => Token::RArrow,
"LIT_STR" => Token::Literal(Lit::Str_(keywords::Invalid.name()), None),
"DOTDOT" => Token::DotDot,
"MOD_SEP" => Token::ModSep,
"DOTDOTDOT" => Token::DotDotDot,
"NOT" => Token::Not,
"AND" => Token::BinOp(BinOpToken::And),
"LPAREN" => Token::OpenDelim(DelimToken::Paren),
"ANDAND" => Token::AndAnd,
"AT" => Token::At,
"LBRACKET" => Token::OpenDelim(DelimToken::Bracket),
"LIT_STR_RAW" => Token::Literal(Lit::StrRaw(keywords::Invalid.name(), 0), None),
"RPAREN" => Token::CloseDelim(DelimToken::Paren),
"SLASH" => Token::BinOp(BinOpToken::Slash),
"COMMA" => Token::Comma,
"LIFETIME" => Token::Lifetime(
ast::Ident::with_empty_ctxt(keywords::Invalid.name())),
"CARET" => Token::BinOp(BinOpToken::Caret),
"TILDE" => Token::Tilde,
"IDENT" => id(),
"PLUS" => Token::BinOp(BinOpToken::Plus),
"LIT_CHAR" => Token::Literal(Lit::Char(keywords::Invalid.name()), None),
"LIT_BYTE" => Token::Literal(Lit::Byte(keywords::Invalid.name()), None),
"EQ" => Token::Eq,
"RBRACKET" => Token::CloseDelim(DelimToken::Bracket),
"COMMENT" => Token::Comment,
"DOC_COMMENT" => Token::DocComment(keywords::Invalid.name()),
"DOT" => Token::Dot,
"EQEQ" => Token::EqEq,
"NE" => Token::Ne,
"GE" => Token::Ge,
"PERCENT" => Token::BinOp(BinOpToken::Percent),
"RBRACE" => Token::CloseDelim(DelimToken::Brace),
"BINOP" => Token::BinOp(BinOpToken::Plus),
"POUND" => Token::Pound,
"OROR" => Token::OrOr,
"LIT_INTEGER" => Token::Literal(Lit::Integer(keywords::Invalid.name()), None),
"BINOPEQ" => Token::BinOpEq(BinOpToken::Plus),
"LIT_FLOAT" => Token::Literal(Lit::Float(keywords::Invalid.name()), None),
"WHITESPACE" => Token::Whitespace,
"UNDERSCORE" => Token::Underscore,
"MINUS" => Token::BinOp(BinOpToken::Minus),
"SEMI" => Token::Semi,
"COLON" => Token::Colon,
"FAT_ARROW" => Token::FatArrow,
"OR" => Token::BinOp(BinOpToken::Or),
"GT" => Token::Gt,
"LE" => Token::Le,
"LIT_BINARY" => Token::Literal(Lit::ByteStr(keywords::Invalid.name()), None),
"LIT_BINARY_RAW" => Token::Literal(
Lit::ByteStrRaw(keywords::Invalid.name(), 0), None),
"QUESTION" => Token::Question,
"SHEBANG" => Token::Shebang(keywords::Invalid.name()),
_ => continue,
};
res.insert(num.to_string(), tok);
}
debug!("Token map: {:?}", res);
res
}
fn str_to_binop(s: &str) -> token::BinOpToken {
match s {
"+" => BinOpToken::Plus,
"/" => BinOpToken::Slash,
"-" => BinOpToken::Minus,
"*" => BinOpToken::Star,
"%" => BinOpToken::Percent,
"^" => BinOpToken::Caret,
"&" => BinOpToken::And,
"|" => BinOpToken::Or,
"<<" => BinOpToken::Shl,
">>" => BinOpToken::Shr,
_ => panic!("Bad binop str `{}`", s),
}
}
/// Assuming a string/byte string literal, strip out the leading/trailing
/// hashes and surrounding quotes/raw/byte prefix.
fn fix(mut lit: &str) -> ast::Name {
let prefix: Vec<char> = lit.chars().take(2).collect();
if prefix[0] == 'r' {
if prefix[1] == 'b' {
lit = &lit[2..]
} else {
lit = &lit[1..];
}
} else if prefix[0] == 'b' {
lit = &lit[1..];
}
let leading_hashes = count(lit);
// +1/-1 to adjust for single quotes
Symbol::intern(&lit[leading_hashes + 1..lit.len() - leading_hashes - 1])
}
/// Assuming a char/byte literal, strip the 'b' prefix and the single quotes.
fn fixchar(mut lit: &str) -> ast::Name {
let prefix = lit.chars().next().unwrap();
if prefix == 'b' {
lit = &lit[1..];
}
Symbol::intern(&lit[1..lit.len() - 1])
}
fn count(lit: &str) -> usize {
lit.chars().take_while(|c| *c == '#').count()
}
fn parse_antlr_token(s: &str, tokens: &HashMap<String, token::Token>, surrogate_pairs_pos: &[usize],
has_bom: bool)
-> TokenAndSpan {
// old regex:
// \[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]
let start = s.find("[@").unwrap();
let comma = start + s[start..].find(",").unwrap();
let colon = comma + s[comma..].find(":").unwrap();
let content_start = colon + s[colon..].find("='").unwrap();
// Use rfind instead of find, because we don't want to stop at the content
let content_end = content_start + s[content_start..].rfind("',<").unwrap();
let toknum_end = content_end + s[content_end..].find(">,").unwrap();
let start = &s[comma + 1 .. colon];
let end = &s[colon + 1 .. content_start];
let content = &s[content_start + 2 .. content_end];
let toknum = &s[content_end + 3 .. toknum_end];
let not_found = format!("didn't find token {:?} in the map", toknum);
let proto_tok = tokens.get(toknum).expect(&not_found);
let nm = Symbol::intern(content);
debug!("What we got: content (`{}`), proto: {:?}", content, proto_tok);
let real_tok = match *proto_tok {
Token::BinOp(..) => Token::BinOp(str_to_binop(content)),
Token::BinOpEq(..) => Token::BinOpEq(str_to_binop(&content[..content.len() - 1])),
Token::Literal(Lit::Str_(..), n) => Token::Literal(Lit::Str_(fix(content)), n),
Token::Literal(Lit::StrRaw(..), n) => Token::Literal(Lit::StrRaw(fix(content),
count(content)), n),
Token::Literal(Lit::Char(..), n) => Token::Literal(Lit::Char(fixchar(content)), n),
Token::Literal(Lit::Byte(..), n) => Token::Literal(Lit::Byte(fixchar(content)), n),
Token::DocComment(..) => Token::DocComment(nm),
Token::Literal(Lit::Integer(..), n) => Token::Literal(Lit::Integer(nm), n),
Token::Literal(Lit::Float(..), n) => Token::Literal(Lit::Float(nm), n),
Token::Literal(Lit::ByteStr(..), n) => Token::Literal(Lit::ByteStr(nm), n),
Token::Literal(Lit::ByteStrRaw(..), n) => Token::Literal(Lit::ByteStrRaw(fix(content),
count(content)), n),
Token::Ident(..) => Token::Ident(ast::Ident::with_empty_ctxt(nm)),
Token::Lifetime(..) => Token::Lifetime(ast::Ident::with_empty_ctxt(nm)),
ref t => t.clone()
};
let start_offset = if real_tok == Token::Eof {
1
} else {
0
};
let offset = if has_bom { 1 } else { 0 };
let mut lo = start.parse::<u32>().unwrap() - start_offset - offset;
let mut hi = end.parse::<u32>().unwrap() + 1 - offset;
// Adjust the span: For each surrogate pair already encountered, subtract one position.
lo -= surrogate_pairs_pos.binary_search(&(lo as usize)).unwrap_or_else(|x| x) as u32;
hi -= surrogate_pairs_pos.binary_search(&(hi as usize)).unwrap_or_else(|x| x) as u32;
let sp = syntax_pos::Span {
lo: syntax_pos::BytePos(lo),
hi: syntax_pos::BytePos(hi),
expn_id: syntax_pos::NO_EXPANSION
};
TokenAndSpan {
tok: real_tok,
sp: sp
}
}
fn tok_cmp(a: &token::Token, b: &token::Token) -> bool {
match a {
&Token::Ident(id) => match b {
&Token::Ident(id2) => id == id2,
_ => false
},
_ => a == b
}
}
fn span_cmp(antlr_sp: codemap::Span, rust_sp: codemap::Span, cm: &codemap::CodeMap) -> bool {
antlr_sp.expn_id == rust_sp.expn_id &&
antlr_sp.lo.to_usize() == cm.bytepos_to_file_charpos(rust_sp.lo).to_usize() &&
antlr_sp.hi.to_usize() == cm.bytepos_to_file_charpos(rust_sp.hi).to_usize()
}
fn main() {
fn next(r: &mut lexer::StringReader) -> TokenAndSpan {
use syntax::parse::lexer::Reader;
r.next_token()
}
let mut args = env::args().skip(1);
let filename = args.next().unwrap();
if filename.find("parse-fail").is_some() {
return;
}
// Rust's lexer
let mut code = String::new();
File::open(&Path::new(&filename)).unwrap().read_to_string(&mut code).unwrap();
let surrogate_pairs_pos: Vec<usize> = code.chars().enumerate()
.filter(|&(_, c)| c as usize > 0xFFFF)
.map(|(n, _)| n)
.enumerate()
.map(|(x, n)| x + n)
.collect();
let has_bom = code.starts_with("\u{feff}");
debug!("Pairs: {:?}", surrogate_pairs_pos);
let options = config::basic_options();
let session = session::build_session(options, &DepGraph::new(false), None,
syntax::errors::registry::Registry::new(&[]),
Rc::new(DummyCrateStore));
let filemap = session.parse_sess.codemap()
.new_filemap("<n/a>".to_string(), code);
let mut lexer = lexer::StringReader::new(session.diagnostic(), filemap);
let cm = session.codemap();
// ANTLR
let mut token_file = File::open(&Path::new(&args.next().unwrap())).unwrap();
let mut token_list = String::new();
token_file.read_to_string(&mut token_list).unwrap();
let token_map = parse_token_list(&token_list);
let stdin = std::io::stdin();
let lock = stdin.lock();
let lines = lock.lines();
let antlr_tokens = lines.map(|l| parse_antlr_token(l.unwrap().trim(),
&token_map,
&surrogate_pairs_pos,
has_bom));
for antlr_tok in antlr_tokens {
let rustc_tok = next(&mut lexer);
if rustc_tok.tok == Token::Eof && antlr_tok.tok == Token::Eof {
continue
}
assert!(span_cmp(antlr_tok.sp, rustc_tok.sp, cm), "{:?} and {:?} have different spans",
rustc_tok,
antlr_tok);
macro_rules! matches {
( $($x:pat),+ ) => (
match rustc_tok.tok {
$($x => match antlr_tok.tok {
$x => {
if !tok_cmp(&rustc_tok.tok, &antlr_tok.tok) {
// FIXME #15677: needs more robust escaping in
// antlr
warn!("Different names for {:?} and {:?}", rustc_tok, antlr_tok);
}
}
_ => panic!("{:?} is not {:?}", antlr_tok, rustc_tok)
},)*
ref c => assert!(c == &antlr_tok.tok, "{:?} is not {:?}", antlr_tok, rustc_tok)
}
)
}
matches!(
Token::Literal(Lit::Byte(..), _),
Token::Literal(Lit::Char(..), _),
Token::Literal(Lit::Integer(..), _),
Token::Literal(Lit::Float(..), _),
Token::Literal(Lit::Str_(..), _),
Token::Literal(Lit::StrRaw(..), _),
Token::Literal(Lit::ByteStr(..), _),
Token::Literal(Lit::ByteStrRaw(..), _),
Token::Ident(..),
Token::Lifetime(..),
Token::Interpolated(..),
Token::DocComment(..),
Token::Shebang(..)
);
}
}

View file

@ -1,473 +0,0 @@
lexer grammar Xidcontinue;
fragment XID_Continue:
'\u0030' .. '\u0039'
| '\u0041' .. '\u005a'
| '\u005f'
| '\u0061' .. '\u007a'
| '\u00aa'
| '\u00b5'
| '\u00b7'
| '\u00ba'
| '\u00c0' .. '\u00d6'
| '\u00d8' .. '\u00f6'
| '\u00f8' .. '\u0236'
| '\u0250' .. '\u02c1'
| '\u02c6' .. '\u02d1'
| '\u02e0' .. '\u02e4'
| '\u02ee'
| '\u0300' .. '\u0357'
| '\u035d' .. '\u036f'
| '\u0386'
| '\u0388' .. '\u038a'
| '\u038c'
| '\u038e' .. '\u03a1'
| '\u03a3' .. '\u03ce'
| '\u03d0' .. '\u03f5'
| '\u03f7' .. '\u03fb'
| '\u0400' .. '\u0481'
| '\u0483' .. '\u0486'
| '\u048a' .. '\u04ce'
| '\u04d0' .. '\u04f5'
| '\u04f8' .. '\u04f9'
| '\u0500' .. '\u050f'
| '\u0531' .. '\u0556'
| '\u0559'
| '\u0561' .. '\u0587'
| '\u0591' .. '\u05a1'
| '\u05a3' .. '\u05b9'
| '\u05bb' .. '\u05bd'
| '\u05bf'
| '\u05c1' .. '\u05c2'
| '\u05c4'
| '\u05d0' .. '\u05ea'
| '\u05f0' .. '\u05f2'
| '\u0610' .. '\u0615'
| '\u0621' .. '\u063a'
| '\u0640' .. '\u0658'
| '\u0660' .. '\u0669'
| '\u066e' .. '\u06d3'
| '\u06d5' .. '\u06dc'
| '\u06df' .. '\u06e8'
| '\u06ea' .. '\u06fc'
| '\u06ff'
| '\u0710' .. '\u074a'
| '\u074d' .. '\u074f'
| '\u0780' .. '\u07b1'
| '\u0901' .. '\u0939'
| '\u093c' .. '\u094d'
| '\u0950' .. '\u0954'
| '\u0958' .. '\u0963'
| '\u0966' .. '\u096f'
| '\u0981' .. '\u0983'
| '\u0985' .. '\u098c'
| '\u098f' .. '\u0990'
| '\u0993' .. '\u09a8'
| '\u09aa' .. '\u09b0'
| '\u09b2'
| '\u09b6' .. '\u09b9'
| '\u09bc' .. '\u09c4'
| '\u09c7' .. '\u09c8'
| '\u09cb' .. '\u09cd'
| '\u09d7'
| '\u09dc' .. '\u09dd'
| '\u09df' .. '\u09e3'
| '\u09e6' .. '\u09f1'
| '\u0a01' .. '\u0a03'
| '\u0a05' .. '\u0a0a'
| '\u0a0f' .. '\u0a10'
| '\u0a13' .. '\u0a28'
| '\u0a2a' .. '\u0a30'
| '\u0a32' .. '\u0a33'
| '\u0a35' .. '\u0a36'
| '\u0a38' .. '\u0a39'
| '\u0a3c'
| '\u0a3e' .. '\u0a42'
| '\u0a47' .. '\u0a48'
| '\u0a4b' .. '\u0a4d'
| '\u0a59' .. '\u0a5c'
| '\u0a5e'
| '\u0a66' .. '\u0a74'
| '\u0a81' .. '\u0a83'
| '\u0a85' .. '\u0a8d'
| '\u0a8f' .. '\u0a91'
| '\u0a93' .. '\u0aa8'
| '\u0aaa' .. '\u0ab0'
| '\u0ab2' .. '\u0ab3'
| '\u0ab5' .. '\u0ab9'
| '\u0abc' .. '\u0ac5'
| '\u0ac7' .. '\u0ac9'
| '\u0acb' .. '\u0acd'
| '\u0ad0'
| '\u0ae0' .. '\u0ae3'
| '\u0ae6' .. '\u0aef'
| '\u0b01' .. '\u0b03'
| '\u0b05' .. '\u0b0c'
| '\u0b0f' .. '\u0b10'
| '\u0b13' .. '\u0b28'
| '\u0b2a' .. '\u0b30'
| '\u0b32' .. '\u0b33'
| '\u0b35' .. '\u0b39'
| '\u0b3c' .. '\u0b43'
| '\u0b47' .. '\u0b48'
| '\u0b4b' .. '\u0b4d'
| '\u0b56' .. '\u0b57'
| '\u0b5c' .. '\u0b5d'
| '\u0b5f' .. '\u0b61'
| '\u0b66' .. '\u0b6f'
| '\u0b71'
| '\u0b82' .. '\u0b83'
| '\u0b85' .. '\u0b8a'
| '\u0b8e' .. '\u0b90'
| '\u0b92' .. '\u0b95'
| '\u0b99' .. '\u0b9a'
| '\u0b9c'
| '\u0b9e' .. '\u0b9f'
| '\u0ba3' .. '\u0ba4'
| '\u0ba8' .. '\u0baa'
| '\u0bae' .. '\u0bb5'
| '\u0bb7' .. '\u0bb9'
| '\u0bbe' .. '\u0bc2'
| '\u0bc6' .. '\u0bc8'
| '\u0bca' .. '\u0bcd'
| '\u0bd7'
| '\u0be7' .. '\u0bef'
| '\u0c01' .. '\u0c03'
| '\u0c05' .. '\u0c0c'
| '\u0c0e' .. '\u0c10'
| '\u0c12' .. '\u0c28'
| '\u0c2a' .. '\u0c33'
| '\u0c35' .. '\u0c39'
| '\u0c3e' .. '\u0c44'
| '\u0c46' .. '\u0c48'
| '\u0c4a' .. '\u0c4d'
| '\u0c55' .. '\u0c56'
| '\u0c60' .. '\u0c61'
| '\u0c66' .. '\u0c6f'
| '\u0c82' .. '\u0c83'
| '\u0c85' .. '\u0c8c'
| '\u0c8e' .. '\u0c90'
| '\u0c92' .. '\u0ca8'
| '\u0caa' .. '\u0cb3'
| '\u0cb5' .. '\u0cb9'
| '\u0cbc' .. '\u0cc4'
| '\u0cc6' .. '\u0cc8'
| '\u0cca' .. '\u0ccd'
| '\u0cd5' .. '\u0cd6'
| '\u0cde'
| '\u0ce0' .. '\u0ce1'
| '\u0ce6' .. '\u0cef'
| '\u0d02' .. '\u0d03'
| '\u0d05' .. '\u0d0c'
| '\u0d0e' .. '\u0d10'
| '\u0d12' .. '\u0d28'
| '\u0d2a' .. '\u0d39'
| '\u0d3e' .. '\u0d43'
| '\u0d46' .. '\u0d48'
| '\u0d4a' .. '\u0d4d'
| '\u0d57'
| '\u0d60' .. '\u0d61'
| '\u0d66' .. '\u0d6f'
| '\u0d82' .. '\u0d83'
| '\u0d85' .. '\u0d96'
| '\u0d9a' .. '\u0db1'
| '\u0db3' .. '\u0dbb'
| '\u0dbd'
| '\u0dc0' .. '\u0dc6'
| '\u0dca'
| '\u0dcf' .. '\u0dd4'
| '\u0dd6'
| '\u0dd8' .. '\u0ddf'
| '\u0df2' .. '\u0df3'
| '\u0e01' .. '\u0e3a'
| '\u0e40' .. '\u0e4e'
| '\u0e50' .. '\u0e59'
| '\u0e81' .. '\u0e82'
| '\u0e84'
| '\u0e87' .. '\u0e88'
| '\u0e8a'
| '\u0e8d'
| '\u0e94' .. '\u0e97'
| '\u0e99' .. '\u0e9f'
| '\u0ea1' .. '\u0ea3'
| '\u0ea5'
| '\u0ea7'
| '\u0eaa' .. '\u0eab'
| '\u0ead' .. '\u0eb9'
| '\u0ebb' .. '\u0ebd'
| '\u0ec0' .. '\u0ec4'
| '\u0ec6'
| '\u0ec8' .. '\u0ecd'
| '\u0ed0' .. '\u0ed9'
| '\u0edc' .. '\u0edd'
| '\u0f00'
| '\u0f18' .. '\u0f19'
| '\u0f20' .. '\u0f29'
| '\u0f35'
| '\u0f37'
| '\u0f39'
| '\u0f3e' .. '\u0f47'
| '\u0f49' .. '\u0f6a'
| '\u0f71' .. '\u0f84'
| '\u0f86' .. '\u0f8b'
| '\u0f90' .. '\u0f97'
| '\u0f99' .. '\u0fbc'
| '\u0fc6'
| '\u1000' .. '\u1021'
| '\u1023' .. '\u1027'
| '\u1029' .. '\u102a'
| '\u102c' .. '\u1032'
| '\u1036' .. '\u1039'
| '\u1040' .. '\u1049'
| '\u1050' .. '\u1059'
| '\u10a0' .. '\u10c5'
| '\u10d0' .. '\u10f8'
| '\u1100' .. '\u1159'
| '\u115f' .. '\u11a2'
| '\u11a8' .. '\u11f9'
| '\u1200' .. '\u1206'
| '\u1208' .. '\u1246'
| '\u1248'
| '\u124a' .. '\u124d'
| '\u1250' .. '\u1256'
| '\u1258'
| '\u125a' .. '\u125d'
| '\u1260' .. '\u1286'
| '\u1288'
| '\u128a' .. '\u128d'
| '\u1290' .. '\u12ae'
| '\u12b0'
| '\u12b2' .. '\u12b5'
| '\u12b8' .. '\u12be'
| '\u12c0'
| '\u12c2' .. '\u12c5'
| '\u12c8' .. '\u12ce'
| '\u12d0' .. '\u12d6'
| '\u12d8' .. '\u12ee'
| '\u12f0' .. '\u130e'
| '\u1310'
| '\u1312' .. '\u1315'
| '\u1318' .. '\u131e'
| '\u1320' .. '\u1346'
| '\u1348' .. '\u135a'
| '\u1369' .. '\u1371'
| '\u13a0' .. '\u13f4'
| '\u1401' .. '\u166c'
| '\u166f' .. '\u1676'
| '\u1681' .. '\u169a'
| '\u16a0' .. '\u16ea'
| '\u16ee' .. '\u16f0'
| '\u1700' .. '\u170c'
| '\u170e' .. '\u1714'
| '\u1720' .. '\u1734'
| '\u1740' .. '\u1753'
| '\u1760' .. '\u176c'
| '\u176e' .. '\u1770'
| '\u1772' .. '\u1773'
| '\u1780' .. '\u17b3'
| '\u17b6' .. '\u17d3'
| '\u17d7'
| '\u17dc' .. '\u17dd'
| '\u17e0' .. '\u17e9'
| '\u180b' .. '\u180d'
| '\u1810' .. '\u1819'
| '\u1820' .. '\u1877'
| '\u1880' .. '\u18a9'
| '\u1900' .. '\u191c'
| '\u1920' .. '\u192b'
| '\u1930' .. '\u193b'
| '\u1946' .. '\u196d'
| '\u1970' .. '\u1974'
| '\u1d00' .. '\u1d6b'
| '\u1e00' .. '\u1e9b'
| '\u1ea0' .. '\u1ef9'
| '\u1f00' .. '\u1f15'
| '\u1f18' .. '\u1f1d'
| '\u1f20' .. '\u1f45'
| '\u1f48' .. '\u1f4d'
| '\u1f50' .. '\u1f57'
| '\u1f59'
| '\u1f5b'
| '\u1f5d'
| '\u1f5f' .. '\u1f7d'
| '\u1f80' .. '\u1fb4'
| '\u1fb6' .. '\u1fbc'
| '\u1fbe'
| '\u1fc2' .. '\u1fc4'
| '\u1fc6' .. '\u1fcc'
| '\u1fd0' .. '\u1fd3'
| '\u1fd6' .. '\u1fdb'
| '\u1fe0' .. '\u1fec'
| '\u1ff2' .. '\u1ff4'
| '\u1ff6' .. '\u1ffc'
| '\u203f' .. '\u2040'
| '\u2054'
| '\u2071'
| '\u207f'
| '\u20d0' .. '\u20dc'
| '\u20e1'
| '\u20e5' .. '\u20ea'
| '\u2102'
| '\u2107'
| '\u210a' .. '\u2113'
| '\u2115'
| '\u2118' .. '\u211d'
| '\u2124'
| '\u2126'
| '\u2128'
| '\u212a' .. '\u2131'
| '\u2133' .. '\u2139'
| '\u213d' .. '\u213f'
| '\u2145' .. '\u2149'
| '\u2160' .. '\u2183'
| '\u3005' .. '\u3007'
| '\u3021' .. '\u302f'
| '\u3031' .. '\u3035'
| '\u3038' .. '\u303c'
| '\u3041' .. '\u3096'
| '\u3099' .. '\u309a'
| '\u309d' .. '\u309f'
| '\u30a1' .. '\u30ff'
| '\u3105' .. '\u312c'
| '\u3131' .. '\u318e'
| '\u31a0' .. '\u31b7'
| '\u31f0' .. '\u31ff'
| '\u3400' .. '\u4db5'
| '\u4e00' .. '\u9fa5'
| '\ua000' .. '\ua48c'
| '\uac00' .. '\ud7a3'
| '\uf900' .. '\ufa2d'
| '\ufa30' .. '\ufa6a'
| '\ufb00' .. '\ufb06'
| '\ufb13' .. '\ufb17'
| '\ufb1d' .. '\ufb28'
| '\ufb2a' .. '\ufb36'
| '\ufb38' .. '\ufb3c'
| '\ufb3e'
| '\ufb40' .. '\ufb41'
| '\ufb43' .. '\ufb44'
| '\ufb46' .. '\ufbb1'
| '\ufbd3' .. '\ufc5d'
| '\ufc64' .. '\ufd3d'
| '\ufd50' .. '\ufd8f'
| '\ufd92' .. '\ufdc7'
| '\ufdf0' .. '\ufdf9'
| '\ufe00' .. '\ufe0f'
| '\ufe20' .. '\ufe23'
| '\ufe33' .. '\ufe34'
| '\ufe4d' .. '\ufe4f'
| '\ufe71'
| '\ufe73'
| '\ufe77'
| '\ufe79'
| '\ufe7b'
| '\ufe7d'
| '\ufe7f' .. '\ufefc'
| '\uff10' .. '\uff19'
| '\uff21' .. '\uff3a'
| '\uff3f'
| '\uff41' .. '\uff5a'
| '\uff65' .. '\uffbe'
| '\uffc2' .. '\uffc7'
| '\uffca' .. '\uffcf'
| '\uffd2' .. '\uffd7'
| '\uffda' .. '\uffdc'
| '\ud800' '\udc00' .. '\udc0a'
| '\ud800' '\udc0d' .. '\udc25'
| '\ud800' '\udc28' .. '\udc39'
| '\ud800' '\udc3c' .. '\udc3c'
| '\ud800' '\udc3f' .. '\udc4c'
| '\ud800' '\udc50' .. '\udc5c'
| '\ud800' '\udc80' .. '\udcf9'
| '\ud800' '\udf00' .. '\udf1d'
| '\ud800' '\udf30' .. '\udf49'
| '\ud800' '\udf80' .. '\udf9c'
| '\ud801' '\ue000' .. '\ue09c'
| '\ud801' '\ue0a0' .. '\ue0a8'
| '\ud802' '\ue400' .. '\ue404'
| '\ud802' '\u0808'
| '\ud802' '\ue40a' .. '\ue434'
| '\ud802' '\ue437' .. '\ue437'
| '\ud802' '\u083c'
| '\ud802' '\u083f'
| '\ud834' '\uad65' .. '\uad68'
| '\ud834' '\uad6d' .. '\uad71'
| '\ud834' '\uad7b' .. '\uad81'
| '\ud834' '\uad85' .. '\uad8a'
| '\ud834' '\uadaa' .. '\uadac'
| '\ud835' '\ub000' .. '\ub053'
| '\ud835' '\ub056' .. '\ub09b'
| '\ud835' '\ub09e' .. '\ub09e'
| '\ud835' '\ud4a2'
| '\ud835' '\ub0a5' .. '\ub0a5'
| '\ud835' '\ub0a9' .. '\ub0ab'
| '\ud835' '\ub0ae' .. '\ub0b8'
| '\ud835' '\ud4bb'
| '\ud835' '\ub0bd' .. '\ub0c2'
| '\ud835' '\ub0c5' .. '\ub104'
| '\ud835' '\ub107' .. '\ub109'
| '\ud835' '\ub10d' .. '\ub113'
| '\ud835' '\ub116' .. '\ub11b'
| '\ud835' '\ub11e' .. '\ub138'
| '\ud835' '\ub13b' .. '\ub13d'
| '\ud835' '\ub140' .. '\ub143'
| '\ud835' '\ud546'
| '\ud835' '\ub14a' .. '\ub14f'
| '\ud835' '\ub152' .. '\ub2a2'
| '\ud835' '\ub2a8' .. '\ub2bf'
| '\ud835' '\ub2c2' .. '\ub2d9'
| '\ud835' '\ub2dc' .. '\ub2f9'
| '\ud835' '\ub2fc' .. '\ub313'
| '\ud835' '\ub316' .. '\ub333'
| '\ud835' '\ub336' .. '\ub34d'
| '\ud835' '\ub350' .. '\ub36d'
| '\ud835' '\ub370' .. '\ub387'
| '\ud835' '\ub38a' .. '\ub3a7'
| '\ud835' '\ub3aa' .. '\ub3c1'
| '\ud835' '\ub3c4' .. '\ub3c8'
| '\ud835' '\ub3ce' .. '\ub3fe'
| '\ud840' '\udc00' .. '\udffe'
| '\ud841' '\ue000' .. '\ue3fe'
| '\ud842' '\ue400' .. '\ue7fe'
| '\ud843' '\ue800' .. '\uebfe'
| '\ud844' '\uec00' .. '\ueffe'
| '\ud845' '\uf000' .. '\uf3fe'
| '\ud846' '\uf400' .. '\uf7fe'
| '\ud847' '\uf800' .. '\ufbfe'
| '\ud848' '\ufc00' .. '\ufffe'
| '\ud849' '\u0000' .. '\u03fe'
| '\ud84a' '\u0400' .. '\u07fe'
| '\ud84b' '\u0800' .. '\u0bfe'
| '\ud84c' '\u0c00' .. '\u0ffe'
| '\ud84d' '\u1000' .. '\u13fe'
| '\ud84e' '\u1400' .. '\u17fe'
| '\ud84f' '\u1800' .. '\u1bfe'
| '\ud850' '\u1c00' .. '\u1ffe'
| '\ud851' '\u2000' .. '\u23fe'
| '\ud852' '\u2400' .. '\u27fe'
| '\ud853' '\u2800' .. '\u2bfe'
| '\ud854' '\u2c00' .. '\u2ffe'
| '\ud855' '\u3000' .. '\u33fe'
| '\ud856' '\u3400' .. '\u37fe'
| '\ud857' '\u3800' .. '\u3bfe'
| '\ud858' '\u3c00' .. '\u3ffe'
| '\ud859' '\u4000' .. '\u43fe'
| '\ud85a' '\u4400' .. '\u47fe'
| '\ud85b' '\u4800' .. '\u4bfe'
| '\ud85c' '\u4c00' .. '\u4ffe'
| '\ud85d' '\u5000' .. '\u53fe'
| '\ud85e' '\u5400' .. '\u57fe'
| '\ud85f' '\u5800' .. '\u5bfe'
| '\ud860' '\u5c00' .. '\u5ffe'
| '\ud861' '\u6000' .. '\u63fe'
| '\ud862' '\u6400' .. '\u67fe'
| '\ud863' '\u6800' .. '\u6bfe'
| '\ud864' '\u6c00' .. '\u6ffe'
| '\ud865' '\u7000' .. '\u73fe'
| '\ud866' '\u7400' .. '\u77fe'
| '\ud867' '\u7800' .. '\u7bfe'
| '\ud868' '\u7c00' .. '\u7ffe'
| '\ud869' '\u8000' .. '\u82d5'
| '\ud87e' '\ud400' .. '\ud61c'
| '\udb40' '\udd00' .. '\uddee'
;

View file

@ -1,379 +0,0 @@
lexer grammar Xidstart;
fragment XID_Start :
'\u0041' .. '\u005a'
| '_'
| '\u0061' .. '\u007a'
| '\u00aa'
| '\u00b5'
| '\u00ba'
| '\u00c0' .. '\u00d6'
| '\u00d8' .. '\u00f6'
| '\u00f8' .. '\u0236'
| '\u0250' .. '\u02c1'
| '\u02c6' .. '\u02d1'
| '\u02e0' .. '\u02e4'
| '\u02ee'
| '\u0386'
| '\u0388' .. '\u038a'
| '\u038c'
| '\u038e' .. '\u03a1'
| '\u03a3' .. '\u03ce'
| '\u03d0' .. '\u03f5'
| '\u03f7' .. '\u03fb'
| '\u0400' .. '\u0481'
| '\u048a' .. '\u04ce'
| '\u04d0' .. '\u04f5'
| '\u04f8' .. '\u04f9'
| '\u0500' .. '\u050f'
| '\u0531' .. '\u0556'
| '\u0559'
| '\u0561' .. '\u0587'
| '\u05d0' .. '\u05ea'
| '\u05f0' .. '\u05f2'
| '\u0621' .. '\u063a'
| '\u0640' .. '\u064a'
| '\u066e' .. '\u066f'
| '\u0671' .. '\u06d3'
| '\u06d5'
| '\u06e5' .. '\u06e6'
| '\u06ee' .. '\u06ef'
| '\u06fa' .. '\u06fc'
| '\u06ff'
| '\u0710'
| '\u0712' .. '\u072f'
| '\u074d' .. '\u074f'
| '\u0780' .. '\u07a5'
| '\u07b1'
| '\u0904' .. '\u0939'
| '\u093d'
| '\u0950'
| '\u0958' .. '\u0961'
| '\u0985' .. '\u098c'
| '\u098f' .. '\u0990'
| '\u0993' .. '\u09a8'
| '\u09aa' .. '\u09b0'
| '\u09b2'
| '\u09b6' .. '\u09b9'
| '\u09bd'
| '\u09dc' .. '\u09dd'
| '\u09df' .. '\u09e1'
| '\u09f0' .. '\u09f1'
| '\u0a05' .. '\u0a0a'
| '\u0a0f' .. '\u0a10'
| '\u0a13' .. '\u0a28'
| '\u0a2a' .. '\u0a30'
| '\u0a32' .. '\u0a33'
| '\u0a35' .. '\u0a36'
| '\u0a38' .. '\u0a39'
| '\u0a59' .. '\u0a5c'
| '\u0a5e'
| '\u0a72' .. '\u0a74'
| '\u0a85' .. '\u0a8d'
| '\u0a8f' .. '\u0a91'
| '\u0a93' .. '\u0aa8'
| '\u0aaa' .. '\u0ab0'
| '\u0ab2' .. '\u0ab3'
| '\u0ab5' .. '\u0ab9'
| '\u0abd'
| '\u0ad0'
| '\u0ae0' .. '\u0ae1'
| '\u0b05' .. '\u0b0c'
| '\u0b0f' .. '\u0b10'
| '\u0b13' .. '\u0b28'
| '\u0b2a' .. '\u0b30'
| '\u0b32' .. '\u0b33'
| '\u0b35' .. '\u0b39'
| '\u0b3d'
| '\u0b5c' .. '\u0b5d'
| '\u0b5f' .. '\u0b61'
| '\u0b71'
| '\u0b83'
| '\u0b85' .. '\u0b8a'
| '\u0b8e' .. '\u0b90'
| '\u0b92' .. '\u0b95'
| '\u0b99' .. '\u0b9a'
| '\u0b9c'
| '\u0b9e' .. '\u0b9f'
| '\u0ba3' .. '\u0ba4'
| '\u0ba8' .. '\u0baa'
| '\u0bae' .. '\u0bb5'
| '\u0bb7' .. '\u0bb9'
| '\u0c05' .. '\u0c0c'
| '\u0c0e' .. '\u0c10'
| '\u0c12' .. '\u0c28'
| '\u0c2a' .. '\u0c33'
| '\u0c35' .. '\u0c39'
| '\u0c60' .. '\u0c61'
| '\u0c85' .. '\u0c8c'
| '\u0c8e' .. '\u0c90'
| '\u0c92' .. '\u0ca8'
| '\u0caa' .. '\u0cb3'
| '\u0cb5' .. '\u0cb9'
| '\u0cbd'
| '\u0cde'
| '\u0ce0' .. '\u0ce1'
| '\u0d05' .. '\u0d0c'
| '\u0d0e' .. '\u0d10'
| '\u0d12' .. '\u0d28'
| '\u0d2a' .. '\u0d39'
| '\u0d60' .. '\u0d61'
| '\u0d85' .. '\u0d96'
| '\u0d9a' .. '\u0db1'
| '\u0db3' .. '\u0dbb'
| '\u0dbd'
| '\u0dc0' .. '\u0dc6'
| '\u0e01' .. '\u0e30'
| '\u0e32'
| '\u0e40' .. '\u0e46'
| '\u0e81' .. '\u0e82'
| '\u0e84'
| '\u0e87' .. '\u0e88'
| '\u0e8a'
| '\u0e8d'
| '\u0e94' .. '\u0e97'
| '\u0e99' .. '\u0e9f'
| '\u0ea1' .. '\u0ea3'
| '\u0ea5'
| '\u0ea7'
| '\u0eaa' .. '\u0eab'
| '\u0ead' .. '\u0eb0'
| '\u0eb2'
| '\u0ebd'
| '\u0ec0' .. '\u0ec4'
| '\u0ec6'
| '\u0edc' .. '\u0edd'
| '\u0f00'
| '\u0f40' .. '\u0f47'
| '\u0f49' .. '\u0f6a'
| '\u0f88' .. '\u0f8b'
| '\u1000' .. '\u1021'
| '\u1023' .. '\u1027'
| '\u1029' .. '\u102a'
| '\u1050' .. '\u1055'
| '\u10a0' .. '\u10c5'
| '\u10d0' .. '\u10f8'
| '\u1100' .. '\u1159'
| '\u115f' .. '\u11a2'
| '\u11a8' .. '\u11f9'
| '\u1200' .. '\u1206'
| '\u1208' .. '\u1246'
| '\u1248'
| '\u124a' .. '\u124d'
| '\u1250' .. '\u1256'
| '\u1258'
| '\u125a' .. '\u125d'
| '\u1260' .. '\u1286'
| '\u1288'
| '\u128a' .. '\u128d'
| '\u1290' .. '\u12ae'
| '\u12b0'
| '\u12b2' .. '\u12b5'
| '\u12b8' .. '\u12be'
| '\u12c0'
| '\u12c2' .. '\u12c5'
| '\u12c8' .. '\u12ce'
| '\u12d0' .. '\u12d6'
| '\u12d8' .. '\u12ee'
| '\u12f0' .. '\u130e'
| '\u1310'
| '\u1312' .. '\u1315'
| '\u1318' .. '\u131e'
| '\u1320' .. '\u1346'
| '\u1348' .. '\u135a'
| '\u13a0' .. '\u13f4'
| '\u1401' .. '\u166c'
| '\u166f' .. '\u1676'
| '\u1681' .. '\u169a'
| '\u16a0' .. '\u16ea'
| '\u16ee' .. '\u16f0'
| '\u1700' .. '\u170c'
| '\u170e' .. '\u1711'
| '\u1720' .. '\u1731'
| '\u1740' .. '\u1751'
| '\u1760' .. '\u176c'
| '\u176e' .. '\u1770'
| '\u1780' .. '\u17b3'
| '\u17d7'
| '\u17dc'
| '\u1820' .. '\u1877'
| '\u1880' .. '\u18a8'
| '\u1900' .. '\u191c'
| '\u1950' .. '\u196d'
| '\u1970' .. '\u1974'
| '\u1d00' .. '\u1d6b'
| '\u1e00' .. '\u1e9b'
| '\u1ea0' .. '\u1ef9'
| '\u1f00' .. '\u1f15'
| '\u1f18' .. '\u1f1d'
| '\u1f20' .. '\u1f45'
| '\u1f48' .. '\u1f4d'
| '\u1f50' .. '\u1f57'
| '\u1f59'
| '\u1f5b'
| '\u1f5d'
| '\u1f5f' .. '\u1f7d'
| '\u1f80' .. '\u1fb4'
| '\u1fb6' .. '\u1fbc'
| '\u1fbe'
| '\u1fc2' .. '\u1fc4'
| '\u1fc6' .. '\u1fcc'
| '\u1fd0' .. '\u1fd3'
| '\u1fd6' .. '\u1fdb'
| '\u1fe0' .. '\u1fec'
| '\u1ff2' .. '\u1ff4'
| '\u1ff6' .. '\u1ffc'
| '\u2071'
| '\u207f'
| '\u2102'
| '\u2107'
| '\u210a' .. '\u2113'
| '\u2115'
| '\u2118' .. '\u211d'
| '\u2124'
| '\u2126'
| '\u2128'
| '\u212a' .. '\u2131'
| '\u2133' .. '\u2139'
| '\u213d' .. '\u213f'
| '\u2145' .. '\u2149'
| '\u2160' .. '\u2183'
| '\u3005' .. '\u3007'
| '\u3021' .. '\u3029'
| '\u3031' .. '\u3035'
| '\u3038' .. '\u303c'
| '\u3041' .. '\u3096'
| '\u309d' .. '\u309f'
| '\u30a1' .. '\u30fa'
| '\u30fc' .. '\u30ff'
| '\u3105' .. '\u312c'
| '\u3131' .. '\u318e'
| '\u31a0' .. '\u31b7'
| '\u31f0' .. '\u31ff'
| '\u3400' .. '\u4db5'
| '\u4e00' .. '\u9fa5'
| '\ua000' .. '\ua48c'
| '\uac00' .. '\ud7a3'
| '\uf900' .. '\ufa2d'
| '\ufa30' .. '\ufa6a'
| '\ufb00' .. '\ufb06'
| '\ufb13' .. '\ufb17'
| '\ufb1d'
| '\ufb1f' .. '\ufb28'
| '\ufb2a' .. '\ufb36'
| '\ufb38' .. '\ufb3c'
| '\ufb3e'
| '\ufb40' .. '\ufb41'
| '\ufb43' .. '\ufb44'
| '\ufb46' .. '\ufbb1'
| '\ufbd3' .. '\ufc5d'
| '\ufc64' .. '\ufd3d'
| '\ufd50' .. '\ufd8f'
| '\ufd92' .. '\ufdc7'
| '\ufdf0' .. '\ufdf9'
| '\ufe71'
| '\ufe73'
| '\ufe77'
| '\ufe79'
| '\ufe7b'
| '\ufe7d'
| '\ufe7f' .. '\ufefc'
| '\uff21' .. '\uff3a'
| '\uff41' .. '\uff5a'
| '\uff66' .. '\uff9d'
| '\uffa0' .. '\uffbe'
| '\uffc2' .. '\uffc7'
| '\uffca' .. '\uffcf'
| '\uffd2' .. '\uffd7'
| '\uffda' .. '\uffdc'
| '\ud800' '\udc00' .. '\udc0a'
| '\ud800' '\udc0d' .. '\udc25'
| '\ud800' '\udc28' .. '\udc39'
| '\ud800' '\udc3c' .. '\udc3c'
| '\ud800' '\udc3f' .. '\udc4c'
| '\ud800' '\udc50' .. '\udc5c'
| '\ud800' '\udc80' .. '\udcf9'
| '\ud800' '\udf00' .. '\udf1d'
| '\ud800' '\udf30' .. '\udf49'
| '\ud800' '\udf80' .. '\udf9c'
| '\ud801' '\ue000' .. '\ue09c'
| '\ud802' '\ue400' .. '\ue404'
| '\ud802' '\u0808'
| '\ud802' '\ue40a' .. '\ue434'
| '\ud802' '\ue437' .. '\ue437'
| '\ud802' '\u083c'
| '\ud802' '\u083f'
| '\ud835' '\ub000' .. '\ub053'
| '\ud835' '\ub056' .. '\ub09b'
| '\ud835' '\ub09e' .. '\ub09e'
| '\ud835' '\ud4a2'
| '\ud835' '\ub0a5' .. '\ub0a5'
| '\ud835' '\ub0a9' .. '\ub0ab'
| '\ud835' '\ub0ae' .. '\ub0b8'
| '\ud835' '\ud4bb'
| '\ud835' '\ub0bd' .. '\ub0c2'
| '\ud835' '\ub0c5' .. '\ub104'
| '\ud835' '\ub107' .. '\ub109'
| '\ud835' '\ub10d' .. '\ub113'
| '\ud835' '\ub116' .. '\ub11b'
| '\ud835' '\ub11e' .. '\ub138'
| '\ud835' '\ub13b' .. '\ub13d'
| '\ud835' '\ub140' .. '\ub143'
| '\ud835' '\ud546'
| '\ud835' '\ub14a' .. '\ub14f'
| '\ud835' '\ub152' .. '\ub2a2'
| '\ud835' '\ub2a8' .. '\ub2bf'
| '\ud835' '\ub2c2' .. '\ub2d9'
| '\ud835' '\ub2dc' .. '\ub2f9'
| '\ud835' '\ub2fc' .. '\ub313'
| '\ud835' '\ub316' .. '\ub333'
| '\ud835' '\ub336' .. '\ub34d'
| '\ud835' '\ub350' .. '\ub36d'
| '\ud835' '\ub370' .. '\ub387'
| '\ud835' '\ub38a' .. '\ub3a7'
| '\ud835' '\ub3aa' .. '\ub3c1'
| '\ud835' '\ub3c4' .. '\ub3c8'
| '\ud840' '\udc00' .. '\udffe'
| '\ud841' '\ue000' .. '\ue3fe'
| '\ud842' '\ue400' .. '\ue7fe'
| '\ud843' '\ue800' .. '\uebfe'
| '\ud844' '\uec00' .. '\ueffe'
| '\ud845' '\uf000' .. '\uf3fe'
| '\ud846' '\uf400' .. '\uf7fe'
| '\ud847' '\uf800' .. '\ufbfe'
| '\ud848' '\ufc00' .. '\ufffe'
| '\ud849' '\u0000' .. '\u03fe'
| '\ud84a' '\u0400' .. '\u07fe'
| '\ud84b' '\u0800' .. '\u0bfe'
| '\ud84c' '\u0c00' .. '\u0ffe'
| '\ud84d' '\u1000' .. '\u13fe'
| '\ud84e' '\u1400' .. '\u17fe'
| '\ud84f' '\u1800' .. '\u1bfe'
| '\ud850' '\u1c00' .. '\u1ffe'
| '\ud851' '\u2000' .. '\u23fe'
| '\ud852' '\u2400' .. '\u27fe'
| '\ud853' '\u2800' .. '\u2bfe'
| '\ud854' '\u2c00' .. '\u2ffe'
| '\ud855' '\u3000' .. '\u33fe'
| '\ud856' '\u3400' .. '\u37fe'
| '\ud857' '\u3800' .. '\u3bfe'
| '\ud858' '\u3c00' .. '\u3ffe'
| '\ud859' '\u4000' .. '\u43fe'
| '\ud85a' '\u4400' .. '\u47fe'
| '\ud85b' '\u4800' .. '\u4bfe'
| '\ud85c' '\u4c00' .. '\u4ffe'
| '\ud85d' '\u5000' .. '\u53fe'
| '\ud85e' '\u5400' .. '\u57fe'
| '\ud85f' '\u5800' .. '\u5bfe'
| '\ud860' '\u5c00' .. '\u5ffe'
| '\ud861' '\u6000' .. '\u63fe'
| '\ud862' '\u6400' .. '\u67fe'
| '\ud863' '\u6800' .. '\u6bfe'
| '\ud864' '\u6c00' .. '\u6ffe'
| '\ud865' '\u7000' .. '\u73fe'
| '\ud866' '\u7400' .. '\u77fe'
| '\ud867' '\u7800' .. '\u7bfe'
| '\ud868' '\u7c00' .. '\u7ffe'
| '\ud869' '\u8000' .. '\u82d5'
| '\ud87e' '\ud400' .. '\ud61c'
;