Remove ANTLR grammar.
It is unused and not maintained.
This commit is contained in:
parent
12e76e24cc
commit
c3781e6200
13 changed files with 0 additions and 4221 deletions
4
src/grammar/.gitignore
vendored
4
src/grammar/.gitignore
vendored
|
|
@ -1,4 +0,0 @@
|
|||
verify
|
||||
*.class
|
||||
*.java
|
||||
*.tokens
|
||||
|
|
@ -1,33 +0,0 @@
|
|||
# Reference grammar.
|
||||
|
||||
Uses [antlr4](http://www.antlr.org/) and a custom Rust tool to compare
|
||||
ASTs/token streams generated. You can use the `make check-lexer` target to
|
||||
run all of the available tests.
|
||||
|
||||
The build of the rust part is included with `make tidy` and can be run with `make check-build-lexer-verifier`.
|
||||
|
||||
# Manual build
|
||||
|
||||
To use manually, assuming antlr4 is installed at `/usr/share/java/antlr-complete.jar`:
|
||||
|
||||
```
|
||||
antlr4 RustLexer.g4
|
||||
javac -classpath /usr/share/java/antlr-complete.jar *.java
|
||||
rustc -O verify.rs
|
||||
for file in ../*/**.rs; do
|
||||
echo $file;
|
||||
grun RustLexer tokens -tokens < "$file" | ./verify "$file" RustLexer.tokens || break
|
||||
done
|
||||
```
|
||||
|
||||
Note that the `../*/**.rs` glob will match every `*.rs` file in the above
|
||||
directory and all of its recursive children. This is a Zsh extension.
|
||||
|
||||
|
||||
## Cleanup
|
||||
|
||||
To cleanup you can use a command like this:
|
||||
|
||||
```bash
|
||||
rm -f verify *.class *.java *.tokens
|
||||
```
|
||||
|
|
@ -1,197 +0,0 @@
|
|||
lexer grammar RustLexer;
|
||||
|
||||
@lexer::members {
|
||||
public boolean is_at(int pos) {
|
||||
return _input.index() == pos;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
tokens {
|
||||
EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUS,
|
||||
MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP,
|
||||
BINOPEQ, LARROW, AT, DOT, DOTDOT, DOTDOTDOT, COMMA, SEMI, COLON,
|
||||
MOD_SEP, RARROW, FAT_ARROW, LPAREN, RPAREN, LBRACKET, RBRACKET,
|
||||
LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR, LIT_BYTE,
|
||||
LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BYTE_STR,
|
||||
LIT_BYTE_STR_RAW, QUESTION, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT,
|
||||
COMMENT, SHEBANG, UTF8_BOM
|
||||
}
|
||||
|
||||
import xidstart , xidcontinue;
|
||||
|
||||
|
||||
/* Expression-operator symbols */
|
||||
|
||||
EQ : '=' ;
|
||||
LT : '<' ;
|
||||
LE : '<=' ;
|
||||
EQEQ : '==' ;
|
||||
NE : '!=' ;
|
||||
GE : '>=' ;
|
||||
GT : '>' ;
|
||||
ANDAND : '&&' ;
|
||||
OROR : '||' ;
|
||||
NOT : '!' ;
|
||||
TILDE : '~' ;
|
||||
PLUS : '+' ;
|
||||
MINUS : '-' ;
|
||||
STAR : '*' ;
|
||||
SLASH : '/' ;
|
||||
PERCENT : '%' ;
|
||||
CARET : '^' ;
|
||||
AND : '&' ;
|
||||
OR : '|' ;
|
||||
SHL : '<<' ;
|
||||
SHR : '>>' ;
|
||||
LARROW : '<-' ;
|
||||
|
||||
BINOP
|
||||
: PLUS
|
||||
| SLASH
|
||||
| MINUS
|
||||
| STAR
|
||||
| PERCENT
|
||||
| CARET
|
||||
| AND
|
||||
| OR
|
||||
| SHL
|
||||
| SHR
|
||||
| LARROW
|
||||
;
|
||||
|
||||
BINOPEQ : BINOP EQ ;
|
||||
|
||||
/* "Structural symbols" */
|
||||
|
||||
AT : '@' ;
|
||||
DOT : '.' ;
|
||||
DOTDOT : '..' ;
|
||||
DOTDOTDOT : '...' ;
|
||||
COMMA : ',' ;
|
||||
SEMI : ';' ;
|
||||
COLON : ':' ;
|
||||
MOD_SEP : '::' ;
|
||||
RARROW : '->' ;
|
||||
FAT_ARROW : '=>' ;
|
||||
LPAREN : '(' ;
|
||||
RPAREN : ')' ;
|
||||
LBRACKET : '[' ;
|
||||
RBRACKET : ']' ;
|
||||
LBRACE : '{' ;
|
||||
RBRACE : '}' ;
|
||||
POUND : '#';
|
||||
DOLLAR : '$' ;
|
||||
UNDERSCORE : '_' ;
|
||||
|
||||
// Literals
|
||||
|
||||
fragment HEXIT
|
||||
: [0-9a-fA-F]
|
||||
;
|
||||
|
||||
fragment CHAR_ESCAPE
|
||||
: [nrt\\'"0]
|
||||
| [xX] HEXIT HEXIT
|
||||
| 'u' HEXIT HEXIT HEXIT HEXIT
|
||||
| 'U' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT
|
||||
| 'u{' HEXIT '}'
|
||||
| 'u{' HEXIT HEXIT '}'
|
||||
| 'u{' HEXIT HEXIT HEXIT '}'
|
||||
| 'u{' HEXIT HEXIT HEXIT HEXIT '}'
|
||||
| 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT '}'
|
||||
| 'u{' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT '}'
|
||||
;
|
||||
|
||||
fragment SUFFIX
|
||||
: IDENT
|
||||
;
|
||||
|
||||
fragment INTEGER_SUFFIX
|
||||
: { _input.LA(1) != 'e' && _input.LA(1) != 'E' }? SUFFIX
|
||||
;
|
||||
|
||||
LIT_CHAR
|
||||
: '\'' ( '\\' CHAR_ESCAPE
|
||||
| ~[\\'\n\t\r]
|
||||
| '\ud800' .. '\udbff' '\udc00' .. '\udfff'
|
||||
)
|
||||
'\'' SUFFIX?
|
||||
;
|
||||
|
||||
LIT_BYTE
|
||||
: 'b\'' ( '\\' ( [xX] HEXIT HEXIT
|
||||
| [nrt\\'"0] )
|
||||
| ~[\\'\n\t\r] '\udc00'..'\udfff'?
|
||||
)
|
||||
'\'' SUFFIX?
|
||||
;
|
||||
|
||||
LIT_INTEGER
|
||||
|
||||
: [0-9][0-9_]* INTEGER_SUFFIX?
|
||||
| '0b' [01_]+ INTEGER_SUFFIX?
|
||||
| '0o' [0-7_]+ INTEGER_SUFFIX?
|
||||
| '0x' [0-9a-fA-F_]+ INTEGER_SUFFIX?
|
||||
;
|
||||
|
||||
LIT_FLOAT
|
||||
: [0-9][0-9_]* ('.' {
|
||||
/* dot followed by another dot is a range, not a float */
|
||||
_input.LA(1) != '.' &&
|
||||
/* dot followed by an identifier is an integer with a function call, not a float */
|
||||
_input.LA(1) != '_' &&
|
||||
!(_input.LA(1) >= 'a' && _input.LA(1) <= 'z') &&
|
||||
!(_input.LA(1) >= 'A' && _input.LA(1) <= 'Z')
|
||||
}? | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX?)
|
||||
;
|
||||
|
||||
LIT_STR
|
||||
: '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? '"' SUFFIX?
|
||||
;
|
||||
|
||||
LIT_BYTE_STR : 'b' LIT_STR ;
|
||||
LIT_BYTE_STR_RAW : 'b' LIT_STR_RAW ;
|
||||
|
||||
/* this is a bit messy */
|
||||
|
||||
fragment LIT_STR_RAW_INNER
|
||||
: '"' .*? '"'
|
||||
| LIT_STR_RAW_INNER2
|
||||
;
|
||||
|
||||
fragment LIT_STR_RAW_INNER2
|
||||
: POUND LIT_STR_RAW_INNER POUND
|
||||
;
|
||||
|
||||
LIT_STR_RAW
|
||||
: 'r' LIT_STR_RAW_INNER SUFFIX?
|
||||
;
|
||||
|
||||
|
||||
QUESTION : '?';
|
||||
|
||||
IDENT : XID_Start XID_Continue* ;
|
||||
|
||||
fragment QUESTION_IDENTIFIER : QUESTION? IDENT;
|
||||
|
||||
LIFETIME : '\'' IDENT ;
|
||||
|
||||
WHITESPACE : [ \r\n\t]+ ;
|
||||
|
||||
UNDOC_COMMENT : '////' ~[\n]* -> type(COMMENT) ;
|
||||
YESDOC_COMMENT : '///' ~[\r\n]* -> type(DOC_COMMENT) ;
|
||||
OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ;
|
||||
LINE_COMMENT : '//' ( ~[/\n] ~[\n]* )? -> type(COMMENT) ;
|
||||
|
||||
DOC_BLOCK_COMMENT
|
||||
: ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT)
|
||||
;
|
||||
|
||||
BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ;
|
||||
|
||||
/* these appear at the beginning of a file */
|
||||
|
||||
SHEBANG : '#!' { is_at(2) && _input.LA(1) != '[' }? ~[\r\n]* -> type(SHEBANG) ;
|
||||
|
||||
UTF8_BOM : '\ufeff' { is_at(1) }? -> skip ;
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
# ignore-license
|
||||
|
||||
# Run the reference lexer against libsyntax and compare the tokens and spans.
|
||||
# If "// ignore-lexer-test" is present in the file, it will be ignored.
|
||||
|
||||
|
||||
# Argument $1 is the file to check, $2 is the classpath to use, $3 is the path
|
||||
# to the grun binary, $4 is the path to the verify binary, $5 is the path to
|
||||
# RustLexer.tokens
|
||||
if [ "${VERBOSE}" == "1" ]; then
|
||||
set -x
|
||||
fi
|
||||
|
||||
passed=0
|
||||
failed=0
|
||||
skipped=0
|
||||
|
||||
check() {
|
||||
grep --silent "// ignore-lexer-test" "$1";
|
||||
|
||||
# if it is *not* found...
|
||||
if [ $? -eq 1 ]; then
|
||||
cd $2 # This `cd` is so java will pick up RustLexer.class. I could not
|
||||
# figure out how to wrangle the CLASSPATH, just adding build/grammar
|
||||
# did not seem to have any effect.
|
||||
if $3 RustLexer tokens -tokens < $1 | $4 $1 $5; then
|
||||
echo "pass: $1"
|
||||
passed=`expr $passed + 1`
|
||||
else
|
||||
echo "fail: $1"
|
||||
failed=`expr $failed + 1`
|
||||
fi
|
||||
else
|
||||
echo "skip: $1"
|
||||
skipped=`expr $skipped + 1`
|
||||
fi
|
||||
}
|
||||
|
||||
for file in $(find $1 -iname '*.rs' ! -path '*/test/compile-fail*'); do
|
||||
check "$file" $2 $3 $4 $5
|
||||
done
|
||||
|
||||
printf "\ntest result: "
|
||||
|
||||
if [ $failed -eq 0 ]; then
|
||||
printf "ok. $passed passed; $failed failed; $skipped skipped\n\n"
|
||||
else
|
||||
printf "failed. $passed passed; $failed failed; $skipped skipped\n\n"
|
||||
exit 1
|
||||
fi
|
||||
|
|
@ -1,343 +0,0 @@
|
|||
%{
|
||||
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
static int num_hashes;
|
||||
static int end_hashes;
|
||||
static int saw_non_hash;
|
||||
|
||||
%}
|
||||
|
||||
%option stack
|
||||
%option yylineno
|
||||
|
||||
%x str
|
||||
%x rawstr
|
||||
%x rawstr_esc_begin
|
||||
%x rawstr_esc_body
|
||||
%x rawstr_esc_end
|
||||
%x byte
|
||||
%x bytestr
|
||||
%x rawbytestr
|
||||
%x rawbytestr_nohash
|
||||
%x pound
|
||||
%x shebang_or_attr
|
||||
%x ltorchar
|
||||
%x linecomment
|
||||
%x doc_line
|
||||
%x blockcomment
|
||||
%x doc_block
|
||||
%x suffix
|
||||
|
||||
ident [a-zA-Z\x80-\xff_][a-zA-Z0-9\x80-\xff_]*
|
||||
|
||||
%%
|
||||
|
||||
<suffix>{ident} { BEGIN(INITIAL); }
|
||||
<suffix>(.|\n) { yyless(0); BEGIN(INITIAL); }
|
||||
|
||||
[ \n\t\r] { }
|
||||
|
||||
\xef\xbb\xbf {
|
||||
// UTF-8 byte order mark (BOM), ignore if in line 1, error otherwise
|
||||
if (yyget_lineno() != 1) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
\/\/(\/|\!) { BEGIN(doc_line); yymore(); }
|
||||
<doc_line>\n { BEGIN(INITIAL);
|
||||
yyleng--;
|
||||
yytext[yyleng] = 0;
|
||||
return ((yytext[2] == '!') ? INNER_DOC_COMMENT : OUTER_DOC_COMMENT);
|
||||
}
|
||||
<doc_line>[^\n]* { yymore(); }
|
||||
|
||||
\/\/|\/\/\/\/ { BEGIN(linecomment); }
|
||||
<linecomment>\n { BEGIN(INITIAL); }
|
||||
<linecomment>[^\n]* { }
|
||||
|
||||
\/\*(\*|\!)[^*] { yy_push_state(INITIAL); yy_push_state(doc_block); yymore(); }
|
||||
<doc_block>\/\* { yy_push_state(doc_block); yymore(); }
|
||||
<doc_block>\*\/ {
|
||||
yy_pop_state();
|
||||
if (yy_top_state() == doc_block) {
|
||||
yymore();
|
||||
} else {
|
||||
return ((yytext[2] == '!') ? INNER_DOC_COMMENT : OUTER_DOC_COMMENT);
|
||||
}
|
||||
}
|
||||
<doc_block>(.|\n) { yymore(); }
|
||||
|
||||
\/\* { yy_push_state(blockcomment); }
|
||||
<blockcomment>\/\* { yy_push_state(blockcomment); }
|
||||
<blockcomment>\*\/ { yy_pop_state(); }
|
||||
<blockcomment>(.|\n) { }
|
||||
|
||||
_ { return UNDERSCORE; }
|
||||
as { return AS; }
|
||||
box { return BOX; }
|
||||
break { return BREAK; }
|
||||
const { return CONST; }
|
||||
continue { return CONTINUE; }
|
||||
crate { return CRATE; }
|
||||
else { return ELSE; }
|
||||
enum { return ENUM; }
|
||||
extern { return EXTERN; }
|
||||
false { return FALSE; }
|
||||
fn { return FN; }
|
||||
for { return FOR; }
|
||||
if { return IF; }
|
||||
impl { return IMPL; }
|
||||
in { return IN; }
|
||||
let { return LET; }
|
||||
loop { return LOOP; }
|
||||
match { return MATCH; }
|
||||
mod { return MOD; }
|
||||
move { return MOVE; }
|
||||
mut { return MUT; }
|
||||
priv { return PRIV; }
|
||||
proc { return PROC; }
|
||||
pub { return PUB; }
|
||||
ref { return REF; }
|
||||
return { return RETURN; }
|
||||
self { return SELF; }
|
||||
static { return STATIC; }
|
||||
struct { return STRUCT; }
|
||||
trait { return TRAIT; }
|
||||
true { return TRUE; }
|
||||
type { return TYPE; }
|
||||
typeof { return TYPEOF; }
|
||||
unsafe { return UNSAFE; }
|
||||
use { return USE; }
|
||||
where { return WHERE; }
|
||||
while { return WHILE; }
|
||||
|
||||
{ident} { return IDENT; }
|
||||
|
||||
0x[0-9a-fA-F_]+ { BEGIN(suffix); return LIT_INTEGER; }
|
||||
0o[0-8_]+ { BEGIN(suffix); return LIT_INTEGER; }
|
||||
0b[01_]+ { BEGIN(suffix); return LIT_INTEGER; }
|
||||
[0-9][0-9_]* { BEGIN(suffix); return LIT_INTEGER; }
|
||||
[0-9][0-9_]*\.(\.|[a-zA-Z]) { yyless(yyleng - 2); BEGIN(suffix); return LIT_INTEGER; }
|
||||
|
||||
[0-9][0-9_]*\.[0-9_]*([eE][-\+]?[0-9_]+)? { BEGIN(suffix); return LIT_FLOAT; }
|
||||
[0-9][0-9_]*(\.[0-9_]*)?[eE][-\+]?[0-9_]+ { BEGIN(suffix); return LIT_FLOAT; }
|
||||
|
||||
; { return ';'; }
|
||||
, { return ','; }
|
||||
\.\.\. { return DOTDOTDOT; }
|
||||
\.\. { return DOTDOT; }
|
||||
\. { return '.'; }
|
||||
\( { return '('; }
|
||||
\) { return ')'; }
|
||||
\{ { return '{'; }
|
||||
\} { return '}'; }
|
||||
\[ { return '['; }
|
||||
\] { return ']'; }
|
||||
@ { return '@'; }
|
||||
# { BEGIN(pound); yymore(); }
|
||||
<pound>\! { BEGIN(shebang_or_attr); yymore(); }
|
||||
<shebang_or_attr>\[ {
|
||||
BEGIN(INITIAL);
|
||||
yyless(2);
|
||||
return SHEBANG;
|
||||
}
|
||||
<shebang_or_attr>[^\[\n]*\n {
|
||||
// Since the \n was eaten as part of the token, yylineno will have
|
||||
// been incremented to the value 2 if the shebang was on the first
|
||||
// line. This yyless undoes that, setting yylineno back to 1.
|
||||
yyless(yyleng - 1);
|
||||
if (yyget_lineno() == 1) {
|
||||
BEGIN(INITIAL);
|
||||
return SHEBANG_LINE;
|
||||
} else {
|
||||
BEGIN(INITIAL);
|
||||
yyless(2);
|
||||
return SHEBANG;
|
||||
}
|
||||
}
|
||||
<pound>. { BEGIN(INITIAL); yyless(1); return '#'; }
|
||||
|
||||
\~ { return '~'; }
|
||||
:: { return MOD_SEP; }
|
||||
: { return ':'; }
|
||||
\$ { return '$'; }
|
||||
\? { return '?'; }
|
||||
|
||||
== { return EQEQ; }
|
||||
=> { return FAT_ARROW; }
|
||||
= { return '='; }
|
||||
\!= { return NE; }
|
||||
\! { return '!'; }
|
||||
\<= { return LE; }
|
||||
\<\< { return SHL; }
|
||||
\<\<= { return SHLEQ; }
|
||||
\< { return '<'; }
|
||||
\>= { return GE; }
|
||||
\>\> { return SHR; }
|
||||
\>\>= { return SHREQ; }
|
||||
\> { return '>'; }
|
||||
|
||||
\x27 { BEGIN(ltorchar); yymore(); }
|
||||
<ltorchar>static { BEGIN(INITIAL); return STATIC_LIFETIME; }
|
||||
<ltorchar>{ident} { BEGIN(INITIAL); return LIFETIME; }
|
||||
<ltorchar>\\[nrt\\\x27\x220]\x27 { BEGIN(suffix); return LIT_CHAR; }
|
||||
<ltorchar>\\x[0-9a-fA-F]{2}\x27 { BEGIN(suffix); return LIT_CHAR; }
|
||||
<ltorchar>\\u\{[0-9a-fA-F]?{6}\}\x27 { BEGIN(suffix); return LIT_CHAR; }
|
||||
<ltorchar>.\x27 { BEGIN(suffix); return LIT_CHAR; }
|
||||
<ltorchar>[\x80-\xff]{2,4}\x27 { BEGIN(suffix); return LIT_CHAR; }
|
||||
<ltorchar><<EOF>> { BEGIN(INITIAL); return -1; }
|
||||
|
||||
b\x22 { BEGIN(bytestr); yymore(); }
|
||||
<bytestr>\x22 { BEGIN(suffix); return LIT_BYTE_STR; }
|
||||
|
||||
<bytestr><<EOF>> { return -1; }
|
||||
<bytestr>\\[n\nrt\\\x27\x220] { yymore(); }
|
||||
<bytestr>\\x[0-9a-fA-F]{2} { yymore(); }
|
||||
<bytestr>\\u\{[0-9a-fA-F]?{6}\} { yymore(); }
|
||||
<bytestr>\\[^n\nrt\\\x27\x220] { return -1; }
|
||||
<bytestr>(.|\n) { yymore(); }
|
||||
|
||||
br\x22 { BEGIN(rawbytestr_nohash); yymore(); }
|
||||
<rawbytestr_nohash>\x22 { BEGIN(suffix); return LIT_BYTE_STR_RAW; }
|
||||
<rawbytestr_nohash>(.|\n) { yymore(); }
|
||||
<rawbytestr_nohash><<EOF>> { return -1; }
|
||||
|
||||
br/# {
|
||||
BEGIN(rawbytestr);
|
||||
yymore();
|
||||
num_hashes = 0;
|
||||
saw_non_hash = 0;
|
||||
end_hashes = 0;
|
||||
}
|
||||
<rawbytestr># {
|
||||
if (!saw_non_hash) {
|
||||
num_hashes++;
|
||||
} else if (end_hashes != 0) {
|
||||
end_hashes++;
|
||||
if (end_hashes == num_hashes) {
|
||||
BEGIN(INITIAL);
|
||||
return LIT_BYTE_STR_RAW;
|
||||
}
|
||||
}
|
||||
yymore();
|
||||
}
|
||||
<rawbytestr>\x22# {
|
||||
end_hashes = 1;
|
||||
if (end_hashes == num_hashes) {
|
||||
BEGIN(INITIAL);
|
||||
return LIT_BYTE_STR_RAW;
|
||||
}
|
||||
yymore();
|
||||
}
|
||||
<rawbytestr>(.|\n) {
|
||||
if (!saw_non_hash) {
|
||||
saw_non_hash = 1;
|
||||
}
|
||||
if (end_hashes != 0) {
|
||||
end_hashes = 0;
|
||||
}
|
||||
yymore();
|
||||
}
|
||||
<rawbytestr><<EOF>> { return -1; }
|
||||
|
||||
b\x27 { BEGIN(byte); yymore(); }
|
||||
<byte>\\[nrt\\\x27\x220]\x27 { BEGIN(INITIAL); return LIT_BYTE; }
|
||||
<byte>\\x[0-9a-fA-F]{2}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
|
||||
<byte>\\u[0-9a-fA-F]{4}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
|
||||
<byte>\\U[0-9a-fA-F]{8}\x27 { BEGIN(INITIAL); return LIT_BYTE; }
|
||||
<byte>.\x27 { BEGIN(INITIAL); return LIT_BYTE; }
|
||||
<byte><<EOF>> { BEGIN(INITIAL); return -1; }
|
||||
|
||||
r\x22 { BEGIN(rawstr); yymore(); }
|
||||
<rawstr>\x22 { BEGIN(suffix); return LIT_STR_RAW; }
|
||||
<rawstr>(.|\n) { yymore(); }
|
||||
<rawstr><<EOF>> { return -1; }
|
||||
|
||||
r/# {
|
||||
BEGIN(rawstr_esc_begin);
|
||||
yymore();
|
||||
num_hashes = 0;
|
||||
saw_non_hash = 0;
|
||||
end_hashes = 0;
|
||||
}
|
||||
|
||||
<rawstr_esc_begin># {
|
||||
num_hashes++;
|
||||
yymore();
|
||||
}
|
||||
<rawstr_esc_begin>\x22 {
|
||||
BEGIN(rawstr_esc_body);
|
||||
yymore();
|
||||
}
|
||||
<rawstr_esc_begin>(.|\n) { return -1; }
|
||||
|
||||
<rawstr_esc_body>\x22/# {
|
||||
BEGIN(rawstr_esc_end);
|
||||
yymore();
|
||||
}
|
||||
<rawstr_esc_body>(.|\n) {
|
||||
yymore();
|
||||
}
|
||||
|
||||
<rawstr_esc_end># {
|
||||
end_hashes++;
|
||||
if (end_hashes == num_hashes) {
|
||||
BEGIN(INITIAL);
|
||||
return LIT_STR_RAW;
|
||||
}
|
||||
yymore();
|
||||
}
|
||||
<rawstr_esc_end>[^#] {
|
||||
end_hashes = 0;
|
||||
BEGIN(rawstr_esc_body);
|
||||
yymore();
|
||||
}
|
||||
|
||||
<rawstr_esc_begin,rawstr_esc_body,rawstr_esc_end><<EOF>> { return -1; }
|
||||
|
||||
\x22 { BEGIN(str); yymore(); }
|
||||
<str>\x22 { BEGIN(suffix); return LIT_STR; }
|
||||
|
||||
<str><<EOF>> { return -1; }
|
||||
<str>\\[n\nr\rt\\\x27\x220] { yymore(); }
|
||||
<str>\\x[0-9a-fA-F]{2} { yymore(); }
|
||||
<str>\\u\{[0-9a-fA-F]?{6}\} { yymore(); }
|
||||
<str>\\[^n\nrt\\\x27\x220] { return -1; }
|
||||
<str>(.|\n) { yymore(); }
|
||||
|
||||
\<- { return LARROW; }
|
||||
-\> { return RARROW; }
|
||||
- { return '-'; }
|
||||
-= { return MINUSEQ; }
|
||||
&& { return ANDAND; }
|
||||
& { return '&'; }
|
||||
&= { return ANDEQ; }
|
||||
\|\| { return OROR; }
|
||||
\| { return '|'; }
|
||||
\|= { return OREQ; }
|
||||
\+ { return '+'; }
|
||||
\+= { return PLUSEQ; }
|
||||
\* { return '*'; }
|
||||
\*= { return STAREQ; }
|
||||
\/ { return '/'; }
|
||||
\/= { return SLASHEQ; }
|
||||
\^ { return '^'; }
|
||||
\^= { return CARETEQ; }
|
||||
% { return '%'; }
|
||||
%= { return PERCENTEQ; }
|
||||
|
||||
<<EOF>> { return 0; }
|
||||
|
||||
%%
|
||||
|
|
@ -1,203 +0,0 @@
|
|||
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
extern int yylex();
|
||||
extern int rsparse();
|
||||
|
||||
#define PUSHBACK_LEN 4
|
||||
|
||||
static char pushback[PUSHBACK_LEN];
|
||||
static int verbose;
|
||||
|
||||
void print(const char* format, ...) {
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
if (verbose) {
|
||||
vprintf(format, args);
|
||||
}
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
// If there is a non-null char at the head of the pushback queue,
|
||||
// dequeue it and shift the rest of the queue forwards. Otherwise,
|
||||
// return the token from calling yylex.
|
||||
int rslex() {
|
||||
if (pushback[0] == '\0') {
|
||||
return yylex();
|
||||
} else {
|
||||
char c = pushback[0];
|
||||
memmove(pushback, pushback + 1, PUSHBACK_LEN - 1);
|
||||
pushback[PUSHBACK_LEN - 1] = '\0';
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: this does nothing if the pushback queue is full. As long as
|
||||
// there aren't more than PUSHBACK_LEN consecutive calls to push_back
|
||||
// in an action, this shouldn't be a problem.
|
||||
void push_back(char c) {
|
||||
for (int i = 0; i < PUSHBACK_LEN; ++i) {
|
||||
if (pushback[i] == '\0') {
|
||||
pushback[i] = c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern int rsdebug;
|
||||
|
||||
struct node {
|
||||
struct node *next;
|
||||
struct node *prev;
|
||||
int own_string;
|
||||
char const *name;
|
||||
int n_elems;
|
||||
struct node *elems[];
|
||||
};
|
||||
|
||||
struct node *nodes = NULL;
|
||||
int n_nodes;
|
||||
|
||||
struct node *mk_node(char const *name, int n, ...) {
|
||||
va_list ap;
|
||||
int i = 0;
|
||||
unsigned sz = sizeof(struct node) + (n * sizeof(struct node *));
|
||||
struct node *nn, *nd = (struct node *)malloc(sz);
|
||||
|
||||
print("# New %d-ary node: %s = %p\n", n, name, nd);
|
||||
|
||||
nd->own_string = 0;
|
||||
nd->prev = NULL;
|
||||
nd->next = nodes;
|
||||
if (nodes) {
|
||||
nodes->prev = nd;
|
||||
}
|
||||
nodes = nd;
|
||||
|
||||
nd->name = name;
|
||||
nd->n_elems = n;
|
||||
|
||||
va_start(ap, n);
|
||||
while (i < n) {
|
||||
nn = va_arg(ap, struct node *);
|
||||
print("# arg[%d]: %p\n", i, nn);
|
||||
print("# (%s ...)\n", nn->name);
|
||||
nd->elems[i++] = nn;
|
||||
}
|
||||
va_end(ap);
|
||||
n_nodes++;
|
||||
return nd;
|
||||
}
|
||||
|
||||
struct node *mk_atom(char *name) {
|
||||
struct node *nd = mk_node((char const *)strdup(name), 0);
|
||||
nd->own_string = 1;
|
||||
return nd;
|
||||
}
|
||||
|
||||
struct node *mk_none() {
|
||||
return mk_atom("<none>");
|
||||
}
|
||||
|
||||
struct node *ext_node(struct node *nd, int n, ...) {
|
||||
va_list ap;
|
||||
int i = 0, c = nd->n_elems + n;
|
||||
unsigned sz = sizeof(struct node) + (c * sizeof(struct node *));
|
||||
struct node *nn;
|
||||
|
||||
print("# Extending %d-ary node by %d nodes: %s = %p",
|
||||
nd->n_elems, c, nd->name, nd);
|
||||
|
||||
if (nd->next) {
|
||||
nd->next->prev = nd->prev;
|
||||
}
|
||||
if (nd->prev) {
|
||||
nd->prev->next = nd->next;
|
||||
}
|
||||
nd = realloc(nd, sz);
|
||||
nd->prev = NULL;
|
||||
nd->next = nodes;
|
||||
nodes->prev = nd;
|
||||
nodes = nd;
|
||||
|
||||
print(" ==> %p\n", nd);
|
||||
|
||||
va_start(ap, n);
|
||||
while (i < n) {
|
||||
nn = va_arg(ap, struct node *);
|
||||
print("# arg[%d]: %p\n", i, nn);
|
||||
print("# (%s ...)\n", nn->name);
|
||||
nd->elems[nd->n_elems++] = nn;
|
||||
++i;
|
||||
}
|
||||
va_end(ap);
|
||||
return nd;
|
||||
}
|
||||
|
||||
int const indent_step = 4;
|
||||
|
||||
void print_indent(int depth) {
|
||||
while (depth) {
|
||||
if (depth-- % indent_step == 0) {
|
||||
print("|");
|
||||
} else {
|
||||
print(" ");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void print_node(struct node *n, int depth) {
|
||||
int i = 0;
|
||||
print_indent(depth);
|
||||
if (n->n_elems == 0) {
|
||||
print("%s\n", n->name);
|
||||
} else {
|
||||
print("(%s\n", n->name);
|
||||
for (i = 0; i < n->n_elems; ++i) {
|
||||
print_node(n->elems[i], depth + indent_step);
|
||||
}
|
||||
print_indent(depth);
|
||||
print(")\n");
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
if (argc == 2 && strcmp(argv[1], "-v") == 0) {
|
||||
verbose = 1;
|
||||
} else {
|
||||
verbose = 0;
|
||||
}
|
||||
int ret = 0;
|
||||
struct node *tmp;
|
||||
memset(pushback, '\0', PUSHBACK_LEN);
|
||||
ret = rsparse();
|
||||
print("--- PARSE COMPLETE: ret:%d, n_nodes:%d ---\n", ret, n_nodes);
|
||||
if (nodes) {
|
||||
print_node(nodes, 0);
|
||||
}
|
||||
while (nodes) {
|
||||
tmp = nodes;
|
||||
nodes = tmp->next;
|
||||
if (tmp->own_string) {
|
||||
free((void*)tmp->name);
|
||||
}
|
||||
free(tmp);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void rserror(char const *s) {
|
||||
fprintf(stderr, "%s\n", s);
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,64 +0,0 @@
|
|||
Rust's lexical grammar is not context-free. Raw string literals are the source
|
||||
of the problem. Informally, a raw string literal is an `r`, followed by `N`
|
||||
hashes (where N can be zero), a quote, any characters, then a quote followed
|
||||
by `N` hashes. Critically, once inside the first pair of quotes,
|
||||
another quote cannot be followed by `N` consecutive hashes. e.g.
|
||||
`r###""###"###` is invalid.
|
||||
|
||||
This grammar describes this as best possible:
|
||||
|
||||
R -> 'r' S
|
||||
S -> '"' B '"'
|
||||
S -> '#' S '#'
|
||||
B -> . B
|
||||
B -> ε
|
||||
|
||||
Where `.` represents any character, and `ε` the empty string. Consider the
|
||||
string `r#""#"#`. This string is not a valid raw string literal, but can be
|
||||
accepted as one by the above grammar, using the derivation:
|
||||
|
||||
R : #""#"#
|
||||
S : ""#"
|
||||
S : "#
|
||||
B : #
|
||||
B : ε
|
||||
|
||||
(Where `T : U` means the rule `T` is applied, and `U` is the remainder of the
|
||||
string.) The difficulty arises from the fact that it is fundamentally
|
||||
context-sensitive. In particular, the context needed is the number of hashes.
|
||||
|
||||
To prove that Rust's string literals are not context-free, we will use
|
||||
the fact that context-free languages are closed under intersection with
|
||||
regular languages, and the
|
||||
[pumping lemma for context-free languages](https://en.wikipedia.org/wiki/Pumping_lemma_for_context-free_languages).
|
||||
|
||||
Consider the regular language `R = r#+""#*"#+`. If Rust's raw string literals are
|
||||
context-free, then their intersection with `R`, `R'`, should also be context-free.
|
||||
Therefore, to prove that raw string literals are not context-free,
|
||||
it is sufficient to prove that `R'` is not context-free.
|
||||
|
||||
The language `R'` is `{r#^n""#^m"#^n | m < n}`.
|
||||
|
||||
Assume `R'` *is* context-free. Then `R'` has some pumping length `p > 0` for which
|
||||
the pumping lemma applies. Consider the following string `s` in `R'`:
|
||||
|
||||
`r#^p""#^{p-1}"#^p`
|
||||
|
||||
e.g. for `p = 2`: `s = r##""#"##`
|
||||
|
||||
Then `s = uvwxy` for some choice of `uvwxy` such that `vx` is non-empty,
|
||||
`|vwx| < p+1`, and `uv^iwx^iy` is in `R'` for all `i >= 0`.
|
||||
|
||||
Neither `v` nor `x` can contain a `"` or `r`, as the number of these characters
|
||||
in any string in `R'` is fixed. So `v` and `x` contain only hashes.
|
||||
Consequently, of the three sequences of hashes, `v` and `x` combined
|
||||
can only pump two of them.
|
||||
If we ever choose the central sequence of hashes, then one of the outer sequences
|
||||
will not grow when we pump, leading to an imbalance between the outer sequences.
|
||||
Therefore, we must pump both outer sequences of hashes. However,
|
||||
there are `p+2` characters between these two sequences of hashes, and `|vwx|` must
|
||||
be less than `p+1`. Therefore we have a contradiction, and `R'` must not be
|
||||
context-free.
|
||||
|
||||
Since `R'` is not context-free, it follows that the Rust's raw string literals
|
||||
must not be context-free.
|
||||
|
|
@ -1,76 +0,0 @@
|
|||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2015 The Rust Project Developers. See the COPYRIGHT
|
||||
# file at the top-level directory of this distribution and at
|
||||
# http://rust-lang.org/COPYRIGHT.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
# option. This file may not be copied, modified, or distributed
|
||||
# except according to those terms.
|
||||
|
||||
# ignore-tidy-linelength
|
||||
|
||||
import sys
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import argparse
|
||||
|
||||
# usage: testparser.py [-h] [-p PARSER [PARSER ...]] -s SOURCE_DIR
|
||||
|
||||
# Parsers should read from stdin and return exit status 0 for a
|
||||
# successful parse, and nonzero for an unsuccessful parse
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-p', '--parser', nargs='+')
|
||||
parser.add_argument('-s', '--source-dir', nargs=1, required=True)
|
||||
args = parser.parse_args(sys.argv[1:])
|
||||
|
||||
total = 0
|
||||
ok = {}
|
||||
bad = {}
|
||||
for parser in args.parser:
|
||||
ok[parser] = 0
|
||||
bad[parser] = []
|
||||
devnull = open(os.devnull, 'w')
|
||||
print("\n")
|
||||
|
||||
for base, dirs, files in os.walk(args.source_dir[0]):
|
||||
for f in filter(lambda p: p.endswith('.rs'), files):
|
||||
p = os.path.join(base, f)
|
||||
parse_fail = 'parse-fail' in p
|
||||
if sys.version_info.major == 3:
|
||||
lines = open(p, encoding='utf-8').readlines()
|
||||
else:
|
||||
lines = open(p).readlines()
|
||||
if any('ignore-test' in line or 'ignore-lexer-test' in line for line in lines):
|
||||
continue
|
||||
total += 1
|
||||
for parser in args.parser:
|
||||
if subprocess.call(parser, stdin=open(p), stderr=subprocess.STDOUT, stdout=devnull) == 0:
|
||||
if parse_fail:
|
||||
bad[parser].append(p)
|
||||
else:
|
||||
ok[parser] += 1
|
||||
else:
|
||||
if parse_fail:
|
||||
ok[parser] += 1
|
||||
else:
|
||||
bad[parser].append(p)
|
||||
parser_stats = ', '.join(['{}: {}'.format(parser, ok[parser]) for parser in args.parser])
|
||||
sys.stdout.write("\033[K\r total: {}, {}, scanned {}"
|
||||
.format(total, os.path.relpath(parser_stats), os.path.relpath(p)))
|
||||
|
||||
devnull.close()
|
||||
|
||||
print("\n")
|
||||
|
||||
for parser in args.parser:
|
||||
filename = os.path.basename(parser) + '.bad'
|
||||
print("writing {} files that did not yield the correct result with {} to {}".format(len(bad[parser]), parser, filename))
|
||||
with open(filename, "w") as f:
|
||||
for p in bad[parser]:
|
||||
f.write(p)
|
||||
f.write("\n")
|
||||
|
|
@ -1,91 +0,0 @@
|
|||
// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
enum Token {
|
||||
SHL = 257, // Parser generators reserve 0-256 for char literals
|
||||
SHR,
|
||||
LE,
|
||||
EQEQ,
|
||||
NE,
|
||||
GE,
|
||||
ANDAND,
|
||||
OROR,
|
||||
SHLEQ,
|
||||
SHREQ,
|
||||
MINUSEQ,
|
||||
ANDEQ,
|
||||
OREQ,
|
||||
PLUSEQ,
|
||||
STAREQ,
|
||||
SLASHEQ,
|
||||
CARETEQ,
|
||||
PERCENTEQ,
|
||||
DOTDOT,
|
||||
DOTDOTDOT,
|
||||
MOD_SEP,
|
||||
RARROW,
|
||||
FAT_ARROW,
|
||||
LIT_BYTE,
|
||||
LIT_CHAR,
|
||||
LIT_INTEGER,
|
||||
LIT_FLOAT,
|
||||
LIT_STR,
|
||||
LIT_STR_RAW,
|
||||
LIT_BYTE_STR,
|
||||
LIT_BYTE_STR_RAW,
|
||||
IDENT,
|
||||
UNDERSCORE,
|
||||
LIFETIME,
|
||||
|
||||
// keywords
|
||||
SELF,
|
||||
STATIC,
|
||||
AS,
|
||||
BREAK,
|
||||
CRATE,
|
||||
ELSE,
|
||||
ENUM,
|
||||
EXTERN,
|
||||
FALSE,
|
||||
FN,
|
||||
FOR,
|
||||
IF,
|
||||
IMPL,
|
||||
IN,
|
||||
LET,
|
||||
LOOP,
|
||||
MATCH,
|
||||
MOD,
|
||||
MOVE,
|
||||
MUT,
|
||||
PRIV,
|
||||
PUB,
|
||||
REF,
|
||||
RETURN,
|
||||
STRUCT,
|
||||
TRUE,
|
||||
TRAIT,
|
||||
TYPE,
|
||||
UNSAFE,
|
||||
USE,
|
||||
WHILE,
|
||||
CONTINUE,
|
||||
PROC,
|
||||
BOX,
|
||||
CONST,
|
||||
WHERE,
|
||||
TYPEOF,
|
||||
INNER_DOC_COMMENT,
|
||||
OUTER_DOC_COMMENT,
|
||||
|
||||
SHEBANG,
|
||||
SHEBANG_LINE,
|
||||
STATIC_LIFETIME
|
||||
};
|
||||
|
|
@ -1,361 +0,0 @@
|
|||
// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
#![feature(plugin, rustc_private)]
|
||||
|
||||
extern crate syntax;
|
||||
extern crate syntax_pos;
|
||||
extern crate rustc;
|
||||
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, Read};
|
||||
use std::path::Path;
|
||||
|
||||
use syntax::parse::lexer;
|
||||
use rustc::dep_graph::DepGraph;
|
||||
use rustc::session::{self, config};
|
||||
use rustc::middle::cstore::DummyCrateStore;
|
||||
|
||||
use std::rc::Rc;
|
||||
use syntax::ast;
|
||||
use syntax::codemap;
|
||||
use syntax::parse::token::{self, BinOpToken, DelimToken, Lit, Token};
|
||||
use syntax::parse::lexer::TokenAndSpan;
|
||||
use syntax_pos::Pos;
|
||||
|
||||
use syntax::symbol::{Symbol, keywords};
|
||||
|
||||
fn parse_token_list(file: &str) -> HashMap<String, token::Token> {
|
||||
fn id() -> token::Token {
|
||||
Token::Ident(ast::Ident::with_empty_ctxt(keywords::Invalid.name()))
|
||||
}
|
||||
|
||||
let mut res = HashMap::new();
|
||||
|
||||
res.insert("-1".to_string(), Token::Eof);
|
||||
|
||||
for line in file.split('\n') {
|
||||
let eq = match line.trim().rfind('=') {
|
||||
Some(val) => val,
|
||||
None => continue
|
||||
};
|
||||
|
||||
let val = &line[..eq];
|
||||
let num = &line[eq + 1..];
|
||||
|
||||
let tok = match val {
|
||||
"SHR" => Token::BinOp(BinOpToken::Shr),
|
||||
"DOLLAR" => Token::Dollar,
|
||||
"LT" => Token::Lt,
|
||||
"STAR" => Token::BinOp(BinOpToken::Star),
|
||||
"FLOAT_SUFFIX" => id(),
|
||||
"INT_SUFFIX" => id(),
|
||||
"SHL" => Token::BinOp(BinOpToken::Shl),
|
||||
"LBRACE" => Token::OpenDelim(DelimToken::Brace),
|
||||
"RARROW" => Token::RArrow,
|
||||
"LIT_STR" => Token::Literal(Lit::Str_(keywords::Invalid.name()), None),
|
||||
"DOTDOT" => Token::DotDot,
|
||||
"MOD_SEP" => Token::ModSep,
|
||||
"DOTDOTDOT" => Token::DotDotDot,
|
||||
"NOT" => Token::Not,
|
||||
"AND" => Token::BinOp(BinOpToken::And),
|
||||
"LPAREN" => Token::OpenDelim(DelimToken::Paren),
|
||||
"ANDAND" => Token::AndAnd,
|
||||
"AT" => Token::At,
|
||||
"LBRACKET" => Token::OpenDelim(DelimToken::Bracket),
|
||||
"LIT_STR_RAW" => Token::Literal(Lit::StrRaw(keywords::Invalid.name(), 0), None),
|
||||
"RPAREN" => Token::CloseDelim(DelimToken::Paren),
|
||||
"SLASH" => Token::BinOp(BinOpToken::Slash),
|
||||
"COMMA" => Token::Comma,
|
||||
"LIFETIME" => Token::Lifetime(
|
||||
ast::Ident::with_empty_ctxt(keywords::Invalid.name())),
|
||||
"CARET" => Token::BinOp(BinOpToken::Caret),
|
||||
"TILDE" => Token::Tilde,
|
||||
"IDENT" => id(),
|
||||
"PLUS" => Token::BinOp(BinOpToken::Plus),
|
||||
"LIT_CHAR" => Token::Literal(Lit::Char(keywords::Invalid.name()), None),
|
||||
"LIT_BYTE" => Token::Literal(Lit::Byte(keywords::Invalid.name()), None),
|
||||
"EQ" => Token::Eq,
|
||||
"RBRACKET" => Token::CloseDelim(DelimToken::Bracket),
|
||||
"COMMENT" => Token::Comment,
|
||||
"DOC_COMMENT" => Token::DocComment(keywords::Invalid.name()),
|
||||
"DOT" => Token::Dot,
|
||||
"EQEQ" => Token::EqEq,
|
||||
"NE" => Token::Ne,
|
||||
"GE" => Token::Ge,
|
||||
"PERCENT" => Token::BinOp(BinOpToken::Percent),
|
||||
"RBRACE" => Token::CloseDelim(DelimToken::Brace),
|
||||
"BINOP" => Token::BinOp(BinOpToken::Plus),
|
||||
"POUND" => Token::Pound,
|
||||
"OROR" => Token::OrOr,
|
||||
"LIT_INTEGER" => Token::Literal(Lit::Integer(keywords::Invalid.name()), None),
|
||||
"BINOPEQ" => Token::BinOpEq(BinOpToken::Plus),
|
||||
"LIT_FLOAT" => Token::Literal(Lit::Float(keywords::Invalid.name()), None),
|
||||
"WHITESPACE" => Token::Whitespace,
|
||||
"UNDERSCORE" => Token::Underscore,
|
||||
"MINUS" => Token::BinOp(BinOpToken::Minus),
|
||||
"SEMI" => Token::Semi,
|
||||
"COLON" => Token::Colon,
|
||||
"FAT_ARROW" => Token::FatArrow,
|
||||
"OR" => Token::BinOp(BinOpToken::Or),
|
||||
"GT" => Token::Gt,
|
||||
"LE" => Token::Le,
|
||||
"LIT_BINARY" => Token::Literal(Lit::ByteStr(keywords::Invalid.name()), None),
|
||||
"LIT_BINARY_RAW" => Token::Literal(
|
||||
Lit::ByteStrRaw(keywords::Invalid.name(), 0), None),
|
||||
"QUESTION" => Token::Question,
|
||||
"SHEBANG" => Token::Shebang(keywords::Invalid.name()),
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
res.insert(num.to_string(), tok);
|
||||
}
|
||||
|
||||
debug!("Token map: {:?}", res);
|
||||
res
|
||||
}
|
||||
|
||||
fn str_to_binop(s: &str) -> token::BinOpToken {
|
||||
match s {
|
||||
"+" => BinOpToken::Plus,
|
||||
"/" => BinOpToken::Slash,
|
||||
"-" => BinOpToken::Minus,
|
||||
"*" => BinOpToken::Star,
|
||||
"%" => BinOpToken::Percent,
|
||||
"^" => BinOpToken::Caret,
|
||||
"&" => BinOpToken::And,
|
||||
"|" => BinOpToken::Or,
|
||||
"<<" => BinOpToken::Shl,
|
||||
">>" => BinOpToken::Shr,
|
||||
_ => panic!("Bad binop str `{}`", s),
|
||||
}
|
||||
}
|
||||
|
||||
/// Assuming a string/byte string literal, strip out the leading/trailing
|
||||
/// hashes and surrounding quotes/raw/byte prefix.
|
||||
fn fix(mut lit: &str) -> ast::Name {
|
||||
let prefix: Vec<char> = lit.chars().take(2).collect();
|
||||
if prefix[0] == 'r' {
|
||||
if prefix[1] == 'b' {
|
||||
lit = &lit[2..]
|
||||
} else {
|
||||
lit = &lit[1..];
|
||||
}
|
||||
} else if prefix[0] == 'b' {
|
||||
lit = &lit[1..];
|
||||
}
|
||||
|
||||
let leading_hashes = count(lit);
|
||||
|
||||
// +1/-1 to adjust for single quotes
|
||||
Symbol::intern(&lit[leading_hashes + 1..lit.len() - leading_hashes - 1])
|
||||
}
|
||||
|
||||
/// Assuming a char/byte literal, strip the 'b' prefix and the single quotes.
|
||||
fn fixchar(mut lit: &str) -> ast::Name {
|
||||
let prefix = lit.chars().next().unwrap();
|
||||
if prefix == 'b' {
|
||||
lit = &lit[1..];
|
||||
}
|
||||
|
||||
Symbol::intern(&lit[1..lit.len() - 1])
|
||||
}
|
||||
|
||||
fn count(lit: &str) -> usize {
|
||||
lit.chars().take_while(|c| *c == '#').count()
|
||||
}
|
||||
|
||||
fn parse_antlr_token(s: &str, tokens: &HashMap<String, token::Token>, surrogate_pairs_pos: &[usize],
|
||||
has_bom: bool)
|
||||
-> TokenAndSpan {
|
||||
// old regex:
|
||||
// \[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]
|
||||
let start = s.find("[@").unwrap();
|
||||
let comma = start + s[start..].find(",").unwrap();
|
||||
let colon = comma + s[comma..].find(":").unwrap();
|
||||
let content_start = colon + s[colon..].find("='").unwrap();
|
||||
// Use rfind instead of find, because we don't want to stop at the content
|
||||
let content_end = content_start + s[content_start..].rfind("',<").unwrap();
|
||||
let toknum_end = content_end + s[content_end..].find(">,").unwrap();
|
||||
|
||||
let start = &s[comma + 1 .. colon];
|
||||
let end = &s[colon + 1 .. content_start];
|
||||
let content = &s[content_start + 2 .. content_end];
|
||||
let toknum = &s[content_end + 3 .. toknum_end];
|
||||
|
||||
let not_found = format!("didn't find token {:?} in the map", toknum);
|
||||
let proto_tok = tokens.get(toknum).expect(¬_found);
|
||||
|
||||
let nm = Symbol::intern(content);
|
||||
|
||||
debug!("What we got: content (`{}`), proto: {:?}", content, proto_tok);
|
||||
|
||||
let real_tok = match *proto_tok {
|
||||
Token::BinOp(..) => Token::BinOp(str_to_binop(content)),
|
||||
Token::BinOpEq(..) => Token::BinOpEq(str_to_binop(&content[..content.len() - 1])),
|
||||
Token::Literal(Lit::Str_(..), n) => Token::Literal(Lit::Str_(fix(content)), n),
|
||||
Token::Literal(Lit::StrRaw(..), n) => Token::Literal(Lit::StrRaw(fix(content),
|
||||
count(content)), n),
|
||||
Token::Literal(Lit::Char(..), n) => Token::Literal(Lit::Char(fixchar(content)), n),
|
||||
Token::Literal(Lit::Byte(..), n) => Token::Literal(Lit::Byte(fixchar(content)), n),
|
||||
Token::DocComment(..) => Token::DocComment(nm),
|
||||
Token::Literal(Lit::Integer(..), n) => Token::Literal(Lit::Integer(nm), n),
|
||||
Token::Literal(Lit::Float(..), n) => Token::Literal(Lit::Float(nm), n),
|
||||
Token::Literal(Lit::ByteStr(..), n) => Token::Literal(Lit::ByteStr(nm), n),
|
||||
Token::Literal(Lit::ByteStrRaw(..), n) => Token::Literal(Lit::ByteStrRaw(fix(content),
|
||||
count(content)), n),
|
||||
Token::Ident(..) => Token::Ident(ast::Ident::with_empty_ctxt(nm)),
|
||||
Token::Lifetime(..) => Token::Lifetime(ast::Ident::with_empty_ctxt(nm)),
|
||||
ref t => t.clone()
|
||||
};
|
||||
|
||||
let start_offset = if real_tok == Token::Eof {
|
||||
1
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let offset = if has_bom { 1 } else { 0 };
|
||||
|
||||
let mut lo = start.parse::<u32>().unwrap() - start_offset - offset;
|
||||
let mut hi = end.parse::<u32>().unwrap() + 1 - offset;
|
||||
|
||||
// Adjust the span: For each surrogate pair already encountered, subtract one position.
|
||||
lo -= surrogate_pairs_pos.binary_search(&(lo as usize)).unwrap_or_else(|x| x) as u32;
|
||||
hi -= surrogate_pairs_pos.binary_search(&(hi as usize)).unwrap_or_else(|x| x) as u32;
|
||||
|
||||
let sp = syntax_pos::Span {
|
||||
lo: syntax_pos::BytePos(lo),
|
||||
hi: syntax_pos::BytePos(hi),
|
||||
expn_id: syntax_pos::NO_EXPANSION
|
||||
};
|
||||
|
||||
TokenAndSpan {
|
||||
tok: real_tok,
|
||||
sp: sp
|
||||
}
|
||||
}
|
||||
|
||||
fn tok_cmp(a: &token::Token, b: &token::Token) -> bool {
|
||||
match a {
|
||||
&Token::Ident(id) => match b {
|
||||
&Token::Ident(id2) => id == id2,
|
||||
_ => false
|
||||
},
|
||||
_ => a == b
|
||||
}
|
||||
}
|
||||
|
||||
fn span_cmp(antlr_sp: codemap::Span, rust_sp: codemap::Span, cm: &codemap::CodeMap) -> bool {
|
||||
antlr_sp.expn_id == rust_sp.expn_id &&
|
||||
antlr_sp.lo.to_usize() == cm.bytepos_to_file_charpos(rust_sp.lo).to_usize() &&
|
||||
antlr_sp.hi.to_usize() == cm.bytepos_to_file_charpos(rust_sp.hi).to_usize()
|
||||
}
|
||||
|
||||
fn main() {
|
||||
fn next(r: &mut lexer::StringReader) -> TokenAndSpan {
|
||||
use syntax::parse::lexer::Reader;
|
||||
r.next_token()
|
||||
}
|
||||
|
||||
let mut args = env::args().skip(1);
|
||||
let filename = args.next().unwrap();
|
||||
if filename.find("parse-fail").is_some() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Rust's lexer
|
||||
let mut code = String::new();
|
||||
File::open(&Path::new(&filename)).unwrap().read_to_string(&mut code).unwrap();
|
||||
|
||||
let surrogate_pairs_pos: Vec<usize> = code.chars().enumerate()
|
||||
.filter(|&(_, c)| c as usize > 0xFFFF)
|
||||
.map(|(n, _)| n)
|
||||
.enumerate()
|
||||
.map(|(x, n)| x + n)
|
||||
.collect();
|
||||
|
||||
let has_bom = code.starts_with("\u{feff}");
|
||||
|
||||
debug!("Pairs: {:?}", surrogate_pairs_pos);
|
||||
|
||||
let options = config::basic_options();
|
||||
let session = session::build_session(options, &DepGraph::new(false), None,
|
||||
syntax::errors::registry::Registry::new(&[]),
|
||||
Rc::new(DummyCrateStore));
|
||||
let filemap = session.parse_sess.codemap()
|
||||
.new_filemap("<n/a>".to_string(), code);
|
||||
let mut lexer = lexer::StringReader::new(session.diagnostic(), filemap);
|
||||
let cm = session.codemap();
|
||||
|
||||
// ANTLR
|
||||
let mut token_file = File::open(&Path::new(&args.next().unwrap())).unwrap();
|
||||
let mut token_list = String::new();
|
||||
token_file.read_to_string(&mut token_list).unwrap();
|
||||
let token_map = parse_token_list(&token_list);
|
||||
|
||||
let stdin = std::io::stdin();
|
||||
let lock = stdin.lock();
|
||||
let lines = lock.lines();
|
||||
let antlr_tokens = lines.map(|l| parse_antlr_token(l.unwrap().trim(),
|
||||
&token_map,
|
||||
&surrogate_pairs_pos,
|
||||
has_bom));
|
||||
|
||||
for antlr_tok in antlr_tokens {
|
||||
let rustc_tok = next(&mut lexer);
|
||||
if rustc_tok.tok == Token::Eof && antlr_tok.tok == Token::Eof {
|
||||
continue
|
||||
}
|
||||
|
||||
assert!(span_cmp(antlr_tok.sp, rustc_tok.sp, cm), "{:?} and {:?} have different spans",
|
||||
rustc_tok,
|
||||
antlr_tok);
|
||||
|
||||
macro_rules! matches {
|
||||
( $($x:pat),+ ) => (
|
||||
match rustc_tok.tok {
|
||||
$($x => match antlr_tok.tok {
|
||||
$x => {
|
||||
if !tok_cmp(&rustc_tok.tok, &antlr_tok.tok) {
|
||||
// FIXME #15677: needs more robust escaping in
|
||||
// antlr
|
||||
warn!("Different names for {:?} and {:?}", rustc_tok, antlr_tok);
|
||||
}
|
||||
}
|
||||
_ => panic!("{:?} is not {:?}", antlr_tok, rustc_tok)
|
||||
},)*
|
||||
ref c => assert!(c == &antlr_tok.tok, "{:?} is not {:?}", antlr_tok, rustc_tok)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
matches!(
|
||||
Token::Literal(Lit::Byte(..), _),
|
||||
Token::Literal(Lit::Char(..), _),
|
||||
Token::Literal(Lit::Integer(..), _),
|
||||
Token::Literal(Lit::Float(..), _),
|
||||
Token::Literal(Lit::Str_(..), _),
|
||||
Token::Literal(Lit::StrRaw(..), _),
|
||||
Token::Literal(Lit::ByteStr(..), _),
|
||||
Token::Literal(Lit::ByteStrRaw(..), _),
|
||||
Token::Ident(..),
|
||||
Token::Lifetime(..),
|
||||
Token::Interpolated(..),
|
||||
Token::DocComment(..),
|
||||
Token::Shebang(..)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,473 +0,0 @@
|
|||
lexer grammar Xidcontinue;
|
||||
|
||||
fragment XID_Continue:
|
||||
'\u0030' .. '\u0039'
|
||||
| '\u0041' .. '\u005a'
|
||||
| '\u005f'
|
||||
| '\u0061' .. '\u007a'
|
||||
| '\u00aa'
|
||||
| '\u00b5'
|
||||
| '\u00b7'
|
||||
| '\u00ba'
|
||||
| '\u00c0' .. '\u00d6'
|
||||
| '\u00d8' .. '\u00f6'
|
||||
| '\u00f8' .. '\u0236'
|
||||
| '\u0250' .. '\u02c1'
|
||||
| '\u02c6' .. '\u02d1'
|
||||
| '\u02e0' .. '\u02e4'
|
||||
| '\u02ee'
|
||||
| '\u0300' .. '\u0357'
|
||||
| '\u035d' .. '\u036f'
|
||||
| '\u0386'
|
||||
| '\u0388' .. '\u038a'
|
||||
| '\u038c'
|
||||
| '\u038e' .. '\u03a1'
|
||||
| '\u03a3' .. '\u03ce'
|
||||
| '\u03d0' .. '\u03f5'
|
||||
| '\u03f7' .. '\u03fb'
|
||||
| '\u0400' .. '\u0481'
|
||||
| '\u0483' .. '\u0486'
|
||||
| '\u048a' .. '\u04ce'
|
||||
| '\u04d0' .. '\u04f5'
|
||||
| '\u04f8' .. '\u04f9'
|
||||
| '\u0500' .. '\u050f'
|
||||
| '\u0531' .. '\u0556'
|
||||
| '\u0559'
|
||||
| '\u0561' .. '\u0587'
|
||||
| '\u0591' .. '\u05a1'
|
||||
| '\u05a3' .. '\u05b9'
|
||||
| '\u05bb' .. '\u05bd'
|
||||
| '\u05bf'
|
||||
| '\u05c1' .. '\u05c2'
|
||||
| '\u05c4'
|
||||
| '\u05d0' .. '\u05ea'
|
||||
| '\u05f0' .. '\u05f2'
|
||||
| '\u0610' .. '\u0615'
|
||||
| '\u0621' .. '\u063a'
|
||||
| '\u0640' .. '\u0658'
|
||||
| '\u0660' .. '\u0669'
|
||||
| '\u066e' .. '\u06d3'
|
||||
| '\u06d5' .. '\u06dc'
|
||||
| '\u06df' .. '\u06e8'
|
||||
| '\u06ea' .. '\u06fc'
|
||||
| '\u06ff'
|
||||
| '\u0710' .. '\u074a'
|
||||
| '\u074d' .. '\u074f'
|
||||
| '\u0780' .. '\u07b1'
|
||||
| '\u0901' .. '\u0939'
|
||||
| '\u093c' .. '\u094d'
|
||||
| '\u0950' .. '\u0954'
|
||||
| '\u0958' .. '\u0963'
|
||||
| '\u0966' .. '\u096f'
|
||||
| '\u0981' .. '\u0983'
|
||||
| '\u0985' .. '\u098c'
|
||||
| '\u098f' .. '\u0990'
|
||||
| '\u0993' .. '\u09a8'
|
||||
| '\u09aa' .. '\u09b0'
|
||||
| '\u09b2'
|
||||
| '\u09b6' .. '\u09b9'
|
||||
| '\u09bc' .. '\u09c4'
|
||||
| '\u09c7' .. '\u09c8'
|
||||
| '\u09cb' .. '\u09cd'
|
||||
| '\u09d7'
|
||||
| '\u09dc' .. '\u09dd'
|
||||
| '\u09df' .. '\u09e3'
|
||||
| '\u09e6' .. '\u09f1'
|
||||
| '\u0a01' .. '\u0a03'
|
||||
| '\u0a05' .. '\u0a0a'
|
||||
| '\u0a0f' .. '\u0a10'
|
||||
| '\u0a13' .. '\u0a28'
|
||||
| '\u0a2a' .. '\u0a30'
|
||||
| '\u0a32' .. '\u0a33'
|
||||
| '\u0a35' .. '\u0a36'
|
||||
| '\u0a38' .. '\u0a39'
|
||||
| '\u0a3c'
|
||||
| '\u0a3e' .. '\u0a42'
|
||||
| '\u0a47' .. '\u0a48'
|
||||
| '\u0a4b' .. '\u0a4d'
|
||||
| '\u0a59' .. '\u0a5c'
|
||||
| '\u0a5e'
|
||||
| '\u0a66' .. '\u0a74'
|
||||
| '\u0a81' .. '\u0a83'
|
||||
| '\u0a85' .. '\u0a8d'
|
||||
| '\u0a8f' .. '\u0a91'
|
||||
| '\u0a93' .. '\u0aa8'
|
||||
| '\u0aaa' .. '\u0ab0'
|
||||
| '\u0ab2' .. '\u0ab3'
|
||||
| '\u0ab5' .. '\u0ab9'
|
||||
| '\u0abc' .. '\u0ac5'
|
||||
| '\u0ac7' .. '\u0ac9'
|
||||
| '\u0acb' .. '\u0acd'
|
||||
| '\u0ad0'
|
||||
| '\u0ae0' .. '\u0ae3'
|
||||
| '\u0ae6' .. '\u0aef'
|
||||
| '\u0b01' .. '\u0b03'
|
||||
| '\u0b05' .. '\u0b0c'
|
||||
| '\u0b0f' .. '\u0b10'
|
||||
| '\u0b13' .. '\u0b28'
|
||||
| '\u0b2a' .. '\u0b30'
|
||||
| '\u0b32' .. '\u0b33'
|
||||
| '\u0b35' .. '\u0b39'
|
||||
| '\u0b3c' .. '\u0b43'
|
||||
| '\u0b47' .. '\u0b48'
|
||||
| '\u0b4b' .. '\u0b4d'
|
||||
| '\u0b56' .. '\u0b57'
|
||||
| '\u0b5c' .. '\u0b5d'
|
||||
| '\u0b5f' .. '\u0b61'
|
||||
| '\u0b66' .. '\u0b6f'
|
||||
| '\u0b71'
|
||||
| '\u0b82' .. '\u0b83'
|
||||
| '\u0b85' .. '\u0b8a'
|
||||
| '\u0b8e' .. '\u0b90'
|
||||
| '\u0b92' .. '\u0b95'
|
||||
| '\u0b99' .. '\u0b9a'
|
||||
| '\u0b9c'
|
||||
| '\u0b9e' .. '\u0b9f'
|
||||
| '\u0ba3' .. '\u0ba4'
|
||||
| '\u0ba8' .. '\u0baa'
|
||||
| '\u0bae' .. '\u0bb5'
|
||||
| '\u0bb7' .. '\u0bb9'
|
||||
| '\u0bbe' .. '\u0bc2'
|
||||
| '\u0bc6' .. '\u0bc8'
|
||||
| '\u0bca' .. '\u0bcd'
|
||||
| '\u0bd7'
|
||||
| '\u0be7' .. '\u0bef'
|
||||
| '\u0c01' .. '\u0c03'
|
||||
| '\u0c05' .. '\u0c0c'
|
||||
| '\u0c0e' .. '\u0c10'
|
||||
| '\u0c12' .. '\u0c28'
|
||||
| '\u0c2a' .. '\u0c33'
|
||||
| '\u0c35' .. '\u0c39'
|
||||
| '\u0c3e' .. '\u0c44'
|
||||
| '\u0c46' .. '\u0c48'
|
||||
| '\u0c4a' .. '\u0c4d'
|
||||
| '\u0c55' .. '\u0c56'
|
||||
| '\u0c60' .. '\u0c61'
|
||||
| '\u0c66' .. '\u0c6f'
|
||||
| '\u0c82' .. '\u0c83'
|
||||
| '\u0c85' .. '\u0c8c'
|
||||
| '\u0c8e' .. '\u0c90'
|
||||
| '\u0c92' .. '\u0ca8'
|
||||
| '\u0caa' .. '\u0cb3'
|
||||
| '\u0cb5' .. '\u0cb9'
|
||||
| '\u0cbc' .. '\u0cc4'
|
||||
| '\u0cc6' .. '\u0cc8'
|
||||
| '\u0cca' .. '\u0ccd'
|
||||
| '\u0cd5' .. '\u0cd6'
|
||||
| '\u0cde'
|
||||
| '\u0ce0' .. '\u0ce1'
|
||||
| '\u0ce6' .. '\u0cef'
|
||||
| '\u0d02' .. '\u0d03'
|
||||
| '\u0d05' .. '\u0d0c'
|
||||
| '\u0d0e' .. '\u0d10'
|
||||
| '\u0d12' .. '\u0d28'
|
||||
| '\u0d2a' .. '\u0d39'
|
||||
| '\u0d3e' .. '\u0d43'
|
||||
| '\u0d46' .. '\u0d48'
|
||||
| '\u0d4a' .. '\u0d4d'
|
||||
| '\u0d57'
|
||||
| '\u0d60' .. '\u0d61'
|
||||
| '\u0d66' .. '\u0d6f'
|
||||
| '\u0d82' .. '\u0d83'
|
||||
| '\u0d85' .. '\u0d96'
|
||||
| '\u0d9a' .. '\u0db1'
|
||||
| '\u0db3' .. '\u0dbb'
|
||||
| '\u0dbd'
|
||||
| '\u0dc0' .. '\u0dc6'
|
||||
| '\u0dca'
|
||||
| '\u0dcf' .. '\u0dd4'
|
||||
| '\u0dd6'
|
||||
| '\u0dd8' .. '\u0ddf'
|
||||
| '\u0df2' .. '\u0df3'
|
||||
| '\u0e01' .. '\u0e3a'
|
||||
| '\u0e40' .. '\u0e4e'
|
||||
| '\u0e50' .. '\u0e59'
|
||||
| '\u0e81' .. '\u0e82'
|
||||
| '\u0e84'
|
||||
| '\u0e87' .. '\u0e88'
|
||||
| '\u0e8a'
|
||||
| '\u0e8d'
|
||||
| '\u0e94' .. '\u0e97'
|
||||
| '\u0e99' .. '\u0e9f'
|
||||
| '\u0ea1' .. '\u0ea3'
|
||||
| '\u0ea5'
|
||||
| '\u0ea7'
|
||||
| '\u0eaa' .. '\u0eab'
|
||||
| '\u0ead' .. '\u0eb9'
|
||||
| '\u0ebb' .. '\u0ebd'
|
||||
| '\u0ec0' .. '\u0ec4'
|
||||
| '\u0ec6'
|
||||
| '\u0ec8' .. '\u0ecd'
|
||||
| '\u0ed0' .. '\u0ed9'
|
||||
| '\u0edc' .. '\u0edd'
|
||||
| '\u0f00'
|
||||
| '\u0f18' .. '\u0f19'
|
||||
| '\u0f20' .. '\u0f29'
|
||||
| '\u0f35'
|
||||
| '\u0f37'
|
||||
| '\u0f39'
|
||||
| '\u0f3e' .. '\u0f47'
|
||||
| '\u0f49' .. '\u0f6a'
|
||||
| '\u0f71' .. '\u0f84'
|
||||
| '\u0f86' .. '\u0f8b'
|
||||
| '\u0f90' .. '\u0f97'
|
||||
| '\u0f99' .. '\u0fbc'
|
||||
| '\u0fc6'
|
||||
| '\u1000' .. '\u1021'
|
||||
| '\u1023' .. '\u1027'
|
||||
| '\u1029' .. '\u102a'
|
||||
| '\u102c' .. '\u1032'
|
||||
| '\u1036' .. '\u1039'
|
||||
| '\u1040' .. '\u1049'
|
||||
| '\u1050' .. '\u1059'
|
||||
| '\u10a0' .. '\u10c5'
|
||||
| '\u10d0' .. '\u10f8'
|
||||
| '\u1100' .. '\u1159'
|
||||
| '\u115f' .. '\u11a2'
|
||||
| '\u11a8' .. '\u11f9'
|
||||
| '\u1200' .. '\u1206'
|
||||
| '\u1208' .. '\u1246'
|
||||
| '\u1248'
|
||||
| '\u124a' .. '\u124d'
|
||||
| '\u1250' .. '\u1256'
|
||||
| '\u1258'
|
||||
| '\u125a' .. '\u125d'
|
||||
| '\u1260' .. '\u1286'
|
||||
| '\u1288'
|
||||
| '\u128a' .. '\u128d'
|
||||
| '\u1290' .. '\u12ae'
|
||||
| '\u12b0'
|
||||
| '\u12b2' .. '\u12b5'
|
||||
| '\u12b8' .. '\u12be'
|
||||
| '\u12c0'
|
||||
| '\u12c2' .. '\u12c5'
|
||||
| '\u12c8' .. '\u12ce'
|
||||
| '\u12d0' .. '\u12d6'
|
||||
| '\u12d8' .. '\u12ee'
|
||||
| '\u12f0' .. '\u130e'
|
||||
| '\u1310'
|
||||
| '\u1312' .. '\u1315'
|
||||
| '\u1318' .. '\u131e'
|
||||
| '\u1320' .. '\u1346'
|
||||
| '\u1348' .. '\u135a'
|
||||
| '\u1369' .. '\u1371'
|
||||
| '\u13a0' .. '\u13f4'
|
||||
| '\u1401' .. '\u166c'
|
||||
| '\u166f' .. '\u1676'
|
||||
| '\u1681' .. '\u169a'
|
||||
| '\u16a0' .. '\u16ea'
|
||||
| '\u16ee' .. '\u16f0'
|
||||
| '\u1700' .. '\u170c'
|
||||
| '\u170e' .. '\u1714'
|
||||
| '\u1720' .. '\u1734'
|
||||
| '\u1740' .. '\u1753'
|
||||
| '\u1760' .. '\u176c'
|
||||
| '\u176e' .. '\u1770'
|
||||
| '\u1772' .. '\u1773'
|
||||
| '\u1780' .. '\u17b3'
|
||||
| '\u17b6' .. '\u17d3'
|
||||
| '\u17d7'
|
||||
| '\u17dc' .. '\u17dd'
|
||||
| '\u17e0' .. '\u17e9'
|
||||
| '\u180b' .. '\u180d'
|
||||
| '\u1810' .. '\u1819'
|
||||
| '\u1820' .. '\u1877'
|
||||
| '\u1880' .. '\u18a9'
|
||||
| '\u1900' .. '\u191c'
|
||||
| '\u1920' .. '\u192b'
|
||||
| '\u1930' .. '\u193b'
|
||||
| '\u1946' .. '\u196d'
|
||||
| '\u1970' .. '\u1974'
|
||||
| '\u1d00' .. '\u1d6b'
|
||||
| '\u1e00' .. '\u1e9b'
|
||||
| '\u1ea0' .. '\u1ef9'
|
||||
| '\u1f00' .. '\u1f15'
|
||||
| '\u1f18' .. '\u1f1d'
|
||||
| '\u1f20' .. '\u1f45'
|
||||
| '\u1f48' .. '\u1f4d'
|
||||
| '\u1f50' .. '\u1f57'
|
||||
| '\u1f59'
|
||||
| '\u1f5b'
|
||||
| '\u1f5d'
|
||||
| '\u1f5f' .. '\u1f7d'
|
||||
| '\u1f80' .. '\u1fb4'
|
||||
| '\u1fb6' .. '\u1fbc'
|
||||
| '\u1fbe'
|
||||
| '\u1fc2' .. '\u1fc4'
|
||||
| '\u1fc6' .. '\u1fcc'
|
||||
| '\u1fd0' .. '\u1fd3'
|
||||
| '\u1fd6' .. '\u1fdb'
|
||||
| '\u1fe0' .. '\u1fec'
|
||||
| '\u1ff2' .. '\u1ff4'
|
||||
| '\u1ff6' .. '\u1ffc'
|
||||
| '\u203f' .. '\u2040'
|
||||
| '\u2054'
|
||||
| '\u2071'
|
||||
| '\u207f'
|
||||
| '\u20d0' .. '\u20dc'
|
||||
| '\u20e1'
|
||||
| '\u20e5' .. '\u20ea'
|
||||
| '\u2102'
|
||||
| '\u2107'
|
||||
| '\u210a' .. '\u2113'
|
||||
| '\u2115'
|
||||
| '\u2118' .. '\u211d'
|
||||
| '\u2124'
|
||||
| '\u2126'
|
||||
| '\u2128'
|
||||
| '\u212a' .. '\u2131'
|
||||
| '\u2133' .. '\u2139'
|
||||
| '\u213d' .. '\u213f'
|
||||
| '\u2145' .. '\u2149'
|
||||
| '\u2160' .. '\u2183'
|
||||
| '\u3005' .. '\u3007'
|
||||
| '\u3021' .. '\u302f'
|
||||
| '\u3031' .. '\u3035'
|
||||
| '\u3038' .. '\u303c'
|
||||
| '\u3041' .. '\u3096'
|
||||
| '\u3099' .. '\u309a'
|
||||
| '\u309d' .. '\u309f'
|
||||
| '\u30a1' .. '\u30ff'
|
||||
| '\u3105' .. '\u312c'
|
||||
| '\u3131' .. '\u318e'
|
||||
| '\u31a0' .. '\u31b7'
|
||||
| '\u31f0' .. '\u31ff'
|
||||
| '\u3400' .. '\u4db5'
|
||||
| '\u4e00' .. '\u9fa5'
|
||||
| '\ua000' .. '\ua48c'
|
||||
| '\uac00' .. '\ud7a3'
|
||||
| '\uf900' .. '\ufa2d'
|
||||
| '\ufa30' .. '\ufa6a'
|
||||
| '\ufb00' .. '\ufb06'
|
||||
| '\ufb13' .. '\ufb17'
|
||||
| '\ufb1d' .. '\ufb28'
|
||||
| '\ufb2a' .. '\ufb36'
|
||||
| '\ufb38' .. '\ufb3c'
|
||||
| '\ufb3e'
|
||||
| '\ufb40' .. '\ufb41'
|
||||
| '\ufb43' .. '\ufb44'
|
||||
| '\ufb46' .. '\ufbb1'
|
||||
| '\ufbd3' .. '\ufc5d'
|
||||
| '\ufc64' .. '\ufd3d'
|
||||
| '\ufd50' .. '\ufd8f'
|
||||
| '\ufd92' .. '\ufdc7'
|
||||
| '\ufdf0' .. '\ufdf9'
|
||||
| '\ufe00' .. '\ufe0f'
|
||||
| '\ufe20' .. '\ufe23'
|
||||
| '\ufe33' .. '\ufe34'
|
||||
| '\ufe4d' .. '\ufe4f'
|
||||
| '\ufe71'
|
||||
| '\ufe73'
|
||||
| '\ufe77'
|
||||
| '\ufe79'
|
||||
| '\ufe7b'
|
||||
| '\ufe7d'
|
||||
| '\ufe7f' .. '\ufefc'
|
||||
| '\uff10' .. '\uff19'
|
||||
| '\uff21' .. '\uff3a'
|
||||
| '\uff3f'
|
||||
| '\uff41' .. '\uff5a'
|
||||
| '\uff65' .. '\uffbe'
|
||||
| '\uffc2' .. '\uffc7'
|
||||
| '\uffca' .. '\uffcf'
|
||||
| '\uffd2' .. '\uffd7'
|
||||
| '\uffda' .. '\uffdc'
|
||||
| '\ud800' '\udc00' .. '\udc0a'
|
||||
| '\ud800' '\udc0d' .. '\udc25'
|
||||
| '\ud800' '\udc28' .. '\udc39'
|
||||
| '\ud800' '\udc3c' .. '\udc3c'
|
||||
| '\ud800' '\udc3f' .. '\udc4c'
|
||||
| '\ud800' '\udc50' .. '\udc5c'
|
||||
| '\ud800' '\udc80' .. '\udcf9'
|
||||
| '\ud800' '\udf00' .. '\udf1d'
|
||||
| '\ud800' '\udf30' .. '\udf49'
|
||||
| '\ud800' '\udf80' .. '\udf9c'
|
||||
| '\ud801' '\ue000' .. '\ue09c'
|
||||
| '\ud801' '\ue0a0' .. '\ue0a8'
|
||||
| '\ud802' '\ue400' .. '\ue404'
|
||||
| '\ud802' '\u0808'
|
||||
| '\ud802' '\ue40a' .. '\ue434'
|
||||
| '\ud802' '\ue437' .. '\ue437'
|
||||
| '\ud802' '\u083c'
|
||||
| '\ud802' '\u083f'
|
||||
| '\ud834' '\uad65' .. '\uad68'
|
||||
| '\ud834' '\uad6d' .. '\uad71'
|
||||
| '\ud834' '\uad7b' .. '\uad81'
|
||||
| '\ud834' '\uad85' .. '\uad8a'
|
||||
| '\ud834' '\uadaa' .. '\uadac'
|
||||
| '\ud835' '\ub000' .. '\ub053'
|
||||
| '\ud835' '\ub056' .. '\ub09b'
|
||||
| '\ud835' '\ub09e' .. '\ub09e'
|
||||
| '\ud835' '\ud4a2'
|
||||
| '\ud835' '\ub0a5' .. '\ub0a5'
|
||||
| '\ud835' '\ub0a9' .. '\ub0ab'
|
||||
| '\ud835' '\ub0ae' .. '\ub0b8'
|
||||
| '\ud835' '\ud4bb'
|
||||
| '\ud835' '\ub0bd' .. '\ub0c2'
|
||||
| '\ud835' '\ub0c5' .. '\ub104'
|
||||
| '\ud835' '\ub107' .. '\ub109'
|
||||
| '\ud835' '\ub10d' .. '\ub113'
|
||||
| '\ud835' '\ub116' .. '\ub11b'
|
||||
| '\ud835' '\ub11e' .. '\ub138'
|
||||
| '\ud835' '\ub13b' .. '\ub13d'
|
||||
| '\ud835' '\ub140' .. '\ub143'
|
||||
| '\ud835' '\ud546'
|
||||
| '\ud835' '\ub14a' .. '\ub14f'
|
||||
| '\ud835' '\ub152' .. '\ub2a2'
|
||||
| '\ud835' '\ub2a8' .. '\ub2bf'
|
||||
| '\ud835' '\ub2c2' .. '\ub2d9'
|
||||
| '\ud835' '\ub2dc' .. '\ub2f9'
|
||||
| '\ud835' '\ub2fc' .. '\ub313'
|
||||
| '\ud835' '\ub316' .. '\ub333'
|
||||
| '\ud835' '\ub336' .. '\ub34d'
|
||||
| '\ud835' '\ub350' .. '\ub36d'
|
||||
| '\ud835' '\ub370' .. '\ub387'
|
||||
| '\ud835' '\ub38a' .. '\ub3a7'
|
||||
| '\ud835' '\ub3aa' .. '\ub3c1'
|
||||
| '\ud835' '\ub3c4' .. '\ub3c8'
|
||||
| '\ud835' '\ub3ce' .. '\ub3fe'
|
||||
| '\ud840' '\udc00' .. '\udffe'
|
||||
| '\ud841' '\ue000' .. '\ue3fe'
|
||||
| '\ud842' '\ue400' .. '\ue7fe'
|
||||
| '\ud843' '\ue800' .. '\uebfe'
|
||||
| '\ud844' '\uec00' .. '\ueffe'
|
||||
| '\ud845' '\uf000' .. '\uf3fe'
|
||||
| '\ud846' '\uf400' .. '\uf7fe'
|
||||
| '\ud847' '\uf800' .. '\ufbfe'
|
||||
| '\ud848' '\ufc00' .. '\ufffe'
|
||||
| '\ud849' '\u0000' .. '\u03fe'
|
||||
| '\ud84a' '\u0400' .. '\u07fe'
|
||||
| '\ud84b' '\u0800' .. '\u0bfe'
|
||||
| '\ud84c' '\u0c00' .. '\u0ffe'
|
||||
| '\ud84d' '\u1000' .. '\u13fe'
|
||||
| '\ud84e' '\u1400' .. '\u17fe'
|
||||
| '\ud84f' '\u1800' .. '\u1bfe'
|
||||
| '\ud850' '\u1c00' .. '\u1ffe'
|
||||
| '\ud851' '\u2000' .. '\u23fe'
|
||||
| '\ud852' '\u2400' .. '\u27fe'
|
||||
| '\ud853' '\u2800' .. '\u2bfe'
|
||||
| '\ud854' '\u2c00' .. '\u2ffe'
|
||||
| '\ud855' '\u3000' .. '\u33fe'
|
||||
| '\ud856' '\u3400' .. '\u37fe'
|
||||
| '\ud857' '\u3800' .. '\u3bfe'
|
||||
| '\ud858' '\u3c00' .. '\u3ffe'
|
||||
| '\ud859' '\u4000' .. '\u43fe'
|
||||
| '\ud85a' '\u4400' .. '\u47fe'
|
||||
| '\ud85b' '\u4800' .. '\u4bfe'
|
||||
| '\ud85c' '\u4c00' .. '\u4ffe'
|
||||
| '\ud85d' '\u5000' .. '\u53fe'
|
||||
| '\ud85e' '\u5400' .. '\u57fe'
|
||||
| '\ud85f' '\u5800' .. '\u5bfe'
|
||||
| '\ud860' '\u5c00' .. '\u5ffe'
|
||||
| '\ud861' '\u6000' .. '\u63fe'
|
||||
| '\ud862' '\u6400' .. '\u67fe'
|
||||
| '\ud863' '\u6800' .. '\u6bfe'
|
||||
| '\ud864' '\u6c00' .. '\u6ffe'
|
||||
| '\ud865' '\u7000' .. '\u73fe'
|
||||
| '\ud866' '\u7400' .. '\u77fe'
|
||||
| '\ud867' '\u7800' .. '\u7bfe'
|
||||
| '\ud868' '\u7c00' .. '\u7ffe'
|
||||
| '\ud869' '\u8000' .. '\u82d5'
|
||||
| '\ud87e' '\ud400' .. '\ud61c'
|
||||
| '\udb40' '\udd00' .. '\uddee'
|
||||
;
|
||||
|
|
@ -1,379 +0,0 @@
|
|||
lexer grammar Xidstart;
|
||||
|
||||
fragment XID_Start :
|
||||
'\u0041' .. '\u005a'
|
||||
| '_'
|
||||
| '\u0061' .. '\u007a'
|
||||
| '\u00aa'
|
||||
| '\u00b5'
|
||||
| '\u00ba'
|
||||
| '\u00c0' .. '\u00d6'
|
||||
| '\u00d8' .. '\u00f6'
|
||||
| '\u00f8' .. '\u0236'
|
||||
| '\u0250' .. '\u02c1'
|
||||
| '\u02c6' .. '\u02d1'
|
||||
| '\u02e0' .. '\u02e4'
|
||||
| '\u02ee'
|
||||
| '\u0386'
|
||||
| '\u0388' .. '\u038a'
|
||||
| '\u038c'
|
||||
| '\u038e' .. '\u03a1'
|
||||
| '\u03a3' .. '\u03ce'
|
||||
| '\u03d0' .. '\u03f5'
|
||||
| '\u03f7' .. '\u03fb'
|
||||
| '\u0400' .. '\u0481'
|
||||
| '\u048a' .. '\u04ce'
|
||||
| '\u04d0' .. '\u04f5'
|
||||
| '\u04f8' .. '\u04f9'
|
||||
| '\u0500' .. '\u050f'
|
||||
| '\u0531' .. '\u0556'
|
||||
| '\u0559'
|
||||
| '\u0561' .. '\u0587'
|
||||
| '\u05d0' .. '\u05ea'
|
||||
| '\u05f0' .. '\u05f2'
|
||||
| '\u0621' .. '\u063a'
|
||||
| '\u0640' .. '\u064a'
|
||||
| '\u066e' .. '\u066f'
|
||||
| '\u0671' .. '\u06d3'
|
||||
| '\u06d5'
|
||||
| '\u06e5' .. '\u06e6'
|
||||
| '\u06ee' .. '\u06ef'
|
||||
| '\u06fa' .. '\u06fc'
|
||||
| '\u06ff'
|
||||
| '\u0710'
|
||||
| '\u0712' .. '\u072f'
|
||||
| '\u074d' .. '\u074f'
|
||||
| '\u0780' .. '\u07a5'
|
||||
| '\u07b1'
|
||||
| '\u0904' .. '\u0939'
|
||||
| '\u093d'
|
||||
| '\u0950'
|
||||
| '\u0958' .. '\u0961'
|
||||
| '\u0985' .. '\u098c'
|
||||
| '\u098f' .. '\u0990'
|
||||
| '\u0993' .. '\u09a8'
|
||||
| '\u09aa' .. '\u09b0'
|
||||
| '\u09b2'
|
||||
| '\u09b6' .. '\u09b9'
|
||||
| '\u09bd'
|
||||
| '\u09dc' .. '\u09dd'
|
||||
| '\u09df' .. '\u09e1'
|
||||
| '\u09f0' .. '\u09f1'
|
||||
| '\u0a05' .. '\u0a0a'
|
||||
| '\u0a0f' .. '\u0a10'
|
||||
| '\u0a13' .. '\u0a28'
|
||||
| '\u0a2a' .. '\u0a30'
|
||||
| '\u0a32' .. '\u0a33'
|
||||
| '\u0a35' .. '\u0a36'
|
||||
| '\u0a38' .. '\u0a39'
|
||||
| '\u0a59' .. '\u0a5c'
|
||||
| '\u0a5e'
|
||||
| '\u0a72' .. '\u0a74'
|
||||
| '\u0a85' .. '\u0a8d'
|
||||
| '\u0a8f' .. '\u0a91'
|
||||
| '\u0a93' .. '\u0aa8'
|
||||
| '\u0aaa' .. '\u0ab0'
|
||||
| '\u0ab2' .. '\u0ab3'
|
||||
| '\u0ab5' .. '\u0ab9'
|
||||
| '\u0abd'
|
||||
| '\u0ad0'
|
||||
| '\u0ae0' .. '\u0ae1'
|
||||
| '\u0b05' .. '\u0b0c'
|
||||
| '\u0b0f' .. '\u0b10'
|
||||
| '\u0b13' .. '\u0b28'
|
||||
| '\u0b2a' .. '\u0b30'
|
||||
| '\u0b32' .. '\u0b33'
|
||||
| '\u0b35' .. '\u0b39'
|
||||
| '\u0b3d'
|
||||
| '\u0b5c' .. '\u0b5d'
|
||||
| '\u0b5f' .. '\u0b61'
|
||||
| '\u0b71'
|
||||
| '\u0b83'
|
||||
| '\u0b85' .. '\u0b8a'
|
||||
| '\u0b8e' .. '\u0b90'
|
||||
| '\u0b92' .. '\u0b95'
|
||||
| '\u0b99' .. '\u0b9a'
|
||||
| '\u0b9c'
|
||||
| '\u0b9e' .. '\u0b9f'
|
||||
| '\u0ba3' .. '\u0ba4'
|
||||
| '\u0ba8' .. '\u0baa'
|
||||
| '\u0bae' .. '\u0bb5'
|
||||
| '\u0bb7' .. '\u0bb9'
|
||||
| '\u0c05' .. '\u0c0c'
|
||||
| '\u0c0e' .. '\u0c10'
|
||||
| '\u0c12' .. '\u0c28'
|
||||
| '\u0c2a' .. '\u0c33'
|
||||
| '\u0c35' .. '\u0c39'
|
||||
| '\u0c60' .. '\u0c61'
|
||||
| '\u0c85' .. '\u0c8c'
|
||||
| '\u0c8e' .. '\u0c90'
|
||||
| '\u0c92' .. '\u0ca8'
|
||||
| '\u0caa' .. '\u0cb3'
|
||||
| '\u0cb5' .. '\u0cb9'
|
||||
| '\u0cbd'
|
||||
| '\u0cde'
|
||||
| '\u0ce0' .. '\u0ce1'
|
||||
| '\u0d05' .. '\u0d0c'
|
||||
| '\u0d0e' .. '\u0d10'
|
||||
| '\u0d12' .. '\u0d28'
|
||||
| '\u0d2a' .. '\u0d39'
|
||||
| '\u0d60' .. '\u0d61'
|
||||
| '\u0d85' .. '\u0d96'
|
||||
| '\u0d9a' .. '\u0db1'
|
||||
| '\u0db3' .. '\u0dbb'
|
||||
| '\u0dbd'
|
||||
| '\u0dc0' .. '\u0dc6'
|
||||
| '\u0e01' .. '\u0e30'
|
||||
| '\u0e32'
|
||||
| '\u0e40' .. '\u0e46'
|
||||
| '\u0e81' .. '\u0e82'
|
||||
| '\u0e84'
|
||||
| '\u0e87' .. '\u0e88'
|
||||
| '\u0e8a'
|
||||
| '\u0e8d'
|
||||
| '\u0e94' .. '\u0e97'
|
||||
| '\u0e99' .. '\u0e9f'
|
||||
| '\u0ea1' .. '\u0ea3'
|
||||
| '\u0ea5'
|
||||
| '\u0ea7'
|
||||
| '\u0eaa' .. '\u0eab'
|
||||
| '\u0ead' .. '\u0eb0'
|
||||
| '\u0eb2'
|
||||
| '\u0ebd'
|
||||
| '\u0ec0' .. '\u0ec4'
|
||||
| '\u0ec6'
|
||||
| '\u0edc' .. '\u0edd'
|
||||
| '\u0f00'
|
||||
| '\u0f40' .. '\u0f47'
|
||||
| '\u0f49' .. '\u0f6a'
|
||||
| '\u0f88' .. '\u0f8b'
|
||||
| '\u1000' .. '\u1021'
|
||||
| '\u1023' .. '\u1027'
|
||||
| '\u1029' .. '\u102a'
|
||||
| '\u1050' .. '\u1055'
|
||||
| '\u10a0' .. '\u10c5'
|
||||
| '\u10d0' .. '\u10f8'
|
||||
| '\u1100' .. '\u1159'
|
||||
| '\u115f' .. '\u11a2'
|
||||
| '\u11a8' .. '\u11f9'
|
||||
| '\u1200' .. '\u1206'
|
||||
| '\u1208' .. '\u1246'
|
||||
| '\u1248'
|
||||
| '\u124a' .. '\u124d'
|
||||
| '\u1250' .. '\u1256'
|
||||
| '\u1258'
|
||||
| '\u125a' .. '\u125d'
|
||||
| '\u1260' .. '\u1286'
|
||||
| '\u1288'
|
||||
| '\u128a' .. '\u128d'
|
||||
| '\u1290' .. '\u12ae'
|
||||
| '\u12b0'
|
||||
| '\u12b2' .. '\u12b5'
|
||||
| '\u12b8' .. '\u12be'
|
||||
| '\u12c0'
|
||||
| '\u12c2' .. '\u12c5'
|
||||
| '\u12c8' .. '\u12ce'
|
||||
| '\u12d0' .. '\u12d6'
|
||||
| '\u12d8' .. '\u12ee'
|
||||
| '\u12f0' .. '\u130e'
|
||||
| '\u1310'
|
||||
| '\u1312' .. '\u1315'
|
||||
| '\u1318' .. '\u131e'
|
||||
| '\u1320' .. '\u1346'
|
||||
| '\u1348' .. '\u135a'
|
||||
| '\u13a0' .. '\u13f4'
|
||||
| '\u1401' .. '\u166c'
|
||||
| '\u166f' .. '\u1676'
|
||||
| '\u1681' .. '\u169a'
|
||||
| '\u16a0' .. '\u16ea'
|
||||
| '\u16ee' .. '\u16f0'
|
||||
| '\u1700' .. '\u170c'
|
||||
| '\u170e' .. '\u1711'
|
||||
| '\u1720' .. '\u1731'
|
||||
| '\u1740' .. '\u1751'
|
||||
| '\u1760' .. '\u176c'
|
||||
| '\u176e' .. '\u1770'
|
||||
| '\u1780' .. '\u17b3'
|
||||
| '\u17d7'
|
||||
| '\u17dc'
|
||||
| '\u1820' .. '\u1877'
|
||||
| '\u1880' .. '\u18a8'
|
||||
| '\u1900' .. '\u191c'
|
||||
| '\u1950' .. '\u196d'
|
||||
| '\u1970' .. '\u1974'
|
||||
| '\u1d00' .. '\u1d6b'
|
||||
| '\u1e00' .. '\u1e9b'
|
||||
| '\u1ea0' .. '\u1ef9'
|
||||
| '\u1f00' .. '\u1f15'
|
||||
| '\u1f18' .. '\u1f1d'
|
||||
| '\u1f20' .. '\u1f45'
|
||||
| '\u1f48' .. '\u1f4d'
|
||||
| '\u1f50' .. '\u1f57'
|
||||
| '\u1f59'
|
||||
| '\u1f5b'
|
||||
| '\u1f5d'
|
||||
| '\u1f5f' .. '\u1f7d'
|
||||
| '\u1f80' .. '\u1fb4'
|
||||
| '\u1fb6' .. '\u1fbc'
|
||||
| '\u1fbe'
|
||||
| '\u1fc2' .. '\u1fc4'
|
||||
| '\u1fc6' .. '\u1fcc'
|
||||
| '\u1fd0' .. '\u1fd3'
|
||||
| '\u1fd6' .. '\u1fdb'
|
||||
| '\u1fe0' .. '\u1fec'
|
||||
| '\u1ff2' .. '\u1ff4'
|
||||
| '\u1ff6' .. '\u1ffc'
|
||||
| '\u2071'
|
||||
| '\u207f'
|
||||
| '\u2102'
|
||||
| '\u2107'
|
||||
| '\u210a' .. '\u2113'
|
||||
| '\u2115'
|
||||
| '\u2118' .. '\u211d'
|
||||
| '\u2124'
|
||||
| '\u2126'
|
||||
| '\u2128'
|
||||
| '\u212a' .. '\u2131'
|
||||
| '\u2133' .. '\u2139'
|
||||
| '\u213d' .. '\u213f'
|
||||
| '\u2145' .. '\u2149'
|
||||
| '\u2160' .. '\u2183'
|
||||
| '\u3005' .. '\u3007'
|
||||
| '\u3021' .. '\u3029'
|
||||
| '\u3031' .. '\u3035'
|
||||
| '\u3038' .. '\u303c'
|
||||
| '\u3041' .. '\u3096'
|
||||
| '\u309d' .. '\u309f'
|
||||
| '\u30a1' .. '\u30fa'
|
||||
| '\u30fc' .. '\u30ff'
|
||||
| '\u3105' .. '\u312c'
|
||||
| '\u3131' .. '\u318e'
|
||||
| '\u31a0' .. '\u31b7'
|
||||
| '\u31f0' .. '\u31ff'
|
||||
| '\u3400' .. '\u4db5'
|
||||
| '\u4e00' .. '\u9fa5'
|
||||
| '\ua000' .. '\ua48c'
|
||||
| '\uac00' .. '\ud7a3'
|
||||
| '\uf900' .. '\ufa2d'
|
||||
| '\ufa30' .. '\ufa6a'
|
||||
| '\ufb00' .. '\ufb06'
|
||||
| '\ufb13' .. '\ufb17'
|
||||
| '\ufb1d'
|
||||
| '\ufb1f' .. '\ufb28'
|
||||
| '\ufb2a' .. '\ufb36'
|
||||
| '\ufb38' .. '\ufb3c'
|
||||
| '\ufb3e'
|
||||
| '\ufb40' .. '\ufb41'
|
||||
| '\ufb43' .. '\ufb44'
|
||||
| '\ufb46' .. '\ufbb1'
|
||||
| '\ufbd3' .. '\ufc5d'
|
||||
| '\ufc64' .. '\ufd3d'
|
||||
| '\ufd50' .. '\ufd8f'
|
||||
| '\ufd92' .. '\ufdc7'
|
||||
| '\ufdf0' .. '\ufdf9'
|
||||
| '\ufe71'
|
||||
| '\ufe73'
|
||||
| '\ufe77'
|
||||
| '\ufe79'
|
||||
| '\ufe7b'
|
||||
| '\ufe7d'
|
||||
| '\ufe7f' .. '\ufefc'
|
||||
| '\uff21' .. '\uff3a'
|
||||
| '\uff41' .. '\uff5a'
|
||||
| '\uff66' .. '\uff9d'
|
||||
| '\uffa0' .. '\uffbe'
|
||||
| '\uffc2' .. '\uffc7'
|
||||
| '\uffca' .. '\uffcf'
|
||||
| '\uffd2' .. '\uffd7'
|
||||
| '\uffda' .. '\uffdc'
|
||||
| '\ud800' '\udc00' .. '\udc0a'
|
||||
| '\ud800' '\udc0d' .. '\udc25'
|
||||
| '\ud800' '\udc28' .. '\udc39'
|
||||
| '\ud800' '\udc3c' .. '\udc3c'
|
||||
| '\ud800' '\udc3f' .. '\udc4c'
|
||||
| '\ud800' '\udc50' .. '\udc5c'
|
||||
| '\ud800' '\udc80' .. '\udcf9'
|
||||
| '\ud800' '\udf00' .. '\udf1d'
|
||||
| '\ud800' '\udf30' .. '\udf49'
|
||||
| '\ud800' '\udf80' .. '\udf9c'
|
||||
| '\ud801' '\ue000' .. '\ue09c'
|
||||
| '\ud802' '\ue400' .. '\ue404'
|
||||
| '\ud802' '\u0808'
|
||||
| '\ud802' '\ue40a' .. '\ue434'
|
||||
| '\ud802' '\ue437' .. '\ue437'
|
||||
| '\ud802' '\u083c'
|
||||
| '\ud802' '\u083f'
|
||||
| '\ud835' '\ub000' .. '\ub053'
|
||||
| '\ud835' '\ub056' .. '\ub09b'
|
||||
| '\ud835' '\ub09e' .. '\ub09e'
|
||||
| '\ud835' '\ud4a2'
|
||||
| '\ud835' '\ub0a5' .. '\ub0a5'
|
||||
| '\ud835' '\ub0a9' .. '\ub0ab'
|
||||
| '\ud835' '\ub0ae' .. '\ub0b8'
|
||||
| '\ud835' '\ud4bb'
|
||||
| '\ud835' '\ub0bd' .. '\ub0c2'
|
||||
| '\ud835' '\ub0c5' .. '\ub104'
|
||||
| '\ud835' '\ub107' .. '\ub109'
|
||||
| '\ud835' '\ub10d' .. '\ub113'
|
||||
| '\ud835' '\ub116' .. '\ub11b'
|
||||
| '\ud835' '\ub11e' .. '\ub138'
|
||||
| '\ud835' '\ub13b' .. '\ub13d'
|
||||
| '\ud835' '\ub140' .. '\ub143'
|
||||
| '\ud835' '\ud546'
|
||||
| '\ud835' '\ub14a' .. '\ub14f'
|
||||
| '\ud835' '\ub152' .. '\ub2a2'
|
||||
| '\ud835' '\ub2a8' .. '\ub2bf'
|
||||
| '\ud835' '\ub2c2' .. '\ub2d9'
|
||||
| '\ud835' '\ub2dc' .. '\ub2f9'
|
||||
| '\ud835' '\ub2fc' .. '\ub313'
|
||||
| '\ud835' '\ub316' .. '\ub333'
|
||||
| '\ud835' '\ub336' .. '\ub34d'
|
||||
| '\ud835' '\ub350' .. '\ub36d'
|
||||
| '\ud835' '\ub370' .. '\ub387'
|
||||
| '\ud835' '\ub38a' .. '\ub3a7'
|
||||
| '\ud835' '\ub3aa' .. '\ub3c1'
|
||||
| '\ud835' '\ub3c4' .. '\ub3c8'
|
||||
| '\ud840' '\udc00' .. '\udffe'
|
||||
| '\ud841' '\ue000' .. '\ue3fe'
|
||||
| '\ud842' '\ue400' .. '\ue7fe'
|
||||
| '\ud843' '\ue800' .. '\uebfe'
|
||||
| '\ud844' '\uec00' .. '\ueffe'
|
||||
| '\ud845' '\uf000' .. '\uf3fe'
|
||||
| '\ud846' '\uf400' .. '\uf7fe'
|
||||
| '\ud847' '\uf800' .. '\ufbfe'
|
||||
| '\ud848' '\ufc00' .. '\ufffe'
|
||||
| '\ud849' '\u0000' .. '\u03fe'
|
||||
| '\ud84a' '\u0400' .. '\u07fe'
|
||||
| '\ud84b' '\u0800' .. '\u0bfe'
|
||||
| '\ud84c' '\u0c00' .. '\u0ffe'
|
||||
| '\ud84d' '\u1000' .. '\u13fe'
|
||||
| '\ud84e' '\u1400' .. '\u17fe'
|
||||
| '\ud84f' '\u1800' .. '\u1bfe'
|
||||
| '\ud850' '\u1c00' .. '\u1ffe'
|
||||
| '\ud851' '\u2000' .. '\u23fe'
|
||||
| '\ud852' '\u2400' .. '\u27fe'
|
||||
| '\ud853' '\u2800' .. '\u2bfe'
|
||||
| '\ud854' '\u2c00' .. '\u2ffe'
|
||||
| '\ud855' '\u3000' .. '\u33fe'
|
||||
| '\ud856' '\u3400' .. '\u37fe'
|
||||
| '\ud857' '\u3800' .. '\u3bfe'
|
||||
| '\ud858' '\u3c00' .. '\u3ffe'
|
||||
| '\ud859' '\u4000' .. '\u43fe'
|
||||
| '\ud85a' '\u4400' .. '\u47fe'
|
||||
| '\ud85b' '\u4800' .. '\u4bfe'
|
||||
| '\ud85c' '\u4c00' .. '\u4ffe'
|
||||
| '\ud85d' '\u5000' .. '\u53fe'
|
||||
| '\ud85e' '\u5400' .. '\u57fe'
|
||||
| '\ud85f' '\u5800' .. '\u5bfe'
|
||||
| '\ud860' '\u5c00' .. '\u5ffe'
|
||||
| '\ud861' '\u6000' .. '\u63fe'
|
||||
| '\ud862' '\u6400' .. '\u67fe'
|
||||
| '\ud863' '\u6800' .. '\u6bfe'
|
||||
| '\ud864' '\u6c00' .. '\u6ffe'
|
||||
| '\ud865' '\u7000' .. '\u73fe'
|
||||
| '\ud866' '\u7400' .. '\u77fe'
|
||||
| '\ud867' '\u7800' .. '\u7bfe'
|
||||
| '\ud868' '\u7c00' .. '\u7ffe'
|
||||
| '\ud869' '\u8000' .. '\u82d5'
|
||||
| '\ud87e' '\ud400' .. '\ud61c'
|
||||
;
|
||||
Loading…
Add table
Add a link
Reference in a new issue