Auto merge of #50052 - nnethercote:char_lit, r=Mark-Simulacrum

Avoid allocating when parsing \u{...} literals.

`char_lit` uses an allocation in order to ignore '_' chars in \u{...}
literals. This patch changes it to not do that by processing the chars
more directly.

This improves various rustc-perf benchmark measurements by up to 6%,
particularly regex, futures, clap, coercions, hyper, and encoding.

rustc-perf results, on a stage 2 build with jemalloc disabled:

<details>

```
regex-check
	avg: -5.4%	min: -6.5%	max: -2.7%
futures-check
	avg: -3.5%	min: -5.3%	max: -1.7%
regex-opt
	avg: -2.0%	min: -5.1%	max: -0.2%
regex
	avg: -2.3%	min: -5.0%	max: -0.6%
futures-opt
	avg: -3.0%	min: -4.8%	max: -1.1%
futures
	avg: -3.1%	min: -4.8%	max: -1.3%
clap-rs-check
	avg: -1.8%	min: -3.5%	max: -0.9%
coercions-check
	avg: -2.0%	min: -3.3%	max: -1.0%
hyper-check
	avg: -2.2%	min: -3.1%	max: -1.3%
hyper
	avg: -1.3%	min: -2.4%	max: -0.3%
hyper-opt
	avg: -0.9%	min: -2.3%	max: -0.1%
coercions
	avg: -1.1%	min: -2.2%	max: -0.4%
encoding-check
	avg: -1.7%	min: -2.2%	max: -0.9%
clap-rs-opt
	avg: -0.7%	min: -2.2%	max: 0.0%
coercions-opt
	avg: -1.2%	min: -2.1%	max: -0.3%
clap-rs
	avg: -0.8%	min: -1.9%	max: -0.4%
encoding-opt
	avg: -1.0%	min: -1.9%	max: -0.3%
encoding
	avg: -1.1%	min: -1.9%	max: -0.4%
piston-image-check
	avg: -0.7%	min: -1.3%	max: -0.3%
inflate-opt
	avg: -0.3%	min: -0.9%	max: -0.0%
piston-image
	avg: -0.3%	min: -0.8%	max: -0.1%
piston-image-opt
	avg: -0.3%	min: -0.7%	max: -0.1%
syn-check
	avg: -0.3%	min: -0.6%	max: -0.1%
deep-vector
	avg: 0.1%	min: -0.1%	max: 0.5%
syn-opt
	avg: -0.1%	min: -0.4%	max: 0.0%
html5ever
	avg: -0.2%	min: -0.4%	max: -0.0%
deep-vector-check
	avg: 0.0%	min: -0.3%	max: 0.3%
syn
	avg: -0.2%	min: -0.3%	max: -0.1%
html5ever-check
	avg: -0.3%	min: -0.3%	max: -0.2%
issue-46449-check
	avg: -0.1%	min: -0.2%	max: 0.2%
html5ever-opt
	avg: -0.0%	min: -0.2%	max: 0.1%
deep-vector-opt
	avg: -0.0%	min: -0.2%	max: 0.1%
issue-46449-opt
	avg: -0.0%	min: -0.2%	max: 0.1%
unify-linearly-check
	avg: -0.0%	min: -0.2%	max: 0.1%
helloworld-check
	avg: 0.0%	min: -0.0%	max: 0.2%
parser-check
	avg: -0.0%	min: -0.2%	max: 0.0%
inflate
	avg: 0.0%	min: -0.0%	max: 0.1%
tokio-webpush-simple-check
	avg: -0.1%	min: -0.1%	max: -0.0%
regression-31157-check
	avg: 0.0%	min: -0.1%	max: 0.1%
issue-46449
	avg: 0.0%	min: -0.1%	max: 0.1%
tuple-stress-opt
	avg: 0.0%	min: -0.0%	max: 0.1%
tuple-stress-check
	avg: -0.0%	min: -0.1%	max: 0.1%
tuple-stress
	avg: 0.0%	min: -0.0%	max: 0.1%
deeply-nested-check
	avg: 0.0%	min: -0.0%	max: 0.1%
regression-31157
	avg: -0.0%	min: -0.1%	max: 0.1%
deeply-nested-opt
	avg: -0.0%	min: -0.1%	max: 0.1%
parser-opt
	avg: -0.0%	min: -0.1%	max: 0.0%
parser
	avg: 0.1%	min: 0.0%	max: 0.1%
tokio-webpush-simple
	avg: -0.0%	min: -0.1%	max: 0.1%
regression-31157-opt
	avg: -0.0%	min: -0.1%	max: 0.1%
helloworld-opt
	avg: 0.0%	min: -0.0%	max: 0.1%
unify-linearly-opt
	avg: 0.0%	min: -0.0%	max: 0.1%
unused-warnings-check
	avg: 0.0%	min: 0.0%	max: 0.1%
tokio-webpush-simple-opt
	avg: -0.0%	min: -0.1%	max: 0.0%
helloworld
	avg: -0.0%	min: -0.0%	max: 0.1%
unused-warnings
	avg: 0.0%	min: -0.0%	max: 0.0%
deeply-nested
	avg: -0.0%	min: -0.0%	max: -0.0%
unused-warnings-opt
	avg: 0.0%	min: -0.0%	max: 0.0%
unify-linearly
	avg: 0.0%	min: -0.0%	max: 0.0%
inflate-check
	avg: 0.0%	min: -0.0%	max: 0.0%
```

</details>
This commit is contained in:
bors 2018-04-20 10:40:25 +00:00
commit 85f5dd489e

View file

@ -271,8 +271,16 @@ pub fn char_lit(lit: &str, diag: Option<(Span, &Handler)>) -> (char, isize) {
'u' => {
assert_eq!(lit.as_bytes()[2], b'{');
let idx = lit.find('}').unwrap();
let s = &lit[3..idx].chars().filter(|&c| c != '_').collect::<String>();
let v = u32::from_str_radix(&s, 16).unwrap();
// All digits and '_' are ascii, so treat each byte as a char.
let mut v: u32 = 0;
for c in lit[3..idx].bytes() {
let c = char::from(c);
if c != '_' {
let x = c.to_digit(16).unwrap();
v = v.checked_mul(16).unwrap().checked_add(x).unwrap();
}
}
let c = char::from_u32(v).unwrap_or_else(|| {
if let Some((span, diag)) = diag {
let mut diag = diag.struct_span_err(span, "invalid unicode character escape");