diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 2feb81324338..b9650cab507e 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -1099,6 +1099,60 @@ pub unsafe fn vcaleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t { vcageq_f64(b, a) } +/// Floating-point convert to higher precision long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtl))] +pub unsafe fn vcvt_f64_f32(a: float32x2_t) -> float64x2_t { + simd_cast(a) +} + +/// Floating-point convert to higher precision long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtl))] +pub unsafe fn vcvt_high_f64_f32(a: float32x4_t) -> float64x2_t { + let b: float32x2_t = simd_shuffle2(a, a, [2, 3]); + simd_cast(b) +} + +/// Floating-point convert to lower precision narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtn))] +pub unsafe fn vcvt_f32_f64(a: float64x2_t) -> float32x2_t { + simd_cast(a) +} + +/// Floating-point convert to lower precision narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtn))] +pub unsafe fn vcvt_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t { + simd_shuffle4(a, simd_cast(b), [0, 1, 2, 3]) +} + +/// Floating-point convert to lower precision narrow, rounding to odd +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtxn))] +pub unsafe fn vcvtx_f32_f64(a: float64x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtxn.v2f32.v2f64")] + fn vcvtx_f32_f64_(a: float64x2_t) -> float32x2_t; + } + vcvtx_f32_f64_(a) +} + +/// Floating-point convert to lower precision narrow, rounding to odd +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtxn))] +pub unsafe fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t { + simd_shuffle4(a, vcvtx_f32_f64(b), [0, 1, 2, 3]) +} + /// Multiply #[inline] #[target_feature(enable = "neon")] @@ -2366,6 +2420,56 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vcvt_f64_f32() { + let a: f32x2 = f32x2::new(-1.2, 1.2); + let e: f64x2 = f64x2::new(-1.2f32 as f64, 1.2f32 as f64); + let r: f64x2 = transmute(vcvt_f64_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvt_high_f64_f32() { + let a: f32x4 = f32x4::new(-1.2, 1.2, 2.3, 3.4); + let e: f64x2 = f64x2::new(2.3f32 as f64, 3.4f32 as f64); + let r: f64x2 = transmute(vcvt_high_f64_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvt_f32_f64() { + let a: f64x2 = f64x2::new(-1.2, 1.2); + let e: f32x2 = f32x2::new(-1.2f64 as f32, 1.2f64 as f32); + let r: f32x2 = transmute(vcvt_f32_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvt_high_f32_f64() { + let a: f32x2 = f32x2::new(-1.2, 1.2); + let b: f64x2 = f64x2::new(-2.3, 3.4); + let e: f32x4 = f32x4::new(-1.2, 1.2, -2.3f64 as f32, 3.4f64 as f32); + let r: f32x4 = transmute(vcvt_high_f32_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtx_f32_f64() { + let a: f64x2 = f64x2::new(-1.0, 2.0); + let e: f32x2 = f32x2::new(-1.0, 2.0); + let r: f32x2 = transmute(vcvtx_f32_f64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtx_high_f32_f64() { + let a: f32x2 = f32x2::new(-1.0, 2.0); + let b: f64x2 = f64x2::new(-3.0, 4.0); + let e: f32x4 = f32x4::new(-1.0, 2.0, -3.0, 4.0); + let r: f32x4 = transmute(vcvtx_high_f32_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmul_f64() { let a: f64 = 1.0; diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec index df6870c06c40..a50f3956ac9c 100644 --- a/library/stdarch/crates/stdarch-gen/neon.spec +++ b/library/stdarch/crates/stdarch-gen/neon.spec @@ -527,7 +527,7 @@ generate int*_t /// Unsigned count leading sign bits name = vclz -multi_fn = transmute, [self-signed-ext, transmute(a)] +multi_fn = transmute, {self-signed-ext, transmute(a)} a = MIN, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, MAX validate BITS, BITS, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, BITS_M1, 0 @@ -589,6 +589,69 @@ generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t arm = vacge.s generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t +/// Floating-point convert to higher precision long +name = vcvt +double-suffixes +fn = simd_cast +a = -1.2, 1.2 +validate -1.2f32 as f64, 1.2f32 as f64 + +aarch64 = fcvtl +generate float32x2_t:float64x2_t + +/// Floating-point convert to higher precision long +name = vcvt_high +double-suffixes +multi_fn = simd_shuffle2, b:float32x2_t, a, a, [2, 3] +multi_fn = simd_cast, b +a = -1.2, 1.2, 2.3, 3.4 +validate 2.3f32 as f64, 3.4f32 as f64 + +aarch64 = fcvtl +generate float32x4_t:float64x2_t + +/// Floating-point convert to lower precision narrow +name = vcvt +double-suffixes +fn = simd_cast +a = -1.2, 1.2 +validate -1.2f64 as f32, 1.2f64 as f32 + +aarch64 = fcvtn +generate float64x2_t:float32x2_t + +/// Floating-point convert to lower precision narrow +name = vcvt_high +double-suffixes +multi_fn = simd_shuffle4, a, {simd_cast, b}, [0, 1, 2, 3] +a = -1.2, 1.2 +b = -2.3, 3.4 +validate -1.2, 1.2, -2.3f64 as f32, 3.4f64 as f32 + +aarch64 = fcvtn +generate float32x2_t:float64x2_t:float32x4_t + +/// Floating-point convert to lower precision narrow, rounding to odd +name = vcvtx +double-suffixes +a = -1.0, 2.0 +validate -1.0, 2.0 + +aarch64 = fcvtxn +link-aarch64 = fcvtxn._EXT2_._EXT_ +generate float64x2_t:float32x2_t + +/// Floating-point convert to lower precision narrow, rounding to odd +name = vcvtx_high +double-suffixes +multi_fn = simd_shuffle4, a, {vcvtx-doubleself-noext, b}, [0, 1, 2, 3] +a = -1.0, 2.0 +b = -3.0, 4.0 +validate -1.0, 2.0, -3.0, 4.0 + +aarch64 = fcvtxn +generate float32x2_t:float64x2_t:float32x4_t + /// Saturating subtract name = vqsub a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 diff --git a/library/stdarch/crates/stdarch-gen/src/main.rs b/library/stdarch/crates/stdarch-gen/src/main.rs index e9c49d8a9528..90ec610d9bef 100644 --- a/library/stdarch/crates/stdarch-gen/src/main.rs +++ b/library/stdarch/crates/stdarch-gen/src/main.rs @@ -160,6 +160,20 @@ fn type_to_unsigned_suffix(t: &str) -> &str { } } +fn type_to_double_suffixes<'a>(out_t: &'a str, in_t: &'a str) -> &'a str { + match (out_t, in_t) { + ("float32x2_t", "float64x2_t") => "_f32_f64", + ("float64x2_t", "float32x2_t") => "_f64_f32", + ("float64x2_t", "float32x4_t") => "_f64_f32", + ("float32x4_t", "float64x2_t") => "_f32_f64", + ("int32x2_t", "float32x2_t") => "_s32_f32", + ("int32x4_t", "float32x4_t") => "q_s32_f32", + ("int64x1_t", "float64x1_t") => "_s64_f64", + ("int64x2_t", "float64x2_t") => "q_s64_f64", + (_, _) => panic!("unknown type: {}, {}", out_t, in_t), + } +} + fn type_to_global_type(t: &str) -> &str { match t { "int8x8_t" => "i8x8", @@ -388,15 +402,21 @@ fn gen_aarch64( current_aarch64: &Option, link_aarch64: &Option, in_t: &str, + in_t2: &str, out_t: &str, current_tests: &[(Vec, Vec, Vec)], + double_suffixes: bool, para_num: i32, fixed: &Vec, multi_fn: &Vec, ) -> (String, String) { let _global_t = type_to_global_type(in_t); let _global_ret_t = type_to_global_type(out_t); - let name = format!("{}{}", current_name, type_to_suffix(in_t)); + let name = if double_suffixes { + format!("{}{}", current_name, type_to_double_suffixes(out_t, in_t2)) + } else { + format!("{}{}", current_name, type_to_suffix(in_t2)) + }; let current_fn = if let Some(current_fn) = current_fn.clone() { if link_aarch64.is_some() { panic!( @@ -440,7 +460,7 @@ fn gen_aarch64( format!("a: {}", in_t) } 2 => { - format!("a: {}, b: {}", in_t, in_t) + format!("a: {}, b: {}", in_t, in_t2) } _ => unimplemented!("unknown para_num"), }, @@ -455,7 +475,14 @@ fn gen_aarch64( if i > 0 { calls.push_str("\n "); } - calls.push_str(&get_call(&multi_fn[i], current_name, in_t, out_t, fixed)); + calls.push_str(&get_call( + &multi_fn[i], + current_name, + in_t, + in_t2, + out_t, + fixed, + )); } calls } else { @@ -466,7 +493,7 @@ fn gen_aarch64( r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ {}{}(a, b) }}"#, - name, in_t, in_t, out_t, ext_c, current_fn, + name, in_t, in_t2, out_t, ext_c, current_fn, ), (0, 1, 0) => format!( r#"pub unsafe fn {}(a: {}) -> {} {{ @@ -499,7 +526,7 @@ fn gen_aarch64( r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ {}{} }}"#, - name, in_t, in_t, out_t, ext_c, multi_calls, + name, in_t, in_t2, out_t, ext_c, multi_calls, ), (_, _, _) => String::new(), }; @@ -517,9 +544,12 @@ fn gen_aarch64( let test = gen_test( &name, &in_t, + &in_t2, &out_t, current_tests, type_len(in_t), + type_len(in_t2), + type_len(out_t), para_num, ); (function, test) @@ -528,9 +558,12 @@ fn gen_aarch64( fn gen_test( name: &str, in_t: &str, + in_t2: &str, out_t: &str, current_tests: &[(Vec, Vec, Vec)], - len: usize, + len_in: usize, + len_in2: usize, + len_out: usize, para_num: i32, ) -> String { let mut test = format!( @@ -540,9 +573,9 @@ fn gen_test( name, ); for (a, b, e) in current_tests { - let a: Vec = a.iter().take(len).cloned().collect(); - let b: Vec = b.iter().take(len).cloned().collect(); - let e: Vec = e.iter().take(len).cloned().collect(); + let a: Vec = a.iter().take(len_in).cloned().collect(); + let b: Vec = b.iter().take(len_in2).cloned().collect(); + let e: Vec = e.iter().take(len_out).cloned().collect(); let t = { match para_num { 1 => { @@ -569,7 +602,7 @@ fn gen_test( assert_eq!(r, e); "#, values(in_t, &a), - values(in_t, &b), + values(in_t2, &b), values(out_t, &e), type_to_global_type(out_t), name @@ -597,15 +630,21 @@ fn gen_arm( current_aarch64: &Option, link_aarch64: &Option, in_t: &str, + in_t2: &str, out_t: &str, current_tests: &[(Vec, Vec, Vec)], + double_suffixes: bool, para_num: i32, fixed: &Vec, multi_fn: &Vec, ) -> (String, String) { let _global_t = type_to_global_type(in_t); let _global_ret_t = type_to_global_type(out_t); - let name = format!("{}{}", current_name, type_to_suffix(in_t)); + let name = if double_suffixes { + format!("{}{}", current_name, type_to_double_suffixes(out_t, in_t2)) + } else { + format!("{}{}", current_name, type_to_suffix(in_t2)) + }; let current_aarch64 = current_aarch64 .clone() .unwrap_or_else(|| current_arm.to_string()); @@ -655,7 +694,7 @@ fn gen_arm( format!("a: {}", in_t) } 2 => { - format!("a: {}, b: {}", in_t, in_t) + format!("a: {}, b: {}", in_t, in_t2) } _ => unimplemented!("unknown para_num"), }, @@ -670,7 +709,14 @@ fn gen_arm( if i > 0 { calls.push_str("\n "); } - calls.push_str(&get_call(&multi_fn[i], current_name, in_t, out_t, fixed)); + calls.push_str(&get_call( + &multi_fn[i], + current_name, + in_t, + in_t2, + out_t, + fixed, + )); } calls } else { @@ -681,7 +727,7 @@ fn gen_arm( r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ {}{}(a, b) }}"#, - name, in_t, in_t, out_t, ext_c, current_fn, + name, in_t, in_t2, out_t, ext_c, current_fn, ), (0, 1, 0) => format!( r#"pub unsafe fn {}(a: {}) -> {} {{ @@ -714,7 +760,7 @@ fn gen_arm( r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{ {}{} }}"#, - name, in_t, in_t, out_t, ext_c, multi_calls, + name, in_t, in_t2, out_t, ext_c, multi_calls, ), (_, _, _) => String::new(), }; @@ -736,9 +782,12 @@ fn gen_arm( let test = gen_test( &name, &in_t, + &in_t2, &out_t, current_tests, type_len(in_t), + type_len(in_t2), + type_len(out_t), para_num, ); @@ -819,6 +868,7 @@ fn get_call( in_str: &str, current_name: &str, in_t: &str, + in_t2: &str, out_t: &str, fixed: &Vec, ) -> String { @@ -830,20 +880,20 @@ fn get_call( let mut i = 1; while i < params.len() { let s = ¶ms[i]; - if s.starts_with('[') { + if s.starts_with('{') { let mut sub_fn = String::new(); - let mut brackets = 1; + let mut paranthes = 0; while i < params.len() { if !sub_fn.is_empty() { sub_fn.push_str(", "); } sub_fn.push_str(¶ms[i]); - if params[i].starts_with('[') { - brackets += 1; + if params[i].starts_with('{') { + paranthes += 1; } - if params[i].ends_with("]") { - brackets -= 1; - if brackets == 0 { + if params[i].ends_with('}') { + paranthes -= 1; + if paranthes == 0 { break; } } @@ -853,6 +903,7 @@ fn get_call( &sub_fn[1..sub_fn.len() - 1], current_name, in_t, + in_t2, out_t, fixed, ); @@ -868,6 +919,8 @@ fn get_call( re = Some((re_params[0].clone(), in_t.to_string())); } else if re_params[1] == "out_t" { re = Some((re_params[0].clone(), out_t.to_string())); + } else { + re = Some((re_params[0].clone(), re_params[1].clone())); } } else { if !param_str.is_empty() { @@ -891,11 +944,13 @@ fn get_call( fn_format[0].clone() }; if fn_format[1] == "self" { - fn_name.push_str(type_to_suffix(in_t)); + fn_name.push_str(type_to_suffix(in_t2)); } else if fn_format[1] == "signed" { - fn_name.push_str(type_to_signed_suffix(in_t)); + fn_name.push_str(type_to_signed_suffix(in_t2)); } else if fn_format[1] == "unsigned" { - fn_name.push_str(type_to_unsigned_suffix(in_t)); + fn_name.push_str(type_to_unsigned_suffix(in_t2)); + } else if fn_format[1] == "doubleself" { + fn_name.push_str(type_to_double_suffixes(out_t, in_t2)); } else { fn_name.push_str(&fn_format[1]); }; @@ -932,6 +987,7 @@ fn main() -> io::Result<()> { let mut link_arm: Option = None; let mut link_aarch64: Option = None; let mut para_num = 2; + let mut double_suffixes = false; let mut a: Vec = Vec::new(); let mut b: Vec = Vec::new(); let mut fixed: Vec = Vec::new(); @@ -1007,6 +1063,7 @@ mod test { link_arm = None; current_tests = Vec::new(); para_num = 2; + double_suffixes = false; a = Vec::new(); b = Vec::new(); fixed = Vec::new(); @@ -1022,6 +1079,8 @@ mod test { current_arm = Some(String::from(&line[6..])); } else if line.starts_with("aarch64 = ") { current_aarch64 = Some(String::from(&line[10..])); + } else if line.starts_with("double-suffixes") { + double_suffixes = true; } else if line.starts_with("a = ") { a = line[4..].split(',').map(|v| v.trim().to_string()).collect(); } else if line.starts_with("b = ") { @@ -1054,13 +1113,20 @@ mod test { for line in types { let spec: Vec<&str> = line.split(':').map(|e| e.trim()).collect(); let in_t; + let in_t2; let out_t; if spec.len() == 1 { in_t = spec[0]; + in_t2 = spec[0]; out_t = spec[0]; } else if spec.len() == 2 { in_t = spec[0]; + in_t2 = spec[0]; out_t = spec[1]; + } else if spec.len() == 3 { + in_t = spec[0]; + in_t2 = spec[1]; + out_t = spec[2]; } else { panic!("Bad spec: {}", line) } @@ -1078,8 +1144,10 @@ mod test { ¤t_aarch64, &link_aarch64, &in_t, + &in_t2, &out_t, ¤t_tests, + double_suffixes, para_num, &fixed, &multi_fn, @@ -1094,8 +1162,10 @@ mod test { ¤t_aarch64, &link_aarch64, &in_t, + &in_t2, &out_t, ¤t_tests, + double_suffixes, para_num, &fixed, &multi_fn,