From 77a9effd3dde6eabcda086ccd36dd1bfb5fb88ab Mon Sep 17 00:00:00 2001 From: Arpan Kapoor Date: Fri, 24 Mar 2023 13:06:20 +0530 Subject: [PATCH 1/3] Optimize bitreverse codegen --- example/mini_core_hello_world.rs | 5 + src/intrinsic/mod.rs | 161 +++++++------------------------ 2 files changed, 41 insertions(+), 125 deletions(-) diff --git a/example/mini_core_hello_world.rs b/example/mini_core_hello_world.rs index 993a31e68eab..4dff341e67d0 100644 --- a/example/mini_core_hello_world.rs +++ b/example/mini_core_hello_world.rs @@ -168,6 +168,11 @@ fn main() { world as Box; assert_eq!(intrinsics::bitreverse(0b10101000u8), 0b00010101u8); + assert_eq!(intrinsics::bitreverse(0xddccu16), 0x33bbu16); + assert_eq!(intrinsics::bitreverse(0xffee_ddccu32), 0x33bb77ffu32); + assert_eq!(intrinsics::bitreverse(0x1234_5678_ffee_ddccu64), 0x33bb77ff1e6a2c48u64); + // != cannot be applied to type u128? + // assert_eq!(intrinsics::bitreverse(0x1234_5678_ffee_ddcc_1234_5678_ffee_ddccu128), 0x33bb77ff1e6a2c4833bb77ff1e6a2c48u128); assert_eq!(intrinsics::bswap(0xabu8), 0xabu8); assert_eq!(intrinsics::bswap(0xddccu16), 0xccddu16); diff --git a/src/intrinsic/mod.rs b/src/intrinsic/mod.rs index 2590e0e3af44..6f856e481bca 100644 --- a/src/intrinsic/mod.rs +++ b/src/intrinsic/mod.rs @@ -549,141 +549,52 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> { let context = &self.cx.context; let result = match width { - 8 => { + 8 | 16 | 32 | 64 => { + let mask = ((1u128 << width) - 1) as u64; + let (m0, m1, m2) = if width > 16 { + ( + context.new_rvalue_from_long(typ, (0x5555555555555555u64 & mask) as i64), + context.new_rvalue_from_long(typ, (0x3333333333333333u64 & mask) as i64), + context.new_rvalue_from_long(typ, (0x0f0f0f0f0f0f0f0fu64 & mask) as i64), + ) + } else { + ( + context.new_rvalue_from_int(typ, (0x5555u64 & mask) as i32), + context.new_rvalue_from_int(typ, (0x3333u64 & mask) as i32), + context.new_rvalue_from_int(typ, (0x0f0fu64 & mask) as i32), + ) + }; + let one = context.new_rvalue_from_int(typ, 1); + let two = context.new_rvalue_from_int(typ, 2); + let four = context.new_rvalue_from_int(typ, 4); + // First step. - let left = self.and(value, context.new_rvalue_from_int(typ, 0xF0)); - let left = self.lshr(left, context.new_rvalue_from_int(typ, 4)); - let right = self.and(value, context.new_rvalue_from_int(typ, 0x0F)); - let right = self.shl(right, context.new_rvalue_from_int(typ, 4)); + let left = self.lshr(value, one); + let left = self.and(left, m0); + let right = self.and(value, m0); + let right = self.shl(right, one); let step1 = self.or(left, right); // Second step. - let left = self.and(step1, context.new_rvalue_from_int(typ, 0xCC)); - let left = self.lshr(left, context.new_rvalue_from_int(typ, 2)); - let right = self.and(step1, context.new_rvalue_from_int(typ, 0x33)); - let right = self.shl(right, context.new_rvalue_from_int(typ, 2)); + let left = self.lshr(step1, two); + let left = self.and(left, m1); + let right = self.and(step1, m1); + let right = self.shl(right, two); let step2 = self.or(left, right); // Third step. - let left = self.and(step2, context.new_rvalue_from_int(typ, 0xAA)); - let left = self.lshr(left, context.new_rvalue_from_int(typ, 1)); - let right = self.and(step2, context.new_rvalue_from_int(typ, 0x55)); - let right = self.shl(right, context.new_rvalue_from_int(typ, 1)); - let step3 = self.or(left, right); - - step3 - }, - 16 => { - // First step. - let left = self.and(value, context.new_rvalue_from_int(typ, 0x5555)); - let left = self.shl(left, context.new_rvalue_from_int(typ, 1)); - let right = self.and(value, context.new_rvalue_from_int(typ, 0xAAAA)); - let right = self.lshr(right, context.new_rvalue_from_int(typ, 1)); - let step1 = self.or(left, right); - - // Second step. - let left = self.and(step1, context.new_rvalue_from_int(typ, 0x3333)); - let left = self.shl(left, context.new_rvalue_from_int(typ, 2)); - let right = self.and(step1, context.new_rvalue_from_int(typ, 0xCCCC)); - let right = self.lshr(right, context.new_rvalue_from_int(typ, 2)); - let step2 = self.or(left, right); - - // Third step. - let left = self.and(step2, context.new_rvalue_from_int(typ, 0x0F0F)); - let left = self.shl(left, context.new_rvalue_from_int(typ, 4)); - let right = self.and(step2, context.new_rvalue_from_int(typ, 0xF0F0)); - let right = self.lshr(right, context.new_rvalue_from_int(typ, 4)); + let left = self.lshr(step2, four); + let left = self.and(left, m2); + let right = self.and(step2, m2); + let right = self.shl(right, four); let step3 = self.or(left, right); // Fourth step. - let left = self.and(step3, context.new_rvalue_from_int(typ, 0x00FF)); - let left = self.shl(left, context.new_rvalue_from_int(typ, 8)); - let right = self.and(step3, context.new_rvalue_from_int(typ, 0xFF00)); - let right = self.lshr(right, context.new_rvalue_from_int(typ, 8)); - let step4 = self.or(left, right); - - step4 - }, - 32 => { - // TODO(antoyo): Refactor with other implementations. - // First step. - let left = self.and(value, context.new_rvalue_from_long(typ, 0x55555555)); - let left = self.shl(left, context.new_rvalue_from_long(typ, 1)); - let right = self.and(value, context.new_rvalue_from_long(typ, 0xAAAAAAAA)); - let right = self.lshr(right, context.new_rvalue_from_long(typ, 1)); - let step1 = self.or(left, right); - - // Second step. - let left = self.and(step1, context.new_rvalue_from_long(typ, 0x33333333)); - let left = self.shl(left, context.new_rvalue_from_long(typ, 2)); - let right = self.and(step1, context.new_rvalue_from_long(typ, 0xCCCCCCCC)); - let right = self.lshr(right, context.new_rvalue_from_long(typ, 2)); - let step2 = self.or(left, right); - - // Third step. - let left = self.and(step2, context.new_rvalue_from_long(typ, 0x0F0F0F0F)); - let left = self.shl(left, context.new_rvalue_from_long(typ, 4)); - let right = self.and(step2, context.new_rvalue_from_long(typ, 0xF0F0F0F0)); - let right = self.lshr(right, context.new_rvalue_from_long(typ, 4)); - let step3 = self.or(left, right); - - // Fourth step. - let left = self.and(step3, context.new_rvalue_from_long(typ, 0x00FF00FF)); - let left = self.shl(left, context.new_rvalue_from_long(typ, 8)); - let right = self.and(step3, context.new_rvalue_from_long(typ, 0xFF00FF00)); - let right = self.lshr(right, context.new_rvalue_from_long(typ, 8)); - let step4 = self.or(left, right); - - // Fifth step. - let left = self.and(step4, context.new_rvalue_from_long(typ, 0x0000FFFF)); - let left = self.shl(left, context.new_rvalue_from_long(typ, 16)); - let right = self.and(step4, context.new_rvalue_from_long(typ, 0xFFFF0000)); - let right = self.lshr(right, context.new_rvalue_from_long(typ, 16)); - let step5 = self.or(left, right); - - step5 - }, - 64 => { - // First step. - let left = self.shl(value, context.new_rvalue_from_long(typ, 32)); - let right = self.lshr(value, context.new_rvalue_from_long(typ, 32)); - let step1 = self.or(left, right); - - // Second step. - let left = self.and(step1, context.new_rvalue_from_long(typ, 0x0001FFFF0001FFFF)); - let left = self.shl(left, context.new_rvalue_from_long(typ, 15)); - let right = self.and(step1, context.new_rvalue_from_long(typ, 0xFFFE0000FFFE0000u64 as i64)); // TODO(antoyo): transmute the number instead? - let right = self.lshr(right, context.new_rvalue_from_long(typ, 17)); - let step2 = self.or(left, right); - - // Third step. - let left = self.lshr(step2, context.new_rvalue_from_long(typ, 10)); - let left = self.xor(step2, left); - let temp = self.and(left, context.new_rvalue_from_long(typ, 0x003F801F003F801F)); - - let left = self.shl(temp, context.new_rvalue_from_long(typ, 10)); - let left = self.or(temp, left); - let step3 = self.xor(left, step2); - - // Fourth step. - let left = self.lshr(step3, context.new_rvalue_from_long(typ, 4)); - let left = self.xor(step3, left); - let temp = self.and(left, context.new_rvalue_from_long(typ, 0x0E0384210E038421)); - - let left = self.shl(temp, context.new_rvalue_from_long(typ, 4)); - let left = self.or(temp, left); - let step4 = self.xor(left, step3); - - // Fifth step. - let left = self.lshr(step4, context.new_rvalue_from_long(typ, 2)); - let left = self.xor(step4, left); - let temp = self.and(left, context.new_rvalue_from_long(typ, 0x2248884222488842)); - - let left = self.shl(temp, context.new_rvalue_from_long(typ, 2)); - let left = self.or(temp, left); - let step5 = self.xor(left, step4); - - step5 + if width == 8 { + step3 + } else { + self.gcc_bswap(step3, width) + } }, 128 => { // TODO(antoyo): find a more efficient implementation? From 6e1a79c6a4630d1cd0bd143919ffac3c598f0e3d Mon Sep 17 00:00:00 2001 From: Arpan Kapoor Date: Sun, 26 Mar 2023 13:19:57 +0530 Subject: [PATCH 2/3] Add u128 PartialEq impl in mini_core.rs --- example/mini_core.rs | 11 +++++++++++ example/mini_core_hello_world.rs | 3 +-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/example/mini_core.rs b/example/mini_core.rs index 637b8dc53fef..018bbaf025da 100644 --- a/example/mini_core.rs +++ b/example/mini_core.rs @@ -59,6 +59,7 @@ unsafe impl Copy for u8 {} unsafe impl Copy for u16 {} unsafe impl Copy for u32 {} unsafe impl Copy for u64 {} +unsafe impl Copy for u128 {} unsafe impl Copy for usize {} unsafe impl Copy for i8 {} unsafe impl Copy for i16 {} @@ -79,6 +80,7 @@ unsafe impl Sync for u8 {} unsafe impl Sync for u16 {} unsafe impl Sync for u32 {} unsafe impl Sync for u64 {} +unsafe impl Sync for u128 {} unsafe impl Sync for usize {} unsafe impl Sync for i8 {} unsafe impl Sync for i16 {} @@ -294,6 +296,15 @@ impl PartialEq for u64 { } } +impl PartialEq for u128 { + fn eq(&self, other: &u128) -> bool { + (*self) == (*other) + } + fn ne(&self, other: &u128) -> bool { + (*self) != (*other) + } +} + impl PartialEq for usize { fn eq(&self, other: &usize) -> bool { (*self) == (*other) diff --git a/example/mini_core_hello_world.rs b/example/mini_core_hello_world.rs index 4dff341e67d0..ad244814208f 100644 --- a/example/mini_core_hello_world.rs +++ b/example/mini_core_hello_world.rs @@ -171,8 +171,7 @@ fn main() { assert_eq!(intrinsics::bitreverse(0xddccu16), 0x33bbu16); assert_eq!(intrinsics::bitreverse(0xffee_ddccu32), 0x33bb77ffu32); assert_eq!(intrinsics::bitreverse(0x1234_5678_ffee_ddccu64), 0x33bb77ff1e6a2c48u64); - // != cannot be applied to type u128? - // assert_eq!(intrinsics::bitreverse(0x1234_5678_ffee_ddcc_1234_5678_ffee_ddccu128), 0x33bb77ff1e6a2c4833bb77ff1e6a2c48u128); + assert_eq!(intrinsics::bitreverse(0x1234_5678_ffee_ddcc_1234_5678_ffee_ddccu128), 0x33bb77ff1e6a2c4833bb77ff1e6a2c48u128); assert_eq!(intrinsics::bswap(0xabu8), 0xabu8); assert_eq!(intrinsics::bswap(0xddccu16), 0xccddu16); From 68b8500235fe2641afa6a36f1786a9d5d5105065 Mon Sep 17 00:00:00 2001 From: Arpan Kapoor Date: Mon, 27 Mar 2023 20:33:05 +0530 Subject: [PATCH 3/3] move u128 test to std_example --- example/mini_core.rs | 11 ----------- example/mini_core_hello_world.rs | 1 - example/std_example.rs | 1 + 3 files changed, 1 insertion(+), 12 deletions(-) diff --git a/example/mini_core.rs b/example/mini_core.rs index 018bbaf025da..637b8dc53fef 100644 --- a/example/mini_core.rs +++ b/example/mini_core.rs @@ -59,7 +59,6 @@ unsafe impl Copy for u8 {} unsafe impl Copy for u16 {} unsafe impl Copy for u32 {} unsafe impl Copy for u64 {} -unsafe impl Copy for u128 {} unsafe impl Copy for usize {} unsafe impl Copy for i8 {} unsafe impl Copy for i16 {} @@ -80,7 +79,6 @@ unsafe impl Sync for u8 {} unsafe impl Sync for u16 {} unsafe impl Sync for u32 {} unsafe impl Sync for u64 {} -unsafe impl Sync for u128 {} unsafe impl Sync for usize {} unsafe impl Sync for i8 {} unsafe impl Sync for i16 {} @@ -296,15 +294,6 @@ impl PartialEq for u64 { } } -impl PartialEq for u128 { - fn eq(&self, other: &u128) -> bool { - (*self) == (*other) - } - fn ne(&self, other: &u128) -> bool { - (*self) != (*other) - } -} - impl PartialEq for usize { fn eq(&self, other: &usize) -> bool { (*self) == (*other) diff --git a/example/mini_core_hello_world.rs b/example/mini_core_hello_world.rs index ad244814208f..1c8ca6b9d7c8 100644 --- a/example/mini_core_hello_world.rs +++ b/example/mini_core_hello_world.rs @@ -171,7 +171,6 @@ fn main() { assert_eq!(intrinsics::bitreverse(0xddccu16), 0x33bbu16); assert_eq!(intrinsics::bitreverse(0xffee_ddccu32), 0x33bb77ffu32); assert_eq!(intrinsics::bitreverse(0x1234_5678_ffee_ddccu64), 0x33bb77ff1e6a2c48u64); - assert_eq!(intrinsics::bitreverse(0x1234_5678_ffee_ddcc_1234_5678_ffee_ddccu128), 0x33bb77ff1e6a2c4833bb77ff1e6a2c48u128); assert_eq!(intrinsics::bswap(0xabu8), 0xabu8); assert_eq!(intrinsics::bswap(0xddccu16), 0xccddu16); diff --git a/example/std_example.rs b/example/std_example.rs index 5c171c49fd19..18f2ddcde126 100644 --- a/example/std_example.rs +++ b/example/std_example.rs @@ -58,6 +58,7 @@ fn main() { assert_eq!(0b0000000000000000000000000010000010000000000000000000000000000000_0000000000100000000000000000000000001000000000000100000000000000u128.leading_zeros(), 26); assert_eq!(0b0000000000000000000000000010000000000000000000000000000000000000_0000000000000000000000000000000000001000000000000000000010000000u128.trailing_zeros(), 7); + assert_eq!(0x1234_5678_ffee_ddcc_1234_5678_ffee_ddccu128.reverse_bits(), 0x33bb77ff1e6a2c4833bb77ff1e6a2c48u128); let _d = 0i128.checked_div(2i128); let _d = 0u128.checked_div(2u128);