Merge pull request #556 from CodeLinaro/bcain/hexagon_comp_rt

This commit is contained in:
Amanieu d'Antras 2023-12-14 20:21:11 +00:00 committed by GitHub
commit bc0781c7a0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
25 changed files with 3100 additions and 0 deletions

View file

@ -0,0 +1,55 @@
#![cfg(not(feature = "no-asm"))]
use core::arch::global_asm;
global_asm!(include_str!("hexagon/func_macro.s"), options(raw));
global_asm!(include_str!("hexagon/dfaddsub.s"), options(raw));
global_asm!(include_str!("hexagon/dfdiv.s"), options(raw));
global_asm!(include_str!("hexagon/dffma.s"), options(raw));
global_asm!(include_str!("hexagon/dfminmax.s"), options(raw));
global_asm!(include_str!("hexagon/dfmul.s"), options(raw));
global_asm!(include_str!("hexagon/dfsqrt.s"), options(raw));
global_asm!(include_str!("hexagon/divdi3.s"), options(raw));
global_asm!(include_str!("hexagon/divsi3.s"), options(raw));
global_asm!(include_str!("hexagon/fastmath2_dlib_asm.s"), options(raw));
global_asm!(include_str!("hexagon/fastmath2_ldlib_asm.s"), options(raw));
global_asm!(
include_str!("hexagon/memcpy_forward_vp4cp4n2.s"),
options(raw)
);
global_asm!(
include_str!("hexagon/memcpy_likely_aligned.s"),
options(raw)
);
global_asm!(include_str!("hexagon/moddi3.s"), options(raw));
global_asm!(include_str!("hexagon/modsi3.s"), options(raw));
global_asm!(include_str!("hexagon/sfdiv_opt.s"), options(raw));
global_asm!(include_str!("hexagon/sfsqrt_opt.s"), options(raw));
global_asm!(include_str!("hexagon/udivdi3.s"), options(raw));
global_asm!(include_str!("hexagon/udivmoddi4.s"), options(raw));
global_asm!(include_str!("hexagon/udivmodsi4.s"), options(raw));
global_asm!(include_str!("hexagon/udivsi3.s"), options(raw));
global_asm!(include_str!("hexagon/umoddi3.s"), options(raw));
global_asm!(include_str!("hexagon/umodsi3.s"), options(raw));

View file

@ -0,0 +1,321 @@
.text
.global __hexagon_adddf3
.global __hexagon_subdf3
.type __hexagon_adddf3, @function
.type __hexagon_subdf3, @function
.global __qdsp_adddf3 ; .set __qdsp_adddf3, __hexagon_adddf3
.global __hexagon_fast_adddf3 ; .set __hexagon_fast_adddf3, __hexagon_adddf3
.global __hexagon_fast2_adddf3 ; .set __hexagon_fast2_adddf3, __hexagon_adddf3
.global __qdsp_subdf3 ; .set __qdsp_subdf3, __hexagon_subdf3
.global __hexagon_fast_subdf3 ; .set __hexagon_fast_subdf3, __hexagon_subdf3
.global __hexagon_fast2_subdf3 ; .set __hexagon_fast2_subdf3, __hexagon_subdf3
.p2align 5
__hexagon_adddf3:
{
r4 = extractu(r1,#11,#20)
r5 = extractu(r3,#11,#20)
r13:12 = combine(##0x20000000,#0)
}
{
p3 = dfclass(r1:0,#2)
p3 = dfclass(r3:2,#2)
r9:8 = r13:12
p2 = cmp.gtu(r5,r4)
}
{
if (!p3) jump .Ladd_abnormal
if (p2) r1:0 = r3:2
if (p2) r3:2 = r1:0
if (p2) r5:4 = combine(r4,r5)
}
{
r13:12 = insert(r1:0,#52,#11 -2)
r9:8 = insert(r3:2,#52,#11 -2)
r15 = sub(r4,r5)
r7:6 = combine(#62,#1)
}
.Ladd_continue:
{
r15 = min(r15,r7)
r11:10 = neg(r13:12)
p2 = cmp.gt(r1,#-1)
r14 = #0
}
{
if (!p2) r13:12 = r11:10
r11:10 = extractu(r9:8,r15:14)
r9:8 = ASR(r9:8,r15)
r15:14 = #0
}
{
p1 = cmp.eq(r11:10,r15:14)
if (!p1.new) r8 = or(r8,r6)
r5 = add(r4,#-1024 -60)
p3 = cmp.gt(r3,#-1)
}
{
r13:12 = add(r13:12,r9:8)
r11:10 = sub(r13:12,r9:8)
r7:6 = combine(#54,##2045)
}
{
p0 = cmp.gtu(r4,r7)
p0 = !cmp.gtu(r4,r6)
if (!p0.new) jump:nt .Ladd_ovf_unf
if (!p3) r13:12 = r11:10
}
{
r1:0 = convert_d2df(r13:12)
p0 = cmp.eq(r13,#0)
p0 = cmp.eq(r12,#0)
if (p0.new) jump:nt .Ladd_zero
}
{
r1 += asl(r5,#20)
jumpr r31
}
.falign
__hexagon_subdf3:
{
r3 = togglebit(r3,#31)
jump __qdsp_adddf3
}
.falign
.Ladd_zero:
{
r28 = USR
r1:0 = #0
r3 = #1
}
{
r28 = extractu(r28,#2,#22)
r3 = asl(r3,#31)
}
{
p0 = cmp.eq(r28,#2)
if (p0.new) r1 = xor(r1,r3)
jumpr r31
}
.falign
.Ladd_ovf_unf:
{
r1:0 = convert_d2df(r13:12)
p0 = cmp.eq(r13,#0)
p0 = cmp.eq(r12,#0)
if (p0.new) jump:nt .Ladd_zero
}
{
r28 = extractu(r1,#11,#20)
r1 += asl(r5,#20)
}
{
r5 = add(r5,r28)
r3:2 = combine(##0x00100000,#0)
}
{
p0 = cmp.gt(r5,##1024 +1024 -2)
if (p0.new) jump:nt .Ladd_ovf
}
{
p0 = cmp.gt(r5,#0)
if (p0.new) jumpr:t r31
r28 = sub(#1,r5)
}
{
r3:2 = insert(r1:0,#52,#0)
r1:0 = r13:12
}
{
r3:2 = lsr(r3:2,r28)
}
{
r1:0 = insert(r3:2,#63,#0)
jumpr r31
}
.falign
.Ladd_ovf:
{
r1:0 = r13:12
r28 = USR
r13:12 = combine(##0x7fefffff,#-1)
}
{
r5 = extractu(r28,#2,#22)
r28 = or(r28,#0x28)
r9:8 = combine(##0x7ff00000,#0)
}
{
USR = r28
r5 ^= lsr(r1,#31)
r28 = r5
}
{
p0 = !cmp.eq(r28,#1)
p0 = !cmp.eq(r5,#2)
if (p0.new) r13:12 = r9:8
}
{
r1:0 = insert(r13:12,#63,#0)
}
{
p0 = dfcmp.eq(r1:0,r1:0)
jumpr r31
}
.Ladd_abnormal:
{
r13:12 = extractu(r1:0,#63,#0)
r9:8 = extractu(r3:2,#63,#0)
}
{
p3 = cmp.gtu(r13:12,r9:8)
if (!p3.new) r1:0 = r3:2
if (!p3.new) r3:2 = r1:0
}
{
p0 = dfclass(r1:0,#0x0f)
if (!p0.new) jump:nt .Linvalid_nan_add
if (!p3) r13:12 = r9:8
if (!p3) r9:8 = r13:12
}
{
p1 = dfclass(r1:0,#0x08)
if (p1.new) jump:nt .Linf_add
}
{
p2 = dfclass(r3:2,#0x01)
if (p2.new) jump:nt .LB_zero
r13:12 = #0
}
{
p0 = dfclass(r1:0,#4)
if (p0.new) jump:nt .Ladd_two_subnormal
r13:12 = combine(##0x20000000,#0)
}
{
r4 = extractu(r1,#11,#20)
r5 = #1
r9:8 = asl(r9:8,#11 -2)
}
{
r13:12 = insert(r1:0,#52,#11 -2)
r15 = sub(r4,r5)
r7:6 = combine(#62,#1)
jump .Ladd_continue
}
.Ladd_two_subnormal:
{
r13:12 = extractu(r1:0,#63,#0)
r9:8 = extractu(r3:2,#63,#0)
}
{
r13:12 = neg(r13:12)
r9:8 = neg(r9:8)
p0 = cmp.gt(r1,#-1)
p1 = cmp.gt(r3,#-1)
}
{
if (p0) r13:12 = r1:0
if (p1) r9:8 = r3:2
}
{
r13:12 = add(r13:12,r9:8)
}
{
r9:8 = neg(r13:12)
p0 = cmp.gt(r13,#-1)
r3:2 = #0
}
{
if (!p0) r1:0 = r9:8
if (p0) r1:0 = r13:12
r3 = ##0x80000000
}
{
if (!p0) r1 = or(r1,r3)
p0 = dfcmp.eq(r1:0,r3:2)
if (p0.new) jump:nt .Lzero_plus_zero
}
{
jumpr r31
}
.Linvalid_nan_add:
{
r28 = convert_df2sf(r1:0)
p0 = dfclass(r3:2,#0x0f)
if (p0.new) r3:2 = r1:0
}
{
r2 = convert_df2sf(r3:2)
r1:0 = #-1
jumpr r31
}
.falign
.LB_zero:
{
p0 = dfcmp.eq(r13:12,r1:0)
if (!p0.new) jumpr:t r31
}
.Lzero_plus_zero:
{
p0 = cmp.eq(r1:0,r3:2)
if (p0.new) jumpr:t r31
}
{
r28 = USR
}
{
r28 = extractu(r28,#2,#22)
r1:0 = #0
}
{
p0 = cmp.eq(r28,#2)
if (p0.new) r1 = ##0x80000000
jumpr r31
}
.Linf_add:
{
p0 = !cmp.eq(r1,r3)
p0 = dfclass(r3:2,#8)
if (!p0.new) jumpr:t r31
}
{
r2 = ##0x7f800001
}
{
r1:0 = convert_sf2df(r2)
jumpr r31
}
.size __hexagon_adddf3,.-__hexagon_adddf3

View file

@ -0,0 +1,372 @@
.text
.global __hexagon_divdf3
.type __hexagon_divdf3,@function
.global __qdsp_divdf3 ; .set __qdsp_divdf3, __hexagon_divdf3
.global __hexagon_fast_divdf3 ; .set __hexagon_fast_divdf3, __hexagon_divdf3
.global __hexagon_fast2_divdf3 ; .set __hexagon_fast2_divdf3, __hexagon_divdf3
.p2align 5
__hexagon_divdf3:
{
p2 = dfclass(r1:0,#0x02)
p2 = dfclass(r3:2,#0x02)
r13:12 = combine(r3,r1)
r28 = xor(r1,r3)
}
{
if (!p2) jump .Ldiv_abnormal
r7:6 = extractu(r3:2,#23,#52 -23)
r8 = ##0x3f800001
}
{
r9 = or(r8,r6)
r13 = extractu(r13,#11,#52 -32)
r12 = extractu(r12,#11,#52 -32)
p3 = cmp.gt(r28,#-1)
}
.Ldenorm_continue:
{
r11,p0 = sfrecipa(r8,r9)
r10 = and(r8,#-2)
r28 = #1
r12 = sub(r12,r13)
}
{
r10 -= sfmpy(r11,r9):lib
r1 = insert(r28,#11 +1,#52 -32)
r13 = ##0x00800000 << 3
}
{
r11 += sfmpy(r11,r10):lib
r3 = insert(r28,#11 +1,#52 -32)
r10 = and(r8,#-2)
}
{
r10 -= sfmpy(r11,r9):lib
r5 = #-0x3ff +1
r4 = #0x3ff -1
}
{
r11 += sfmpy(r11,r10):lib
p1 = cmp.gt(r12,r5)
p1 = !cmp.gt(r12,r4)
}
{
r13 = insert(r11,#23,#3)
r5:4 = #0
r12 = add(r12,#-61)
}
{
r13 = add(r13,#((-3) << 3))
}
{ r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASL(r7:6, # ( 14 )); r1:0 -= asl(r15:14, # 32); }
{ r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 1 )); r1:0 -= asl(r15:14, # 32); }
{ r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 16 )); r1:0 -= asl(r15:14, # 32); }
{ r7:6 = mpyu(r13,r1); r1:0 = asl(r1:0,# ( 15 )); }; { r6 = # 0; r1:0 -= mpyu(r7,r2); r15:14 = mpyu(r7,r3); }; { r5:4 += ASR(r7:6, # ( 31 )); r1:0 -= asl(r15:14, # 32); r7:6=# ( 0 ); }
{
r15:14 = sub(r1:0,r3:2)
p0 = cmp.gtu(r3:2,r1:0)
if (!p0.new) r6 = #2
}
{
r5:4 = add(r5:4,r7:6)
if (!p0) r1:0 = r15:14
r15:14 = #0
}
{
p0 = cmp.eq(r1:0,r15:14)
if (!p0.new) r4 = or(r4,r28)
}
{
r7:6 = neg(r5:4)
}
{
if (!p3) r5:4 = r7:6
}
{
r1:0 = convert_d2df(r5:4)
if (!p1) jump .Ldiv_ovf_unf
}
{
r1 += asl(r12,#52 -32)
jumpr r31
}
.Ldiv_ovf_unf:
{
r1 += asl(r12,#52 -32)
r13 = extractu(r1,#11,#52 -32)
}
{
r7:6 = abs(r5:4)
r12 = add(r12,r13)
}
{
p0 = cmp.gt(r12,##0x3ff +0x3ff)
if (p0.new) jump:nt .Ldiv_ovf
}
{
p0 = cmp.gt(r12,#0)
if (p0.new) jump:nt .Lpossible_unf2
}
{
r13 = add(clb(r7:6),#-1)
r12 = sub(#7,r12)
r10 = USR
r11 = #63
}
{
r13 = min(r12,r11)
r11 = or(r10,#0x030)
r7:6 = asl(r7:6,r13)
r12 = #0
}
{
r15:14 = extractu(r7:6,r13:12)
r7:6 = lsr(r7:6,r13)
r3:2 = #1
}
{
p0 = cmp.gtu(r3:2,r15:14)
if (!p0.new) r6 = or(r2,r6)
r7 = setbit(r7,#52 -32+4)
}
{
r5:4 = neg(r7:6)
p0 = bitsclr(r6,#(1<<4)-1)
if (!p0.new) r10 = r11
}
{
USR = r10
if (p3) r5:4 = r7:6
r10 = #-0x3ff -(52 +4)
}
{
r1:0 = convert_d2df(r5:4)
}
{
r1 += asl(r10,#52 -32)
jumpr r31
}
.Lpossible_unf2:
{
r3:2 = extractu(r1:0,#63,#0)
r15:14 = combine(##0x00100000,#0)
r10 = #0x7FFF
}
{
p0 = dfcmp.eq(r15:14,r3:2)
p0 = bitsset(r7,r10)
}
{
if (!p0) jumpr r31
r10 = USR
}
{
r10 = or(r10,#0x30)
}
{
USR = r10
}
{
p0 = dfcmp.eq(r1:0,r1:0)
jumpr r31
}
.Ldiv_ovf:
{
r10 = USR
r3:2 = combine(##0x7fefffff,#-1)
r1 = mux(p3,#0,#-1)
}
{
r7:6 = combine(##0x7ff00000,#0)
r5 = extractu(r10,#2,#22)
r10 = or(r10,#0x28)
}
{
USR = r10
r5 ^= lsr(r1,#31)
r4 = r5
}
{
p0 = !cmp.eq(r4,#1)
p0 = !cmp.eq(r5,#2)
if (p0.new) r3:2 = r7:6
p0 = dfcmp.eq(r3:2,r3:2)
}
{
r1:0 = insert(r3:2,#63,#0)
jumpr r31
}
.Ldiv_abnormal:
{
p0 = dfclass(r1:0,#0x0F)
p0 = dfclass(r3:2,#0x0F)
p3 = cmp.gt(r28,#-1)
}
{
p1 = dfclass(r1:0,#0x08)
p1 = dfclass(r3:2,#0x08)
}
{
p2 = dfclass(r1:0,#0x01)
p2 = dfclass(r3:2,#0x01)
}
{
if (!p0) jump .Ldiv_nan
if (p1) jump .Ldiv_invalid
}
{
if (p2) jump .Ldiv_invalid
}
{
p2 = dfclass(r1:0,#(0x0F ^ 0x01))
p2 = dfclass(r3:2,#(0x0F ^ 0x08))
}
{
p1 = dfclass(r1:0,#(0x0F ^ 0x08))
p1 = dfclass(r3:2,#(0x0F ^ 0x01))
}
{
if (!p2) jump .Ldiv_zero_result
if (!p1) jump .Ldiv_inf_result
}
{
p0 = dfclass(r1:0,#0x02)
p1 = dfclass(r3:2,#0x02)
r10 = ##0x00100000
}
{
r13:12 = combine(r3,r1)
r1 = insert(r10,#11 +1,#52 -32)
r3 = insert(r10,#11 +1,#52 -32)
}
{
if (p0) r1 = or(r1,r10)
if (p1) r3 = or(r3,r10)
}
{
r5 = add(clb(r1:0),#-11)
r4 = add(clb(r3:2),#-11)
r10 = #1
}
{
r12 = extractu(r12,#11,#52 -32)
r13 = extractu(r13,#11,#52 -32)
}
{
r1:0 = asl(r1:0,r5)
r3:2 = asl(r3:2,r4)
if (!p0) r12 = sub(r10,r5)
if (!p1) r13 = sub(r10,r4)
}
{
r7:6 = extractu(r3:2,#23,#52 -23)
}
{
r9 = or(r8,r6)
jump .Ldenorm_continue
}
.Ldiv_zero_result:
{
r1 = xor(r1,r3)
r3:2 = #0
}
{
r1:0 = insert(r3:2,#63,#0)
jumpr r31
}
.Ldiv_inf_result:
{
p2 = dfclass(r3:2,#0x01)
p2 = dfclass(r1:0,#(0x0F ^ 0x08))
}
{
r10 = USR
if (!p2) jump 1f
r1 = xor(r1,r3)
}
{
r10 = or(r10,#0x04)
}
{
USR = r10
}
1:
{
r3:2 = combine(##0x7ff00000,#0)
p0 = dfcmp.uo(r3:2,r3:2)
}
{
r1:0 = insert(r3:2,#63,#0)
jumpr r31
}
.Ldiv_nan:
{
p0 = dfclass(r1:0,#0x10)
p1 = dfclass(r3:2,#0x10)
if (!p0.new) r1:0 = r3:2
if (!p1.new) r3:2 = r1:0
}
{
r5 = convert_df2sf(r1:0)
r4 = convert_df2sf(r3:2)
}
{
r1:0 = #-1
jumpr r31
}
.Ldiv_invalid:
{
r10 = ##0x7f800001
}
{
r1:0 = convert_sf2df(r10)
jumpr r31
}
.size __hexagon_divdf3,.-__hexagon_divdf3

View file

@ -0,0 +1,536 @@
.text
.global __hexagon_fmadf4
.type __hexagon_fmadf4,@function
.global __hexagon_fmadf5
.type __hexagon_fmadf5,@function
.global fma
.type fma,@function
.global __qdsp_fmadf5 ; .set __qdsp_fmadf5, __hexagon_fmadf5
.p2align 5
__hexagon_fmadf4:
__hexagon_fmadf5:
fma:
{
p0 = dfclass(r1:0,#2)
p0 = dfclass(r3:2,#2)
r13:12 = #0
r15:14 = #0
}
{
r13:12 = insert(r1:0,#52,#11 -3)
r15:14 = insert(r3:2,#52,#11 -3)
r7 = ##0x10000000
allocframe(#32)
}
{
r9:8 = mpyu(r12,r14)
if (!p0) jump .Lfma_abnormal_ab
r13 = or(r13,r7)
r15 = or(r15,r7)
}
{
p0 = dfclass(r5:4,#2)
if (!p0.new) jump:nt .Lfma_abnormal_c
r11:10 = combine(r7,#0)
r7:6 = combine(#0,r9)
}
.Lfma_abnormal_c_restart:
{
r7:6 += mpyu(r14,r13)
r11:10 = insert(r5:4,#52,#11 -3)
memd(r29+#0) = r17:16
memd(r29+#8) = r19:18
}
{
r7:6 += mpyu(r12,r15)
r19:18 = neg(r11:10)
p0 = cmp.gt(r5,#-1)
r28 = xor(r1,r3)
}
{
r18 = extractu(r1,#11,#20)
r19 = extractu(r3,#11,#20)
r17:16 = combine(#0,r7)
if (!p0) r11:10 = r19:18
}
{
r17:16 += mpyu(r13,r15)
r9:8 = combine(r6,r8)
r18 = add(r18,r19)
r19 = extractu(r5,#11,#20)
}
{
r18 = add(r18,#-1023 +(4))
p3 = !cmp.gt(r28,#-1)
r7:6 = #0
r15:14 = #0
}
{
r7:6 = sub(r7:6,r9:8,p3):carry
p0 = !cmp.gt(r28,#-1)
p1 = cmp.gt(r19,r18)
if (p1.new) r19:18 = combine(r18,r19)
}
{
r15:14 = sub(r15:14,r17:16,p3):carry
if (p0) r9:8 = r7:6
r7:6 = #0
r19 = sub(r18,r19)
}
{
if (p0) r17:16 = r15:14
p0 = cmp.gt(r19,#63)
if (p1) r9:8 = r7:6
if (p1) r7:6 = r9:8
}
{
if (p1) r17:16 = r11:10
if (p1) r11:10 = r17:16
if (p0) r19 = add(r19,#-64)
r28 = #63
}
{
if (p0) r7:6 = r11:10
r28 = asr(r11,#31)
r13 = min(r19,r28)
r12 = #0
}
{
if (p0) r11:10 = combine(r28,r28)
r5:4 = extract(r7:6,r13:12)
r7:6 = lsr(r7:6,r13)
r12 = sub(#64,r13)
}
{
r15:14 = #0
r28 = #-2
r7:6 |= lsl(r11:10,r12)
r11:10 = asr(r11:10,r13)
}
{
p3 = cmp.gtu(r5:4,r15:14)
if (p3.new) r6 = and(r6,r28)
r15:14 = #1
r5:4 = #0
}
{
r9:8 = add(r7:6,r9:8,p3):carry
}
{
r17:16 = add(r11:10,r17:16,p3):carry
r28 = #62
}
{
r12 = add(clb(r17:16),#-2)
if (!cmp.eq(r12.new,r28)) jump:t 1f
}
{
r11:10 = extractu(r9:8,#62,#2)
r9:8 = asl(r9:8,#62)
r18 = add(r18,#-62)
}
{
r17:16 = insert(r11:10,#62,#0)
}
{
r12 = add(clb(r17:16),#-2)
}
.falign
1:
{
r11:10 = asl(r17:16,r12)
r5:4 |= asl(r9:8,r12)
r13 = sub(#64,r12)
r18 = sub(r18,r12)
}
{
r11:10 |= lsr(r9:8,r13)
p2 = cmp.gtu(r15:14,r5:4)
r28 = #1023 +1023 -2
}
{
if (!p2) r10 = or(r10,r14)
p0 = !cmp.gt(r18,r28)
p0 = cmp.gt(r18,#1)
if (!p0.new) jump:nt .Lfma_ovf_unf
}
{
p0 = cmp.gtu(r15:14,r11:10)
r1:0 = convert_d2df(r11:10)
r18 = add(r18,#-1023 -60)
r17:16 = memd(r29+#0)
}
{
r1 += asl(r18,#20)
r19:18 = memd(r29+#8)
if (!p0) dealloc_return
}
.Ladd_yields_zero:
{
r28 = USR
r1:0 = #0
}
{
r28 = extractu(r28,#2,#22)
r17:16 = memd(r29+#0)
r19:18 = memd(r29+#8)
}
{
p0 = cmp.eq(r28,#2)
if (p0.new) r1 = ##0x80000000
dealloc_return
}
.Lfma_ovf_unf:
{
p0 = cmp.gtu(r15:14,r11:10)
if (p0.new) jump:nt .Ladd_yields_zero
}
{
r1:0 = convert_d2df(r11:10)
r18 = add(r18,#-1023 -60)
r28 = r18
}
{
r1 += asl(r18,#20)
r7 = extractu(r1,#11,#20)
}
{
r6 = add(r18,r7)
r17:16 = memd(r29+#0)
r19:18 = memd(r29+#8)
r9:8 = abs(r11:10)
}
{
p0 = cmp.gt(r6,##1023 +1023)
if (p0.new) jump:nt .Lfma_ovf
}
{
p0 = cmp.gt(r6,#0)
if (p0.new) jump:nt .Lpossible_unf0
}
{
r7 = add(clb(r9:8),#-2)
r6 = sub(#1+5,r28)
p3 = cmp.gt(r11,#-1)
}
{
r6 = add(r6,r7)
r9:8 = asl(r9:8,r7)
r1 = USR
r28 = #63
}
{
r7 = min(r6,r28)
r6 = #0
r0 = #0x0030
}
{
r3:2 = extractu(r9:8,r7:6)
r9:8 = asr(r9:8,r7)
}
{
p0 = cmp.gtu(r15:14,r3:2)
if (!p0.new) r8 = or(r8,r14)
r9 = setbit(r9,#20 +3)
}
{
r11:10 = neg(r9:8)
p1 = bitsclr(r8,#(1<<3)-1)
if (!p1.new) r1 = or(r1,r0)
r3:2 = #0
}
{
if (p3) r11:10 = r9:8
USR = r1
r28 = #-1023 -(52 +3)
}
{
r1:0 = convert_d2df(r11:10)
}
{
r1 += asl(r28,#20)
dealloc_return
}
.Lpossible_unf0:
{
r28 = ##0x7fefffff
r9:8 = abs(r11:10)
}
{
p0 = cmp.eq(r0,#0)
p0 = bitsclr(r1,r28)
if (!p0.new) dealloc_return:t
r28 = #0x7fff
}
{
p0 = bitsset(r9,r28)
r3 = USR
r2 = #0x0030
}
{
if (p0) r3 = or(r3,r2)
}
{
USR = r3
}
{
p0 = dfcmp.eq(r1:0,r1:0)
dealloc_return
}
.Lfma_ovf:
{
r28 = USR
r11:10 = combine(##0x7fefffff,#-1)
r1:0 = r11:10
}
{
r9:8 = combine(##0x7ff00000,#0)
r3 = extractu(r28,#2,#22)
r28 = or(r28,#0x28)
}
{
USR = r28
r3 ^= lsr(r1,#31)
r2 = r3
}
{
p0 = !cmp.eq(r2,#1)
p0 = !cmp.eq(r3,#2)
}
{
p0 = dfcmp.eq(r9:8,r9:8)
if (p0.new) r11:10 = r9:8
}
{
r1:0 = insert(r11:10,#63,#0)
dealloc_return
}
.Lfma_abnormal_ab:
{
r9:8 = extractu(r1:0,#63,#0)
r11:10 = extractu(r3:2,#63,#0)
deallocframe
}
{
p3 = cmp.gtu(r9:8,r11:10)
if (!p3.new) r1:0 = r3:2
if (!p3.new) r3:2 = r1:0
}
{
p0 = dfclass(r1:0,#0x0f)
if (!p0.new) jump:nt .Lnan
if (!p3) r9:8 = r11:10
if (!p3) r11:10 = r9:8
}
{
p1 = dfclass(r1:0,#0x08)
p1 = dfclass(r3:2,#0x0e)
}
{
p0 = dfclass(r1:0,#0x08)
p0 = dfclass(r3:2,#0x01)
}
{
if (p1) jump .Lab_inf
p2 = dfclass(r3:2,#0x01)
}
{
if (p0) jump .Linvalid
if (p2) jump .Lab_true_zero
r28 = ##0x7c000000
}
{
p0 = bitsclr(r1,r28)
if (p0.new) jump:nt .Lfma_ab_tiny
}
{
r28 = add(clb(r11:10),#-11)
}
{
r11:10 = asl(r11:10,r28)
}
{
r3:2 = insert(r11:10,#63,#0)
r1 -= asl(r28,#20)
}
jump fma
.Lfma_ab_tiny:
r9:8 = combine(##0x00100000,#0)
{
r1:0 = insert(r9:8,#63,#0)
r3:2 = insert(r9:8,#63,#0)
}
jump fma
.Lab_inf:
{
r3:2 = lsr(r3:2,#63)
p0 = dfclass(r5:4,#0x10)
}
{
r1:0 ^= asl(r3:2,#63)
if (p0) jump .Lnan
}
{
p1 = dfclass(r5:4,#0x08)
if (p1.new) jump:nt .Lfma_inf_plus_inf
}
{
jumpr r31
}
.falign
.Lfma_inf_plus_inf:
{
p0 = dfcmp.eq(r1:0,r5:4)
if (!p0.new) jump:nt .Linvalid
}
{
jumpr r31
}
.Lnan:
{
p0 = dfclass(r3:2,#0x10)
p1 = dfclass(r5:4,#0x10)
if (!p0.new) r3:2 = r1:0
if (!p1.new) r5:4 = r1:0
}
{
r3 = convert_df2sf(r3:2)
r2 = convert_df2sf(r5:4)
}
{
r3 = convert_df2sf(r1:0)
r1:0 = #-1
jumpr r31
}
.Linvalid:
{
r28 = ##0x7f800001
}
{
r1:0 = convert_sf2df(r28)
jumpr r31
}
.Lab_true_zero:
{
p0 = dfclass(r5:4,#0x10)
if (p0.new) jump:nt .Lnan
if (p0.new) r1:0 = r5:4
}
{
p0 = dfcmp.eq(r3:2,r5:4)
r1 = lsr(r1,#31)
}
{
r3 ^= asl(r1,#31)
if (!p0) r1:0 = r5:4
if (!p0) jumpr r31
}
{
p0 = cmp.eq(r3:2,r5:4)
if (p0.new) jumpr:t r31
r1:0 = r3:2
}
{
r28 = USR
}
{
r28 = extractu(r28,#2,#22)
r1:0 = #0
}
{
p0 = cmp.eq(r28,#2)
if (p0.new) r1 = ##0x80000000
jumpr r31
}
.falign
.Lfma_abnormal_c:
{
p0 = dfclass(r5:4,#0x10)
if (p0.new) jump:nt .Lnan
if (p0.new) r1:0 = r5:4
deallocframe
}
{
p0 = dfclass(r5:4,#0x08)
if (p0.new) r1:0 = r5:4
if (p0.new) jumpr:nt r31
}
{
p0 = dfclass(r5:4,#0x01)
if (p0.new) jump:nt __hexagon_muldf3
r28 = #1
}
{
allocframe(#32)
r11:10 = #0
r5 = insert(r28,#11,#20)
jump .Lfma_abnormal_c_restart
}
.size fma,.-fma

View file

@ -0,0 +1,51 @@
.text
.global __hexagon_mindf3
.global __hexagon_maxdf3
.global fmin
.type fmin,@function
.global fmax
.type fmax,@function
.type __hexagon_mindf3,@function
.type __hexagon_maxdf3,@function
.global __qdsp_mindf3 ; .set __qdsp_mindf3, __hexagon_mindf3
.global __qdsp_maxdf3 ; .set __qdsp_maxdf3, __hexagon_maxdf3
.p2align 5
__hexagon_mindf3:
fmin:
{
p0 = dfclass(r1:0,#0x10)
p1 = dfcmp.gt(r1:0,r3:2)
r5:4 = r1:0
}
{
if (p0) r1:0 = r3:2
if (p1) r1:0 = r3:2
p2 = dfcmp.eq(r1:0,r3:2)
if (!p2.new) jumpr:t r31
}
{
r1:0 = or(r5:4,r3:2)
jumpr r31
}
.size __hexagon_mindf3,.-__hexagon_mindf3
.falign
__hexagon_maxdf3:
fmax:
{
p0 = dfclass(r1:0,#0x10)
p1 = dfcmp.gt(r3:2,r1:0)
r5:4 = r1:0
}
{
if (p0) r1:0 = r3:2
if (p1) r1:0 = r3:2
p2 = dfcmp.eq(r1:0,r3:2)
if (!p2.new) jumpr:t r31
}
{
r1:0 = and(r5:4,r3:2)
jumpr r31
}
.size __hexagon_maxdf3,.-__hexagon_maxdf3

View file

@ -0,0 +1,309 @@
.text
.global __hexagon_muldf3
.type __hexagon_muldf3,@function
.global __qdsp_muldf3 ; .set __qdsp_muldf3, __hexagon_muldf3
.global __hexagon_fast_muldf3 ; .set __hexagon_fast_muldf3, __hexagon_muldf3
.global __hexagon_fast2_muldf3 ; .set __hexagon_fast2_muldf3, __hexagon_muldf3
.p2align 5
__hexagon_muldf3:
{
p0 = dfclass(r1:0,#2)
p0 = dfclass(r3:2,#2)
r13:12 = combine(##0x40000000,#0)
}
{
r13:12 = insert(r1:0,#52,#11 -1)
r5:4 = asl(r3:2,#11 -1)
r28 = #-1024
r9:8 = #1
}
{
r7:6 = mpyu(r4,r13)
r5:4 = insert(r9:8,#2,#62)
}
{
r15:14 = mpyu(r12,r4)
r7:6 += mpyu(r12,r5)
}
{
r7:6 += lsr(r15:14,#32)
r11:10 = mpyu(r13,r5)
r5:4 = combine(##1024 +1024 -4,#0)
}
{
r11:10 += lsr(r7:6,#32)
if (!p0) jump .Lmul_abnormal
p1 = cmp.eq(r14,#0)
p1 = cmp.eq(r6,#0)
}
{
if (!p1) r10 = or(r10,r8)
r6 = extractu(r1,#11,#20)
r7 = extractu(r3,#11,#20)
}
{
r15:14 = neg(r11:10)
r6 += add(r28,r7)
r28 = xor(r1,r3)
}
{
if (!p2.new) r11:10 = r15:14
p2 = cmp.gt(r28,#-1)
p0 = !cmp.gt(r6,r5)
p0 = cmp.gt(r6,r4)
if (!p0.new) jump:nt .Lmul_ovf_unf
}
{
r1:0 = convert_d2df(r11:10)
r6 = add(r6,#-1024 -58)
}
{
r1 += asl(r6,#20)
jumpr r31
}
.falign
.Lpossible_unf1:
{
p0 = cmp.eq(r0,#0)
p0 = bitsclr(r1,r4)
if (!p0.new) jumpr:t r31
r5 = #0x7fff
}
{
p0 = bitsset(r13,r5)
r4 = USR
r5 = #0x030
}
{
if (p0) r4 = or(r4,r5)
}
{
USR = r4
}
{
p0 = dfcmp.eq(r1:0,r1:0)
jumpr r31
}
.falign
.Lmul_ovf_unf:
{
r1:0 = convert_d2df(r11:10)
r13:12 = abs(r11:10)
r7 = add(r6,#-1024 -58)
}
{
r1 += asl(r7,#20)
r7 = extractu(r1,#11,#20)
r4 = ##0x7FEFFFFF
}
{
r7 += add(r6,##-1024 -58)
r5 = #0
}
{
p0 = cmp.gt(r7,##1024 +1024 -2)
if (p0.new) jump:nt .Lmul_ovf
}
{
p0 = cmp.gt(r7,#0)
if (p0.new) jump:nt .Lpossible_unf1
r5 = sub(r6,r5)
r28 = #63
}
{
r4 = #0
r5 = sub(#5,r5)
}
{
p3 = cmp.gt(r11,#-1)
r5 = min(r5,r28)
r11:10 = r13:12
}
{
r28 = USR
r15:14 = extractu(r11:10,r5:4)
}
{
r11:10 = asr(r11:10,r5)
r4 = #0x0030
r1 = insert(r9,#11,#20)
}
{
p0 = cmp.gtu(r9:8,r15:14)
if (!p0.new) r10 = or(r10,r8)
r11 = setbit(r11,#20 +3)
}
{
r15:14 = neg(r11:10)
p1 = bitsclr(r10,#0x7)
if (!p1.new) r28 = or(r4,r28)
}
{
if (!p3) r11:10 = r15:14
USR = r28
}
{
r1:0 = convert_d2df(r11:10)
p0 = dfcmp.eq(r1:0,r1:0)
}
{
r1 = insert(r9,#11 -1,#20 +1)
jumpr r31
}
.falign
.Lmul_ovf:
{
r28 = USR
r13:12 = combine(##0x7fefffff,#-1)
r1:0 = r11:10
}
{
r14 = extractu(r28,#2,#22)
r28 = or(r28,#0x28)
r5:4 = combine(##0x7ff00000,#0)
}
{
USR = r28
r14 ^= lsr(r1,#31)
r28 = r14
}
{
p0 = !cmp.eq(r28,#1)
p0 = !cmp.eq(r14,#2)
if (p0.new) r13:12 = r5:4
p0 = dfcmp.eq(r1:0,r1:0)
}
{
r1:0 = insert(r13:12,#63,#0)
jumpr r31
}
.Lmul_abnormal:
{
r13:12 = extractu(r1:0,#63,#0)
r5:4 = extractu(r3:2,#63,#0)
}
{
p3 = cmp.gtu(r13:12,r5:4)
if (!p3.new) r1:0 = r3:2
if (!p3.new) r3:2 = r1:0
}
{
p0 = dfclass(r1:0,#0x0f)
if (!p0.new) jump:nt .Linvalid_nan
if (!p3) r13:12 = r5:4
if (!p3) r5:4 = r13:12
}
{
p1 = dfclass(r1:0,#0x08)
p1 = dfclass(r3:2,#0x0e)
}
{
p0 = dfclass(r1:0,#0x08)
p0 = dfclass(r3:2,#0x01)
}
{
if (p1) jump .Ltrue_inf
p2 = dfclass(r3:2,#0x01)
}
{
if (p0) jump .Linvalid_zeroinf
if (p2) jump .Ltrue_zero
r28 = ##0x7c000000
}
{
p0 = bitsclr(r1,r28)
if (p0.new) jump:nt .Lmul_tiny
}
{
r28 = cl0(r5:4)
}
{
r28 = add(r28,#-11)
}
{
r5:4 = asl(r5:4,r28)
}
{
r3:2 = insert(r5:4,#63,#0)
r1 -= asl(r28,#20)
}
jump __hexagon_muldf3
.Lmul_tiny:
{
r28 = USR
r1:0 = xor(r1:0,r3:2)
}
{
r28 = or(r28,#0x30)
r1:0 = insert(r9:8,#63,#0)
r5 = extractu(r28,#2,#22)
}
{
USR = r28
p0 = cmp.gt(r5,#1)
if (!p0.new) r0 = #0
r5 ^= lsr(r1,#31)
}
{
p0 = cmp.eq(r5,#3)
if (!p0.new) r0 = #0
jumpr r31
}
.Linvalid_zeroinf:
{
r28 = USR
}
{
r1:0 = #-1
r28 = or(r28,#2)
}
{
USR = r28
}
{
p0 = dfcmp.uo(r1:0,r1:0)
jumpr r31
}
.Linvalid_nan:
{
p0 = dfclass(r3:2,#0x0f)
r28 = convert_df2sf(r1:0)
if (p0.new) r3:2 = r1:0
}
{
r2 = convert_df2sf(r3:2)
r1:0 = #-1
jumpr r31
}
.falign
.Ltrue_zero:
{
r1:0 = r3:2
r3:2 = r1:0
}
.Ltrue_inf:
{
r3 = extract(r3,#1,#31)
}
{
r1 ^= asl(r3,#31)
jumpr r31
}
.size __hexagon_muldf3,.-__hexagon_muldf3

View file

@ -0,0 +1,277 @@
.text
.global __hexagon_sqrtdf2
.type __hexagon_sqrtdf2,@function
.global __hexagon_sqrt
.type __hexagon_sqrt,@function
.global __qdsp_sqrtdf2 ; .set __qdsp_sqrtdf2, __hexagon_sqrtdf2; .type __qdsp_sqrtdf2,@function
.global __qdsp_sqrt ; .set __qdsp_sqrt, __hexagon_sqrt; .type __qdsp_sqrt,@function
.global __hexagon_fast_sqrtdf2 ; .set __hexagon_fast_sqrtdf2, __hexagon_sqrtdf2; .type __hexagon_fast_sqrtdf2,@function
.global __hexagon_fast_sqrt ; .set __hexagon_fast_sqrt, __hexagon_sqrt; .type __hexagon_fast_sqrt,@function
.global __hexagon_fast2_sqrtdf2 ; .set __hexagon_fast2_sqrtdf2, __hexagon_sqrtdf2; .type __hexagon_fast2_sqrtdf2,@function
.global __hexagon_fast2_sqrt ; .set __hexagon_fast2_sqrt, __hexagon_sqrt; .type __hexagon_fast2_sqrt,@function
.type sqrt,@function
.p2align 5
__hexagon_sqrtdf2:
__hexagon_sqrt:
{
r15:14 = extractu(r1:0,#23 +1,#52 -23)
r28 = extractu(r1,#11,#52 -32)
r5:4 = combine(##0x3f000004,#1)
}
{
p2 = dfclass(r1:0,#0x02)
p2 = cmp.gt(r1,#-1)
if (!p2.new) jump:nt .Lsqrt_abnormal
r9 = or(r5,r14)
}
.Ldenormal_restart:
{
r11:10 = r1:0
r7,p0 = sfinvsqrta(r9)
r5 = and(r5,#-16)
r3:2 = #0
}
{
r3 += sfmpy(r7,r9):lib
r2 += sfmpy(r7,r5):lib
r6 = r5
r9 = and(r28,#1)
}
{
r6 -= sfmpy(r3,r2):lib
r11 = insert(r4,#11 +1,#52 -32)
p1 = cmp.gtu(r9,#0)
}
{
r3 += sfmpy(r3,r6):lib
r2 += sfmpy(r2,r6):lib
r6 = r5
r9 = mux(p1,#8,#9)
}
{
r6 -= sfmpy(r3,r2):lib
r11:10 = asl(r11:10,r9)
r9 = mux(p1,#3,#2)
}
{
r2 += sfmpy(r2,r6):lib
r15:14 = asl(r11:10,r9)
}
{
r2 = and(r2,##0x007fffff)
}
{
r2 = add(r2,##0x00800000 - 3)
r9 = mux(p1,#7,#8)
}
{
r8 = asl(r2,r9)
r9 = mux(p1,#15-(1+1),#15-(1+0))
}
{
r13:12 = mpyu(r8,r15)
}
{
r1:0 = asl(r11:10,#15)
r15:14 = mpyu(r13,r13)
p1 = cmp.eq(r0,r0)
}
{
r1:0 -= asl(r15:14,#15)
r15:14 = mpyu(r13,r12)
p2 = cmp.eq(r0,r0)
}
{
r1:0 -= lsr(r15:14,#16)
p3 = cmp.eq(r0,r0)
}
{
r1:0 = mpyu(r1,r8)
}
{
r13:12 += lsr(r1:0,r9)
r9 = add(r9,#16)
r1:0 = asl(r11:10,#31)
}
{
r15:14 = mpyu(r13,r13)
r1:0 -= mpyu(r13,r12)
}
{
r1:0 -= asl(r15:14,#31)
r15:14 = mpyu(r12,r12)
}
{
r1:0 -= lsr(r15:14,#33)
}
{
r1:0 = mpyu(r1,r8)
}
{
r13:12 += lsr(r1:0,r9)
r9 = add(r9,#16)
r1:0 = asl(r11:10,#47)
}
{
r15:14 = mpyu(r13,r13)
}
{
r1:0 -= asl(r15:14,#47)
r15:14 = mpyu(r13,r12)
}
{
r1:0 -= asl(r15:14,#16)
r15:14 = mpyu(r12,r12)
}
{
r1:0 -= lsr(r15:14,#17)
}
{
r1:0 = mpyu(r1,r8)
}
{
r13:12 += lsr(r1:0,r9)
}
{
r3:2 = mpyu(r13,r12)
r5:4 = mpyu(r12,r12)
r15:14 = #0
r1:0 = #0
}
{
r3:2 += lsr(r5:4,#33)
r5:4 += asl(r3:2,#33)
p1 = cmp.eq(r0,r0)
}
{
r7:6 = mpyu(r13,r13)
r1:0 = sub(r1:0,r5:4,p1):carry
r9:8 = #1
}
{
r7:6 += lsr(r3:2,#31)
r9:8 += asl(r13:12,#1)
}
{
r15:14 = sub(r11:10,r7:6,p1):carry
r5:4 = sub(r1:0,r9:8,p2):carry
r7:6 = #1
r11:10 = #0
}
{
r3:2 = sub(r15:14,r11:10,p2):carry
r7:6 = add(r13:12,r7:6)
r28 = add(r28,#-0x3ff)
}
{
if (p2) r13:12 = r7:6
if (p2) r1:0 = r5:4
if (p2) r15:14 = r3:2
}
{
r5:4 = sub(r1:0,r9:8,p3):carry
r7:6 = #1
r28 = asr(r28,#1)
}
{
r3:2 = sub(r15:14,r11:10,p3):carry
r7:6 = add(r13:12,r7:6)
}
{
if (p3) r13:12 = r7:6
if (p3) r1:0 = r5:4
r2 = #1
}
{
p0 = cmp.eq(r1:0,r11:10)
if (!p0.new) r12 = or(r12,r2)
r3 = cl0(r13:12)
r28 = add(r28,#-63)
}
{
r1:0 = convert_ud2df(r13:12)
r28 = add(r28,r3)
}
{
r1 += asl(r28,#52 -32)
jumpr r31
}
.Lsqrt_abnormal:
{
p0 = dfclass(r1:0,#0x01)
if (p0.new) jumpr:t r31
}
{
p0 = dfclass(r1:0,#0x10)
if (p0.new) jump:nt .Lsqrt_nan
}
{
p0 = cmp.gt(r1,#-1)
if (!p0.new) jump:nt .Lsqrt_invalid_neg
if (!p0.new) r28 = ##0x7F800001
}
{
p0 = dfclass(r1:0,#0x08)
if (p0.new) jumpr:nt r31
}
{
r1:0 = extractu(r1:0,#52,#0)
}
{
r28 = add(clb(r1:0),#-11)
}
{
r1:0 = asl(r1:0,r28)
r28 = sub(#1,r28)
}
{
r1 = insert(r28,#1,#52 -32)
}
{
r3:2 = extractu(r1:0,#23 +1,#52 -23)
r5 = ##0x3f000004
}
{
r9 = or(r5,r2)
r5 = and(r5,#-16)
jump .Ldenormal_restart
}
.Lsqrt_nan:
{
r28 = convert_df2sf(r1:0)
r1:0 = #-1
jumpr r31
}
.Lsqrt_invalid_neg:
{
r1:0 = convert_sf2df(r28)
jumpr r31
}
.size __hexagon_sqrt,.-__hexagon_sqrt
.size __hexagon_sqrtdf2,.-__hexagon_sqrtdf2

View file

@ -0,0 +1,64 @@
FUNCTION_BEGIN __hexagon_divdi3
{
p2 = tstbit(r1,#31)
p3 = tstbit(r3,#31)
}
{
r1:0 = abs(r1:0)
r3:2 = abs(r3:2)
}
{
r6 = cl0(r1:0)
r7 = cl0(r3:2)
r5:4 = r3:2
r3:2 = r1:0
}
{
p3 = xor(p2,p3)
r10 = sub(r7,r6)
r1:0 = #0
r15:14 = #1
}
{
r11 = add(r10,#1)
r13:12 = lsl(r5:4,r10)
r15:14 = lsl(r15:14,r10)
}
{
p0 = cmp.gtu(r5:4,r3:2)
loop0(1f,r11)
}
{
if (p0) jump .hexagon_divdi3_return
}
.falign
1:
{
p0 = cmp.gtu(r13:12,r3:2)
}
{
r7:6 = sub(r3:2, r13:12)
r9:8 = add(r1:0, r15:14)
}
{
r1:0 = vmux(p0, r1:0, r9:8)
r3:2 = vmux(p0, r3:2, r7:6)
}
{
r15:14 = lsr(r15:14, #1)
r13:12 = lsr(r13:12, #1)
}:endloop0
.hexagon_divdi3_return:
{
r3:2 = neg(r1:0)
}
{
r1:0 = vmux(p3,r3:2,r1:0)
jumpr r31
}
FUNCTION_END __hexagon_divdi3
.globl __qdsp_divdi3
.set __qdsp_divdi3, __hexagon_divdi3

View file

@ -0,0 +1,53 @@
FUNCTION_BEGIN __hexagon_divsi3
{
p0 = cmp.ge(r0,#0)
p1 = cmp.ge(r1,#0)
r1 = abs(r0)
r2 = abs(r1)
}
{
r3 = cl0(r1)
r4 = cl0(r2)
r5 = sub(r1,r2)
p2 = cmp.gtu(r2,r1)
}
{
r0 = #0
p1 = xor(p0,p1)
p0 = cmp.gtu(r2,r5)
if (p2) jumpr r31
}
{
r0 = mux(p1,#-1,#1)
if (p0) jumpr r31
r4 = sub(r4,r3)
r3 = #1
}
{
r0 = #0
r3:2 = vlslw(r3:2,r4)
loop0(1f,r4)
}
.falign
1:
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r1 = sub(r1,r2)
if (!p0.new) r0 = add(r0,r3)
r3:2 = vlsrw(r3:2,#1)
}:endloop0
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r0 = add(r0,r3)
if (!p1) jumpr r31
}
{
r0 = neg(r0)
jumpr r31
}
FUNCTION_END __hexagon_divsi3
.globl __qdsp_divsi3
.set __qdsp_divsi3, __hexagon_divsi3

View file

@ -0,0 +1,266 @@
.text
.global fast2_dadd_asm
.type fast2_dadd_asm, @function
fast2_dadd_asm:
.falign
{
R7:6 = VABSDIFFH(R1:0, R3:2)
R9 = #62
R4 = SXTH(R0)
R5 = SXTH(R2)
} {
R6 = SXTH(R6)
P0 = CMP.GT(R4, R5);
if ( P0.new) R8 = add(R4, #1)
if (!P0.new) R8 = add(R5, #1)
} {
if ( P0) R4 = #1
if (!P0) R5 = #1
R0.L = #0
R6 = MIN(R6, R9)
} {
if (!P0) R4 = add(R6, #1)
if ( P0) R5 = add(R6, #1)
R2.L = #0
R11:10 = #0
} {
R1:0 = ASR(R1:0, R4)
R3:2 = ASR(R3:2, R5)
} {
R1:0 = add(R1:0, R3:2)
R10.L = #0x8001
} {
R4 = clb(R1:0)
R9 = #58
} {
R4 = add(R4, #-1)
p0 = cmp.gt(R4, R9)
} {
R1:0 = ASL(R1:0, R4)
R8 = SUB(R8, R4)
if(p0) jump .Ldenorma
} {
R0 = insert(R8, #16, #0)
jumpr r31
}
.Ldenorma:
{
R1:0 = R11:10
jumpr r31
}
.text
.global fast2_dsub_asm
.type fast2_dsub_asm, @function
fast2_dsub_asm:
.falign
{
R7:6 = VABSDIFFH(R1:0, R3:2)
R9 = #62
R4 = SXTH(R0)
R5 = SXTH(R2)
} {
R6 = SXTH(R6)
P0 = CMP.GT(R4, R5);
if ( P0.new) R8 = add(R4, #1)
if (!P0.new) R8 = add(R5, #1)
} {
if ( P0) R4 = #1
if (!P0) R5 = #1
R0.L = #0
R6 = MIN(R6, R9)
} {
if (!P0) R4 = add(R6, #1)
if ( P0) R5 = add(R6, #1)
R2.L = #0
R11:10 = #0
} {
R1:0 = ASR(R1:0, R4)
R3:2 = ASR(R3:2, R5)
} {
R1:0 = sub(R1:0, R3:2)
R10.L = #0x8001
} {
R4 = clb(R1:0)
R9 = #58
} {
R4 = add(R4, #-1)
p0 = cmp.gt(R4, R9)
} {
R1:0 = ASL(R1:0, R4)
R8 = SUB(R8, R4)
if(p0) jump .Ldenorm
} {
R0 = insert(R8, #16, #0)
jumpr r31
}
.Ldenorm:
{
R1:0 = R11:10
jumpr r31
}
.text
.global fast2_dmpy_asm
.type fast2_dmpy_asm, @function
fast2_dmpy_asm:
.falign
{
R13= lsr(R2, #16)
R5 = sxth(R2)
R4 = sxth(R0)
R12= lsr(R0, #16)
}
{
R11:10 = mpy(R1, R3)
R7:6 = mpy(R1, R13)
R0.L = #0x0
R15:14 = #0
}
{
R11:10 = add(R11:10, R11:10)
R7:6 += mpy(R3, R12)
R2.L = #0x0
R15.H = #0x8000
}
{
R7:6 = asr(R7:6, #15)
R12.L = #0x8001
p1 = cmp.eq(R1:0, R3:2)
}
{
R7:6 = add(R7:6, R11:10)
R8 = add(R4, R5)
p2 = cmp.eq(R1:0, R15:14)
}
{
R9 = clb(R7:6)
R3:2 = abs(R7:6)
R11 = #58
}
{
p1 = and(p1, p2)
R8 = sub(R8, R9)
R9 = add(R9, #-1)
p0 = cmp.gt(R9, R11)
}
{
R8 = add(R8, #1)
R1:0 = asl(R7:6, R9)
if(p1) jump .Lsat
}
{
R0 = insert(R8,#16, #0)
if(!p0) jumpr r31
}
{
R0 = insert(R12,#16, #0)
jumpr r31
}
.Lsat:
{
R1:0 = #-1
}
{
R1:0 = lsr(R1:0, #1)
}
{
R0 = insert(R8,#16, #0)
jumpr r31
}
.text
.global fast2_qd2f_asm
.type fast2_qd2f_asm, @function
fast2_qd2f_asm:
.falign
{
R3 = abs(R1):sat
R4 = sxth(R0)
R5 = #0x40
R6.L = #0xffc0
}
{
R0 = extractu(R3, #8, #0)
p2 = cmp.gt(R4, #126)
p3 = cmp.ge(R4, #-126)
R6.H = #0x7fff
}
{
p1 = cmp.eq(R0,#0x40)
if(p1.new) R5 = #0
R4 = add(R4, #126)
if(!p3) jump .Lmin
}
{
p0 = bitsset(R3, R6)
R0.L = #0x0000
R2 = add(R3, R5)
R7 = lsr(R6, #8)
}
{
if(p0) R4 = add(R4, #1)
if(p0) R3 = #0
R2 = lsr(R2, #7)
R0.H = #0x8000
}
{
R0 = and(R0, R1)
R6 &= asl(R4, #23)
if(!p0) R3 = and(R2, R7)
if(p2) jump .Lmax
}
{
R0 += add(R6, R3)
jumpr r31
}
.Lmax:
{
R0.L = #0xffff;
}
{
R0.H = #0x7f7f;
jumpr r31
}
.Lmin:
{
R0 = #0x0
jumpr r31
}
.text
.global fast2_f2qd_asm
.type fast2_f2qd_asm, @function
fast2_f2qd_asm:
.falign
{
R1 = asl(R0, #7)
p0 = tstbit(R0, #31)
R5:4 = #0
R3 = add(R0,R0)
}
{
R1 = setbit(R1, #30)
R0= extractu(R0,#8,#23)
R4.L = #0x8001
p1 = cmp.eq(R3, #0)
}
{
R1= extractu(R1, #31, #0)
R0= add(R0, #-126)
R2 = #0
if(p1) jump .Lminqd
}
{
R0 = zxth(R0)
if(p0) R1= sub(R2, R1)
jumpr r31
}
.Lminqd:
{
R1:0 = R5:4
jumpr r31
}

View file

@ -0,0 +1,187 @@
.text
.global fast2_ldadd_asm
.type fast2_ldadd_asm, @function
fast2_ldadd_asm:
.falign
{
R4 = memw(r29+#8)
R5 = memw(r29+#24)
r7 = r0
}
{
R6 = sub(R4, R5):sat
P0 = CMP.GT(R4, R5);
if ( P0.new) R8 = add(R4, #1)
if (!P0.new) R8 = add(R5, #1)
} {
R6 = abs(R6):sat
if ( P0) R4 = #1
if (!P0) R5 = #1
R9 = #62
} {
R6 = MIN(R6, R9)
R1:0 = memd(r29+#0)
R3:2 = memd(r29+#16)
} {
if (!P0) R4 = add(R6, #1)
if ( P0) R5 = add(R6, #1)
} {
R1:0 = ASR(R1:0, R4)
R3:2 = ASR(R3:2, R5)
} {
R1:0 = add(R1:0, R3:2)
R3:2 = #0
} {
R4 = clb(R1:0)
R9.L =#0x0001
} {
R8 -= add(R4, #-1)
R4 = add(R4, #-1)
p0 = cmp.gt(R4, #58)
R9.H =#0x8000
} {
if(!p0)memw(r7+#8) = R8
R1:0 = ASL(R1:0, R4)
if(p0) jump .Ldenorma1
} {
memd(r7+#0) = R1:0
jumpr r31
}
.Ldenorma1:
memd(r7+#0) = R3:2
{
memw(r7+#8) = R9
jumpr r31
}
.text
.global fast2_ldsub_asm
.type fast2_ldsub_asm, @function
fast2_ldsub_asm:
.falign
{
R4 = memw(r29+#8)
R5 = memw(r29+#24)
r7 = r0
}
{
R6 = sub(R4, R5):sat
P0 = CMP.GT(R4, R5);
if ( P0.new) R8 = add(R4, #1)
if (!P0.new) R8 = add(R5, #1)
} {
R6 = abs(R6):sat
if ( P0) R4 = #1
if (!P0) R5 = #1
R9 = #62
} {
R6 = min(R6, R9)
R1:0 = memd(r29+#0)
R3:2 = memd(r29+#16)
} {
if (!P0) R4 = add(R6, #1)
if ( P0) R5 = add(R6, #1)
} {
R1:0 = ASR(R1:0, R4)
R3:2 = ASR(R3:2, R5)
} {
R1:0 = sub(R1:0, R3:2)
R3:2 = #0
} {
R4 = clb(R1:0)
R9.L =#0x0001
} {
R8 -= add(R4, #-1)
R4 = add(R4, #-1)
p0 = cmp.gt(R4, #58)
R9.H =#0x8000
} {
if(!p0)memw(r7+#8) = R8
R1:0 = asl(R1:0, R4)
if(p0) jump .Ldenorma_s
} {
memd(r7+#0) = R1:0
jumpr r31
}
.Ldenorma_s:
memd(r7+#0) = R3:2
{
memw(r7+#8) = R9
jumpr r31
}
.text
.global fast2_ldmpy_asm
.type fast2_ldmpy_asm, @function
fast2_ldmpy_asm:
.falign
{
R15:14 = memd(r29+#0)
R3:2 = memd(r29+#16)
R13:12 = #0
}
{
R8= extractu(R2, #31, #1)
R9= extractu(R14, #31, #1)
R13.H = #0x8000
}
{
R11:10 = mpy(R15, R3)
R7:6 = mpy(R15, R8)
R4 = memw(r29+#8)
R5 = memw(r29+#24)
}
{
R11:10 = add(R11:10, R11:10)
R7:6 += mpy(R3, R9)
}
{
R7:6 = asr(R7:6, #30)
R8.L = #0x0001
p1 = cmp.eq(R15:14, R3:2)
}
{
R7:6 = add(R7:6, R11:10)
R4= add(R4, R5)
p2 = cmp.eq(R3:2, R13:12)
}
{
R9 = clb(R7:6)
R8.H = #0x8000
p1 = and(p1, p2)
}
{
R4-= add(R9, #-1)
R9 = add(R9, #-1)
if(p1) jump .Lsat1
}
{
R7:6 = asl(R7:6, R9)
memw(R0+#8) = R4
p0 = cmp.gt(R9, #58)
if(p0.new) jump:NT .Ldenorm1
}
{
memd(R0+#0) = R7:6
jumpr r31
}
.Lsat1:
{
R13:12 = #0
R4+= add(R9, #1)
}
{
R13.H = #0x4000
memw(R0+#8) = R4
}
{
memd(R0+#0) = R13:12
jumpr r31
}
.Ldenorm1:
{
memw(R0+#8) = R8
R15:14 = #0
}
{
memd(R0+#0) = R15:14
jumpr r31
}

View file

@ -0,0 +1,12 @@
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm

View file

@ -0,0 +1,91 @@
.text
.globl hexagon_memcpy_forward_vp4cp4n2
.balign 32
.type hexagon_memcpy_forward_vp4cp4n2,@function
hexagon_memcpy_forward_vp4cp4n2:
{
r3 = sub(##4096, r1)
r5 = lsr(r2, #3)
}
{
r3 = extractu(r3, #10, #2)
r4 = extractu(r3, #7, #5)
}
{
r3 = minu(r2, r3)
r4 = minu(r5, r4)
}
{
r4 = or(r4, ##2105344)
p0 = cmp.eq(r3, #0)
if (p0.new) jump:nt .Lskipprolog
}
l2fetch(r1, r4)
{
loop0(.Lprolog, r3)
r2 = sub(r2, r3)
}
.falign
.Lprolog:
{
r4 = memw(r1++#4)
memw(r0++#4) = r4.new
} :endloop0
.Lskipprolog:
{
r3 = lsr(r2, #10)
if (cmp.eq(r3.new, #0)) jump:nt .Lskipmain
}
{
loop1(.Lout, r3)
r2 = extractu(r2, #10, #0)
r3 = ##2105472
}
.falign
.Lout:
l2fetch(r1, r3)
loop0(.Lpage, #512)
.falign
.Lpage:
r5:4 = memd(r1++#8)
{
memw(r0++#8) = r4
memw(r0+#4) = r5
} :endloop0:endloop1
.Lskipmain:
{
r3 = ##2105344
r4 = lsr(r2, #3)
p0 = cmp.eq(r2, #0)
if (p0.new) jumpr:nt r31
}
{
r3 = or(r3, r4)
loop0(.Lepilog, r2)
}
l2fetch(r1, r3)
.falign
.Lepilog:
{
r4 = memw(r1++#4)
memw(r0++#4) = r4.new
} :endloop0
jumpr r31
.size hexagon_memcpy_forward_vp4cp4n2, . - hexagon_memcpy_forward_vp4cp4n2

View file

@ -0,0 +1,42 @@
FUNCTION_BEGIN __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
{
p0 = bitsclr(r1,#7)
p0 = bitsclr(r0,#7)
if (p0.new) r5:4 = memd(r1)
r3 = #-3
}
{
if (!p0) jump .Lmemcpy_call
if (p0) memd(r0++#8) = r5:4
if (p0) r5:4 = memd(r1+#8)
r3 += lsr(r2,#3)
}
{
memd(r0++#8) = r5:4
r5:4 = memd(r1+#16)
r1 = add(r1,#24)
loop0(1f,r3)
}
.falign
1:
{
memd(r0++#8) = r5:4
r5:4 = memd(r1++#8)
}:endloop0
{
memd(r0) = r5:4
r0 -= add(r2,#-8)
jumpr r31
}
FUNCTION_END __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
.Lmemcpy_call:
jump memcpy@PLT
.globl __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes
.set __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes, __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes

View file

@ -0,0 +1,63 @@
FUNCTION_BEGIN __hexagon_moddi3
{
p3 = tstbit(r1,#31)
}
{
r1:0 = abs(r1:0)
r3:2 = abs(r3:2)
}
{
r6 = cl0(r1:0)
r7 = cl0(r3:2)
r5:4 = r3:2
r3:2 = r1:0
}
{
r10 = sub(r7,r6)
r1:0 = #0
r15:14 = #1
}
{
r11 = add(r10,#1)
r13:12 = lsl(r5:4,r10)
r15:14 = lsl(r15:14,r10)
}
{
p0 = cmp.gtu(r5:4,r3:2)
loop0(1f,r11)
}
{
if (p0) jump .hexagon_moddi3_return
}
.falign
1:
{
p0 = cmp.gtu(r13:12,r3:2)
}
{
r7:6 = sub(r3:2, r13:12)
r9:8 = add(r1:0, r15:14)
}
{
r1:0 = vmux(p0, r1:0, r9:8)
r3:2 = vmux(p0, r3:2, r7:6)
}
{
r15:14 = lsr(r15:14, #1)
r13:12 = lsr(r13:12, #1)
}:endloop0
.hexagon_moddi3_return:
{
r1:0 = neg(r3:2)
}
{
r1:0 = vmux(p3,r1:0,r3:2)
jumpr r31
}
FUNCTION_END __hexagon_moddi3
.globl __qdsp_moddi3
.set __qdsp_moddi3, __hexagon_moddi3

View file

@ -0,0 +1,44 @@
FUNCTION_BEGIN __hexagon_modsi3
{
p2 = cmp.ge(r0,#0)
r2 = abs(r0)
r1 = abs(r1)
}
{
r3 = cl0(r2)
r4 = cl0(r1)
p0 = cmp.gtu(r1,r2)
}
{
r3 = sub(r4,r3)
if (p0) jumpr r31
}
{
p1 = cmp.eq(r3,#0)
loop0(1f,r3)
r0 = r2
r2 = lsl(r1,r3)
}
.falign
1:
{
p0 = cmp.gtu(r2,r0)
if (!p0.new) r0 = sub(r0,r2)
r2 = lsr(r2,#1)
if (p1) r1 = #0
}:endloop0
{
p0 = cmp.gtu(r2,r0)
if (!p0.new) r0 = sub(r0,r1)
if (p2) jumpr r31
}
{
r0 = neg(r0)
jumpr r31
}
FUNCTION_END __hexagon_modsi3
.globl __qdsp_modsi3
.set __qdsp_modsi3, __hexagon_modsi3

View file

@ -0,0 +1,42 @@
FUNCTION_BEGIN __hexagon_divsf3
{
r2,p0 = sfrecipa(r0,r1)
r4 = sffixupd(r0,r1)
r3 = ##0x3f800000
}
{
r5 = sffixupn(r0,r1)
r3 -= sfmpy(r4,r2):lib
r6 = ##0x80000000
r7 = r3
}
{
r2 += sfmpy(r3,r2):lib
r3 = r7
r6 = r5
r0 = and(r6,r5)
}
{
r3 -= sfmpy(r4,r2):lib
r0 += sfmpy(r5,r2):lib
}
{
r2 += sfmpy(r3,r2):lib
r6 -= sfmpy(r0,r4):lib
}
{
r0 += sfmpy(r6,r2):lib
}
{
r5 -= sfmpy(r0,r4):lib
}
{
r0 += sfmpy(r5,r2,p0):scale
jumpr r31
}
FUNCTION_END __hexagon_divsf3
.global __qdsp_divsf3 ; .set __qdsp_divsf3, __hexagon_divsf3
.global __hexagon_fast_divsf3 ; .set __hexagon_fast_divsf3, __hexagon_divsf3
.global __hexagon_fast2_divsf3 ; .set __hexagon_fast2_divsf3, __hexagon_divsf3

View file

@ -0,0 +1,49 @@
FUNCTION_BEGIN __hexagon_sqrtf
{
r3,p0 = sfinvsqrta(r0)
r5 = sffixupr(r0)
r4 = ##0x3f000000
r1:0 = combine(#0,#0)
}
{
r0 += sfmpy(r3,r5):lib
r1 += sfmpy(r3,r4):lib
r2 = r4
r3 = r5
}
{
r2 -= sfmpy(r0,r1):lib
p1 = sfclass(r5,#1)
}
{
r0 += sfmpy(r0,r2):lib
r1 += sfmpy(r1,r2):lib
r2 = r4
r3 = r5
}
{
r2 -= sfmpy(r0,r1):lib
r3 -= sfmpy(r0,r0):lib
}
{
r0 += sfmpy(r1,r3):lib
r1 += sfmpy(r1,r2):lib
r2 = r4
r3 = r5
}
{
r3 -= sfmpy(r0,r0):lib
if (p1) r0 = or(r0,r5)
}
{
r0 += sfmpy(r1,r3,p0):scale
jumpr r31
}
FUNCTION_END __hexagon_sqrtf
.global __qdsp_sqrtf ; .set __qdsp_sqrtf, __hexagon_sqrtf
.global __hexagon_fast_sqrtf ; .set __hexagon_fast_sqrtf, __hexagon_sqrtf
.global __hexagon_fast2_sqrtf ; .set __hexagon_fast2_sqrtf, __hexagon_sqrtf

View file

@ -0,0 +1,50 @@
FUNCTION_BEGIN __hexagon_udivdi3
{
r6 = cl0(r1:0)
r7 = cl0(r3:2)
r5:4 = r3:2
r3:2 = r1:0
}
{
r10 = sub(r7,r6)
r1:0 = #0
r15:14 = #1
}
{
r11 = add(r10,#1)
r13:12 = lsl(r5:4,r10)
r15:14 = lsl(r15:14,r10)
}
{
p0 = cmp.gtu(r5:4,r3:2)
loop0(1f,r11)
}
{
if (p0) jumpr r31
}
.falign
1:
{
p0 = cmp.gtu(r13:12,r3:2)
}
{
r7:6 = sub(r3:2, r13:12)
r9:8 = add(r1:0, r15:14)
}
{
r1:0 = vmux(p0, r1:0, r9:8)
r3:2 = vmux(p0, r3:2, r7:6)
}
{
r15:14 = lsr(r15:14, #1)
r13:12 = lsr(r13:12, #1)
}:endloop0
{
jumpr r31
}
FUNCTION_END __hexagon_udivdi3
.globl __qdsp_udivdi3
.set __qdsp_udivdi3, __hexagon_udivdi3

View file

@ -0,0 +1,50 @@
FUNCTION_BEGIN __hexagon_udivmoddi4
{
r6 = cl0(r1:0)
r7 = cl0(r3:2)
r5:4 = r3:2
r3:2 = r1:0
}
{
r10 = sub(r7,r6)
r1:0 = #0
r15:14 = #1
}
{
r11 = add(r10,#1)
r13:12 = lsl(r5:4,r10)
r15:14 = lsl(r15:14,r10)
}
{
p0 = cmp.gtu(r5:4,r3:2)
loop0(1f,r11)
}
{
if (p0) jumpr r31
}
.falign
1:
{
p0 = cmp.gtu(r13:12,r3:2)
}
{
r7:6 = sub(r3:2, r13:12)
r9:8 = add(r1:0, r15:14)
}
{
r1:0 = vmux(p0, r1:0, r9:8)
r3:2 = vmux(p0, r3:2, r7:6)
}
{
r15:14 = lsr(r15:14, #1)
r13:12 = lsr(r13:12, #1)
}:endloop0
{
jumpr r31
}
FUNCTION_END __hexagon_udivmoddi4
.globl __qdsp_udivmoddi4
.set __qdsp_udivmoddi4, __hexagon_udivmoddi4

View file

@ -0,0 +1,39 @@
FUNCTION_BEGIN __hexagon_udivmodsi4
{
r2 = cl0(r0)
r3 = cl0(r1)
r5:4 = combine(#1,#0)
p0 = cmp.gtu(r1,r0)
}
{
r6 = sub(r3,r2)
r4 = r1
r1:0 = combine(r0,r4)
if (p0) jumpr r31
}
{
r3:2 = vlslw(r5:4,r6)
loop0(1f,r6)
p0 = cmp.eq(r6,#0)
if (p0.new) r4 = #0
}
.falign
1:
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r1 = sub(r1,r2)
if (!p0.new) r0 = add(r0,r3)
r3:2 = vlsrw(r3:2,#1)
}:endloop0
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r1 = sub(r1,r4)
if (!p0.new) r0 = add(r0,r3)
jumpr r31
}
FUNCTION_END __hexagon_udivmodsi4
.globl __qdsp_udivmodsi4
.set __qdsp_udivmodsi4, __hexagon_udivmodsi4

View file

@ -0,0 +1,36 @@
FUNCTION_BEGIN __hexagon_udivsi3
{
r2 = cl0(r0)
r3 = cl0(r1)
r5:4 = combine(#1,#0)
p0 = cmp.gtu(r1,r0)
}
{
r6 = sub(r3,r2)
r4 = r1
r1:0 = combine(r0,r4)
if (p0) jumpr r31
}
{
r3:2 = vlslw(r5:4,r6)
loop0(1f,r6)
}
.falign
1:
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r1 = sub(r1,r2)
if (!p0.new) r0 = add(r0,r3)
r3:2 = vlsrw(r3:2,#1)
}:endloop0
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r0 = add(r0,r3)
jumpr r31
}
FUNCTION_END __hexagon_udivsi3
.globl __qdsp_udivsi3
.set __qdsp_udivsi3, __hexagon_udivsi3

View file

@ -0,0 +1,53 @@
FUNCTION_BEGIN __hexagon_umoddi3
{
r6 = cl0(r1:0)
r7 = cl0(r3:2)
r5:4 = r3:2
r3:2 = r1:0
}
{
r10 = sub(r7,r6)
r1:0 = #0
r15:14 = #1
}
{
r11 = add(r10,#1)
r13:12 = lsl(r5:4,r10)
r15:14 = lsl(r15:14,r10)
}
{
p0 = cmp.gtu(r5:4,r3:2)
loop0(1f,r11)
}
{
if (p0) jump .hexagon_umoddi3_return
}
.falign
1:
{
p0 = cmp.gtu(r13:12,r3:2)
}
{
r7:6 = sub(r3:2, r13:12)
r9:8 = add(r1:0, r15:14)
}
{
r1:0 = vmux(p0, r1:0, r9:8)
r3:2 = vmux(p0, r3:2, r7:6)
}
{
r15:14 = lsr(r15:14, #1)
r13:12 = lsr(r13:12, #1)
}:endloop0
.hexagon_umoddi3_return:
{
r1:0 = r3:2
jumpr r31
}
FUNCTION_END __hexagon_umoddi3
.globl __qdsp_umoddi3
.set __qdsp_umoddi3, __hexagon_umoddi3

View file

@ -0,0 +1,34 @@
FUNCTION_BEGIN __hexagon_umodsi3
{
r2 = cl0(r0)
r3 = cl0(r1)
p0 = cmp.gtu(r1,r0)
}
{
r2 = sub(r3,r2)
if (p0) jumpr r31
}
{
loop0(1f,r2)
p1 = cmp.eq(r2,#0)
r2 = lsl(r1,r2)
}
.falign
1:
{
p0 = cmp.gtu(r2,r0)
if (!p0.new) r0 = sub(r0,r2)
r2 = lsr(r2,#1)
if (p1) r1 = #0
}:endloop0
{
p0 = cmp.gtu(r2,r0)
if (!p0.new) r0 = sub(r0,r1)
jumpr r31
}
FUNCTION_END __hexagon_umodsi3
.globl __qdsp_umodsi3
.set __qdsp_umodsi3, __hexagon_umodsi3

View file

@ -1,6 +1,7 @@
#![cfg_attr(feature = "compiler-builtins", compiler_builtins)]
#![cfg_attr(not(feature = "no-asm"), feature(asm))]
#![feature(abi_unadjusted)]
#![feature(asm_experimental_arch)]
#![cfg_attr(not(feature = "no-asm"), feature(global_asm))]
#![feature(cfg_target_has_atomic)]
#![feature(compiler_builtins)]
@ -70,6 +71,9 @@ pub mod aarch64_linux;
))]
pub mod arm_linux;
#[cfg(target_arch = "hexagon")]
pub mod hexagon;
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
pub mod riscv;