From 52dba9843b4f9d2ec2da3f8509ed6c8e48e091e6 Mon Sep 17 00:00:00 2001 From: Andrey Zgarbul Date: Sat, 14 Jul 2018 00:52:28 +0300 Subject: [PATCH] rem_pio2_large comments --- .../libm/src/math/rem_pio2_large.rs | 216 +++++++++--------- 1 file changed, 105 insertions(+), 111 deletions(-) diff --git a/library/compiler-builtins/libm/src/math/rem_pio2_large.rs b/library/compiler-builtins/libm/src/math/rem_pio2_large.rs index deb985f1d1cc..809724df4194 100644 --- a/library/compiler-builtins/libm/src/math/rem_pio2_large.rs +++ b/library/compiler-builtins/libm/src/math/rem_pio2_large.rs @@ -1,117 +1,20 @@ use super::scalbn; use super::floor; -/// double x[],y[]; int e0,nx,prec; -/// -/// __rem_pio2_large return the last three digits of N with -/// y = x - N*pi/2 -/// so that |y| < pi/2. -/// -/// The method is to compute the integer (mod 8) and fraction parts of -/// (2/pi)*x without doing the full multiplication. In general we -/// skip the part of the product that are known to be a huge integer ( -/// more accurately, = 0 mod 8 ). Thus the number of operations are -/// independent of the exponent of the input. -/// -/// (2/pi) is represented by an array of 24-bit integers in ipio2[]. -/// -/// Input parameters: -/// x[] The input value (must be positive) is broken into nx -/// pieces of 24-bit integers in double precision format. -/// x[i] will be the i-th 24 bit of x. The scaled exponent -/// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 -/// match x's up to 24 bits. -/// -/// Example of breaking a double positive z into x[0]+x[1]+x[2]: -/// e0 = ilogb(z)-23 -/// z = scalbn(z,-e0) -/// for i = 0,1,2 -/// x[i] = floor(z) -/// z = (z-x[i])*2**24 -/// -/// y[] ouput result in an array of double precision numbers. -/// The dimension of y[] is: -/// 24-bit precision 1 -/// 53-bit precision 2 -/// 64-bit precision 2 -/// 113-bit precision 3 -/// The actual value is the sum of them. Thus for 113-bit -/// precison, one may have to do something like: -/// -/// long double t,w,r_head, r_tail; -/// t = (long double)y[2] + (long double)y[1]; -/// w = (long double)y[0]; -/// r_head = t+w; -/// r_tail = w - (r_head - t); -/// -/// e0 The exponent of x[0]. Must be <= 16360 or you need to -/// expand the ipio2 table. -/// -/// prec an integer indicating the precision: -/// 0 24 bits (single) -/// 1 53 bits (double) -/// 2 64 bits (extended) -/// 3 113 bits (quad) -/// External function: -/// double scalbn(), floor(); -/// -/// -/// Here is the description of some local variables: -/// -/// jk jk+1 is the initial number of terms of ipio2[] needed -/// in the computation. The minimum and recommended value -/// for jk is 3,4,4,6 for single, double, extended, and quad. -/// jk+1 must be 2 larger than you might expect so that our -/// recomputation test works. (Up to 24 bits in the integer -/// part (the 24 bits of it that we compute) and 23 bits in -/// the fraction part may be lost to cancelation before we -/// recompute.) -/// -/// jz local integer variable indicating the number of -/// terms of ipio2[] used. -/// -/// jx nx - 1 -/// -/// jv index for pointing to the suitable ipio2[] for the -/// computation. In general, we want -/// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 -/// is an integer. Thus -/// e0-3-24*jv >= 0 or (e0-3)/24 >= jv -/// Hence jv = max(0,(e0-3)/24). -/// -/// jp jp+1 is the number of terms in PIo2[] needed, jp = jk. -/// -/// q[] double array with integral value, representing the -/// 24-bits chunk of the product of x and 2/pi. -/// -/// q0 the corresponding exponent of q[0]. Note that the -/// exponent for q[i] would be q0-24*i. -/// -/// PIo2[] double precision array, obtained by cutting pi/2 -/// into 24 bits chunks. -/// -/// f[] ipio2[] in floating point -/// -/// iq[] integer array by breaking up q[] in 24-bits chunk. -/// -/// fq[] final product of x*(2/pi) in fq[0],..,fq[jk] -/// -/// ih integer. If >0 it indicates q[] is >= 0.5, hence -/// it also indicates the *sign* of the result. +// initial value for jk +const INIT_JK : [usize; 4] = [3,4,4,6]; -const INIT_JK : [usize; 4] = [3,4,4,6]; /* initial value for jk */ - -/// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi -/// -/// integer array, contains the (24*i)-th to (24*i+23)-th -/// bit of 2/pi after binary point. The corresponding -/// floating value is -/// -/// ipio2[i] * 2^(-24(i+1)). -/// -/// NB: This table must have at least (e0-3)/24 + jk terms. -/// For quad precision (e0 <= 16360, jk = 6), this is 686. -#[cfg(not(ldbl_max_exp_more1024))] +// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi +// +// integer array, contains the (24*i)-th to (24*i+23)-th +// bit of 2/pi after binary point. The corresponding +// floating value is +// +// ipio2[i] * 2^(-24(i+1)). +// +// NB: This table must have at least (e0-3)/24 + jk terms. +// For quad precision (e0 <= 16360, jk = 6), this is 686. +#[cfg(target_pointer_width = "32")] const IPIO2 : [i32; 66] = [ 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, @@ -126,7 +29,7 @@ const IPIO2 : [i32; 66] = [ 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, ]; -#[cfg(ldbl_max_exp_more1024)] +#[cfg(target_pointer_width = "64")] const IPIO2 : [i32; 690] = [ 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, @@ -256,6 +159,97 @@ const PIO2 : [f64; 8] = [ 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */ ]; +// fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32 +// +// Input parameters: +// x[] The input value (must be positive) is broken into nx +// pieces of 24-bit integers in double precision format. +// x[i] will be the i-th 24 bit of x. The scaled exponent +// of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 +// match x's up to 24 bits. +// +// Example of breaking a double positive z into x[0]+x[1]+x[2]: +// e0 = ilogb(z)-23 +// z = scalbn(z,-e0) +// for i = 0,1,2 +// x[i] = floor(z) +// z = (z-x[i])*2**24 +// +// y[] ouput result in an array of double precision numbers. +// The dimension of y[] is: +// 24-bit precision 1 +// 53-bit precision 2 +// 64-bit precision 2 +// 113-bit precision 3 +// The actual value is the sum of them. Thus for 113-bit +// precison, one may have to do something like: +// +// long double t,w,r_head, r_tail; +// t = (long double)y[2] + (long double)y[1]; +// w = (long double)y[0]; +// r_head = t+w; +// r_tail = w - (r_head - t); +// +// e0 The exponent of x[0]. Must be <= 16360 or you need to +// expand the ipio2 table. +// +// prec an integer indicating the precision: +// 0 24 bits (single) +// 1 53 bits (double) +// 2 64 bits (extended) +// 3 113 bits (quad) +// +// Here is the description of some local variables: +// +// jk jk+1 is the initial number of terms of ipio2[] needed +// in the computation. The minimum and recommended value +// for jk is 3,4,4,6 for single, double, extended, and quad. +// jk+1 must be 2 larger than you might expect so that our +// recomputation test works. (Up to 24 bits in the integer +// part (the 24 bits of it that we compute) and 23 bits in +// the fraction part may be lost to cancelation before we +// recompute.) +// +// jz local integer variable indicating the number of +// terms of ipio2[] used. +// +// jx nx - 1 +// +// jv index for pointing to the suitable ipio2[] for the +// computation. In general, we want +// ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 +// is an integer. Thus +// e0-3-24*jv >= 0 or (e0-3)/24 >= jv +// Hence jv = max(0,(e0-3)/24). +// +// jp jp+1 is the number of terms in PIo2[] needed, jp = jk. +// +// q[] double array with integral value, representing the +// 24-bits chunk of the product of x and 2/pi. +// +// q0 the corresponding exponent of q[0]. Note that the +// exponent for q[i] would be q0-24*i. +// +// PIo2[] double precision array, obtained by cutting pi/2 +// into 24 bits chunks. +// +// f[] ipio2[] in floating point +// +// iq[] integer array by breaking up q[] in 24-bits chunk. +// +// fq[] final product of x*(2/pi) in fq[0],..,fq[jk] +// +// ih integer. If >0 it indicates q[] is >= 0.5, hence +// it also indicates the *sign* of the result. + +/// Return the last three digits of N with y = x - N*pi/2 +/// so that |y| < pi/2. +/// +/// The method is to compute the integer (mod 8) and fraction parts of +/// (2/pi)*x without doing the full multiplication. In general we +/// skip the part of the product that are known to be a huge integer ( +/// more accurately, = 0 mod 8 ). Thus the number of operations are +/// independent of the exponent of the input. #[inline] pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32 { let x1p24 = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24