From 52dba9843b4f9d2ec2da3f8509ed6c8e48e091e6 Mon Sep 17 00:00:00 2001
From: Andrey Zgarbul <zgarbul.andrey@gmail.com>
Date: Sat, 14 Jul 2018 00:52:28 +0300
Subject: [PATCH] rem_pio2_large comments

---
 .../libm/src/math/rem_pio2_large.rs           | 216 +++++++++---------
 1 file changed, 105 insertions(+), 111 deletions(-)

diff --git a/library/compiler-builtins/libm/src/math/rem_pio2_large.rs b/library/compiler-builtins/libm/src/math/rem_pio2_large.rs
index deb985f1d1cc..809724df4194 100644
--- a/library/compiler-builtins/libm/src/math/rem_pio2_large.rs
+++ b/library/compiler-builtins/libm/src/math/rem_pio2_large.rs
@@ -1,117 +1,20 @@
 use super::scalbn;
 use super::floor;
 
-/// double x[],y[]; int e0,nx,prec;
-///
-/// __rem_pio2_large return the last three digits of N with
-///              y = x - N*pi/2
-/// so that |y| < pi/2.
-///
-/// The method is to compute the integer (mod 8) and fraction parts of
-/// (2/pi)*x without doing the full multiplication. In general we
-/// skip the part of the product that are known to be a huge integer (
-/// more accurately, = 0 mod 8 ). Thus the number of operations are
-/// independent of the exponent of the input.
-///
-/// (2/pi) is represented by an array of 24-bit integers in ipio2[].
-///
-/// Input parameters:
-///      x[]     The input value (must be positive) is broken into nx
-///              pieces of 24-bit integers in double precision format.
-///              x[i] will be the i-th 24 bit of x. The scaled exponent
-///              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
-///              match x's up to 24 bits.
-///
-///              Example of breaking a double positive z into x[0]+x[1]+x[2]:
-///                      e0 = ilogb(z)-23
-///                      z  = scalbn(z,-e0)
-///              for i = 0,1,2
-///                      x[i] = floor(z)
-///                      z    = (z-x[i])*2**24
-///
-///      y[]     ouput result in an array of double precision numbers.
-///              The dimension of y[] is:
-///                      24-bit  precision       1
-///                      53-bit  precision       2
-///                      64-bit  precision       2
-///                      113-bit precision       3
-///              The actual value is the sum of them. Thus for 113-bit
-///              precison, one may have to do something like:
-///
-///              long double t,w,r_head, r_tail;
-///              t = (long double)y[2] + (long double)y[1];
-///              w = (long double)y[0];
-///              r_head = t+w;
-///              r_tail = w - (r_head - t);
-///
-///      e0      The exponent of x[0]. Must be <= 16360 or you need to
-///              expand the ipio2 table.
-///
-///      prec    an integer indicating the precision:
-///                      0       24  bits (single)
-///                      1       53  bits (double)
-///                      2       64  bits (extended)
-///                      3       113 bits (quad)
-/// External function:
-///      double scalbn(), floor();
-///
-///
-/// Here is the description of some local variables:
-///
-///      jk      jk+1 is the initial number of terms of ipio2[] needed
-///              in the computation. The minimum and recommended value
-///              for jk is 3,4,4,6 for single, double, extended, and quad.
-///              jk+1 must be 2 larger than you might expect so that our
-///              recomputation test works. (Up to 24 bits in the integer
-///              part (the 24 bits of it that we compute) and 23 bits in
-///              the fraction part may be lost to cancelation before we
-///              recompute.)
-///
-///      jz      local integer variable indicating the number of
-///              terms of ipio2[] used.
-///
-///      jx      nx - 1
-///
-///      jv      index for pointing to the suitable ipio2[] for the
-///              computation. In general, we want
-///                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
-///              is an integer. Thus
-///                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
-///              Hence jv = max(0,(e0-3)/24).
-///
-///      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
-///
-///      q[]     double array with integral value, representing the
-///              24-bits chunk of the product of x and 2/pi.
-///
-///      q0      the corresponding exponent of q[0]. Note that the
-///              exponent for q[i] would be q0-24*i.
-///
-///      PIo2[]  double precision array, obtained by cutting pi/2
-///              into 24 bits chunks.
-///
-///      f[]     ipio2[] in floating point
-///
-///      iq[]    integer array by breaking up q[] in 24-bits chunk.
-///
-///      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
-///
-///      ih      integer. If >0 it indicates q[] is >= 0.5, hence
-///              it also indicates the *sign* of the result.
+// initial value for jk
+const INIT_JK : [usize; 4] = [3,4,4,6];
 
-const INIT_JK : [usize; 4] = [3,4,4,6]; /* initial value for jk */
-
-/// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
-///
-///              integer array, contains the (24*i)-th to (24*i+23)-th
-///              bit of 2/pi after binary point. The corresponding
-///              floating value is
-///
-///                      ipio2[i] * 2^(-24(i+1)).
-///
-/// NB: This table must have at least (e0-3)/24 + jk terms.
-///     For quad precision (e0 <= 16360, jk = 6), this is 686.
-#[cfg(not(ldbl_max_exp_more1024))]
+// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
+//
+//              integer array, contains the (24*i)-th to (24*i+23)-th
+//              bit of 2/pi after binary point. The corresponding
+//              floating value is
+//
+//                      ipio2[i] * 2^(-24(i+1)).
+//
+// NB: This table must have at least (e0-3)/24 + jk terms.
+//     For quad precision (e0 <= 16360, jk = 6), this is 686.
+#[cfg(target_pointer_width = "32")]
 const IPIO2 : [i32; 66] = [
     0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
     0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
@@ -126,7 +29,7 @@ const IPIO2 : [i32; 66] = [
     0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
 ];
 
-#[cfg(ldbl_max_exp_more1024)]
+#[cfg(target_pointer_width = "64")]
 const IPIO2 : [i32; 690] = [
     0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
     0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
@@ -256,6 +159,97 @@ const PIO2 : [f64; 8] = [
     2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
 ];
 
+// fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32
+//
+// Input parameters:
+//      x[]     The input value (must be positive) is broken into nx
+//              pieces of 24-bit integers in double precision format.
+//              x[i] will be the i-th 24 bit of x. The scaled exponent
+//              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
+//              match x's up to 24 bits.
+//
+//              Example of breaking a double positive z into x[0]+x[1]+x[2]:
+//                      e0 = ilogb(z)-23
+//                      z  = scalbn(z,-e0)
+//              for i = 0,1,2
+//                      x[i] = floor(z)
+//                      z    = (z-x[i])*2**24
+//
+//      y[]     ouput result in an array of double precision numbers.
+//              The dimension of y[] is:
+//                      24-bit  precision       1
+//                      53-bit  precision       2
+//                      64-bit  precision       2
+//                      113-bit precision       3
+//              The actual value is the sum of them. Thus for 113-bit
+//              precison, one may have to do something like:
+//
+//              long double t,w,r_head, r_tail;
+//              t = (long double)y[2] + (long double)y[1];
+//              w = (long double)y[0];
+//              r_head = t+w;
+//              r_tail = w - (r_head - t);
+//
+//      e0      The exponent of x[0]. Must be <= 16360 or you need to
+//              expand the ipio2 table.
+//
+//      prec    an integer indicating the precision:
+//                      0       24  bits (single)
+//                      1       53  bits (double)
+//                      2       64  bits (extended)
+//                      3       113 bits (quad)
+//
+// Here is the description of some local variables:
+//
+//      jk      jk+1 is the initial number of terms of ipio2[] needed
+//              in the computation. The minimum and recommended value
+//              for jk is 3,4,4,6 for single, double, extended, and quad.
+//              jk+1 must be 2 larger than you might expect so that our
+//              recomputation test works. (Up to 24 bits in the integer
+//              part (the 24 bits of it that we compute) and 23 bits in
+//              the fraction part may be lost to cancelation before we
+//              recompute.)
+//
+//      jz      local integer variable indicating the number of
+//              terms of ipio2[] used.
+//
+//      jx      nx - 1
+//
+//      jv      index for pointing to the suitable ipio2[] for the
+//              computation. In general, we want
+//                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
+//              is an integer. Thus
+//                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
+//              Hence jv = max(0,(e0-3)/24).
+//
+//      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
+//
+//      q[]     double array with integral value, representing the
+//              24-bits chunk of the product of x and 2/pi.
+//
+//      q0      the corresponding exponent of q[0]. Note that the
+//              exponent for q[i] would be q0-24*i.
+//
+//      PIo2[]  double precision array, obtained by cutting pi/2
+//              into 24 bits chunks.
+//
+//      f[]     ipio2[] in floating point
+//
+//      iq[]    integer array by breaking up q[] in 24-bits chunk.
+//
+//      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
+//
+//      ih      integer. If >0 it indicates q[] is >= 0.5, hence
+//              it also indicates the *sign* of the result.
+
+/// Return the last three digits of N with y = x - N*pi/2
+/// so that |y| < pi/2.
+///
+/// The method is to compute the integer (mod 8) and fraction parts of
+/// (2/pi)*x without doing the full multiplication. In general we
+/// skip the part of the product that are known to be a huge integer (
+/// more accurately, = 0 mod 8 ). Thus the number of operations are
+/// independent of the exponent of the input.
 #[inline]
 pub(crate) fn rem_pio2_large(x : &[f64], y : &mut [f64], e0 : i32, prec : usize) -> i32 {
     let x1p24  = f64::from_bits(0x4170000000000000); // 0x1p24 === 2 ^ 24