Wrong default precision for %a

#1797

stb

nothings

Issue Details

3 months ago

stefano-zanotti-88

View on GitHub

I Want to Work on This Issue

stefano-zanotti-88

opened 3 months ago

Author

The standard says:

if the precision is missing and FLT_RADIX is a power of 2, then the precision is sufficient for an exact representation of the value

Instead, we use a default precision of 6 (just like for %e %f %g). Fix: replace https://github.com/nothings/stb/blob/f0569113c93ad095470c54bf34a17b36646bbbb5/stb_sprintf.h#L643-L659 with

if (stbsp__real_to_parts((stbsp__int64 *)&n64, &dp, fv))
   fl |= STBSP__NEGATIVE;

if (dp != -1023)
   n64 |= (((stbsp__uint64)1) << 52);

if (pr == -1) {
   // 52 fractional bits of mantissa (pr is only for the fraction, we ignore the implicit integral 1)
   // means exactly 13 nibbles/chars of precision
   if (dp == -1023 && !n64) { // +-0.0 -> precision 0
      pr = 0;
   } else {
      if (1) { // naive
         pr = 13;
         stbsp__uint64 m = n64;
         while((m & 0xF) == 0) {
            --pr;
            m >>= 4;
         }
      } else { // chunks
         stbsp__uint64 m = n64;
         pr = 13;
         if ((m & 0xFFFFFFFu) == 0) { pr -= 7; m >>= 7*4; }
         if ((m &     0xFFFu) == 0) { pr -= 3; m >>= 3*4; }
         if ((m &      0xFFu) == 0) { pr -= 2; m >>= 2*4; }
         if ((m &       0xFu) == 0) { pr -= 1; m >>= 1*4; }
      }
   }
}

s = num + 64;

stbsp__lead_sign(fl, lead);

if (dp == -1023)
   dp = (n64) ? -1022 : 0;
n64 <<= (64 - 56);
if (pr < 13)
   n64 += ((((stbsp__uint64)8) << 56) >> (pr * 4));

I don't know which of the two variants above (naive/chunks) would be better here. Or maybe some other bit hack like these: https://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightLinear

Test cases (now vs correct):

printf("%a", 2.23e-308),           // "0x1.009117p-1022" vs "0x1.0091177587f83p-1022"
printf("%a", 0x1p-1074);           // "0x0.000000p-1022" vs "0x0.0000000000001p-1022"
printf("%a", 0x0p0);               // "0x0.000000p+0"    vs "0x0p+0"
printf("%a", 0x1p0);               // "0x1.000000p+0"    vs "0x1p+0"
printf("%a", 0x1.8p0);             // "0x1.800000p+0"    vs "0x1.8p+0"
printf("%a", 0x1.cp0);             // "0x1.c00000p+0"    vs "0x1.cp+0"
printf("%a", 0x1.ep0);             // "0x1.e00000p+0"    vs "0x1.ep+0"
printf("%a", 0x1.fp0);             // "0x1.f00000p+0"    vs "0x1.fp+0"
printf("%a", 0x1.f8p0);            // "0x1.f80000p+0"    vs "0x1.f8p+0"
printf("%a", 0x1.ffp0);            // "0x1.ff0000p+0"    vs "0x1.ffp+0"
printf("%a", 0x1.fffp0);           // "0x1.fff000p+0"    vs "0x1.fffp+0"
printf("%a", 0x1.ffffp0);          // "0x1.ffff00p+0"    vs "0x1.ffffp+0"
printf("%a", 0x1.fffffp0);         // "0x1.fffff0p+0"    vs "0x1.fffffp+0"
printf("%a", 0x1.ffffffp0);        // "0x1.ffffffp+0"    vs "0x1.ffffffp+0"
printf("%a", 0x1.fffffffp0);       // "0x2.000000p+0"    vs "0x1.fffffffp+0"
printf("%a", 0x1.ffffffffp0);      // "0x2.000000p+0"    vs "0x1.ffffffffp+0"
printf("%a", 0x1.fffffffffp0);     // "0x2.000000p+0"    vs "0x1.fffffffffp+0"
printf("%a", 0x1.ffffffffffp0);    // "0x2.000000p+0"    vs "0x1.ffffffffffp+0"
printf("%a", 0x1.fffffffffffp0);   // "0x2.000000p+0"    vs "0x1.fffffffffffp+0"
printf("%a", 0x1.ffffffffffffp0);  // "0x2.000000p+0"    vs "0x1.ffffffffffffp+0"
printf("%a", 0x1.fffffffffffffp0); // "0x2.000000p+0"    vs "0x1.fffffffffffffp+0"
printf("%#a", 0x0p0);              // "0x0.000000p+0"    vs "0x0.p+0"
printf("%#a", 0x1p0);              // "0x1.000000p+0"    vs "0x1.p+0"

Note that the wrong handling of '#' for %a is discussed in #1791. The last two test cases do not produce the correct output with just the present fix -- the #1791 fix is needed as well.

I Want to Work on This Issue

stb

nothings

Issue Details

3 months ago

stefano-zanotti-88

View on GitHub

I Want to Work on This Issue

Wrong default precision for %a

#1797

stefano-zanotti-88

opened 3 months ago

Author

The standard says:

if the precision is missing and FLT_RADIX is a power of 2, then the precision is sufficient for an exact representation of the value

Instead, we use a default precision of 6 (just like for %e %f %g). Fix: replace https://github.com/nothings/stb/blob/f0569113c93ad095470c54bf34a17b36646bbbb5/stb_sprintf.h#L643-L659 with

if (stbsp__real_to_parts((stbsp__int64 *)&n64, &dp, fv))
   fl |= STBSP__NEGATIVE;

if (dp != -1023)
   n64 |= (((stbsp__uint64)1) << 52);

if (pr == -1) {
   // 52 fractional bits of mantissa (pr is only for the fraction, we ignore the implicit integral 1)
   // means exactly 13 nibbles/chars of precision
   if (dp == -1023 && !n64) { // +-0.0 -> precision 0
      pr = 0;
   } else {
      if (1) { // naive
         pr = 13;
         stbsp__uint64 m = n64;
         while((m & 0xF) == 0) {
            --pr;
            m >>= 4;
         }
      } else { // chunks
         stbsp__uint64 m = n64;
         pr = 13;
         if ((m & 0xFFFFFFFu) == 0) { pr -= 7; m >>= 7*4; }
         if ((m &     0xFFFu) == 0) { pr -= 3; m >>= 3*4; }
         if ((m &      0xFFu) == 0) { pr -= 2; m >>= 2*4; }
         if ((m &       0xFu) == 0) { pr -= 1; m >>= 1*4; }
      }
   }
}

s = num + 64;

stbsp__lead_sign(fl, lead);

if (dp == -1023)
   dp = (n64) ? -1022 : 0;
n64 <<= (64 - 56);
if (pr < 13)
   n64 += ((((stbsp__uint64)8) << 56) >> (pr * 4));

I don't know which of the two variants above (naive/chunks) would be better here. Or maybe some other bit hack like these: https://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightLinear

Test cases (now vs correct):

printf("%a", 2.23e-308),           // "0x1.009117p-1022" vs "0x1.0091177587f83p-1022"
printf("%a", 0x1p-1074);           // "0x0.000000p-1022" vs "0x0.0000000000001p-1022"
printf("%a", 0x0p0);               // "0x0.000000p+0"    vs "0x0p+0"
printf("%a", 0x1p0);               // "0x1.000000p+0"    vs "0x1p+0"
printf("%a", 0x1.8p0);             // "0x1.800000p+0"    vs "0x1.8p+0"
printf("%a", 0x1.cp0);             // "0x1.c00000p+0"    vs "0x1.cp+0"
printf("%a", 0x1.ep0);             // "0x1.e00000p+0"    vs "0x1.ep+0"
printf("%a", 0x1.fp0);             // "0x1.f00000p+0"    vs "0x1.fp+0"
printf("%a", 0x1.f8p0);            // "0x1.f80000p+0"    vs "0x1.f8p+0"
printf("%a", 0x1.ffp0);            // "0x1.ff0000p+0"    vs "0x1.ffp+0"
printf("%a", 0x1.fffp0);           // "0x1.fff000p+0"    vs "0x1.fffp+0"
printf("%a", 0x1.ffffp0);          // "0x1.ffff00p+0"    vs "0x1.ffffp+0"
printf("%a", 0x1.fffffp0);         // "0x1.fffff0p+0"    vs "0x1.fffffp+0"
printf("%a", 0x1.ffffffp0);        // "0x1.ffffffp+0"    vs "0x1.ffffffp+0"
printf("%a", 0x1.fffffffp0);       // "0x2.000000p+0"    vs "0x1.fffffffp+0"
printf("%a", 0x1.ffffffffp0);      // "0x2.000000p+0"    vs "0x1.ffffffffp+0"
printf("%a", 0x1.fffffffffp0);     // "0x2.000000p+0"    vs "0x1.fffffffffp+0"
printf("%a", 0x1.ffffffffffp0);    // "0x2.000000p+0"    vs "0x1.ffffffffffp+0"
printf("%a", 0x1.fffffffffffp0);   // "0x2.000000p+0"    vs "0x1.fffffffffffp+0"
printf("%a", 0x1.ffffffffffffp0);  // "0x2.000000p+0"    vs "0x1.ffffffffffffp+0"
printf("%a", 0x1.fffffffffffffp0); // "0x2.000000p+0"    vs "0x1.fffffffffffffp+0"
printf("%#a", 0x0p0);              // "0x0.000000p+0"    vs "0x0.p+0"
printf("%#a", 0x1p0);              // "0x1.000000p+0"    vs "0x1.p+0"

Note that the wrong handling of '#' for %a is discussed in #1791. The last two test cases do not produce the correct output with just the present fix -- the #1791 fix is needed as well.

I Want to Work on This Issue