Wrong default precision for %a
The standard says:
if the precision is missing and FLT_RADIX is a power of 2, then the precision is sufficient for an exact representation of the value
Instead, we use a default precision of 6 (just like for %e %f %g). Fix: replace https://github.com/nothings/stb/blob/f0569113c93ad095470c54bf34a17b36646bbbb5/stb_sprintf.h#L643-L659 with
if (stbsp__real_to_parts((stbsp__int64 *)&n64, &dp, fv)) fl |= STBSP__NEGATIVE; if (dp != -1023) n64 |= (((stbsp__uint64)1) << 52); if (pr == -1) { // 52 fractional bits of mantissa (pr is only for the fraction, we ignore the implicit integral 1) // means exactly 13 nibbles/chars of precision if (dp == -1023 && !n64) { // +-0.0 -> precision 0 pr = 0; } else { if (1) { // naive pr = 13; stbsp__uint64 m = n64; while((m & 0xF) == 0) { --pr; m >>= 4; } } else { // chunks stbsp__uint64 m = n64; pr = 13; if ((m & 0xFFFFFFFu) == 0) { pr -= 7; m >>= 7*4; } if ((m & 0xFFFu) == 0) { pr -= 3; m >>= 3*4; } if ((m & 0xFFu) == 0) { pr -= 2; m >>= 2*4; } if ((m & 0xFu) == 0) { pr -= 1; m >>= 1*4; } } } } s = num + 64; stbsp__lead_sign(fl, lead); if (dp == -1023) dp = (n64) ? -1022 : 0; n64 <<= (64 - 56); if (pr < 13) n64 += ((((stbsp__uint64)8) << 56) >> (pr * 4));
I don't know which of the two variants above (naive/chunks) would be better here. Or maybe some other bit hack like these: https://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightLinear
Test cases (now vs correct):
printf("%a", 2.23e-308), // "0x1.009117p-1022" vs "0x1.0091177587f83p-1022" printf("%a", 0x1p-1074); // "0x0.000000p-1022" vs "0x0.0000000000001p-1022" printf("%a", 0x0p0); // "0x0.000000p+0" vs "0x0p+0" printf("%a", 0x1p0); // "0x1.000000p+0" vs "0x1p+0" printf("%a", 0x1.8p0); // "0x1.800000p+0" vs "0x1.8p+0" printf("%a", 0x1.cp0); // "0x1.c00000p+0" vs "0x1.cp+0" printf("%a", 0x1.ep0); // "0x1.e00000p+0" vs "0x1.ep+0" printf("%a", 0x1.fp0); // "0x1.f00000p+0" vs "0x1.fp+0" printf("%a", 0x1.f8p0); // "0x1.f80000p+0" vs "0x1.f8p+0" printf("%a", 0x1.ffp0); // "0x1.ff0000p+0" vs "0x1.ffp+0" printf("%a", 0x1.fffp0); // "0x1.fff000p+0" vs "0x1.fffp+0" printf("%a", 0x1.ffffp0); // "0x1.ffff00p+0" vs "0x1.ffffp+0" printf("%a", 0x1.fffffp0); // "0x1.fffff0p+0" vs "0x1.fffffp+0" printf("%a", 0x1.ffffffp0); // "0x1.ffffffp+0" vs "0x1.ffffffp+0" printf("%a", 0x1.fffffffp0); // "0x2.000000p+0" vs "0x1.fffffffp+0" printf("%a", 0x1.ffffffffp0); // "0x2.000000p+0" vs "0x1.ffffffffp+0" printf("%a", 0x1.fffffffffp0); // "0x2.000000p+0" vs "0x1.fffffffffp+0" printf("%a", 0x1.ffffffffffp0); // "0x2.000000p+0" vs "0x1.ffffffffffp+0" printf("%a", 0x1.fffffffffffp0); // "0x2.000000p+0" vs "0x1.fffffffffffp+0" printf("%a", 0x1.ffffffffffffp0); // "0x2.000000p+0" vs "0x1.ffffffffffffp+0" printf("%a", 0x1.fffffffffffffp0); // "0x2.000000p+0" vs "0x1.fffffffffffffp+0" printf("%#a", 0x0p0); // "0x0.000000p+0" vs "0x0.p+0" printf("%#a", 0x1p0); // "0x1.000000p+0" vs "0x1.p+0"
Note that the wrong handling of '#' for %a is discussed in #1791. The last two test cases do not produce the correct output with just the present fix -- the #1791 fix is needed as well.
Wrong default precision for %a
The standard says:
if the precision is missing and FLT_RADIX is a power of 2, then the precision is sufficient for an exact representation of the value
Instead, we use a default precision of 6 (just like for %e %f %g). Fix: replace https://github.com/nothings/stb/blob/f0569113c93ad095470c54bf34a17b36646bbbb5/stb_sprintf.h#L643-L659 with
if (stbsp__real_to_parts((stbsp__int64 *)&n64, &dp, fv)) fl |= STBSP__NEGATIVE; if (dp != -1023) n64 |= (((stbsp__uint64)1) << 52); if (pr == -1) { // 52 fractional bits of mantissa (pr is only for the fraction, we ignore the implicit integral 1) // means exactly 13 nibbles/chars of precision if (dp == -1023 && !n64) { // +-0.0 -> precision 0 pr = 0; } else { if (1) { // naive pr = 13; stbsp__uint64 m = n64; while((m & 0xF) == 0) { --pr; m >>= 4; } } else { // chunks stbsp__uint64 m = n64; pr = 13; if ((m & 0xFFFFFFFu) == 0) { pr -= 7; m >>= 7*4; } if ((m & 0xFFFu) == 0) { pr -= 3; m >>= 3*4; } if ((m & 0xFFu) == 0) { pr -= 2; m >>= 2*4; } if ((m & 0xFu) == 0) { pr -= 1; m >>= 1*4; } } } } s = num + 64; stbsp__lead_sign(fl, lead); if (dp == -1023) dp = (n64) ? -1022 : 0; n64 <<= (64 - 56); if (pr < 13) n64 += ((((stbsp__uint64)8) << 56) >> (pr * 4));
I don't know which of the two variants above (naive/chunks) would be better here. Or maybe some other bit hack like these: https://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightLinear
Test cases (now vs correct):
printf("%a", 2.23e-308), // "0x1.009117p-1022" vs "0x1.0091177587f83p-1022" printf("%a", 0x1p-1074); // "0x0.000000p-1022" vs "0x0.0000000000001p-1022" printf("%a", 0x0p0); // "0x0.000000p+0" vs "0x0p+0" printf("%a", 0x1p0); // "0x1.000000p+0" vs "0x1p+0" printf("%a", 0x1.8p0); // "0x1.800000p+0" vs "0x1.8p+0" printf("%a", 0x1.cp0); // "0x1.c00000p+0" vs "0x1.cp+0" printf("%a", 0x1.ep0); // "0x1.e00000p+0" vs "0x1.ep+0" printf("%a", 0x1.fp0); // "0x1.f00000p+0" vs "0x1.fp+0" printf("%a", 0x1.f8p0); // "0x1.f80000p+0" vs "0x1.f8p+0" printf("%a", 0x1.ffp0); // "0x1.ff0000p+0" vs "0x1.ffp+0" printf("%a", 0x1.fffp0); // "0x1.fff000p+0" vs "0x1.fffp+0" printf("%a", 0x1.ffffp0); // "0x1.ffff00p+0" vs "0x1.ffffp+0" printf("%a", 0x1.fffffp0); // "0x1.fffff0p+0" vs "0x1.fffffp+0" printf("%a", 0x1.ffffffp0); // "0x1.ffffffp+0" vs "0x1.ffffffp+0" printf("%a", 0x1.fffffffp0); // "0x2.000000p+0" vs "0x1.fffffffp+0" printf("%a", 0x1.ffffffffp0); // "0x2.000000p+0" vs "0x1.ffffffffp+0" printf("%a", 0x1.fffffffffp0); // "0x2.000000p+0" vs "0x1.fffffffffp+0" printf("%a", 0x1.ffffffffffp0); // "0x2.000000p+0" vs "0x1.ffffffffffp+0" printf("%a", 0x1.fffffffffffp0); // "0x2.000000p+0" vs "0x1.fffffffffffp+0" printf("%a", 0x1.ffffffffffffp0); // "0x2.000000p+0" vs "0x1.ffffffffffffp+0" printf("%a", 0x1.fffffffffffffp0); // "0x2.000000p+0" vs "0x1.fffffffffffffp+0" printf("%#a", 0x0p0); // "0x0.000000p+0" vs "0x0.p+0" printf("%#a", 0x1p0); // "0x1.000000p+0" vs "0x1.p+0"
Note that the wrong handling of '#' for %a is discussed in #1791. The last two test cases do not produce the correct output with just the present fix -- the #1791 fix is needed as well.