From 69fa6118d4f22e9d7cf2f9591889b8774cbc2e3e Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Wed, 2 Jul 2025 10:58:31 -0700 Subject: [PATCH 01/19] Reimplement floating-point description implementation in Swift. Note: This is still incomplete. I need to finish porting the Float16 and Float80 support before it can fully replace the existing C implementation. --- stdlib/public/core/CMakeLists.txt | 1 + .../public/core/FloatingPointToString.swift | 1291 +++++++++++++++++ .../public/core/FloatingPointTypes.swift.gyb | 22 + stdlib/public/core/GroupInfo.json | 1 + 4 files changed, 1315 insertions(+) create mode 100644 stdlib/public/core/FloatingPointToString.swift diff --git a/stdlib/public/core/CMakeLists.txt b/stdlib/public/core/CMakeLists.txt index 8b8760d557864..6c63ff15e2219 100644 --- a/stdlib/public/core/CMakeLists.txt +++ b/stdlib/public/core/CMakeLists.txt @@ -261,6 +261,7 @@ split_embedded_sources( NORMAL AtomicInt.swift.gyb EMBEDDED FloatingPointParsing.swift.gyb + EMBEDDED FloatingPointToString.swift EMBEDDED FloatingPointTypes.swift.gyb EMBEDDED IntegerTypes.swift.gyb EMBEDDED LegacyInt128.swift.gyb diff --git a/stdlib/public/core/FloatingPointToString.swift b/stdlib/public/core/FloatingPointToString.swift new file mode 100644 index 0000000000000..bbe5019b71905 --- /dev/null +++ b/stdlib/public/core/FloatingPointToString.swift @@ -0,0 +1,1291 @@ +//===--- FloatingPointToString.swift -------------------------*- Swift -*-===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2018-2020 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors +// +//===---------------------------------------------------------------------===// +// +// Converts floating-point types to "optimal" text formats. +// +// The "optimal" form is one with a minimum number of significant +// digits which will parse to exactly the original value. This form +// is ideal for JSON serialization and general printing where you +// don't have specific requirements on the number of significant +// digits. +// +//===---------------------------------------------------------------------===// +/// +/// For binary16, this code uses a simple approach that is normally +/// implemented with variable-length arithmetic. However, due to +/// the limited range of binary16, this can be implemented simply +/// with only 32-bit integer arithmetic. +/// +/// For other formats, we use a modified form of the Grisu2 +/// algorithm from Florian Loitsch; "Printing Floating-Point Numbers +/// Quickly and Accurately with Integers", 2010. +/// https://doi.org/10.1145/1806596.1806623 +/// +/// Some of the Grisu2 modifications were suggested by the "Errol +/// paper": Marc Andrysco, Ranjit Jhala, Sorin Lerner; "Printing +/// Floating-Point Numbers: A Faster, Always Correct Method", 2016. +/// https://doi.org/10.1145/2837614.2837654 +/// In particular, the Errol paper explored the impact of higher-precision +/// fixed-width arithmetic on Grisu2 and showed a way to rapidly test +/// the correctness of such algorithms. +/// +/// A few further improvements were inspired by the Ryu algorithm +/// from Ulf Anders; "Ryū: fast float-to-string conversion", 2018. +/// https://doi.org/10.1145/3296979.3192369 +/// +/// In summary, this implementation is: +/// +/// * Fast. It uses only fixed-width integer arithmetic and has +/// constant memory requirements. For double-precision values on +/// 64-bit processors, it is competitive with Ryu. For double-precision +/// values on 32-bit processors, and higher-precision values on all +/// processors, it is considerably faster. +/// +/// * Always Accurate. Converting the decimal form back to binary +/// will always yield exactly the same value. For the IEEE 754 +/// formats, the round-trip will produce exactly the same bit +/// pattern in memory. +/// +/// * Always Short. This always selects an accurate result with the +/// minimum number of significant digits. +/// +/// * Always Close. Among all accurate, short results, this always +/// chooses the result that is closest to the exact floating-point +/// value. (In case of an exact tie, it rounds the last digit even.) +/// +/// Beyond the requirements above, the precise text form has been +/// tuned to try to maximize readability: +/// * Always include a '.' or an 'e' so the result is obviously +/// a floating-point value +/// * Exponential form always has 1 digit before the decimal point +/// * When present, a '.' is never the first or last character +/// * There is a consecutive range of integer values that can be +/// represented in double (-2^54...2^54). Never use exponential +/// form for integral numbers in this range. +/// * Generally follow existing practice: Don't use use exponential +/// form for fractional values bigger than 10^-4; always write at +/// least 2 digits for an exponent. +/// * Apart from the above, we do prefer shorter output. + +/// +/// This Swift implementation was ported from an earlier C version; +/// the output is exactly the same in all cases. +/// A few notes on the Swift transcription: +/// * We use MutableSpan and MutableRawSpan to +/// identify blocks of working memory. +/// * We use unsafe/unchecked operations extensively, supported +/// by several years of analysis and testing to ensure that +/// no unsafety actually occurs. For Float32, that testing +/// was exhaustive -- we verified all 4 billion possible Float32 values. +/// * The Swift code uses an idiom of building up to 8 ASCII characters +/// in a UInt64 and then writing the whole block to memory. +/// +// ---------------------------------------------------------------------------- + +// Implement the legacy ABI on top of the new one +@_silgen_name("swift_float32ToString2") +internal func _float32ToStringImpl2( + _ textBuffer: UnsafeMutablePointer, + _ bufferLength: UInt, + _ value: Float32, + _ debug: Bool) -> UInt64 { + // Code below works with raw memory. + var buffer = unsafe MutableSpan(_unchecked: textBuffer, + count: Int(bufferLength)) + let textRange = Float32ToASCII(value: value, buffer: &buffer) + let textLength = textRange.upperBound - textRange.lowerBound + + // Move the text to the start of the buffer + if textRange.lowerBound != 0 { + unsafe _memmove(dest: textBuffer, + src: textBuffer + textRange.lowerBound, + size: UInt(truncatingIfNeeded: textLength)) + } + return UInt64(truncatingIfNeeded: textLength) +} + +@_silgen_name("swift_float64ToString2") +internal func _float64ToStringImpl2( + _ textBuffer: UnsafeMutablePointer, + _ bufferLength: UInt, + _ value: Float64, + _ debug: Bool) -> UInt64 { + // Code below works with raw memory. + var buffer = unsafe MutableSpan(_unchecked: textBuffer, + count: Int(bufferLength)) + let textRange = Float64ToASCII(value: value, buffer: &buffer) + let textLength = textRange.upperBound - textRange.lowerBound + + // Move the text to the start of the buffer + if textRange.lowerBound != 0 { + unsafe _memmove(dest: textBuffer, + src: textBuffer + textRange.lowerBound, + size: UInt(truncatingIfNeeded: textLength)) + } + return UInt64(truncatingIfNeeded: textLength) +} + +internal func Float32ToASCII( + value f: Float32, + buffer utf8Buffer: inout MutableSpan) -> Range +{ + if #available(macOS 9999, *) { + return _Float32ToASCII(value: f, buffer: &utf8Buffer) + } else { + return 0..<0 + } +} + +@available(macOS 9999, *) +fileprivate func _Float32ToASCII( + value f: Float32, + buffer utf8Buffer: inout MutableSpan) -> Range +{ + // Note: The algorithm here is the same as for Float64, only + // with narrower arithmetic. Refer to `_Float64ToASCII` for + // more detailed comments and explanation. + + // We need a MutableRawSpan in order to use wide store/load operations + precondition(utf8Buffer.count >= 32) + var buffer = utf8Buffer.mutableBytes + + // Step 1: Handle the special cases, decompose the input + + let binaryExponent: Int + let significand: Float.RawSignificand + let exponentBias = (1 << (Float.exponentBitCount - 1)) - 2; // 126 + if (f.exponentBitPattern == 0xff) { + if (f.isInfinite) { + return infinity(buffer: &buffer, sign: f.sign) + } else { // f.isNaN + let quietBit = (f.significandBitPattern >> (Float.significandBitCount - 1)) & 1 + let payloadMask = UInt32(1 << (Float.significandBitCount - 2)) - 1 + let payload32 = f.significandBitPattern & payloadMask + return nan_details(buffer: &buffer, + sign: f.sign, + quiet: quietBit == 0, + payload: UInt128(truncatingIfNeeded:payload32)) + } + } else if (f.exponentBitPattern == 0) { + if (f.isZero) { + return zero(buffer: &buffer, sign: f.sign) + } else { // f.isSubnormal + binaryExponent = 1 - exponentBias + significand = f.significandBitPattern &<< Float.exponentBitCount + } + } else { + binaryExponent = Int(f.exponentBitPattern) &- exponentBias + significand = (f.significandBitPattern &+ (1 << Float.significandBitCount)) &<< Float.exponentBitCount + } + + // Step 2: Determine the exact unscaled target interval + + let halfUlp: Float.RawSignificand = 1 << (Float.exponentBitCount - 1) + let quarterUlp = halfUlp >> 1 + let upperMidpointExact = significand &+ halfUlp + let lowerMidpointExact = significand &- ((f.significandBitPattern == 0) ? quarterUlp : halfUlp) + let isOddSignificand = ((f.significandBitPattern & 1) != 0) + + // Step 3: Estimate the base 10 exponent + + var base10Exponent = decimalExponentFor2ToThe(binaryExponent) + + // Step 4: Compute power-of-10 scale factor + + var powerOfTenRoundedDown: UInt64 = 0 + var powerOfTenRoundedUp: UInt64 = 0 + + let bulkFirstDigits = 1 + let powerOfTenExponent = intervalContainingPowerOf10_Binary32( + -base10Exponent &+ bulkFirstDigits &- 1, + &powerOfTenRoundedDown, &powerOfTenRoundedUp) + let extraBits = binaryExponent &+ powerOfTenExponent + + // Step 5: Scale the interval (with rounding) + + // Experimentally, 11 is as large as we can go here without introducing errors. + // We need 7 to generate 2 digits at a time below. + // 11 should allow us to generate 3 digits at a time, but + // that doesn't seem to be any faster. + let integerBits = 11 + let fractionBits = 64 - integerBits + var u: UInt64 + var l: UInt64 + if isOddSignificand { + // Narrow the interval (odd significand) + let u1 = multiply64x32RoundingDown(powerOfTenRoundedDown, upperMidpointExact) + u = u1 >> (integerBits - extraBits) + let l1 = multiply64x32RoundingUp(powerOfTenRoundedUp, lowerMidpointExact) + let bias = UInt64((1 &<< (integerBits &- extraBits)) &- 1) + l = (l1 &+ bias) >> (integerBits &- extraBits) + } else { + // Widen the interval (even significand) + let u1 = multiply64x32RoundingUp(powerOfTenRoundedUp, upperMidpointExact) + let bias = UInt64((1 &<< (integerBits &- extraBits)) &- 1) + u = (u1 &+ bias) >> (integerBits &- extraBits) + let l1 = multiply64x32RoundingDown(powerOfTenRoundedDown, lowerMidpointExact) + l = l1 >> (integerBits &- extraBits) + } + + // Step 6: Align first digit, adjust exponent + + while u < (1 &<< fractionBits) { + base10Exponent &-= 1 + l &*= 10 + u &*= 10 + } + + // Step 7: Generate decimal digits into the destination buffer + + var t = u + var delta = u &- l + let fractionMask: UInt64 = (1 << fractionBits) - 1 + + // Write 8 leading zeros to the beginning of the buffer: + unsafe buffer.storeBytes(of: 0x3030303030303030, + toUncheckedByteOffset: 0, + as: UInt64.self) + + // Overwrite the first digit at index 7: + let firstDigit = 7 + let digit = (t >> fractionBits) &+ 0x30 + t &= fractionMask + unsafe buffer.storeBytes(of: UInt8(truncatingIfNeeded: digit), + toUncheckedByteOffset: firstDigit, + as: UInt8.self) + var nextDigit = firstDigit &+ 1 + + // Generate 2 digits at a time... + while (delta &* 10) < ((t &* 10) & fractionMask) { + delta &*= 100 + t &*= 100 + let d12 = Int(truncatingIfNeeded: t >> fractionBits) + let text = unsafe asciiDigitTable[unchecked: d12] + unsafe buffer.storeBytes(of: text, + toUncheckedByteOffset: nextDigit, + as: UInt16.self) + nextDigit &+= 2 + t &= fractionMask + } + + // ... and a final single digit, if necessary + if delta < t { + delta &*= 10 + t &*= 10 + let text = 0x30 + UInt8(truncatingIfNeeded: t >> fractionBits) + unsafe buffer.storeBytes(of: text, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + t &= fractionMask + } + + // Adjust the final digit to be closer to the original value + let isBoundary = (f.significandBitPattern == 0) + if delta > t &+ (1 &<< fractionBits) { + let skew: UInt64 + if isBoundary { + skew = delta &- delta / 3 &- t + } else { + skew = delta / 2 &- t + } + let one = UInt64(1) << (64 - integerBits) + let lastAccurateBit = UInt64(1) << 24 + let fractionMask = (one - 1) & ~(lastAccurateBit - 1); + let oneHalf = one >> 1 + var lastDigit = unsafe buffer.unsafeLoad(fromUncheckedByteOffset: nextDigit &- 1, + as: UInt8.self) + if ((skew &+ (lastAccurateBit >> 1)) & fractionMask) == oneHalf { + // Skew is integer + 1/2, round even after adjustment + let adjust = skew >> (64 - integerBits) + lastDigit &-= UInt8(truncatingIfNeeded: adjust) + lastDigit &= ~1 + } else { + // Round nearest + let adjust = (skew &+ oneHalf) >> (64 - integerBits) + lastDigit &-= UInt8(truncatingIfNeeded: adjust) + } + unsafe buffer.storeBytes(of: lastDigit, + toUncheckedByteOffset: nextDigit &- 1, + as: UInt8.self) + } + + // Step 8: Finish formatting + let forceExponential = (binaryExponent > 25) || (binaryExponent == 25 && !isBoundary) + return finishFormatting(&buffer, f.sign, firstDigit, nextDigit, + forceExponential, base10Exponent) +} + +internal func Float64ToASCII( + value d: Float64, + buffer utf8Buffer: inout MutableSpan) -> Range +{ + if #available(macOS 9999, *) { + return _Float64ToASCII(value: d, buffer: &utf8Buffer) + } else { + return 0..<0 + } +} + +@available(macOS 9999, *) +fileprivate func _Float64ToASCII( + value d: Float64, + buffer utf8Buffer: inout MutableSpan) -> Range +{ + // We need a MutableRawSpan in order to use wide store/load operations + precondition(utf8Buffer.count >= 32) + var buffer = utf8Buffer.mutableBytes + + // + // Step 1: Handle the special cases, decompose the input + // + let binaryExponent: Int + let significand: Double.RawSignificand + let exponentBias = (1 << (Double.exponentBitCount - 1)) - 2; // 1022 + + if (d.exponentBitPattern == 0x7ff) { + if (d.isInfinite) { + return infinity(buffer: &buffer, sign: d.sign) + } else { // d.isNaN + let quietBit = (d.significandBitPattern >> (Double.significandBitCount - 1)) & 1 + let payloadMask = UInt64(1 << (Double.significandBitCount - 2)) - 1 + let payload64 = d.significandBitPattern & payloadMask + return nan_details(buffer: &buffer, + sign: d.sign, + quiet: quietBit == 0, + payload: UInt128(truncatingIfNeeded:payload64)) + } + } else if (d.exponentBitPattern == 0) { + if (d.isZero) { + return zero(buffer: &buffer, sign: d.sign) + } else { // d.isSubnormal + binaryExponent = 1 - exponentBias + significand = d.significandBitPattern &<< Double.exponentBitCount + } + } else { + binaryExponent = Int(d.exponentBitPattern) &- exponentBias + significand = (d.significandBitPattern &+ (1 << Double.significandBitCount)) &<< Double.exponentBitCount + } + // The input has been decomposed as significand * 2^binaryExponent, + // where `significand` is a 64-bit fraction with the binary + // point at the far left. + + // Step 2: Determine the exact unscaled target interval + + // Grisu-style algorithms construct the shortest decimal digit + // sequence within a specific interval. To build the appropriate + // interval, we start by computing the midpoints between this + // floating-point value and the adjacent ones. Note that this + // step is an exact computation. + + let halfUlp: Double.RawSignificand = 1 << (Double.exponentBitCount - 1) + let quarterUlp = halfUlp >> 1 + let upperMidpointExact = significand &+ halfUlp + let lowerMidpointExact = significand &- ((d.significandBitPattern == 0) ? quarterUlp : halfUlp) + let isOddSignificand = ((d.significandBitPattern & 1) != 0) + + // Step 3: Estimate the base 10 exponent + + // Grisu algorithms are based in part on a simple technique for + // generating a base-10 form for a binary floating-point number. + // Start with a binary floating-point number `f * 2^e` and then + // estimate the decimal exponent `p`. You can then rewrite your + // original number as: + // + // ``` + // f * 2^e * 10^-p * 10^p + // ``` + // + // The last term is part of our output, and a good estimate for + // `p` will ensure that `2^e * 10^-p` is close to 1. Multiplying + // the first three terms then yields a fraction suitable for + // producing the decimal digits. Here we use a very fast estimate + // of `p` that is never off by more than 1; we'll have + // opportunities later to correct any error. + + var base10Exponent = decimalExponentFor2ToThe(binaryExponent) + + // Step 4: Compute power-of-10 scale factor + + // Compute `10^-p` to 128-bit precision. We generate + // both over- and under-estimates to ensure we can exactly + // bound the later use of these values. + // The `powerOfTenRounded{Up,Down}` values are 128-bit + // pure fractions with the decimal point at the far left. + + var powerOfTenRoundedDown: UInt128 = 0 + var powerOfTenRoundedUp: UInt128 = 0 + + // Note the extra factor of 10^bulkFirstDigits -- that will give + // us a headstart on digit generation later on. (In contrast, Ryu + // uses an extra factor of 10^17 here to get all the digits up + // front, but then has to back out any extra digits. Doing that + // with a 17-digit value requires 64-bit division, which is the + // root cause of Ryu's poor performance on 32-bit processors. We + // also might have to back out extra digits if 7 is too many, but + // will only need 32-bit division in that case.) + + let bulkFirstDigits = 7 + let bulkFirstDigitFactor = 1000000 // 10^(bulkFirstDigits - 1) + + let powerOfTenExponent = intervalContainingPowerOf10_Binary64( + -base10Exponent &+ bulkFirstDigits &- 1, + &powerOfTenRoundedDown, &powerOfTenRoundedUp) + + let extraBits = binaryExponent + powerOfTenExponent + + // Step 5: Scale the interval (with rounding) + + // As mentioned above, the final digit generation works + // with an interval, so we actually apply the scaling + // to the upper and lower midpoint values separately. + + // As part of the scaling here, we'll switch from a pure + // fraction with zero bit integer portion and 128-bit fraction + // to a fixed-point form with 32 bits in the integer portion. + + let integerBits = 32 + let roundingBias = UInt128((1 &<< UInt64(truncatingIfNeeded: integerBits &- extraBits)) &- 1) + var u: UInt128 + var l: UInt128 + if isOddSignificand { + // Case A: Narrow the interval (odd significand) + + // Loitsch' original Grisu2 always rounds so as to narrow the + // interval. Since our digit generation will select a value + // within the scaled interval, narrowing the interval + // guarantees that we will find a digit sequence that converts + // back to the original value. + + // This ensures accuracy but, as explained in Loitsch' paper, + // this carries a risk that there will be a shorter digit + // sequence outside of our narrowed interval that we will + // miss. This risk obviously gets lower with increased + // precision, but it wasn't until the Errol paper that anyone + // had a good way to test whether a particular implementation + // had sufficient precision. That paper shows a way to enumerate + // the worst-case numbers; those numbers that are extremely close + // to the mid-points between adjacent floating-point values. + // These are the values that might sit just outside of the + // narrowed interval. By testing these values, we can verify + // the correctness of our implementation. + + // Multiply out the upper midpoint, rounding down... + let u1 = multiply128x64RoundingDown(powerOfTenRoundedDown, upperMidpointExact) + // Account for residual binary exponent and adjust + // to the fixed-point format + u = u1 >> (integerBits - extraBits) + + // Conversely for the lower midpoint... + let l1 = multiply128x64RoundingUp(powerOfTenRoundedUp, lowerMidpointExact) + l = (l1 + roundingBias) >> (integerBits - extraBits) + } else { + // Case B: Widen the interval (even significand) + + // As explained in Errol Theorem 6, in certain cases there is + // a short decimal representation at the exact boundary of the + // scaled interval. When such a number is converted back to + // binary, it will get rounded to the adjacent even + // significand. + + // So when the significand is even, we round so as to widen + // the interval in order to ensure that the exact midpoints + // are considered. Of couse, this ensures that we find a + // short result but carries a risk of selecting a result + // outside of the exact scaled interval (which would be + // inaccurate). + // (This technique of rounding differently for even/odd significands + // seems to be new; I've not seen it described in any of the + // papers on floating-point printing.) + + // The same testing approach described above (based on results + // in the Errol paper) also applies + // to this case. + + let u1 = multiply128x64RoundingUp(powerOfTenRoundedUp, upperMidpointExact) + u = (u1 &+ roundingBias) >> (integerBits - extraBits) + let l1 = multiply128x64RoundingDown(powerOfTenRoundedDown, lowerMidpointExact) + l = l1 >> (integerBits - extraBits) + } + + // Step 6: Align the first digit, adjust exponent + + // Calculations above used an estimate for the power-of-ten scale. + // Here, we compensate for any error in that estimate by testing + // whether we have the expected number of digits in the integer + // portion and correcting as necessary. This also serves to + // prune leading zeros from subnormals. + + // Except for subnormals, this loop never runs more than once. + // For subnormals, this might run as many as 16 times. + let minimumU = UInt128(bulkFirstDigitFactor) << (128 - integerBits) + while u < minimumU { + base10Exponent -= 1 + l &*= 10 + u &*= 10 + } + + // Step 7: Produce decimal digits + + // One standard approach generates digits for the scaled upper and + // lower boundaries and stops at the first digit that + // differs. For example, note that 0.1234 is the shortest decimal + // between u = 0.123456 and l = 0.123345. + + // Grisu optimizes this by generating digits for the upper bound + // (multiplying by 10 to isolate each digit) while simultaneously + // scaling the interval width `delta`. As we remove each digit + // from the upper bound, the remainder is the difference between + // the base-10 value generated so far and the true upper bound. + // When that remainder is less than the scaled width of the + // interval, we know the current digits specify a value within the + // target interval. + + // The logic below actually blends three different digit-generation + // strategies: + // * The first digits are already in the integer portion of the + // fixed-point value, thanks to the `bulkFirstDigits` factor above. + // We can just break those down and write them out. + // * If we generated too many digits, we use a Ryu-inspired technique + // to backtrack. + // * If we generated too few digits (the usual case), we use an + // optimized form of the Grisu2 method to produce the remaining + // values. + + // + // Generate digits and build the output. + // + + // Generate digits for `t` with interval width `delta = u - l` + // As above, these are fixed-point with 32-bit integer, 96-bit fraction + var t = u + var delta = u &- l + let fractionMask = (UInt128(1) << 96) - 1 + + var nextDigit = 5 + var firstDigit = nextDigit + unsafe buffer.storeBytes(of: 0x3030303030303030 as UInt64, + toUncheckedByteOffset: 0, + as: UInt64.self) + + // Our initial scaling gave us the first 7 digits already: + let d12345678 = UInt32(truncatingIfNeeded: t._high >> 32) + t &= fractionMask + + if delta >= t { + // Oops! We have too many digits. Back out the extra ones to + // get the right answer. This is similar to Ryu, but since + // we've only produced seven digits, we only need 32-bit + // arithmetic here. (Ryu needs 64-bit arithmetic to back out + // digits, which severely compromises performance on 32-bit + // processors. The same problem occurs with Ryu for 128-bit + // floats on 64-bit processors.) + // A few notes: + // * Our target hardware always supports 32-bit hardware division, + // so this should be reasonably fast. + // * For small integers (like "2.0"), Ryu would have to back out 16 + // digits; we only have to back out 6. + // * Very few double-precision values actually need fewer than 7 + // digits. So this is rarely used except in workloads that + // specifically use double for small integers. + + // Why this is critical for performance: In order to use the + // 8-digits-at-a-time optimization below, we need at least 30 + // bits in the integer part of our fixed-point format above. + // If we only use bulkDigits = 1, that leaves only 128 - 30 = + // 98 bit accuracy for our scaling step, which isn't enough + // (experiments suggest that binary64 needs ~110 bits for + // correctness). So we have to use a large bulkDigits value + // to make full use of the 128-bit scaling above, which forces + // us to have some form of logic to handle the case of too + // many digits. The alternatives are either to use >128 bit + // arithmetic, or to back up and repeat the original scaling + // with bulkDigits = 1. + + let uHigh = u._high + let lHigh = (l &+ UInt128(UInt64.max))._high + let tHigh: UInt64 + if d.significand == 0 { + tHigh = (uHigh &+ lHigh &* 2) / 3 + } else { + tHigh = (uHigh &+ lHigh) / 2 + } + var u0 = UInt32(truncatingIfNeeded: uHigh >> (64 - integerBits)) + var l0 = UInt32(truncatingIfNeeded: lHigh >> (64 - integerBits)) + if lHigh & ((1 << (64 - integerBits)) - 1) != 0 { + l0 &+= 1 + } + var t0 = UInt32(truncatingIfNeeded: tHigh >> (64 - integerBits)) + var t0digits = 8 + + var u1 = u0 / 10 + var l1 = (l0 &+ 9) / 10 + var trailingZeros = (t == 0) + var droppedDigit = UInt32(truncatingIfNeeded: ((tHigh &* 10) >> (64 - integerBits)) % 10) + while u1 >= l1 && u1 != 0 { + u0 = u1 + l0 = l1 + trailingZeros = trailingZeros && (droppedDigit == 0) + droppedDigit = t0 % 10 + t0 /= 10 + t0digits -= 1 + u1 = u0 / 10 + l1 = (l0 &+ 9) / 10 + } + // Correct the final digit + if droppedDigit > 5 || (droppedDigit == 5 && !trailingZeros) { // > 0.5000 + t0 &+= 1 + } else if droppedDigit == 5 && trailingZeros { // == 0.5000 + t0 &+= 1 + t0 &= ~1 + } + // t0 has t0digits digits. Write them out + let text = intToEightDigits(t0) >> ((8 - t0digits) * 8) + unsafe buffer.storeBytes(of: text, + toUncheckedByteOffset: nextDigit, + as: UInt64.self) + nextDigit &+= t0digits + firstDigit &+= 1 + } else { + // Our initial scaling did not produce too many digits. The + // `d12345678` value holds the first 7 digits (plus a leading + // zero). The remainder of this algorithm is basically just a + // heavily-optimized variation of Grisu2. + + // Write out exactly 8 digits, assuming little-endian. + let chars = intToEightDigits(d12345678) + unsafe buffer.storeBytes(of: chars, + toUncheckedByteOffset: nextDigit, + as: UInt64.self) + nextDigit &+= 8 + firstDigit &+= 1 + + // >90% of random binary64 values need at least 15 digits. + // We already have seven, try grabbing the next 8 digits all at once. + let TenToTheEighth = 100000000 as UInt128; // 10^(15-bulkFirstDigits) + let d0 = delta * TenToTheEighth + var t0 = t * TenToTheEighth + let next8Digits = UInt32(truncatingIfNeeded: t0._high >> 32) + t0 &= fractionMask + if d0 < t0 { + // We got 8 more digits! (So number is at least 15 digits) + // Write them out: + let chars = intToEightDigits(next8Digits) + unsafe buffer.storeBytes(of: chars, + toUncheckedByteOffset: nextDigit, + as: UInt64.self) + nextDigit &+= 8 + t = t0 + delta = d0 + } + + // Generate remaining digits one at a time, following Grisu: + while (delta < t) { + delta &*= 10 + t &*= 10 + unsafe buffer.storeBytes(of: UInt8(truncatingIfNeeded: t._high >> 32) &+ 0x30, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + t &= fractionMask + } + + // Adjust the final digit to be closer to the original value. + // This accounts for the fact that sometimes there is more than + // one shortest digit sequence. + + // For example, consider how the above would work if you had the + // value 0.1234 and computed u = 0.1257, l = 0.1211. The above + // digit generation works with `u`, so produces 0.125. But the + // values 0.122, 0.123, and 0.124 are just as short and 0.123 is + // therefore the best choice, since it's closest to the original + // value. + + // We know delta and t are both less than 10.0 here, so we can + // shed some excess integer bits to simplify the following: + let adjustIntegerBits = 4 // Integer bits for "adjust" phase + let deltaHigh64 = UInt64(truncatingIfNeeded: delta >> (64 - integerBits + adjustIntegerBits)) + let tHigh64 = UInt64(truncatingIfNeeded: t >> (64 - integerBits + adjustIntegerBits)) + + let one = UInt64(1) << (64 - adjustIntegerBits) + let adjustFractionMask = one - 1; + let oneHalf = one >> 1; + if deltaHigh64 >= tHigh64 &+ one { + // The `skew` is the difference between our + // computed digits and the original exact value. + var skew: UInt64 + if (d.significandBitPattern == 0) { + skew = deltaHigh64 &- deltaHigh64 / 3 &- tHigh64 + } else { + skew = deltaHigh64 / 2 &- tHigh64 + } + + // We use the `skew` to figure out whether there's + // a better base-10 value than our current one. + if (skew & adjustFractionMask) == oneHalf { + // Difference is an integer + exactly 1/2, so ... + let adjust = skew >> (64 - adjustIntegerBits) + var t = unsafe buffer.unsafeLoad(fromUncheckedByteOffset: nextDigit - 1, + as: UInt8.self) + t &-= UInt8(truncatingIfNeeded: adjust) + // ... we round the last digit even. + t &= ~1 + unsafe buffer.storeBytes(of: t, + toUncheckedByteOffset: nextDigit - 1, + as: UInt8.self) + // Note: "exactly" 1/2 is a subtle point above; this + // determination relies on various roundings canceling + // out, and proving correctness requires proper + // testing. Testing so far has validated the + // correctness of this code. However, even if that + // were not true, this only affects whether we choose + // the theoretically-ideal even final digit when an + // odd final digit would otherwise satisfy all + // requirements. + } else { + let adjust = (skew + oneHalf) >> (64 - adjustIntegerBits) + var t = unsafe buffer.unsafeLoad(fromUncheckedByteOffset: nextDigit - 1, + as: UInt8.self) + t &-= UInt8(truncatingIfNeeded: adjust) + unsafe buffer.storeBytes(of: t, + toUncheckedByteOffset: nextDigit - 1, + as: UInt8.self) + } + } + } + + // Step 8: Finalize formatting by rearranging + // the digits and filling in decimal points, + // exponents, and zero padding. + let isBoundary = (d.significandBitPattern == 0) + let forceExponential = (binaryExponent > 54) || (binaryExponent == 54 && !isBoundary) + return finishFormatting(&buffer, d.sign, firstDigit, nextDigit, + forceExponential, base10Exponent) +} + +@available(macOS 9999, *) +// TODO: This doesn't guarantee inlining in all cases :( +@inline(__always) +fileprivate func finishFormatting(_ buffer: inout MutableRawSpan, + _ sign: FloatingPointSign, + _ firstDigit: Int, + _ nextDigit: Int, + _ forceExponential: Bool, + _ base10Exponent: Int) -> Range +{ + // Performance note: This could be made noticeably faster by + // writing the output consistently in exponential form with no + // decimal point, e.g., "31415926e-07". But the extra cost seems + // worthwhile to achieve "3.1415926" instead. + var firstDigit = firstDigit + var nextDigit = nextDigit + + let digitCount = nextDigit &- firstDigit + if base10Exponent < -4 || forceExponential { + // Exponential form: "-1.23456789e+123" + // Rewrite "123456789" => "1.23456789" by moving the first + // digit to the left one byte and overwriting a period. + // (This is one reason we left empty space to the left of the digits.) + // We don't do this for single-digit significands: "1e+78", "5e-324" + if digitCount > 1 { + let t = unsafe buffer.unsafeLoad(fromUncheckedByteOffset: firstDigit, + as: UInt8.self) + unsafe buffer.storeBytes(of: 0x2e, + toUncheckedByteOffset: firstDigit, + as: UInt8.self) + firstDigit &-= 1 + unsafe buffer.storeBytes(of: t, + toUncheckedByteOffset: firstDigit, + as: UInt8.self) + } + // Append the exponent: + unsafe buffer.storeBytes(of: 0x65, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + var e = base10Exponent + let expSign: UInt8 + if base10Exponent < 0 { + expSign = 0x2d // "-" + e = 0 &- e + } else { + expSign = 0x2b // "+" + } + unsafe buffer.storeBytes(of: expSign, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + if e > 99 { + if e > 999 { + let d = asciiDigitTable[e / 100] + unsafe buffer.storeBytes(of: d, + toUncheckedByteOffset: nextDigit, + as: UInt16.self) + nextDigit &+= 2 + } else { + let d = 0x30 &+ UInt8(truncatingIfNeeded: (e / 100)) + unsafe buffer.storeBytes(of: d, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + } + e = e % 100 + } + let d = unsafe asciiDigitTable[unchecked: e] + unsafe buffer.storeBytes(of: d, + toUncheckedByteOffset: nextDigit, + as: UInt16.self) + nextDigit &+= 2 + } else if base10Exponent < 0 { + // "-0.000123456789" + // We need up to 5 leading characters before the digits. + // Note that the formatters above all insert extra leading "0" characters + // to the beginning of the buffer, so we don't need to memset() here, + // just back up the start to include them... + firstDigit &+= base10Exponent - 1 + // ... and then overwrite a decimal point to get "0." at the beginning + buffer.storeBytes(of: 0x2e, // "." + toByteOffset: firstDigit &+ 1, + as: UInt8.self) + } else if base10Exponent &+ 1 < digitCount { + // "123456.789" + // We move the first digits forward one position + // so we can insert a decimal point in the middle. + // Note: This is the only case where we actually move + // more than one digit around in the buffer. + // TODO: Find out how to use C memmove() here + firstDigit &-= 1 + for i in 0...(base10Exponent &+ 1) { + let t = unsafe buffer.unsafeLoad(fromUncheckedByteOffset: firstDigit &+ i &+ 1, + as: UInt8.self) + unsafe buffer.storeBytes(of: t, + toUncheckedByteOffset: firstDigit &+ i, + as: UInt8.self) + } + unsafe buffer.storeBytes(of: 0x2e, + toUncheckedByteOffset: firstDigit &+ base10Exponent &+ 1, + as: UInt8.self) + } else { + // "12345678900.0" + // Fill trailing zeros, put ".0" at the end + // so the result is obviously floating-point. + let zeroEnd = firstDigit &+ base10Exponent &+ 3 + // TODO: Find out how to use C memset() here: + // Blast 8 "0" digits into the buffer + unsafe buffer.storeBytes(of: 0x3030303030303030 as UInt64, + toUncheckedByteOffset: nextDigit, + as: UInt64.self) + // Add more "0" digits if needed... + // (Note: Can't use a standard range loop because nextDigit+8 + // can legitimately be larger than zeroEnd here.) + var i = nextDigit + 8 + while i < zeroEnd { + unsafe buffer.storeBytes(of: 0x30, + toUncheckedByteOffset: i, + as: UInt8.self) + i &+= 1 + } + nextDigit = zeroEnd + unsafe buffer.storeBytes(of: 0x2e, + toUncheckedByteOffset: nextDigit &- 2, + as: UInt8.self) + } + if sign == .minus { + unsafe buffer.storeBytes(of: 0x2d, + toUncheckedByteOffset: firstDigit &- 1, + as: UInt8.self) // "-" + firstDigit &-= 1 + } + + return unsafe Range(_uncheckedBounds: (lower: firstDigit, upper: nextDigit)) +} + +// Table with ASCII strings for all 2-digit decimal numbers. +// Stored as little-endian UInt16s for efficiency +@available(macOS 9999, *) +fileprivate let asciiDigitTable: InlineArray<100, UInt16> = [ + 0x3030, 0x3130, 0x3230, 0x3330, 0x3430, + 0x3530, 0x3630, 0x3730, 0x3830, 0x3930, + 0x3031, 0x3131, 0x3231, 0x3331, 0x3431, + 0x3531, 0x3631, 0x3731, 0x3831, 0x3931, + 0x3032, 0x3132, 0x3232, 0x3332, 0x3432, + 0x3532, 0x3632, 0x3732, 0x3832, 0x3932, + 0x3033, 0x3133, 0x3233, 0x3333, 0x3433, + 0x3533, 0x3633, 0x3733, 0x3833, 0x3933, + 0x3034, 0x3134, 0x3234, 0x3334, 0x3434, + 0x3534, 0x3634, 0x3734, 0x3834, 0x3934, + 0x3035, 0x3135, 0x3235, 0x3335, 0x3435, + 0x3535, 0x3635, 0x3735, 0x3835, 0x3935, + 0x3036, 0x3136, 0x3236, 0x3336, 0x3436, + 0x3536, 0x3636, 0x3736, 0x3836, 0x3936, + 0x3037, 0x3137, 0x3237, 0x3337, 0x3437, + 0x3537, 0x3637, 0x3737, 0x3837, 0x3937, + 0x3038, 0x3138, 0x3238, 0x3338, 0x3438, + 0x3538, 0x3638, 0x3738, 0x3838, 0x3938, + 0x3039, 0x3139, 0x3239, 0x3339, 0x3439, + 0x3539, 0x3639, 0x3739, 0x3839, 0x3939 +] + +fileprivate func infinity(buffer: inout MutableRawSpan, sign: FloatingPointSign) -> Range { + if sign == .minus { + buffer.storeBytes(of: 0x666e692d, toByteOffset: 0, as: UInt32.self) // "-inf" + return 0..<4 + } else { + buffer.storeBytes(of: 0x00666e69, toByteOffset: 0, as: UInt32.self) // "inf\0" + return 0..<3 + } +} + +fileprivate func zero(buffer: inout MutableRawSpan, sign: FloatingPointSign) -> Range { + if sign == .minus { + buffer.storeBytes(of: 0x302e302d, toByteOffset: 0, as: UInt32.self) // "-0.0" + return 0..<4 + } else { + buffer.storeBytes(of: 0x00302e30, toByteOffset: 0, as: UInt32.self) // "0.0\0" + return 0..<3 + } +} + +@available(macOS 9999, *) +fileprivate let hexdigits: InlineArray<16, UInt8> = [ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 ] + +@available(macOS 9999, *) +fileprivate func hexWithoutLeadingZeros(buffer: inout MutableRawSpan, offset: inout Int, value: UInt64) { + var shift = 60 + while (shift > 0) && ((value >> shift) & 0xf == 0) { + shift -= 4 + } + while shift >= 0 { + let d = hexdigits[Int(truncatingIfNeeded: (value >> shift) & 0xf)] + shift -= 4 + buffer.storeBytes(of: d, toByteOffset: offset, as: UInt8.self) + offset += 1 + } +} + +@available(macOS 9999, *) +fileprivate func hexWithLeadingZeros(buffer: inout MutableRawSpan, offset: inout Int, value: UInt64) { + var shift = 60 + while shift >= 0 { + let d = hexdigits[Int(truncatingIfNeeded: (value >> shift) & 0xf)] + shift -= 4 + buffer.storeBytes(of: d, toByteOffset: offset, as: UInt8.self) + offset += 1 + } +} + +@available(macOS 9999, *) +fileprivate func nan_details(buffer: inout MutableRawSpan, + sign: FloatingPointSign, + quiet: Bool, + payload: UInt128) -> Range +{ + // value is a NaN of some sort + var i = 0 + if sign == .minus { + buffer.storeBytes(of: 0x2d, toByteOffset: 0, as: UInt8.self) + i = 1 + } + if quiet { + buffer.storeBytes(of: 0x73, toByteOffset: i, as: UInt8.self) // "s" + i += 1 + } + buffer.storeBytes(of: 0x6e, toByteOffset: i, as: UInt8.self) // "n" + buffer.storeBytes(of: 0x61, toByteOffset: i + 1, as: UInt8.self) // "a" + buffer.storeBytes(of: 0x6e, toByteOffset: i + 2, as: UInt8.self) // "n" + i += 3 + if payload != 0 { + buffer.storeBytes(of: 0x28, toByteOffset: i, as: UInt8.self) // "(" + i += 1 + buffer.storeBytes(of: 0x30, toByteOffset: i, as: UInt8.self) // "0" + i += 1 + buffer.storeBytes(of: 0x78, toByteOffset: i, as: UInt8.self) // "x" + i += 1 + if payload._high == 0 { + hexWithoutLeadingZeros(buffer: &buffer, offset: &i, value: payload._low) + } else { + hexWithoutLeadingZeros(buffer: &buffer, offset: &i, value: payload._high) + hexWithLeadingZeros(buffer: &buffer, offset: &i, value: payload._low) + } + buffer.storeBytes(of: 0x29, toByteOffset: i, as: UInt8.self) // ")" + i += 1 + } + return 0.. UInt64 { + // Break into two numbers of 4 decimal digits each + let div8 = n / 10000 + let mod8 = n &- div8 &* 10000 + let fours = UInt64(div8) | (UInt64(mod8) << 32) + + // Break into 4 numbers of 2 decimal digits each + let mask100: UInt64 = 0x0000007f0000007f + let div4 = ((fours &* 10486) >> 20) & mask100 + let mod4 = fours &- 100 &* div4 + let pairs = div4 | (mod4 &<< 16) + + // Break into 8 numbers of a single decimal digit each + let mask10: UInt64 = 0x000f000f000f000f + let div2 = ((pairs &* 103) >> 10) & mask10 + let mod2 = pairs &- 10 &* div2 + let singles = div2 | (mod2 &<< 8) + + // Convert 8 digits to ASCII characters + return singles &+ 0x3030303030303030 +} + +@inline(__always) +fileprivate func multiply64x32RoundingDown(_ lhs: UInt64, _ rhs: UInt32) -> UInt64 { + let mask32 = UInt64(UInt32.max) + let t = ((lhs & mask32) * UInt64(rhs)) >> 32 + return t + (lhs >> 32) * UInt64(rhs) +} + +@inline(__always) +fileprivate func multiply64x32RoundingUp(_ lhs: UInt64, _ rhs: UInt32) -> UInt64 { + let mask32 = UInt64(UInt32.max) + let t = (((lhs & mask32) * UInt64(rhs)) + mask32) >> 32 + return t + (lhs >> 32) * UInt64(rhs) +} + +// Arithmetic on fractions: +// E.g., `128x64` multiplies a 0.128 fixed-point +// value by a 0.64 fixed-point fraction, returning +// a 0.128 value that's been rounded down from the +// exact 192-bit result. +@available(SwiftStdlib 6.0, *) +@inline(__always) +fileprivate func multiply128x64RoundingDown(_ lhs: UInt128, _ rhs: UInt64) -> UInt128 { + let lhsHigh = UInt128(truncatingIfNeeded: lhs._high) + let lhsLow = UInt128(truncatingIfNeeded: lhs._low) + let rhs128 = UInt128(truncatingIfNeeded: rhs) + return (lhsHigh &* rhs128) &+ ((lhsLow &* rhs128) >> 64) +} + +@available(SwiftStdlib 6.0, *) +@inline(__always) +fileprivate func multiply128x64RoundingUp(_ lhs: UInt128, _ rhs: UInt64) -> UInt128 { + let lhsHigh = UInt128(truncatingIfNeeded: lhs._high) + let lhsLow = UInt128(truncatingIfNeeded: lhs._low) + let rhs128 = UInt128(truncatingIfNeeded: rhs) + let h = lhsHigh &* rhs128 + let l = lhsLow &* rhs128 + let bias = (UInt128(1) << 64) &- 1 + return h + ((l &+ bias) &>> 64) +} + +@available(macOS 9999, *) +@inline(__always) +fileprivate func intervalContainingPowerOf10_Binary32(_ p: Int, _ lower: inout UInt64, _ upper: inout UInt64) -> Int { + if p >= 0 { + let base = powersOf10_Exact128[p &* 2 &+ 1] + lower = base + if p < 28 { + upper = base + } else { + upper = base &+ 1 + } + } else { + let base = powersOf10_negativeBinary32[p &+ 40] + lower = base + upper = base &+ 1 + } + return binaryExponentFor10ToThe(p) +} + +@available(macOS 9999, *) +@inline(__always) +fileprivate func intervalContainingPowerOf10_Binary64(_ p: Int, _ lower: inout UInt128, _ upper: inout UInt128) -> Int { + if p >= 0 && p <= 55 { + let upper64 = powersOf10_Exact128[p &* 2 &+ 1] + let lower64 = powersOf10_Exact128[p &* 2] + upper = UInt128(_low: lower64, _high: upper64) + lower = upper + return binaryExponentFor10ToThe(p) + } + + let index = p &+ 400 + let mainPower = index / 28 + let baseHigh = powersOf10_Binary64[mainPower &* 2 &+ 1] + let baseLow = powersOf10_Binary64[mainPower &* 2] + let extraPower = index &- mainPower &* 28 + let baseExponent = binaryExponentFor10ToThe(p &- extraPower) + + if extraPower == 0 { + lower = UInt128(_low: baseLow, _high: baseHigh) + upper = lower &+ 1 + return baseExponent + } else { + let extra = powersOf10_Exact128[extraPower &* 2 &+ 1] + lower = ((UInt128(truncatingIfNeeded:baseHigh) &* UInt128(truncatingIfNeeded:extra)) + &+ ((UInt128(truncatingIfNeeded:baseLow) &* UInt128(truncatingIfNeeded:extra)) &>> 64)) + upper = lower &+ 2 + return baseExponent &+ binaryExponentFor10ToThe(extraPower) + } +} + +@inline(__always) +fileprivate func binaryExponentFor10ToThe(_ p: Int) -> Int { + return Int(((Int64(p) &* 55732705) >> 24) &+ 1) +} + +@inline(__always) +fileprivate func decimalExponentFor2ToThe(_ p: Int) -> Int { + return Int((Int64(p) &* 20201781) >> 26) +} + +@available(macOS 9999, *) +fileprivate let powersOf10_negativeBinary32: InlineArray<_, UInt64> = [ + 0x8b61313bbabce2c6, // x 2^-132 ~= 10^-40 + 0xae397d8aa96c1b77, // x 2^-129 ~= 10^-39 + 0xd9c7dced53c72255, // x 2^-126 ~= 10^-38 + 0x881cea14545c7575, // x 2^-122 ~= 10^-37 + 0xaa242499697392d2, // x 2^-119 ~= 10^-36 + 0xd4ad2dbfc3d07787, // x 2^-116 ~= 10^-35 + 0x84ec3c97da624ab4, // x 2^-112 ~= 10^-34 + 0xa6274bbdd0fadd61, // x 2^-109 ~= 10^-33 + 0xcfb11ead453994ba, // x 2^-106 ~= 10^-32 + 0x81ceb32c4b43fcf4, // x 2^-102 ~= 10^-31 + 0xa2425ff75e14fc31, // x 2^-99 ~= 10^-30 + 0xcad2f7f5359a3b3e, // x 2^-96 ~= 10^-29 + 0xfd87b5f28300ca0d, // x 2^-93 ~= 10^-28 + 0x9e74d1b791e07e48, // x 2^-89 ~= 10^-27 + 0xc612062576589dda, // x 2^-86 ~= 10^-26 + 0xf79687aed3eec551, // x 2^-83 ~= 10^-25 + 0x9abe14cd44753b52, // x 2^-79 ~= 10^-24 + 0xc16d9a0095928a27, // x 2^-76 ~= 10^-23 + 0xf1c90080baf72cb1, // x 2^-73 ~= 10^-22 + 0x971da05074da7bee, // x 2^-69 ~= 10^-21 + 0xbce5086492111aea, // x 2^-66 ~= 10^-20 + 0xec1e4a7db69561a5, // x 2^-63 ~= 10^-19 + 0x9392ee8e921d5d07, // x 2^-59 ~= 10^-18 + 0xb877aa3236a4b449, // x 2^-56 ~= 10^-17 + 0xe69594bec44de15b, // x 2^-53 ~= 10^-16 + 0x901d7cf73ab0acd9, // x 2^-49 ~= 10^-15 + 0xb424dc35095cd80f, // x 2^-46 ~= 10^-14 + 0xe12e13424bb40e13, // x 2^-43 ~= 10^-13 + 0x8cbccc096f5088cb, // x 2^-39 ~= 10^-12 + 0xafebff0bcb24aafe, // x 2^-36 ~= 10^-11 + 0xdbe6fecebdedd5be, // x 2^-33 ~= 10^-10 + 0x89705f4136b4a597, // x 2^-29 ~= 10^-9 + 0xabcc77118461cefc, // x 2^-26 ~= 10^-8 + 0xd6bf94d5e57a42bc, // x 2^-23 ~= 10^-7 + 0x8637bd05af6c69b5, // x 2^-19 ~= 10^-6 + 0xa7c5ac471b478423, // x 2^-16 ~= 10^-5 + 0xd1b71758e219652b, // x 2^-13 ~= 10^-4 + 0x83126e978d4fdf3b, // x 2^-9 ~= 10^-3 + 0xa3d70a3d70a3d70a, // x 2^-6 ~= 10^-2 + 0xcccccccccccccccc, // x 2^-3 ~= 10^-1 +] + +@available(macOS 9999, *) +fileprivate let powersOf10_Exact128: InlineArray<_, UInt64> = [ + // Low order ... high order + 0x0000000000000000, 0x8000000000000000, // x 2^1 == 10^0 exactly + 0x0000000000000000, 0xa000000000000000, // x 2^4 == 10^1 exactly + 0x0000000000000000, 0xc800000000000000, // x 2^7 == 10^2 exactly + 0x0000000000000000, 0xfa00000000000000, // x 2^10 == 10^3 exactly + 0x0000000000000000, 0x9c40000000000000, // x 2^14 == 10^4 exactly + 0x0000000000000000, 0xc350000000000000, // x 2^17 == 10^5 exactly + 0x0000000000000000, 0xf424000000000000, // x 2^20 == 10^6 exactly + 0x0000000000000000, 0x9896800000000000, // x 2^24 == 10^7 exactly + 0x0000000000000000, 0xbebc200000000000, // x 2^27 == 10^8 exactly + 0x0000000000000000, 0xee6b280000000000, // x 2^30 == 10^9 exactly + 0x0000000000000000, 0x9502f90000000000, // x 2^34 == 10^10 exactly + 0x0000000000000000, 0xba43b74000000000, // x 2^37 == 10^11 exactly + 0x0000000000000000, 0xe8d4a51000000000, // x 2^40 == 10^12 exactly + 0x0000000000000000, 0x9184e72a00000000, // x 2^44 == 10^13 exactly + 0x0000000000000000, 0xb5e620f480000000, // x 2^47 == 10^14 exactly + 0x0000000000000000, 0xe35fa931a0000000, // x 2^50 == 10^15 exactly + 0x0000000000000000, 0x8e1bc9bf04000000, // x 2^54 == 10^16 exactly + 0x0000000000000000, 0xb1a2bc2ec5000000, // x 2^57 == 10^17 exactly + 0x0000000000000000, 0xde0b6b3a76400000, // x 2^60 == 10^18 exactly + 0x0000000000000000, 0x8ac7230489e80000, // x 2^64 == 10^19 exactly + 0x0000000000000000, 0xad78ebc5ac620000, // x 2^67 == 10^20 exactly + 0x0000000000000000, 0xd8d726b7177a8000, // x 2^70 == 10^21 exactly + 0x0000000000000000, 0x878678326eac9000, // x 2^74 == 10^22 exactly + 0x0000000000000000, 0xa968163f0a57b400, // x 2^77 == 10^23 exactly + 0x0000000000000000, 0xd3c21bcecceda100, // x 2^80 == 10^24 exactly + 0x0000000000000000, 0x84595161401484a0, // x 2^84 == 10^25 exactly + 0x0000000000000000, 0xa56fa5b99019a5c8, // x 2^87 == 10^26 exactly + 0x0000000000000000, 0xcecb8f27f4200f3a, // x 2^90 == 10^27 exactly + 0x4000000000000000, 0x813f3978f8940984, // x 2^94 == 10^28 exactly + 0x5000000000000000, 0xa18f07d736b90be5, // x 2^97 == 10^29 exactly + 0xa400000000000000, 0xc9f2c9cd04674ede, // x 2^100 == 10^30 exactly + 0x4d00000000000000, 0xfc6f7c4045812296, // x 2^103 == 10^31 exactly + 0xf020000000000000, 0x9dc5ada82b70b59d, // x 2^107 == 10^32 exactly + 0x6c28000000000000, 0xc5371912364ce305, // x 2^110 == 10^33 exactly + 0xc732000000000000, 0xf684df56c3e01bc6, // x 2^113 == 10^34 exactly + 0x3c7f400000000000, 0x9a130b963a6c115c, // x 2^117 == 10^35 exactly + 0x4b9f100000000000, 0xc097ce7bc90715b3, // x 2^120 == 10^36 exactly + 0x1e86d40000000000, 0xf0bdc21abb48db20, // x 2^123 == 10^37 exactly + 0x1314448000000000, 0x96769950b50d88f4, // x 2^127 == 10^38 exactly + 0x17d955a000000000, 0xbc143fa4e250eb31, // x 2^130 == 10^39 exactly + 0x5dcfab0800000000, 0xeb194f8e1ae525fd, // x 2^133 == 10^40 exactly + 0x5aa1cae500000000, 0x92efd1b8d0cf37be, // x 2^137 == 10^41 exactly + 0xf14a3d9e40000000, 0xb7abc627050305ad, // x 2^140 == 10^42 exactly + 0x6d9ccd05d0000000, 0xe596b7b0c643c719, // x 2^143 == 10^43 exactly + 0xe4820023a2000000, 0x8f7e32ce7bea5c6f, // x 2^147 == 10^44 exactly + 0xdda2802c8a800000, 0xb35dbf821ae4f38b, // x 2^150 == 10^45 exactly + 0xd50b2037ad200000, 0xe0352f62a19e306e, // x 2^153 == 10^46 exactly + 0x4526f422cc340000, 0x8c213d9da502de45, // x 2^157 == 10^47 exactly + 0x9670b12b7f410000, 0xaf298d050e4395d6, // x 2^160 == 10^48 exactly + 0x3c0cdd765f114000, 0xdaf3f04651d47b4c, // x 2^163 == 10^49 exactly + 0xa5880a69fb6ac800, 0x88d8762bf324cd0f, // x 2^167 == 10^50 exactly + 0x8eea0d047a457a00, 0xab0e93b6efee0053, // x 2^170 == 10^51 exactly + 0x72a4904598d6d880, 0xd5d238a4abe98068, // x 2^173 == 10^52 exactly + 0x47a6da2b7f864750, 0x85a36366eb71f041, // x 2^177 == 10^53 exactly + 0x999090b65f67d924, 0xa70c3c40a64e6c51, // x 2^180 == 10^54 exactly + 0xfff4b4e3f741cf6d, 0xd0cf4b50cfe20765, // x 2^183 == 10^55 exactly +] + +@available(macOS 9999, *) +fileprivate let powersOf10_Binary64: InlineArray<_, UInt64> = [ + // low-order half, high-order half + 0x3931b850df08e738, 0x95fe7e07c91efafa, // x 2^-1328 ~= 10^-400 + 0xba954f8e758fecb3, 0x9774919ef68662a3, // x 2^-1235 ~= 10^-372 + 0x9028bed2939a635c, 0x98ee4a22ecf3188b, // x 2^-1142 ~= 10^-344 + 0x47b233c92125366e, 0x9a6bb0aa55653b2d, // x 2^-1049 ~= 10^-316 + 0x4ee367f9430aec32, 0x9becce62836ac577, // x 2^-956 ~= 10^-288 + 0x6f773fc3603db4a9, 0x9d71ac8fada6c9b5, // x 2^-863 ~= 10^-260 + 0xc47bc5014a1a6daf, 0x9efa548d26e5a6e1, // x 2^-770 ~= 10^-232 + 0x80e8a40eccd228a4, 0xa086cfcd97bf97f3, // x 2^-677 ~= 10^-204 + 0xb8ada00e5a506a7c, 0xa21727db38cb002f, // x 2^-584 ~= 10^-176 + 0xc13e60d0d2e0ebba, 0xa3ab66580d5fdaf5, // x 2^-491 ~= 10^-148 + 0xc2974eb4ee658828, 0xa54394fe1eedb8fe, // x 2^-398 ~= 10^-120 + 0xcb4ccd500f6bb952, 0xa6dfbd9fb8e5b88e, // x 2^-305 ~= 10^-92 + 0x3f2398d747b36224, 0xa87fea27a539e9a5, // x 2^-212 ~= 10^-64 + 0xdde50bd1d5d0b9e9, 0xaa242499697392d2, // x 2^-119 ~= 10^-36 + 0xfdc20d2b36ba7c3d, 0xabcc77118461cefc, // x 2^-26 ~= 10^-8 + 0x0000000000000000, 0xad78ebc5ac620000, // x 2^67 == 10^20 exactly + 0x9670b12b7f410000, 0xaf298d050e4395d6, // x 2^160 == 10^48 exactly + 0x3b25a55f43294bcb, 0xb0de65388cc8ada8, // x 2^253 ~= 10^76 + 0x58edec91ec2cb657, 0xb2977ee300c50fe7, // x 2^346 ~= 10^104 + 0x29babe4598c311fb, 0xb454e4a179dd1877, // x 2^439 ~= 10^132 + 0x577b986b314d6009, 0xb616a12b7fe617aa, // x 2^532 ~= 10^160 + 0x0c11ed6d538aeb2f, 0xb7dcbf5354e9bece, // x 2^625 ~= 10^188 + 0x6d953e2bd7173692, 0xb9a74a0637ce2ee1, // x 2^718 ~= 10^216 + 0x9d6d1ad41abe37f1, 0xbb764c4ca7a4440f, // x 2^811 ~= 10^244 + 0x4b2d8644d8a74e18, 0xbd49d14aa79dbc82, // x 2^904 ~= 10^272 + 0xe0470a63e6bd56c3, 0xbf21e44003acdd2c, // x 2^997 ~= 10^300 + 0x505f522e53053ff2, 0xc0fe908895cf3b44, // x 2^1090 ~= 10^328 + 0xcca845ab2beafa9a, 0xc2dfe19c8c055535, // x 2^1183 ~= 10^356 + 0x1027fff56784f444, 0xc4c5e310aef8aa17, // x 2^1276 ~= 10^384 +] diff --git a/stdlib/public/core/FloatingPointTypes.swift.gyb b/stdlib/public/core/FloatingPointTypes.swift.gyb index 5f50ea00d5c72..6131af9631670 100644 --- a/stdlib/public/core/FloatingPointTypes.swift.gyb +++ b/stdlib/public/core/FloatingPointTypes.swift.gyb @@ -126,6 +126,28 @@ extension ${Self}: CustomDebugStringConvertible { UnsafeBufferPointer(start: bufferPtr, count: length)) } } + +%if bits == 32 or bits == 64: + // Temporary `debugDescription2` that uses the new Swift implementation. + // `debugDescription` above is still using the old C implementation + // for now so we can compare performance and results between the two. + @available(macOS 15, *) + public var debugDescription2: String { + if #available(macOS 9999, *) { + var buffer = InlineArray<32, UTF8.CodeUnit>(repeating: 0) + var span = buffer.mutableSpan + let textRange = Float${bits}ToASCII(value: self, buffer: &span) + let textStart = unsafe span._start().assumingMemoryBound(to: UTF8.CodeUnit.self) + textRange.lowerBound + let textLength = textRange.upperBound - textRange.lowerBound + + let textBuff = unsafe UnsafeBufferPointer(_uncheckedStart: textStart, + count: textLength) + return unsafe String._fromASCII(textBuff) + } else { + fatalError() + } + } +%end } ${Availability(bits)} diff --git a/stdlib/public/core/GroupInfo.json b/stdlib/public/core/GroupInfo.json index 61917c68bb17d..fd4996552580c 100644 --- a/stdlib/public/core/GroupInfo.json +++ b/stdlib/public/core/GroupInfo.json @@ -179,6 +179,7 @@ "Floating": [ "FloatingPoint.swift", "FloatingPointParsing.swift", + "FloatingPointToString.swift", "FloatingPointTypes.swift", "FloatingPointRandom.swift"], "Vector": [ From 0e1906135b5abda979e7e94dcf91e2dc6dc028cf Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Tue, 8 Jul 2025 16:36:09 -0700 Subject: [PATCH 02/19] Add Float16 support to the Swift implementation --- .../public/core/FloatingPointToString.swift | 263 +++++++++++++++++- .../public/core/FloatingPointTypes.swift.gyb | 7 +- 2 files changed, 257 insertions(+), 13 deletions(-) diff --git a/stdlib/public/core/FloatingPointToString.swift b/stdlib/public/core/FloatingPointToString.swift index bbe5019b71905..1d8277ee9471b 100644 --- a/stdlib/public/core/FloatingPointToString.swift +++ b/stdlib/public/core/FloatingPointToString.swift @@ -21,9 +21,9 @@ //===---------------------------------------------------------------------===// /// /// For binary16, this code uses a simple approach that is normally -/// implemented with variable-length arithmetic. However, due to -/// the limited range of binary16, this can be implemented simply -/// with only 32-bit integer arithmetic. +/// implemented with variable-length arithmetic. However, due to the +/// limited range of binary16, this can be implemented with only +/// 32-bit integer arithmetic. /// /// For other formats, we use a modified form of the Grisu2 /// algorithm from Florian Loitsch; "Printing Floating-Point Numbers @@ -42,6 +42,9 @@ /// from Ulf Anders; "Ryū: fast float-to-string conversion", 2018. /// https://doi.org/10.1145/3296979.3192369 /// +/// The full algorithm is extensively commented in the Float64 version +/// below; refer to that for details. +/// /// In summary, this implementation is: /// /// * Fast. It uses only fixed-width integer arithmetic and has @@ -88,6 +91,9 @@ /// was exhaustive -- we verified all 4 billion possible Float32 values. /// * The Swift code uses an idiom of building up to 8 ASCII characters /// in a UInt64 and then writing the whole block to memory. +/// * The Swift version is slightly faster than the C version; +/// mostly thanks to various minor algorithmic tweaks that were +/// found during the translation process. /// // ---------------------------------------------------------------------------- @@ -134,6 +140,238 @@ internal func _float64ToStringImpl2( return UInt64(truncatingIfNeeded: textLength) } +#if !arch(x86_64) +internal func Float16ToASCII( + value f: Float16, + buffer utf8Buffer: inout MutableSpan) -> Range +{ + if #available(macOS 9999, *) { + return _Float16ToASCII(value: f, buffer: &utf8Buffer) + } else { + return 0..<0 + } +} + +@available(macOS 9999, *) +fileprivate func _Float16ToASCII( + value f: Float16, + buffer utf8Buffer: inout MutableSpan) -> Range +{ + // We need a MutableRawSpan in order to use wide store/load operations + precondition(utf8Buffer.count >= 32) + var buffer = utf8Buffer.mutableBytes + + // Step 1: Handle various input cases: + let binaryExponent: Int + let significand: Float16.RawSignificand + let exponentBias = (1 << (Float16.exponentBitCount - 1)) - 2; // 14 + if (f.exponentBitPattern == 0x1f) { // NaN or Infinity + if (f.isInfinite) { + return infinity(buffer: &buffer, sign: f.sign) + } else { // f.isNaN + let quietBit = (f.significandBitPattern >> (Float16.significandBitCount - 1)) & 1; + let payloadMask = UInt16(1 &<< (Float16.significandBitCount - 2)) - 1 + let payload16 = f.significandBitPattern & payloadMask + return nan_details(buffer: &buffer, + sign: f.sign, + quiet: quietBit == 0, + payloadHigh: 0, + payloadLow: UInt64(truncatingIfNeeded:payload16)) + } + } else if (f.exponentBitPattern == 0) { + if (f.isZero) { + return zero(buffer: &buffer, sign: f.sign) + } else { // Subnormal + binaryExponent = 1 - exponentBias + significand = f.significandBitPattern &<< 2 + } + } else { // normal + binaryExponent = Int(f.exponentBitPattern) &- exponentBias + let hiddenBit = Float16.RawSignificand(1) << Float16.significandBitCount + significand = (f.significandBitPattern &+ hiddenBit) &<< 2 + } + + // Step 2: Determine the exact target interval + let halfUlp: Float16.RawSignificand = 2 + let quarterUlp = halfUlp >> 1 + let upperMidpointExact = significand &+ halfUlp + let lowerMidpointExact = significand &- ((f.significandBitPattern == 0) ? quarterUlp : halfUlp) + + var firstDigit = 1 + var nextDigit = firstDigit + + // Step 3: If it's < 10^-5, format as exponential form + if binaryExponent < -13 || (binaryExponent == -13 && significand < 0x1a38) { + var decimalExponent = -5 + var u = (UInt32(upperMidpointExact) << (28 - 13 &+ binaryExponent)) &* 100000 + var l = (UInt32(lowerMidpointExact) << (28 - 13 &+ binaryExponent)) &* 100000 + var t = (UInt32(significand) << (28 - 13 &+ binaryExponent)) &* 100000 + let mask = (UInt32(1) << 28) - 1 + if t < ((1 << 28) / 10) { + u &*= 100 + l &*= 100 + t &*= 100 + decimalExponent &-= 2 + } + if t < (1 << 28) { + u &*= 10 + l &*= 10 + t &*= 10 + decimalExponent &-= 1 + } + let uDigit = u >> 28 + if uDigit == (l >> 28) { + // More than one digit, so write first digit, ".", then the rest + unsafe buffer.storeBytes(of: 0x30 + UInt8(truncatingIfNeeded: uDigit), + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + unsafe buffer.storeBytes(of: 0x2e, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + while true { + u = (u & mask) &* 10 + l = (l & mask) &* 10 + t = (t & mask) &* 10 + let uDigit = u >> 28 + if uDigit != (l >> 28) { + // Stop before emitting the last digit + break + } + unsafe buffer.storeBytes(of: 0x30 &+ UInt8(truncatingIfNeeded: uDigit), + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + } + } + let digit = 0x30 &+ (t &+ (1 &<< 27)) >> 28 + unsafe buffer.storeBytes(of: UInt8(truncatingIfNeeded: digit), + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + unsafe buffer.storeBytes(of: 0x65, // "e" + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + unsafe buffer.storeBytes(of: 0x2d, // "-" + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + unsafe buffer.storeBytes(of: UInt8(truncatingIfNeeded: -decimalExponent / 10 &+ 0x30), + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + unsafe buffer.storeBytes(of: UInt8(truncatingIfNeeded: -decimalExponent % 10 &+ 0x30), + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + + } else { + + // Step 4: Greater than 10^-5, so use decimal format "123.45" + // (Note: Float16 is never big enough to need exponential for + // positive exponents) + // First, split into integer and fractional parts: + + let intPart : Float16.RawSignificand + let fractionPart : Float16.RawSignificand + if binaryExponent < 13 { + intPart = significand >> (13 &- binaryExponent) + fractionPart = significand &- (intPart &<< (13 &- binaryExponent)) + } else { + intPart = significand &<< (binaryExponent &- 13) + fractionPart = significand &- (intPart >> (binaryExponent &- 13)) + } + + // Step 5: Emit the integer part + let text = intToEightDigits(UInt32(intPart)) + unsafe buffer.storeBytes(of: text, + toUncheckedByteOffset: nextDigit, + as: UInt64.self) + nextDigit &+= 8 + + // Skip leading zeros + if intPart < 10 { + firstDigit &+= 7 + } else if intPart < 100 { + firstDigit &+= 6 + } else if intPart < 1000 { + firstDigit &+= 5 + } else if intPart < 10000 { + firstDigit &+= 4 + } else { + firstDigit &+= 3 + } + + // After the integer part comes a period... + unsafe buffer.storeBytes(of: 0x2e, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + + if fractionPart == 0 { + // Step 6: No fraction, so ".0" and we're done + unsafe buffer.storeBytes(of: 0x30, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + } else { + // Step 7: Emit the fractional part by repeatedly + // multiplying by 10 to produce successive digits: + var u = UInt32(upperMidpointExact) &<< (28 - 13 &+ binaryExponent) + var l = UInt32(lowerMidpointExact) &<< (28 - 13 &+ binaryExponent) + var t = UInt32(fractionPart) &<< (28 - 13 &+ binaryExponent) + let mask = (UInt32(1) << 28) - 1 + var uDigit: UInt8 = 0 + var lDigit: UInt8 = 0 + while true { + u = (u & mask) &* 10 + l = (l & mask) &* 10 + // This actually overflows, but we only need the + // low-order bits, so it doesn't matter. + t = (t & mask) &* 10 + uDigit = UInt8(truncatingIfNeeded: u >> 28) + lDigit = UInt8(truncatingIfNeeded: l >> 28) + if uDigit != lDigit { + break + } + + unsafe buffer.storeBytes(of: 0x30 &+ uDigit, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + } + t &+= 1 << 27 + if (t & mask) == 0 { // Exactly 1/2 + t = (t >> 28) & ~1 // Round last digit even + // Without this next check, 0.015625 == 2^-6 prints + // as "0.01562" which does not round-trip correctly. + // With this, we get "0.01563" which does. + // It affects no other value. + if t <= lDigit && l > 0 { + t += 1 + } + } else { + t >>= 28 + } + unsafe buffer.storeBytes(of: UInt8(truncatingIfNeeded: 0x30 + t), + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + } + } + if f.sign == .minus { + unsafe buffer.storeBytes(of: 0x2d, + toUncheckedByteOffset: firstDigit &- 1, + as: UInt8.self) // "-" + firstDigit &-= 1 + } + return firstDigit..) -> Range @@ -173,7 +411,8 @@ fileprivate func _Float32ToASCII( return nan_details(buffer: &buffer, sign: f.sign, quiet: quietBit == 0, - payload: UInt128(truncatingIfNeeded:payload32)) + payloadHigh: 0, + payloadLow: UInt64(truncatingIfNeeded:payload32)) } } else if (f.exponentBitPattern == 0) { if (f.isZero) { @@ -362,7 +601,8 @@ fileprivate func _Float64ToASCII( return nan_details(buffer: &buffer, sign: d.sign, quiet: quietBit == 0, - payload: UInt128(truncatingIfNeeded:payload64)) + payloadHigh: 0, + payloadLow: UInt64(truncatingIfNeeded:payload64)) } } else if (d.exponentBitPattern == 0) { if (d.isZero) { @@ -987,7 +1227,8 @@ fileprivate func hexWithLeadingZeros(buffer: inout MutableRawSpan, offset: inout fileprivate func nan_details(buffer: inout MutableRawSpan, sign: FloatingPointSign, quiet: Bool, - payload: UInt128) -> Range + payloadHigh: UInt64, + payloadLow: UInt64) -> Range { // value is a NaN of some sort var i = 0 @@ -1003,18 +1244,18 @@ fileprivate func nan_details(buffer: inout MutableRawSpan, buffer.storeBytes(of: 0x61, toByteOffset: i + 1, as: UInt8.self) // "a" buffer.storeBytes(of: 0x6e, toByteOffset: i + 2, as: UInt8.self) // "n" i += 3 - if payload != 0 { + if payloadHigh != 0 || payloadLow != 0 { buffer.storeBytes(of: 0x28, toByteOffset: i, as: UInt8.self) // "(" i += 1 buffer.storeBytes(of: 0x30, toByteOffset: i, as: UInt8.self) // "0" i += 1 buffer.storeBytes(of: 0x78, toByteOffset: i, as: UInt8.self) // "x" i += 1 - if payload._high == 0 { - hexWithoutLeadingZeros(buffer: &buffer, offset: &i, value: payload._low) + if payloadHigh == 0 { + hexWithoutLeadingZeros(buffer: &buffer, offset: &i, value: payloadLow) } else { - hexWithoutLeadingZeros(buffer: &buffer, offset: &i, value: payload._high) - hexWithLeadingZeros(buffer: &buffer, offset: &i, value: payload._low) + hexWithoutLeadingZeros(buffer: &buffer, offset: &i, value: payloadHigh) + hexWithLeadingZeros(buffer: &buffer, offset: &i, value: payloadLow) } buffer.storeBytes(of: 0x29, toByteOffset: i, as: UInt8.self) // ")" i += 1 diff --git a/stdlib/public/core/FloatingPointTypes.swift.gyb b/stdlib/public/core/FloatingPointTypes.swift.gyb index 6131af9631670..f3ed1f013ad5b 100644 --- a/stdlib/public/core/FloatingPointTypes.swift.gyb +++ b/stdlib/public/core/FloatingPointTypes.swift.gyb @@ -126,8 +126,11 @@ extension ${Self}: CustomDebugStringConvertible { UnsafeBufferPointer(start: bufferPtr, count: length)) } } +} -%if bits == 32 or bits == 64: +%if bits == 16 or bits == 32 or bits == 64: +${Availability(bits)} +extension ${Self} { // Temporary `debugDescription2` that uses the new Swift implementation. // `debugDescription` above is still using the old C implementation // for now so we can compare performance and results between the two. @@ -147,8 +150,8 @@ extension ${Self}: CustomDebugStringConvertible { fatalError() } } -%end } +%end ${Availability(bits)} extension ${Self}: TextOutputStreamable { From 6f5d30d2030b4dabbdc635fe21e1b8752d427988 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Wed, 16 Jul 2025 12:55:00 -0700 Subject: [PATCH 03/19] Float16 tweaks: balancing safety and performance --- .../public/core/FloatingPointToString.swift | 95 ++++++++++++------- 1 file changed, 61 insertions(+), 34 deletions(-) diff --git a/stdlib/public/core/FloatingPointToString.swift b/stdlib/public/core/FloatingPointToString.swift index 1d8277ee9471b..2444064fcb859 100644 --- a/stdlib/public/core/FloatingPointToString.swift +++ b/stdlib/public/core/FloatingPointToString.swift @@ -72,8 +72,8 @@ /// * Exponential form always has 1 digit before the decimal point /// * When present, a '.' is never the first or last character /// * There is a consecutive range of integer values that can be -/// represented in double (-2^54...2^54). Never use exponential -/// form for integral numbers in this range. +/// represented in any particular type (-2^54...2^54 for double). +/// Never use exponential form for integral numbers in this range. /// * Generally follow existing practice: Don't use use exponential /// form for fractional values bigger than 10^-4; always write at /// least 2 digits for an exponent. @@ -97,7 +97,33 @@ /// // ---------------------------------------------------------------------------- +// Float16 is not currently supported on Intel macOS. +// (This will change once there's a fully-stable Float16 +// ABI on that platform.) +#if !((os(macOS) || targetEnvironment(macCatalyst)) && arch(x86_64)) // Implement the legacy ABI on top of the new one +@_silgen_name("swift_float16ToString2") +internal func _float16ToStringImpl2( + _ textBuffer: UnsafeMutablePointer, + _ bufferLength: UInt, + _ value: Float16, + _ debug: Bool) -> UInt64 { + // Code below works with raw memory. + var buffer = unsafe MutableSpan(_unchecked: textBuffer, + count: Int(bufferLength)) + let textRange = Float16ToASCII(value: value, buffer: &buffer) + let textLength = textRange.upperBound - textRange.lowerBound + + // Move the text to the start of the buffer + if textRange.lowerBound != 0 { + unsafe _memmove(dest: textBuffer, + src: textBuffer + textRange.lowerBound, + size: UInt(truncatingIfNeeded: textLength)) + } + return UInt64(truncatingIfNeeded: textLength) +} +#endif + @_silgen_name("swift_float32ToString2") internal func _float32ToStringImpl2( _ textBuffer: UnsafeMutablePointer, @@ -140,7 +166,7 @@ internal func _float64ToStringImpl2( return UInt64(truncatingIfNeeded: textLength) } -#if !arch(x86_64) +#if !((os(macOS) || targetEnvironment(macCatalyst)) && arch(x86_64)) internal func Float16ToASCII( value f: Float16, buffer utf8Buffer: inout MutableSpan) -> Range @@ -200,6 +226,12 @@ fileprivate func _Float16ToASCII( var firstDigit = 1 var nextDigit = firstDigit + // Emit the text form differently depending on what range it's in. + // We use `storeBytes(of:toUncheckedByteOffset:as:)` for most of + // the output, but are careful to use the checked/safe form + // `storeBytes(of:toByteOffset:as:)` for the last byte so that we + // reliably crash if we overflow the provided buffer. + // Step 3: If it's < 10^-5, format as exponential form if binaryExponent < -13 || (binaryExponent == -13 && significand < 0x1a38) { var decimalExponent = -5 @@ -262,9 +294,10 @@ fileprivate func _Float16ToASCII( toUncheckedByteOffset: nextDigit, as: UInt8.self) nextDigit &+= 1 - unsafe buffer.storeBytes(of: UInt8(truncatingIfNeeded: -decimalExponent % 10 &+ 0x30), - toUncheckedByteOffset: nextDigit, - as: UInt8.self) + // Last write on this branch, so use a safe checked store + buffer.storeBytes(of: UInt8(truncatingIfNeeded: -decimalExponent % 10 &+ 0x30), + toByteOffset: nextDigit, + as: UInt8.self) nextDigit &+= 1 } else { @@ -312,9 +345,10 @@ fileprivate func _Float16ToASCII( if fractionPart == 0 { // Step 6: No fraction, so ".0" and we're done - unsafe buffer.storeBytes(of: 0x30, - toUncheckedByteOffset: nextDigit, - as: UInt8.self) + // Last write on this branch, so use a checked store + buffer.storeBytes(of: 0x30, + toByteOffset: nextDigit, + as: UInt8.self) nextDigit &+= 1 } else { // Step 7: Emit the fractional part by repeatedly @@ -328,15 +362,14 @@ fileprivate func _Float16ToASCII( while true { u = (u & mask) &* 10 l = (l & mask) &* 10 - // This actually overflows, but we only need the - // low-order bits, so it doesn't matter. - t = (t & mask) &* 10 uDigit = UInt8(truncatingIfNeeded: u >> 28) lDigit = UInt8(truncatingIfNeeded: l >> 28) if uDigit != lDigit { + t = (t & mask) &* 10 break } - + // This overflows, but we don't care at this point. + t &*= 10 unsafe buffer.storeBytes(of: 0x30 &+ uDigit, toUncheckedByteOffset: nextDigit, as: UInt8.self) @@ -345,26 +378,28 @@ fileprivate func _Float16ToASCII( t &+= 1 << 27 if (t & mask) == 0 { // Exactly 1/2 t = (t >> 28) & ~1 // Round last digit even - // Without this next check, 0.015625 == 2^-6 prints - // as "0.01562" which does not round-trip correctly. - // With this, we get "0.01563" which does. - // It affects no other value. - if t <= lDigit && l > 0 { + // Rounding `t` even can end up moving `t` below + // `l`. Detect and correct for this possibility. + // Exhaustive testing shows that the only input value + // affected by this is 0.015625 == 2^-6, which + // incorrectly prints as "0.01562" without this fix. + if t < lDigit || (t == lDigit && l > 0) { t += 1 } } else { t >>= 28 } - unsafe buffer.storeBytes(of: UInt8(truncatingIfNeeded: 0x30 + t), - toUncheckedByteOffset: nextDigit, - as: UInt8.self) + // Last write on this branch, so use a checked store + buffer.storeBytes(of: UInt8(truncatingIfNeeded: 0x30 + t), + toByteOffset: nextDigit, + as: UInt8.self) nextDigit &+= 1 } } if f.sign == .minus { - unsafe buffer.storeBytes(of: 0x2d, - toUncheckedByteOffset: firstDigit &- 1, - as: UInt8.self) // "-" + buffer.storeBytes(of: 0x2d, + toByteOffset: firstDigit &- 1, + as: UInt8.self) // "-" firstDigit &-= 1 } return firstDigit..> (64 - adjustIntegerBits) var t = unsafe buffer.unsafeLoad(fromUncheckedByteOffset: nextDigit - 1, @@ -1175,6 +1201,7 @@ fileprivate let asciiDigitTable: InlineArray<100, UInt16> = [ 0x3539, 0x3639, 0x3739, 0x3839, 0x3939 ] +// The constants below assume we're on a little-endian processor fileprivate func infinity(buffer: inout MutableRawSpan, sign: FloatingPointSign) -> Range { if sign == .minus { buffer.storeBytes(of: 0x666e692d, toByteOffset: 0, as: UInt32.self) // "-inf" @@ -1233,7 +1260,7 @@ fileprivate func nan_details(buffer: inout MutableRawSpan, // value is a NaN of some sort var i = 0 if sign == .minus { - buffer.storeBytes(of: 0x2d, toByteOffset: 0, as: UInt8.self) + buffer.storeBytes(of: 0x2d, toByteOffset: 0, as: UInt8.self) // "-" i = 1 } if quiet { From 2734ef1d9f65f488743f84ab1a8eb9fca6cc244e Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Wed, 16 Jul 2025 14:15:03 -0700 Subject: [PATCH 04/19] Float32/64 safety improvements --- .../public/core/FloatingPointToString.swift | 54 +++++++++---------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/stdlib/public/core/FloatingPointToString.swift b/stdlib/public/core/FloatingPointToString.swift index 2444064fcb859..fffe2135d4a9e 100644 --- a/stdlib/public/core/FloatingPointToString.swift +++ b/stdlib/public/core/FloatingPointToString.swift @@ -925,9 +925,9 @@ fileprivate func _Float64ToASCII( } // t0 has t0digits digits. Write them out let text = intToEightDigits(t0) >> ((8 - t0digits) * 8) - unsafe buffer.storeBytes(of: text, - toUncheckedByteOffset: nextDigit, - as: UInt64.self) + buffer.storeBytes(of: text, + toByteOffset: nextDigit, + as: UInt64.self) nextDigit &+= t0digits firstDigit &+= 1 } else { @@ -1004,28 +1004,24 @@ fileprivate func _Float64ToASCII( skew = deltaHigh64 / 2 &- tHigh64 } + var lastDigit = unsafe buffer.unsafeLoad(fromUncheckedByteOffset: nextDigit - 1, + as: UInt8.self) + // We use the `skew` to figure out whether there's // a better base-10 value than our current one. if (skew & adjustFractionMask) == oneHalf { // Difference is an integer + exactly 1/2, so ... let adjust = skew >> (64 - adjustIntegerBits) - var t = unsafe buffer.unsafeLoad(fromUncheckedByteOffset: nextDigit - 1, - as: UInt8.self) - t &-= UInt8(truncatingIfNeeded: adjust) + lastDigit &-= UInt8(truncatingIfNeeded: adjust) // ... we round the last digit even. - t &= ~1 - unsafe buffer.storeBytes(of: t, - toUncheckedByteOffset: nextDigit - 1, - as: UInt8.self) + lastDigit &= ~1 } else { let adjust = (skew + oneHalf) >> (64 - adjustIntegerBits) - var t = unsafe buffer.unsafeLoad(fromUncheckedByteOffset: nextDigit - 1, - as: UInt8.self) - t &-= UInt8(truncatingIfNeeded: adjust) - unsafe buffer.storeBytes(of: t, - toUncheckedByteOffset: nextDigit - 1, - as: UInt8.self) + lastDigit &-= UInt8(truncatingIfNeeded: adjust) } + buffer.storeBytes(of: lastDigit, + toByteOffset: nextDigit - 1, + as: UInt8.self) } } @@ -1074,7 +1070,7 @@ fileprivate func finishFormatting(_ buffer: inout MutableRawSpan, as: UInt8.self) } // Append the exponent: - unsafe buffer.storeBytes(of: 0x65, + unsafe buffer.storeBytes(of: 0x65, // "e" toUncheckedByteOffset: nextDigit, as: UInt8.self) nextDigit &+= 1 @@ -1107,9 +1103,9 @@ fileprivate func finishFormatting(_ buffer: inout MutableRawSpan, e = e % 100 } let d = unsafe asciiDigitTable[unchecked: e] - unsafe buffer.storeBytes(of: d, - toUncheckedByteOffset: nextDigit, - as: UInt16.self) + buffer.storeBytes(of: d, + toByteOffset: nextDigit, + as: UInt16.self) nextDigit &+= 2 } else if base10Exponent < 0 { // "-0.000123456789" @@ -1137,9 +1133,9 @@ fileprivate func finishFormatting(_ buffer: inout MutableRawSpan, toUncheckedByteOffset: firstDigit &+ i, as: UInt8.self) } - unsafe buffer.storeBytes(of: 0x2e, - toUncheckedByteOffset: firstDigit &+ base10Exponent &+ 1, - as: UInt8.self) + buffer.storeBytes(of: 0x2e, + toByteOffset: firstDigit &+ base10Exponent &+ 1, + as: UInt8.self) } else { // "12345678900.0" // Fill trailing zeros, put ".0" at the end @@ -1161,14 +1157,14 @@ fileprivate func finishFormatting(_ buffer: inout MutableRawSpan, i &+= 1 } nextDigit = zeroEnd - unsafe buffer.storeBytes(of: 0x2e, - toUncheckedByteOffset: nextDigit &- 2, - as: UInt8.self) + buffer.storeBytes(of: 0x2e, + toByteOffset: nextDigit &- 2, + as: UInt8.self) } if sign == .minus { - unsafe buffer.storeBytes(of: 0x2d, - toUncheckedByteOffset: firstDigit &- 1, - as: UInt8.self) // "-" + buffer.storeBytes(of: 0x2d, // "-" + toByteOffset: firstDigit &- 1, + as: UInt8.self) firstDigit &-= 1 } From 2c59c720e96ae280a32ac7c7c45135a9ae276f02 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Thu, 17 Jul 2025 09:02:55 -0700 Subject: [PATCH 05/19] Initial Float80 support + some general reorganization and commenting --- .../public/core/FloatingPointToString.swift | 955 ++++++++++++++++-- .../public/core/FloatingPointTypes.swift.gyb | 4 +- 2 files changed, 895 insertions(+), 64 deletions(-) diff --git a/stdlib/public/core/FloatingPointToString.swift b/stdlib/public/core/FloatingPointToString.swift index fffe2135d4a9e..f4fbd41792fa6 100644 --- a/stdlib/public/core/FloatingPointToString.swift +++ b/stdlib/public/core/FloatingPointToString.swift @@ -2,7 +2,7 @@ // // This source file is part of the Swift.org open source project // -// Copyright (c) 2018-2020 Apple Inc. and the Swift project authors +// Copyright (c) 2018-2025 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information @@ -85,10 +85,11 @@ /// A few notes on the Swift transcription: /// * We use MutableSpan and MutableRawSpan to /// identify blocks of working memory. -/// * We use unsafe/unchecked operations extensively, supported -/// by several years of analysis and testing to ensure that -/// no unsafety actually occurs. For Float32, that testing -/// was exhaustive -- we verified all 4 billion possible Float32 values. +/// * We use unsafe/unchecked operations extensively, supported by +/// several years of analysis and testing of the original C +/// implementation to ensure that no unsafety actually occurs. For +/// Float32, that testing was exhaustive -- we verified all 4 +/// billion possible Float32 values. /// * The Swift code uses an idiom of building up to 8 ASCII characters /// in a UInt64 and then writing the whole block to memory. /// * The Swift version is slightly faster than the C version; @@ -97,11 +98,19 @@ /// // ---------------------------------------------------------------------------- + +// ================================================================ +// +// Float16 +// +// ================================================================ + // Float16 is not currently supported on Intel macOS. // (This will change once there's a fully-stable Float16 // ABI on that platform.) #if !((os(macOS) || targetEnvironment(macCatalyst)) && arch(x86_64)) -// Implement the legacy ABI on top of the new one + +// Support Legacy ABI on top of new implementation @_silgen_name("swift_float16ToString2") internal func _float16ToStringImpl2( _ textBuffer: UnsafeMutablePointer, @@ -122,51 +131,7 @@ internal func _float16ToStringImpl2( } return UInt64(truncatingIfNeeded: textLength) } -#endif - -@_silgen_name("swift_float32ToString2") -internal func _float32ToStringImpl2( - _ textBuffer: UnsafeMutablePointer, - _ bufferLength: UInt, - _ value: Float32, - _ debug: Bool) -> UInt64 { - // Code below works with raw memory. - var buffer = unsafe MutableSpan(_unchecked: textBuffer, - count: Int(bufferLength)) - let textRange = Float32ToASCII(value: value, buffer: &buffer) - let textLength = textRange.upperBound - textRange.lowerBound - - // Move the text to the start of the buffer - if textRange.lowerBound != 0 { - unsafe _memmove(dest: textBuffer, - src: textBuffer + textRange.lowerBound, - size: UInt(truncatingIfNeeded: textLength)) - } - return UInt64(truncatingIfNeeded: textLength) -} - -@_silgen_name("swift_float64ToString2") -internal func _float64ToStringImpl2( - _ textBuffer: UnsafeMutablePointer, - _ bufferLength: UInt, - _ value: Float64, - _ debug: Bool) -> UInt64 { - // Code below works with raw memory. - var buffer = unsafe MutableSpan(_unchecked: textBuffer, - count: Int(bufferLength)) - let textRange = Float64ToASCII(value: value, buffer: &buffer) - let textLength = textRange.upperBound - textRange.lowerBound - - // Move the text to the start of the buffer - if textRange.lowerBound != 0 { - unsafe _memmove(dest: textBuffer, - src: textBuffer + textRange.lowerBound, - size: UInt(truncatingIfNeeded: textLength)) - } - return UInt64(truncatingIfNeeded: textLength) -} -#if !((os(macOS) || targetEnvironment(macCatalyst)) && arch(x86_64)) internal func Float16ToASCII( value f: Float16, buffer utf8Buffer: inout MutableSpan) -> Range @@ -200,7 +165,7 @@ fileprivate func _Float16ToASCII( let payload16 = f.significandBitPattern & payloadMask return nan_details(buffer: &buffer, sign: f.sign, - quiet: quietBit == 0, + quiet: quietBit != 0, payloadHigh: 0, payloadLow: UInt64(truncatingIfNeeded:payload16)) } @@ -406,6 +371,33 @@ fileprivate func _Float16ToASCII( } #endif +// ================================================================ +// +// Float32 +// +// ================================================================ + +// Support Legacy ABI on top of new implementation +@_silgen_name("swift_float32ToString2") +internal func _float32ToStringImpl2( + _ textBuffer: UnsafeMutablePointer, + _ bufferLength: UInt, + _ value: Float32, + _ debug: Bool) -> UInt64 { + // Code below works with raw memory. + var buffer = unsafe MutableSpan(_unchecked: textBuffer, + count: Int(bufferLength)) + let textRange = Float32ToASCII(value: value, buffer: &buffer) + let textLength = textRange.upperBound - textRange.lowerBound + + // Move the text to the start of the buffer + if textRange.lowerBound != 0 { + unsafe _memmove(dest: textBuffer, + src: textBuffer + textRange.lowerBound, + size: UInt(truncatingIfNeeded: textLength)) + } + return UInt64(truncatingIfNeeded: textLength) +} internal func Float32ToASCII( value f: Float32, @@ -445,7 +437,7 @@ fileprivate func _Float32ToASCII( let payload32 = f.significandBitPattern & payloadMask return nan_details(buffer: &buffer, sign: f.sign, - quiet: quietBit == 0, + quiet: quietBit != 0, payloadHigh: 0, payloadLow: UInt64(truncatingIfNeeded:payload32)) } @@ -599,6 +591,34 @@ fileprivate func _Float32ToASCII( forceExponential, base10Exponent) } +// ================================================================ +// +// Float64 +// +// ================================================================ + +// Support Legacy ABI on top of new implementation +@_silgen_name("swift_float64ToString2") +internal func _float64ToStringImpl2( + _ textBuffer: UnsafeMutablePointer, + _ bufferLength: UInt, + _ value: Float64, + _ debug: Bool) -> UInt64 { + // Code below works with raw memory. + var buffer = unsafe MutableSpan(_unchecked: textBuffer, + count: Int(bufferLength)) + let textRange = Float64ToASCII(value: value, buffer: &buffer) + let textLength = textRange.upperBound - textRange.lowerBound + + // Move the text to the start of the buffer + if textRange.lowerBound != 0 { + unsafe _memmove(dest: textBuffer, + src: textBuffer + textRange.lowerBound, + size: UInt(truncatingIfNeeded: textLength)) + } + return UInt64(truncatingIfNeeded: textLength) +} + internal func Float64ToASCII( value d: Float64, buffer utf8Buffer: inout MutableSpan) -> Range @@ -635,7 +655,7 @@ fileprivate func _Float64ToASCII( let payload64 = d.significandBitPattern & payloadMask return nan_details(buffer: &buffer, sign: d.sign, - quiet: quietBit == 0, + quiet: quietBit != 0, payloadHigh: 0, payloadLow: UInt64(truncatingIfNeeded:payload64)) } @@ -1034,9 +1054,381 @@ fileprivate func _Float64ToASCII( forceExponential, base10Exponent) } + +// ================================================================ +// +// Float80 +// +// ================================================================ +#if ((os(macOS) || targetEnvironment(macCatalyst) || os(Linux)) && arch(x86_64)) + +// Support Legacy ABI on top of new implementation +@_silgen_name("swift_float80ToString2") +internal func _float80ToStringImpl2( + _ textBuffer: UnsafeMutablePointer, + _ bufferLength: UInt, + _ value: Float80, + _ debug: Bool) -> UInt64 { + // Code below works with raw memory. + var buffer = unsafe MutableSpan(_unchecked: textBuffer, + count: Int(bufferLength)) + let textRange = Float80ToASCII(value: value, buffer: &buffer) + let textLength = textRange.upperBound - textRange.lowerBound + + // Move the text to the start of the buffer + if textRange.lowerBound != 0 { + unsafe _memmove(dest: textBuffer, + src: textBuffer + textRange.lowerBound, + size: UInt(truncatingIfNeeded: textLength)) + } + return UInt64(truncatingIfNeeded: textLength) +} + +internal func Float80ToASCII( + value d: Float80, + buffer utf8Buffer: inout MutableSpan) -> Range +{ + if #available(macOS 9999, *) { + return _Float80ToASCII(value: d, buffer: &utf8Buffer) + } else { + return 0..<0 + } +} + +@available(macOS 9999, *) +fileprivate func _Float80ToASCII( + value f: Float80, + buffer utf8Buffer: inout MutableSpan) -> Range +{ + // We need a MutableRawSpan in order to use wide store/load operations + precondition(utf8Buffer.count >= 32) + var buffer = utf8Buffer.mutableBytes + + // Step 1: Handle special cases, decompose the input + + // The Intel 80-bit floating point format has some quirks that + // make this a lot more complex than the corresponding logic for + // the IEEE 754 portable formats. + + // f.significandBitPattern is processed to try to mimic the + // semantics of IEEE portable formats. But for the following, + // we need the actual raw bits: + let rawSignificand = f._representation.explicitSignificand + let binaryExponent: Int + let significand: Float80.RawSignificand + let exponentBias = (1 << (Float80.exponentBitCount - 1)) - 2; // 16382 + let isBoundary = f.significandBitPattern == 0 + if f.exponentBitPattern == 0x7fff { // NaN or Infinity + // 80387 semantics and 80287 semantics differ somewhat; + // we follow 80387 semantics here. + // See: Wikipedia.org "Extended Precision" + // See: Intel's "Floating Point Reference Sheet" + // https://software.intel.com/content/dam/develop/external/us/en/documents/floating-point-reference-sheet.pdf + let selector = rawSignificand >> 62 + let payload = rawSignificand & ((1 << 62) - 1) + switch selector { + case 0: // ∞ or snan on 287, invalid on 387 + fallthrough + case 1: // Pseudo-NaN: snan on 287, invalid on 387 + // Invalid patterns treated as plain "nan" + return nan_details(buffer: &buffer, + sign: .plus, + quiet: true, + payloadHigh: 0, + payloadLow: payload) + case 2: + if payload == 0 { // snan on 287, ∞ on 387 + return infinity(buffer: &buffer, sign: f.sign) + } else { // snan on 287 and 387 + return nan_details(buffer: &buffer, + sign: f.sign, + quiet: false, + payloadHigh: 0, + payloadLow: payload) + } + case 3: + // Zero payload and sign bit set is "indefinite" (treated as qNaN here), + // otherwise qNaN on 387, sNaN on 287 + return nan_details(buffer: &buffer, + sign: f.sign, + quiet: true, + payloadHigh: 0, + payloadLow: payload) + default: + fatalError() + } + } else if f.exponentBitPattern == 0 { + if rawSignificand == 0 { // Zero + return zero(buffer: &buffer, sign: f.sign) + } else { // subnormal + binaryExponent = 1 - exponentBias + significand = rawSignificand + } + } else if rawSignificand >> 63 == 1 { // Normal + binaryExponent = Int(bitPattern:f.exponentBitPattern) - exponentBias + significand = rawSignificand + } else { + return nan_details(buffer: &buffer, + sign: .plus, + quiet: true, + payloadHigh: 0, + payloadLow: 0) + } + + // Step 2: Determine the exact unscaled target interval + let halfUlp = UInt64(1) << 63 + let quarterUlp = halfUlp >> 1 + let threeQuarterUlp = halfUlp + quarterUlp + // Significand is the upper 64 bits of our 128-bit franction + // Upper midpoint adds 1/2 ULP: + let upperMidpointExact = UInt128(_low: halfUlp, _high: significand) + // Lower midpoint subtracts 1 ULP and then adds 1/2 or 3/4 ULP: + let lowerMidpointExact = UInt128(_low: isBoundary ? threeQuarterUlp : halfUlp, + _high: significand - 1) + + return _backend_256bit(buffer: &buffer, + upperMidpointExact: upperMidpointExact, + lowerMidpointExact: lowerMidpointExact, + sign: f.sign, + isBoundary: isBoundary, + isOddSignificand: (f.significandBitPattern & 1) != 0, + binaryExponent: binaryExponent, + forceExponential: binaryExponent > 65 || (binaryExponent == 65 && !isBoundary)) +} +#endif + +// ================================================================ +// +// Float128 +// +// ================================================================ + +#if false +// Note: We don't need _float128ToStringImpl, since that's only for backwards compatibility, +// and the legacy ABI never supported Float128. + +internal func Float128ToASCII( + value d: Float128, + buffer utf8Buffer: inout MutableSpan) -> Range +{ + if #available(macOS 9999, *) { + return _Float128ToASCII(value: d, buffer: &utf8Buffer) + } else { + return 0..<0 + } +} + +@available(macOS 9999, *) +fileprivate func _Float128ToASCII( + value d: Float128, + buffer utf8Buffer: inout MutableSpan) -> Range +{ + // TODO: Write Me! + + // Note: All the interesting parts are already implemented in _backend_256bit(...), + // so this can easily be implemented someday by just copyihng _Float80ToASCII + // and making the obvious changes. (See the introductory parts of + // _Float64ToASCII for the structure common to all IEEE 754 formats.) +} +#endif + +// ================================================================ +// +// Float80/Float128 common backend +// +// This uses 256-bit fixed-width arithmetic to efficiently compute the +// optimal form for a decomposed float80 or binary128 value. It is +// less heavily commented than the 128-bit Double implementation +// above; see that implementation for detailed explanation of the +// logic here. +// +// Float80 could be handled more efficiently with 192-bit fixed-width +// arithmetic. But the code size savings from sharing this logic +// between float80 and binary128 are substantial, and the resulting +// float80 performance is still much better than competing +// implementations. +// +// Also in the interest of code size savings, this eschews some of the +// optimizations used by the 128-bit Double implementation above. +// Those optimizations are simple to reintroduce if you're interested +// in further performance improvements. +// +// If you are interested in extreme code size, you can also use this +// backend for binary32 and binary64, eliminating the separate 128-bit +// implementation. That variation offers surprisingly reasonable +// performance overall. +// +// ================================================================ +#if ((os(macOS) || targetEnvironment(macCatalyst) || os(Linux)) && arch(x86_64)) + +@available(macOS 9999, *) +fileprivate func _backend_256bit( + buffer: inout MutableRawSpan, + upperMidpointExact: UInt128, + lowerMidpointExact: UInt128, + sign: FloatingPointSign, + isBoundary: Bool, + isOddSignificand: Bool, + binaryExponent: Int, + forceExponential: Bool) -> Range { + + // Step 3: Estimate the base 10 exponent + var base10Exponent = decimalExponentFor2ToThe(binaryExponent) + + // Step 4: Compute a power-of-10 scale factor + var powerOfTenRoundedDown = UInt256() + var powerOfTenRoundedUp = UInt256() + let powerOfTenExponent = intervalContainingPowerOf10_Binary128(p: -base10Exponent, + lower: &powerOfTenRoundedDown, + upper: &powerOfTenRoundedUp) + let extraBits = binaryExponent + powerOfTenExponent + + // Step 5: Scale the interval (with rounding) + let integerBits = 14 + let high64FractionBits = 64 - integerBits + var u: UInt256 + var l: UInt256 + if isOddSignificand { + // Narrow the interval (odd significand) + u = powerOfTenRoundedDown + u.multiplyRoundingDown(by: upperMidpointExact) + u.shiftRightRoundingDown(by: integerBits - extraBits) + + l = powerOfTenRoundedUp + l.multiplyRoundingUp(by: lowerMidpointExact) + l.shiftRightRoundingUp(by: integerBits - extraBits) + } else { + // Widen the interval (even significand) + u = powerOfTenRoundedUp + u.multiplyRoundingUp(by: upperMidpointExact) + u.shiftRightRoundingUp(by: integerBits - extraBits) + + l = powerOfTenRoundedDown + l.multiplyRoundingDown(by: lowerMidpointExact) + l.shiftRightRoundingDown(by: integerBits - extraBits) + } + + // Step 6: Align first digit, adjust exponent + while u.high._high < (UInt64(1) << high64FractionBits) { + base10Exponent -= 1 + l.multiply(by: UInt32(10)) + u.multiply(by: UInt32(10)) + } + var t = u + var delta = u - l + + // Step 7: Generate digits + + // Include 8 "0" characters at the beginning of the buffer for finishFormatting to use + buffer.storeBytes(of: 0x3030303030303030, + toByteOffset: 0, + as: UInt64.self) + // Start writing digits just after that + let firstDigit = 8 + var nextDigit = firstDigit + buffer.storeBytes(of: 0x30 + UInt8(truncatingIfNeeded: t.extractIntegerPart(integerBits)), + toByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + + // It would be nice to generate 8 digits at a time + // and take advantage of intToEightDigits, but + // our integer portion has only 14 bits. We can't make + // that bigger without either sacrificing too much + // precision for correct Float128 or folding the first + // digits into the scaling (as we do with Double) which + // would require a back-out phase here. + + // If there is at least one more digit possible... + if delta < t { + + // Try grabbing four digits at a time + var d0 = delta + var t0 = t + d0.multiply(by: 10000) + t0.multiply(by: 10000) + var d1234 = t0.extractIntegerPart(integerBits) + while d0 < t0 { + let d12 = d1234 / 100 + let d34 = d1234 % 100 + unsafe buffer.storeBytes(of: asciiDigitTable[Int(bitPattern:d12)], + toUncheckedByteOffset: nextDigit, + as: UInt16.self) + unsafe buffer.storeBytes(of: asciiDigitTable[Int(bitPattern:d34)], + toUncheckedByteOffset: nextDigit + 2, + as: UInt16.self) + nextDigit &+= 4 + t = t0 + delta = d0 + d0.multiply(by: 10000) + t0.multiply(by: 10000) + d1234 = t0.extractIntegerPart(integerBits) + } + + // Finish by generating one digit at a time... + while delta < t { + delta.multiply(by: UInt32(10)) + t.multiply(by: UInt32(10)) + let digit = UInt8(truncatingIfNeeded: t.extractIntegerPart(integerBits)) + buffer.storeBytes(of: 0x30 + digit, + toByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + } + } + + // Adjust the final digit to be closer to the original value + // We've already consumed most of our available precision, and only + // need a couple of integer bits, so we can narrow down to + // 64 bits here. + let deltaHigh64 = delta.high._high + let tHigh64 = t.high._high + if deltaHigh64 >= tHigh64 + (UInt64(1) << high64FractionBits) { + let skew: UInt64 + if isBoundary { + skew = deltaHigh64 - deltaHigh64 / 3 - tHigh64 + } else { + skew = deltaHigh64 / 2 - tHigh64 + } + let one = UInt64(1) << high64FractionBits + let fractionMask = one - 1 + let oneHalf = one >> 1 + var lastDigit = unsafe buffer.unsafeLoad(fromUncheckedByteOffset: nextDigit - 1, + as: UInt8.self) + if (skew & fractionMask) == oneHalf { + let adjust = skew >> high64FractionBits + lastDigit &-= UInt8(truncatingIfNeeded: adjust) + lastDigit &= ~1 + } else { + let adjust = (skew + oneHalf) >> high64FractionBits + lastDigit &-= UInt8(truncatingIfNeeded: adjust) + } + buffer.storeBytes(of: lastDigit, + toByteOffset: nextDigit - 1, + as: UInt8.self) + } + + return finishFormatting(&buffer, sign, firstDigit, nextDigit, + forceExponential, base10Exponent) +} +#endif + +// ================================================================ +// +// Common Helper functions +// +// ================================================================ + +// Code above computes the appropriate significant digits and stores +// them in `buffer` between `firstDigit` and `nextDigit`. +// `finishFormatting` converts this into the final text form, +// inserting decimal points, minus signs, exponents, etc, as +// necessary. To minimize the work here, this assumes that there are +// at least 5 unused bytes at the beginning of `buffer` before +// `firstDigit` and that those bytes are filled with `"0"` (0x30) +// characters. + @available(macOS 9999, *) -// TODO: This doesn't guarantee inlining in all cases :( -@inline(__always) fileprivate func finishFormatting(_ buffer: inout MutableRawSpan, _ sign: FloatingPointSign, _ firstDigit: Int, @@ -1259,7 +1651,7 @@ fileprivate func nan_details(buffer: inout MutableRawSpan, buffer.storeBytes(of: 0x2d, toByteOffset: 0, as: UInt8.self) // "-" i = 1 } - if quiet { + if !quiet { buffer.storeBytes(of: 0x73, toByteOffset: i, as: UInt8.self) // "s" i += 1 } @@ -1315,6 +1707,20 @@ fileprivate func intToEightDigits(_ n: UInt32) -> UInt64 { return singles &+ 0x3030303030303030 } +// ================================================================ +// +// Arithmetic Helpers +// +// The code above works with fixed-point values. Standard +// addition/subtraction/comparison works fine, but we need rounding +// control when multiplying such values. +// +// For exmaple, `multiply128x64RoundingDown` multiplies a 0.128 +// fixed-point value by a 0.64 fixed-point fraction, returning a 0.128 +// value that's been rounded down from the exact 192-bit result. +// +// ================================================================ + @inline(__always) fileprivate func multiply64x32RoundingDown(_ lhs: UInt64, _ rhs: UInt32) -> UInt64 { let mask32 = UInt64(UInt32.max) @@ -1329,11 +1735,6 @@ fileprivate func multiply64x32RoundingUp(_ lhs: UInt64, _ rhs: UInt32) -> UInt64 return t + (lhs >> 32) * UInt64(rhs) } -// Arithmetic on fractions: -// E.g., `128x64` multiplies a 0.128 fixed-point -// value by a 0.64 fixed-point fraction, returning -// a 0.128 value that's been rounded down from the -// exact 192-bit result. @available(SwiftStdlib 6.0, *) @inline(__always) fileprivate func multiply128x64RoundingDown(_ lhs: UInt128, _ rhs: UInt64) -> UInt128 { @@ -1355,6 +1756,171 @@ fileprivate func multiply128x64RoundingUp(_ lhs: UInt128, _ rhs: UInt64) -> UInt return h + ((l &+ bias) &>> 64) } +#if ((os(macOS) || targetEnvironment(macCatalyst) || os(Linux)) && arch(x86_64)) +// Custom 256-bit unsigned integer type, with various arithmetic helpers as methods. + +// Used by 80- and 128-bit floating point formatting logic above... +@available(macOS 15, *) +fileprivate struct UInt256 { + var high: UInt128 + var low: UInt128 + + init() { + self.high = 0 + self.low = 0 + } + + init(high: UInt64, _ midHigh: UInt64, _ midLow: UInt64, low: UInt64) { + self.high = UInt128(_low: midHigh, _high: high) + self.low = UInt128(_low: low, _high: midLow) + } + + init(high: UInt128, low: UInt128) { + self.high = high + self.low = low + } + + mutating func shiftRightRoundingDown(by shift: Int) { + assert(shift < 32 && shift >= 0) + var t = UInt128(low._low >> shift) + t |= UInt128(low._high) &<< (64 - shift) + let newlow = t._low + t = UInt128(t._high) + t |= UInt128(high._low) &<< (64 - shift) + low = UInt128(_low: newlow, _high: t._low) + t = UInt128(t._high) + t |= UInt128(high._high) &<< (64 - shift) + high = t + } + + mutating func shiftRightRoundingUp(by shift: Int) { + assert(shift < 32 && shift >= 0) + let bias = (UInt64(1) &<< shift) - 1 + var t = UInt128((low._low + bias) >> shift) + t |= UInt128(low._high) &<< (64 - shift) + let newlow = t._low + t = UInt128(t._high) + t |= UInt128(high._low) &<< (64 - shift) + low = UInt128(_low: newlow, _high: t._low) + t = UInt128(t._high) + t |= UInt128(high._high) &<< (64 - shift) + high = t + } + + mutating func multiply(by rhs: UInt32) { + var t = UInt128(low._low) &* UInt128(rhs) + let newlow = t._low + t = UInt128(t._high) &+ UInt128(low._high) &* UInt128(rhs) + low = UInt128(_low: newlow, _high: t._low) + t = UInt128(t._high) &+ UInt128(high._low) &* UInt128(rhs) + let newmidhigh = t._low + t = UInt128(t._high) &+ UInt128(high._high) &* UInt128(rhs) + high = UInt128(_low: newmidhigh, _high: t._low) + assert(t._high == 0) + } + + mutating func multiplyRoundingDown(by rhs: UInt128) { + var current = UInt128(low._low) * UInt128(rhs._low) + + current = UInt128(current._high) + var t = UInt128(low._low) &* UInt128(rhs._high) + current += UInt128(t._low) + var next = UInt128(t._high) + t = UInt128(low._high) &* UInt128(rhs._low) + current += UInt128(t._low) + next += UInt128(t._high) + + current = next + UInt128(current._high) + t = UInt128(low._high) &* UInt128(rhs._high) + current += UInt128(t._low) + next = UInt128(t._high) + t = UInt128(high._low) &* UInt128(rhs._low) + current += UInt128(t._low) + next += UInt128(t._high) + let newlow = current._low + + current = next + UInt128(current._high) + t = UInt128(high._low) &* UInt128(rhs._high) + current += UInt128(t._low) + next = UInt128(t._high) + t = UInt128(high._high) &* UInt128(rhs._low) + current += UInt128(t._low) + next += UInt128(t._high) + low = UInt128(_low: newlow, _high: current._low) + + current = next + UInt128(current._high) + t = UInt128(high._high) &* UInt128(rhs._high) + high = current + t + } + + mutating func multiplyRoundingUp(by rhs: UInt128) { + var current = UInt128(low._low) &* UInt128(rhs._low) + current += UInt128(UInt64.max) + + current = UInt128(current._high) + var t = UInt128(low._low) &* UInt128(rhs._high) + current += UInt128(t._low) + var next = UInt128(t._high) + t = UInt128(low._high) &* UInt128(rhs._low) + current += UInt128(t._low) + next += UInt128(t._high) + current += UInt128(UInt64.max) + + current = next + UInt128(current._high) + t = UInt128(low._high) &* UInt128(rhs._high) + current += UInt128(t._low) + next = UInt128(t._high) + t = UInt128(high._low) &* UInt128(rhs._low) + current += UInt128(t._low) + next += UInt128(t._high) + let newlow = current._low + + current = next + UInt128(current._high) + t = UInt128(high._low) &* UInt128(rhs._high) + current += UInt128(t._low) + next = UInt128(t._high) + t = UInt128(high._high) &* UInt128(rhs._low) + current += UInt128(t._low) + next += UInt128(t._high) + low = UInt128(_low: newlow, _high: current._low) + + current = next + UInt128(current._high) + t = UInt128(high._high) &* UInt128(rhs._high) + high = current + t + } + + mutating func extractIntegerPart(_ bits: Int) -> UInt { + assert(bits < 64) + let integral = high._high >> (64 &- bits) + high = UInt128(_low: high._low, + _high: high._high &- (integral &<< (64 &- bits))) + return UInt(truncatingIfNeeded: integral) + } + + static func - (lhs: UInt256, rhs: UInt256) -> UInt256 { + var t = UInt128(lhs.low._low) &+ UInt128(~rhs.low._low) &+ 1 + let newlowlow = t._low + t = UInt128(t._high) &+ UInt128(lhs.low._high) &+ UInt128(~rhs.low._high) + let newlow = UInt128(_low: newlowlow, _high: t._low) + t = UInt128(t._high) &+ UInt128(lhs.high._low) &+ UInt128(~rhs.high._low) + let newhigh = UInt128(_low: t._low, _high: t._high &+ lhs.high._high &+ ~rhs.high._high) + return UInt256(high: newhigh, low: newlow) + } + + static func < (lhs: UInt256, rhs: UInt256) -> Bool { + return (lhs.high < rhs.high) + || (lhs.high == rhs.high + && lhs.low < rhs.low) + } +} +#endif + +// ================================================================ +// +// Powers of 10 +// +// ================================================================ + @available(macOS 9999, *) @inline(__always) fileprivate func intervalContainingPowerOf10_Binary32(_ p: Int, _ lower: inout UInt64, _ upper: inout UInt64) -> Int { @@ -1415,6 +1981,23 @@ fileprivate func decimalExponentFor2ToThe(_ p: Int) -> Int { return Int((Int64(p) &* 20201781) >> 26) } +// Each of the constant values here have an implicit binary point at +// the extreme left and when not exact, are rounded _down_ from the +// exact values. For example, the first row of the first table says +// that: +// +// 0x0.8b61313bbabce2c6 x 2^-132 +// +// is the result of rounding down the exact binary value of 10^-40 to +// 64 significant bits. The logic above uses these tables to compute +// bounds for the exact value of the power of 10. + +// Note the binary exponent is not stored; it is computed by the +// `binaryExponentFor10ToThe(p)` function. + +// This covers the negative powers of 10 for Float32. +// Positive powers of 10 come from the next table below. +// Table size: 320 bytes @available(macOS 9999, *) fileprivate let powersOf10_negativeBinary32: InlineArray<_, UInt64> = [ 0x8b61313bbabce2c6, // x 2^-132 ~= 10^-40 @@ -1459,6 +2042,21 @@ fileprivate let powersOf10_negativeBinary32: InlineArray<_, UInt64> = [ 0xcccccccccccccccc, // x 2^-3 ~= 10^-1 ] +// All the powers of 10 that can be represented exactly +// in 128 bits, represented as binary floating-point values +// using the same convention as in the previous table, only +// with 128 bit significands. + +// This table is used in four places: +// * The high order 64 bits are used for positive powers of 10 +// when converting Float32. +// * The full 128-bit value is used for 10^0 through 10^55 for Float64. +// * The first 28 entries are combined with the next table for +// all other Float64 values. +// * This is combined with the 256-bit table below for Float80/Float128 +// support. + +// Table size: 896 bytes @available(macOS 9999, *) fileprivate let powersOf10_Exact128: InlineArray<_, UInt64> = [ // Low order ... high order @@ -1520,6 +2118,18 @@ fileprivate let powersOf10_Exact128: InlineArray<_, UInt64> = [ 0xfff4b4e3f741cf6d, 0xd0cf4b50cfe20765, // x 2^183 == 10^55 exactly ] +// Every 28th power of 10 across the full range of Double. +// Combined with a 64-bit exact power of 10 from the previous +// table, this lets us reconstruct a 128-bit lower bound for +// any power of 10 across the full range of double with a single +// 64-bit by 128-bit multiplication. + +// The published algorithms generally use a full table here of +// 800 128-bit values (6400 bytes). Breaking it into two tables +// gives a significant code-size savings for a modest performance +// penalty. + +// Table size: 464 bytes @available(macOS 9999, *) fileprivate let powersOf10_Binary64: InlineArray<_, UInt64> = [ // low-order half, high-order half @@ -1553,3 +2163,226 @@ fileprivate let powersOf10_Binary64: InlineArray<_, UInt64> = [ 0xcca845ab2beafa9a, 0xc2dfe19c8c055535, // x 2^1183 ~= 10^356 0x1027fff56784f444, 0xc4c5e310aef8aa17, // x 2^1276 ~= 10^384 ] + +#if ((os(macOS) || targetEnvironment(macCatalyst) || os(Linux)) && arch(x86_64)) + +// Needed by 80- and 128-bit formatters above + +// We could cut this in half by keeping only the positive powers and doing +// a single additional 256-bit multiplication by 10^-4984 to recover the negative powers. + +// Table size: 5728 bytes +@available(macOS 9999, *) +fileprivate let powersOf10_Binary128: InlineArray<_, UInt64> = [ + // Low-order ... high-order + 0xaec2e6aff96b46ae, 0xf91044c2eff84750, 0x2b55c9e70e00c557, 0xb6536903bf8f2bda, // x 2^-16556 ~= 10^-4984 + 0xda1b3c3dd3889587, 0x73a7380aba84a6b1, 0xbddb2dfde3f8a6e3, 0xb9e5428330737362, // x 2^-16370 ~= 10^-4928 + 0xa2d23c57cfebb9ec, 0x9f165c039ead6d77, 0x88227fdfc13ab53d, 0xbd89006346a9a34d, // x 2^-16184 ~= 10^-4872 + 0x0333d510cf27e5a5, 0x4e3cc383eaa17b7b, 0xe05fe4207ca3d508, 0xc13efc51ade7df64, // x 2^-15998 ~= 10^-4816 + 0xff242c569bc1f539, 0x5c67ba58680c4cce, 0x3c55f3f947fef0e9, 0xc50791bd8dd72edb, // x 2^-15812 ~= 10^-4760 + 0xe4b75ae27bec50bf, 0x25b0419765fdfcdb, 0x0915564d8ab057ee, 0xc8e31de056f89c19, // x 2^-15626 ~= 10^-4704 + 0x548b1e80a94f3434, 0xe418e9217ce83755, 0x801e38463183fc88, 0xccd1ffc6bba63e21, // x 2^-15440 ~= 10^-4648 + 0x541950a0fdc2b4d9, 0xeea173da1f0eb7b4, 0xcfadf6b2aa7c4f43, 0xd0d49859d60d40a3, // x 2^-15254 ~= 10^-4592 + 0x7e64501be95ad76b, 0x451e855d8acef835, 0x9e601e707a2c3488, 0xd4eb4a687c0253e8, // x 2^-15068 ~= 10^-4536 + 0xdadd9645f360cb51, 0xf290163350ecb3eb, 0xa8edffdccfe4db4b, 0xd9167ab0c1965798, // x 2^-14882 ~= 10^-4480 + 0x7e447db3018ffbdf, 0x4fa1860c08a85923, 0xb17cd86e7fcece75, 0xdd568fe9ab559344, // x 2^-14696 ~= 10^-4424 + 0x61cd4655bf64d265, 0xb19fd88fe285b3bc, 0x1151250681d59705, 0xe1abf2cd11206610, // x 2^-14510 ~= 10^-4368 + 0xa5703f5ce7a619ec, 0x361243a84b55574d, 0x025a8e1e5dbb41d6, 0xe6170e21b2910457, // x 2^-14324 ~= 10^-4312 + 0xb93897a6cf5d3e61, 0x18746fcc6a190db9, 0x66e849253e5da0c2, 0xea984ec57de69f13, // x 2^-14138 ~= 10^-4256 + 0x309043d12ab5b0ac, 0x79c93cff11f09319, 0xf5a7800f23ef67b8, 0xef3023b80a732d93, // x 2^-13952 ~= 10^-4200 + 0xa3baa84c049b52b9, 0xbec466ee1b586342, 0x0e85fc7f4edbd3ca, 0xf3defe25478e074a, // x 2^-13766 ~= 10^-4144 + 0xd1f4628316b15c7a, 0xae16192410d3135e, 0x4268a54f70bd28c4, 0xf8a551706112897c, // x 2^-13580 ~= 10^-4088 + 0x9eb9296cc5749dba, 0x48324e275376dfdd, 0x5052e9289f0f2333, 0xfd83933eda772c0b, // x 2^-13394 ~= 10^-4032 + 0xff6aae669a5a0d8a, 0x24fed95087b9006e, 0x01b02378a405b421, 0x813d1dc1f0c754d6, // x 2^-13207 ~= 10^-3976 + 0xf993f18de00dc89b, 0x15617da021b89f92, 0xb782db1fc6aba49b, 0x83c4e245ed051dc1, // x 2^-13021 ~= 10^-3920 + 0xc6a0d64a712172b1, 0x2217669197ac1504, 0x4250be2eeba87d15, 0x86595584116caf3c, // x 2^-12835 ~= 10^-3864 + 0x0bdc0c67a220687b, 0x44a66a6d6fd6537b, 0x3f1f93f1943ca9b6, 0x88fab70d8b44952a, // x 2^-12649 ~= 10^-3808 + 0xb60b57164ad28122, 0xde5bd4572c25a830, 0x2c87f18b39478aa2, 0x8ba947b223e5783e, // x 2^-12463 ~= 10^-3752 + 0xbd59568efdb9bfee, 0x292f8f2c98d7f44c, 0x4054f5360249ebd1, 0x8e6549867da7d11a, // x 2^-12277 ~= 10^-3696 + 0x9fa0721e66791acc, 0x1789061d717d454c, 0xc1187fa0c18adbbe, 0x912effea7015b2c5, // x 2^-12091 ~= 10^-3640 + 0x982b64e953ac4e27, 0x45efb05f20cf48b3, 0x4b4de34e0ebc3e06, 0x9406af8f83fd6265, // x 2^-11905 ~= 10^-3584 + 0xa53f5950eec21dca, 0x3bd8754763bdbca1, 0xac73f0226eff5ea1, 0x96ec9e7f9004839b, // x 2^-11719 ~= 10^-3528 + 0x320e19f88f1161b7, 0x72e93fe0cce7cfd9, 0x2184706ea46a4c38, 0x99e11423765ec1d0, // x 2^-11533 ~= 10^-3472 + 0x491aba48dfc0e36e, 0xd3de560ee34022b2, 0xddadb80577b906bd, 0x9ce4594a044e0f1b, // x 2^-11347 ~= 10^-3416 + 0x06789d038697142f, 0x7a466a75be73db21, 0x60dbd8aa443b560f, 0x9ff6b82ef415d222, // x 2^-11161 ~= 10^-3360 + 0x40ed8056af76ac43, 0x08251c601e346456, 0x7401c6f091f87727, 0xa3187c82120dace6, // x 2^-10975 ~= 10^-3304 + 0x8c643ee307bffec6, 0xf369a11c6f66c05a, 0x4d5b32f713d7f476, 0xa649f36e8583e81a, // x 2^-10789 ~= 10^-3248 + 0xe32f5e080e36b4be, 0x3adf30ff2eb163d4, 0xb4b39dd9ddb8d317, 0xa98b6ba23e2300c7, // x 2^-10603 ~= 10^-3192 + 0x6b9d538c192cfb1b, 0x1c5af3bd4d2c60b5, 0xec41c1793d69d0d1, 0xacdd3555869159d1, // x 2^-10417 ~= 10^-3136 + 0x1adadaeedf7d699c, 0x71043692494aa743, 0x3ca5a7540d9d56c9, 0xb03fa252bd05a815, // x 2^-10231 ~= 10^-3080 + 0xec3e4e5fc6b03617, 0x47c9b16afe8fdf74, 0x92e1bc1fbb33f18d, 0xb3b305fe328e571f, // x 2^-10045 ~= 10^-3024 + 0x1d42fa68b12bdb23, 0xac46a7b3f2b4b34e, 0xa908fd4a88728b6a, 0xb737b55e31cdde04, // x 2^-9859 ~= 10^-2968 + 0x887dede507f2b618, 0x359a8fa0d014b9a7, 0x7c4c65d15c614c56, 0xbace07232df1c802, // x 2^-9673 ~= 10^-2912 + 0x504708e718b4b669, 0xfb4d9440822af452, 0xef84cc99cb4c5d17, 0xbe7653b01aae13e5, // x 2^-9487 ~= 10^-2856 + 0x5b7977525516bff0, 0x75913092420c9b35, 0xcfc147ade4843a24, 0xc230f522ee0a7fc2, // x 2^-9301 ~= 10^-2800 + 0xad5d11883cc1302b, 0x860a754894b9a0bc, 0x4668677d5f46c29b, 0xc5fe475d4cd35cff, // x 2^-9115 ~= 10^-2744 + 0x42032f9f971bfc07, 0x9fb576046ab35018, 0x474b3cb1fe1d6a7f, 0xc9dea80d6283a34c, // x 2^-8929 ~= 10^-2688 + 0xd3e7fbb72403a4dd, 0x8ca223055819af54, 0xd6ea3b733029ef0b, 0xcdd276b6e582284f, // x 2^-8743 ~= 10^-2632 + 0xba2431d885f2b7d9, 0xc9879fc42869f610, 0x3736730a9e47fef8, 0xd1da14bc489025ea, // x 2^-8557 ~= 10^-2576 + 0xa11edbcd65dd1844, 0xcb8edae81a295887, 0x3d24e68dc1027246, 0xd5f5e5681a4b9285, // x 2^-8371 ~= 10^-2520 + 0xa0f076652f69ad08, 0x9d19c341f5f42f2a, 0x742ab8f3864562c8, 0xda264df693ac3e30, // x 2^-8185 ~= 10^-2464 + 0x29f760ef115f2824, 0xe0ee47c041c9de0f, 0x8c119f3680212413, 0xde6bb59f56672cda, // x 2^-7999 ~= 10^-2408 + 0x8b90230b3409c9d3, 0x9d76eef2c1543e65, 0x43190b523f872b9c, 0xe2c6859f5c284230, // x 2^-7813 ~= 10^-2352 + 0xd44ce9993bc6611e, 0x777c9b2dfbede079, 0x2a0969bf88679396, 0xe7372943179706fc, // x 2^-7627 ~= 10^-2296 + 0xe8c5f5a63fd0fbd1, 0x0ccc12293f1d7a58, 0x131565be33dda91a, 0xebbe0df0c8201ac5, // x 2^-7441 ~= 10^-2240 + 0xdb97988dd6b776f4, 0xeb2106f435f7e1d5, 0xccfb1cc2ef1f44de, 0xf05ba3330181c750, // x 2^-7255 ~= 10^-2184 + 0x2fcbc8df94a1d54b, 0x796d0a8120801513, 0x5f8385b3a882ff4c, 0xf5105ac3681f2716, // x 2^-7069 ~= 10^-2128 + 0xc8700c11071a40f5, 0x23cb9e9df9331fe4, 0x166c15f456786c27, 0xf9dca895a3226409, // x 2^-6883 ~= 10^-2072 + 0x9589f4637a50cbb5, 0xea8242b0030e4a51, 0x6c656c3b1f2c9d91, 0xfec102e2857bc1f9, // x 2^-6697 ~= 10^-2016 + 0xc4be56c83349136c, 0x6188db81ac8e775d, 0xfa70b9a2ca60b004, 0x81def119b76837c8, // x 2^-6510 ~= 10^-1960 + 0xb85d39054658b363, 0xe7df06bc613fda21, 0x6a22490e8e9ec98b, 0x8469e0b6f2b8bd9b, // x 2^-6324 ~= 10^-1904 + 0x800b1e1349fef248, 0x469cfd2e6ca32a77, 0x69138459b0fa72d4, 0x87018eefb53c6325, // x 2^-6138 ~= 10^-1848 + 0xb62593291c768919, 0xc098e6ed0bfbd6f6, 0x6c83ad1260ff20f4, 0x89a63ba4c497b50e, // x 2^-5952 ~= 10^-1792 + 0x92ee7fce474479d3, 0xe02017175bf040c6, 0xd82ef2860273de8d, 0x8c5827f711735b46, // x 2^-5766 ~= 10^-1736 + 0x7b0e6375ca8c77d9, 0x5f07e1e10097d47f, 0x416d7f9ab1e67580, 0x8f17964dfc3961f2, // x 2^-5580 ~= 10^-1680 + 0xc8d869ed561af1ce, 0x8b6648e941de779b, 0x56700866b85d57fe, 0x91e4ca5db93dbfec, // x 2^-5394 ~= 10^-1624 + 0xfc04df783488a410, 0x64d1f15da2c146b1, 0x43cf71d5c4fd7868, 0x94c0092dd4ef9511, // x 2^-5208 ~= 10^-1568 + 0xfbaf03b48a965a64, 0x9b6122aa2b72a13c, 0x387898a6e22f821b, 0x97a9991fd8b3afc0, // x 2^-5022 ~= 10^-1512 + 0x50f7f7c13119aadd, 0xe415d8b25694250a, 0x8f8857e875e7774e, 0x9aa1c1f6110c0dd0, // x 2^-4836 ~= 10^-1456 + 0xce214403545fd685, 0xf36d1ad779b90e09, 0xa5c58d5f91a476d7, 0x9da8ccda75b341b5, // x 2^-4650 ~= 10^-1400 + 0x63ddfb68f971b0c5, 0x2822e38faf74b26e, 0x6e1f7f1642ebaac8, 0xa0bf0465b455e921, // x 2^-4464 ~= 10^-1344 + 0xf0d00cec9daf7444, 0x6bf3eea6f661a32a, 0xfad2be1679765f27, 0xa3e4b4a65e97b76a, // x 2^-4278 ~= 10^-1288 + 0x463b4ab4bd478f57, 0x6f6583b5b36d5426, 0x800cfab80c4e2eb1, 0xa71a2b283c14fba6, // x 2^-4092 ~= 10^-1232 + 0xef163df2fa96e983, 0xa825f32bc8f6b080, 0x850b0c5976b21027, 0xaa5fb6fbc115010b, // x 2^-3906 ~= 10^-1176 + 0x7db1b3f8e100eb43, 0x2862b1f61d64ddc3, 0x61363686961a41e5, 0xadb5a8bdaaa53051, // x 2^-3720 ~= 10^-1120 + 0xfd349cf00ba1e09a, 0x6d282fe1b7112879, 0xc6f075c4b81fc72d, 0xb11c529ec0d87268, // x 2^-3534 ~= 10^-1064 + 0xf7221741b221cf6f, 0x3739f15b06ac3c76, 0xb4e4be5b6455ef96, 0xb494086bbfea00c3, // x 2^-3348 ~= 10^-1008 + 0xc4e5a2f864c403bb, 0x6e33cdcda4367276, 0x24d256c540a50309, 0xb81d1f9569068d8e, // x 2^-3162 ~= 10^-952 + 0x276e3f0f67f0553b, 0x00de73d9d5be6974, 0x6d4aa5b50bb5dc0d, 0xbbb7ef38bb827f2d, // x 2^-2976 ~= 10^-896 + 0x51a34a3e674484ed, 0x1fb6069f8b26f840, 0x925624c0d7d93317, 0xbf64d0275747de70, // x 2^-2790 ~= 10^-840 + 0xcc775c8cb6de1dbc, 0x6d60d02eac6309ee, 0x8e5a2e5116baf191, 0xc3241cf0094a8e70, // x 2^-2604 ~= 10^-784 + 0x6023c8fa17d7b105, 0x069cf8f51d2e5e65, 0xb0560c246f90e9e8, 0xc6f631e782d57096, // x 2^-2418 ~= 10^-728 + 0x92c17acb2d08d5fd, 0xc26ffb8e81532725, 0x2ffff1289a804c5a, 0xcadb6d313c8736fc, // x 2^-2232 ~= 10^-672 + 0x47df78ab9e92897a, 0xc02b302a892b81dc, 0xa855e127113c887b, 0xced42ec885d9dbbe, // x 2^-2046 ~= 10^-616 + 0xdaf2dec03ec0c322, 0x72db3bc15b0c7014, 0xe00bad8dfc0d8c8e, 0xd2e0d889c213fd60, // x 2^-1860 ~= 10^-560 + 0xd3a04799e4473ac8, 0xa116409a2fdf1e9e, 0xc654d07271e6c39f, 0xd701ce3bd387bf47, // x 2^-1674 ~= 10^-504 + 0x5c8a5dc65d745a24, 0x2726c48a85389fa7, 0x84c663cee6b86e7c, 0xdb377599b6074244, // x 2^-1488 ~= 10^-448 + 0xd7ebc61ba77a9e66, 0x8bf77d4bc59b35b1, 0xcb285ceb2fed040d, 0xdf82365c497b5453, // x 2^-1302 ~= 10^-392 + 0x744ce999bfed213a, 0x363b1f2c568dc3e2, 0xfd1b1b2308169b25, 0xe3e27a444d8d98b7, // x 2^-1116 ~= 10^-336 + 0x6a40608fe10de7e7, 0xf910f9f648232f14, 0xd1b3400f8f9cff68, 0xe858ad248f5c22c9, // x 2^-930 ~= 10^-280 + 0x9bdbfc21260dd1ad, 0x4609ac5c7899ca36, 0xa4f8bf5635246428, 0xece53cec4a314ebd, // x 2^-744 ~= 10^-224 + 0xd88181aad19d7454, 0xf80f36174730ca34, 0xdc44e6c3cb279ac1, 0xf18899b1bc3f8ca1, // x 2^-558 ~= 10^-168 + 0xee19bfa6947f8e02, 0xaa09501d5954a559, 0x4d4617b5ff4a16d5, 0xf64335bcf065d37d, // x 2^-372 ~= 10^-112 + 0xebbc75a03b4d60e6, 0xac2e4f162cfad40a, 0xeed6e2f0f0d56712, 0xfb158592be068d2e, // x 2^-186 ~= 10^-56 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, // x 2^1 == 10^0 exactly + 0x0000000000000000, 0x2000000000000000, 0xbff8f10e7a8921a4, 0x82818f1281ed449f, // x 2^187 == 10^56 exactly + 0x51775f71e92bf2f2, 0x74a7ef0198791097, 0x03e2cf6bc604ddb0, 0x850fadc09923329e, // x 2^373 ~= 10^112 + 0xb204b3d9686f55b5, 0xfb118fc9c217a1d2, 0x90fb44d2f05d0842, 0x87aa9aff79042286, // x 2^559 ~= 10^168 + 0xd7924bff833149fa, 0xbc10c5c5cda97c8d, 0x82bd6b70d99aaa6f, 0x8a5296ffe33cc92f, // x 2^745 ~= 10^224 + 0xa67d072d3c7fa14b, 0x7ec63730f500b406, 0xdb0b487b6423e1e8, 0x8d07e33455637eb2, // x 2^931 ~= 10^280 + 0x546f2a35dc367e47, 0x949063d8a46f0c0e, 0x213a4f0aa5e8a7b1, 0x8fcac257558ee4e6, // x 2^1117 ~= 10^336 + 0x50611a621c0ee3ae, 0x202d895116aa96be, 0x1c306f5d1b0b5fdf, 0x929b7871de7f22b9, // x 2^1303 ~= 10^392 + 0xffa6738a27dcf7a3, 0x3c11d8430d5c4802, 0xa7ea9c8838ce9437, 0x957a4ae1ebf7f3d3, // x 2^1489 ~= 10^448 + 0x5bf36c0f40bde99d, 0x284ba600ee9f6303, 0xbf1d49cacccd5e68, 0x9867806127ece4f4, // x 2^1675 ~= 10^504 + 0xa6e937834ed12e58, 0x73f26eb82f6b8066, 0x655494c5c95d77f2, 0x9b63610bb9243e46, // x 2^1861 ~= 10^560 + 0x0cd4b7660adc6930, 0x8f868688f8eb79eb, 0x02e008393fd60b55, 0x9e6e366733f85561, // x 2^2047 ~= 10^616 + 0x3efb9807d86d3c6a, 0x84c10a1d22f5adc5, 0x55e04dba4b3bd4dd, 0xa1884b69ade24964, // x 2^2233 ~= 10^672 + 0xf065089401df33b4, 0x1fc02370c451a755, 0x44b222741eb1ebbf, 0xa4b1ec80f47c84ad, // x 2^2419 ~= 10^728 + 0xa62d0da836fce7d5, 0x75933380ceb5048c, 0x1cf4a5c3bc09fa6f, 0xa7eb6799e8aec999, // x 2^2605 ~= 10^784 + 0x7a400df820f096c2, 0x802c4085068d2dd5, 0x3c4a575151b294dc, 0xab350c27feb90acc, // x 2^2791 ~= 10^840 + 0xf48b51375df06e86, 0x412fe9e72afd355e, 0x870a8d87239d8f35, 0xae8f2b2ce3d5dbe9, // x 2^2977 ~= 10^896 + 0x881883521930127c, 0xe53fd3fcb5b4df25, 0xdd929f09c3eff5ac, 0xb1fa17404a30e5e8, // x 2^3163 ~= 10^952 + 0x270cd9f1348eb326, 0x37ed82fe9c75fccf, 0x1931b583a9431d7e, 0xb5762497dbf17a9e, // x 2^3349 ~= 10^1008 + 0x8919b01a5b3d9ec1, 0x6a7669bdfc6f699c, 0xe30db03e0f8dd286, 0xb903a90f561d25e2, // x 2^3535 ~= 10^1064 + 0xf0461526b4201aa5, 0x7fe40defe17e55f5, 0x9eb5cb19647508c5, 0xbca2fc30cc19f090, // x 2^3721 ~= 10^1120 + 0xd67bf35422978bbf, 0x0dbb1c416ebe661f, 0x24bd4c00042ad125, 0xc054773d149bf26b, // x 2^3907 ~= 10^1176 + 0xdd093192ef5508d0, 0x6eac3085943ccc0f, 0x7ea30dbd7ea479e3, 0xc418753460cdcca9, // x 2^4093 ~= 10^1232 + 0xfe4ff20db6d25dc2, 0x5d5d5a9519e34a42, 0x764f4cf916b4dece, 0xc7ef52defe87b751, // x 2^4279 ~= 10^1288 + 0xd8adfb2e00494c5e, 0x72435286baf0e84e, 0xbeb7fbdc1cbe8b37, 0xcbd96ed6466cf081, // x 2^4465 ~= 10^1344 + 0xe07c1e4384f594af, 0x0c6b90b8874d5189, 0xdce472c619aa3f63, 0xcfd7298db6cb9672, // x 2^4651 ~= 10^1400 + 0x5dd902c68fa448cf, 0xea8d16bd9544e48e, 0xe47defc14a406e4f, 0xd3e8e55c3c1f43d0, // x 2^4837 ~= 10^1456 + 0x1223d79357bedca8, 0xeae6c2843752ac35, 0xb7157c60a24a0569, 0xd80f0685a81b2a81, // x 2^5023 ~= 10^1512 + 0xcff72d64bc79e429, 0xccc52c236decd778, 0xfb0b98f6bbc4f0cb, 0xdc49f3445824e360, // x 2^5209 ~= 10^1568 + 0x3731f76b905dffbb, 0x5e2bddd7d12a9e42, 0xc6c6c1764e047e15, 0xe09a13d30c2dba62, // x 2^5395 ~= 10^1624 + 0xeb58d8ef2ada7c09, 0xbc1a3b726b789947, 0x87e8dcfc09dbc33a, 0xe4ffd276eedce658, // x 2^5581 ~= 10^1680 + 0x249a5c06dc5d5db7, 0xa8f09440be97bfe6, 0xb1a3642a8da3cf4f, 0xe97b9b89d001dab3, // x 2^5767 ~= 10^1736 + 0xbf34ff7963028cd9, 0xc20578fa3851488b, 0x2d4070f33b21ab7b, 0xee0ddd84924ab88c, // x 2^5953 ~= 10^1792 + 0x002d0511317361d5, 0xd6919e041129a1a7, 0xa2bf0c63a814e04e, 0xf2b70909cd3fd35c, // x 2^6139 ~= 10^1848 + 0x1fa87f28acf1dcd2, 0xe7a0a88981d1a0f9, 0x08f13995cf9c2747, 0xf77790f0a48a45ce, // x 2^6325 ~= 10^1904 + 0x1b6ff8afbe589b72, 0xc851bb3f9aeb1211, 0x7a37993eb21444fa, 0xfc4fea4fd590b40a, // x 2^6511 ~= 10^1960 + 0xef23a4cbc039f0c2, 0xbb3f8498a972f18e, 0xb7b1ada9cdeba84d, 0x80a046447e3d49f1, // x 2^6698 ~= 10^2016 + 0x2cc44f2b602b6231, 0xf231f4b7996b7278, 0x0cc6866c5d69b2cb, 0x8324f8aa08d7d411, // x 2^6884 ~= 10^2072 + 0x822c97629a3a4c69, 0x8a9afcdbc940e6f9, 0x7fe2b4308dcbf1a3, 0x85b64a659077660e, // x 2^7070 ~= 10^2128 + 0xf66cfcf42d4896b0, 0x1f11852a20ed33c5, 0x1d73ef3eaac3c964, 0x88547abb1d8e5bd9, // x 2^7256 ~= 10^2184 + 0x63093ad0caadb06c, 0x31be1482014cdaf0, 0x1e34291b1ef566c7, 0x8affca2bd1f88549, // x 2^7442 ~= 10^2240 + 0xab50f69048738e9a, 0xa126c32ff4882be8, 0x9e9383d73d486881, 0x8db87a7c1e56d873, // x 2^7628 ~= 10^2296 + 0xe57e659432b0a73e, 0x47a0e15dfc7986b8, 0x9cc5ee51962c011a, 0x907eceba168949b3, // x 2^7814 ~= 10^2352 + 0x8a6ff950599f8ae5, 0xd1cbbb7d005a76d3, 0x413407cfeeac9743, 0x93530b43e5e2c129, // x 2^8000 ~= 10^2408 + 0xd4e6b6e847550caa, 0x56a3106227b87706, 0x7efa7d29c44e11b7, 0x963575ce63b6332d, // x 2^8186 ~= 10^2464 + 0xd835c90b09842263, 0xb69f01a641da2a42, 0x5a848859645d1c6f, 0x9926556bc8defe43, // x 2^8372 ~= 10^2520 + 0x9b0ae73c204ecd61, 0x0794fd5e5a51ac2f, 0x51edea897b34601f, 0x9c25f29286e9ddb6, // x 2^8558 ~= 10^2576 + 0x3130484fb0a61d89, 0x32b7105223a27365, 0xb50008d92529e91f, 0x9f3497244186fca4, // x 2^8744 ~= 10^2632 + 0x8cd036553f38a1e8, 0x5e997e9f45d7897d, 0xf09e780bcc8238d9, 0xa2528e74eaf101fc, // x 2^8930 ~= 10^2688 + 0xe1f8b43b08b5d0ef, 0xa0eaf3f62dc1777c, 0x3a5828869701a165, 0xa580255203f84b47, // x 2^9116 ~= 10^2744 + 0x3c7f62e3154fa708, 0x5786f3927eb15bd5, 0x8b231a70eb5444ce, 0xa8bdaa0a0064fa44, // x 2^9302 ~= 10^2800 + 0x1ebc24a19cd70a2a, 0x843fddd10c7006b8, 0xfa1bde1f473556a4, 0xac0b6c73d065f8cc, // x 2^9488 ~= 10^2856 + 0x46b6aae34cfd26fc, 0x00db7d919b136c68, 0x7730e00421da4d55, 0xaf69bdf68fc6a740, // x 2^9674 ~= 10^2912 + 0x1c4edcb83fc4c49d, 0x61c0edd56bbcb3e8, 0x7f959cb702329d14, 0xb2d8f1915ba88ca5, // x 2^9860 ~= 10^2968 + 0x428c840d247382fe, 0x9cc3b1569b1325a4, 0x40c3a071220f5567, 0xb6595be34f821493, // x 2^10046 ~= 10^3024 + 0xbeb82e734787ec63, 0xbeff12280d5a1676, 0x11c48d02b8326bd3, 0xb9eb5333aa272e9b, // x 2^10232 ~= 10^3080 + 0x302349e12f45c73f, 0xb494bcc96d53e49c, 0x566765461bd2f61b, 0xbd8f2f7a1ba47d6d, // x 2^10418 ~= 10^3136 + 0x5704ebf5f16946ce, 0x431388ec68ac7a26, 0xb889018e4f6e9a52, 0xc1454a673cb9b1ce, // x 2^10604 ~= 10^3192 + 0x5a30431166af9b23, 0x132d031fc1d1fec0, 0xf85333a94848659f, 0xc50dff6d30c3aefc, // x 2^10790 ~= 10^3248 + 0x7573d4b3ffe4ba3b, 0xf888498a40220657, 0x1a1aeae7cf8a9d3d, 0xc8e9abc872eb2bc1, // x 2^10976 ~= 10^3304 + 0xb5eaef7441511eb9, 0xc9cf998035a91664, 0x12e29f09d9061609, 0xccd8ae88cf70ad84, // x 2^11162 ~= 10^3360 + 0x73aed4f1908f4d01, 0x8c53e7beeca4578f, 0xdf7601457ca20b35, 0xd0db689a89f2f9b1, // x 2^11348 ~= 10^3416 + 0x5adbd55696e1cdd9, 0x4949d09424b87626, 0xcbdcd02f23cc7690, 0xd4f23ccfb1916df5, // x 2^11534 ~= 10^3472 + 0x3f500ccf4ea03593, 0x9b80aac81b50762a, 0x44289dd21b589d7a, 0xd91d8fe9a3d019cc, // x 2^11720 ~= 10^3528 + 0x134ca67a679b84ae, 0x8909e424a112a3cd, 0x95aa118ec1d08317, 0xdd5dc8a2bf27f3f7, // x 2^11906 ~= 10^3584 + 0xe89e3cf733d9ff40, 0x014344660a175c36, 0x72c4d2cad73b0a7b, 0xe1b34fb846321d04, // x 2^12092 ~= 10^3640 + 0x68c0a2c6c02dae9a, 0x0b11160a6edb5f57, 0xe20a88f1134f906d, 0xe61e8ff47461cda9, // x 2^12278 ~= 10^3696 + 0x47fa54906741561a, 0xaa13acba1e5511f5, 0xc7c91d5c341ed39d, 0xea9ff638c54554e1, // x 2^12464 ~= 10^3752 + 0x365460ed91271c24, 0xabe33496aff629b4, 0xf659ede2159a45ec, 0xef37f1886f4b6690, // x 2^12650 ~= 10^3808 + 0xe4cbf4acc7fba37f, 0x350e915f7055b1b8, 0x78d946bab954b82f, 0xf3e6f313130ef0ef, // x 2^12836 ~= 10^3864 + 0xe692accdfa5bd859, 0xf4d4d3202379829e, 0xc9b1474d8f89c269, 0xf8ad6e3fa030bd15, // x 2^13022 ~= 10^3920 + 0xeca0018ea3b8d1b4, 0xe878edb67072c26d, 0x6b1d2745340e7b14, 0xfd8bd8b770cb469e, // x 2^13208 ~= 10^3976 + 0xce5fec949ab87cf7, 0x0151dcd7a53488c3, 0xf22e502fcdd4bca2, 0x81415538ce493bd5, // x 2^13395 ~= 10^4032 + 0x5e1731fbff8c032e, 0xe752f53c2f8fa6c1, 0x7c1735fc3b813c8c, 0x83c92edf425b292d, // x 2^13581 ~= 10^4088 + 0xb552102ea83f47e6, 0xdf0fd2002ff6b3a3, 0x0367500a8e9a178f, 0x865db7a9ccd2839e, // x 2^13767 ~= 10^4144 + 0x76507bafe00ec873, 0x71b256ecd954434c, 0xc9ac50475e25293a, 0x88ff2f2bade74531, // x 2^13953 ~= 10^4200 + 0x5e2075ba289a360b, 0xac376f28b45e5acc, 0x0879b2e5f6ee8b1c, 0x8badd636cc48b341, // x 2^14139 ~= 10^4256 + 0xab87d85e6311e801, 0xb7f786d14d58173d, 0x2f33c652bd12fab7, 0x8e69eee1f23f2be5, // x 2^14325 ~= 10^4312 + 0x7fed9b68d77255be, 0x35dc241819de7182, 0xad6a6308a8e8b557, 0x9133bc8f2a130fe5, // x 2^14511 ~= 10^4368 + 0x728ae72899d4bd12, 0xe5413d9414142a55, 0x9dbaa465efe141a0, 0x940b83f23a55842a, // x 2^14697 ~= 10^4424 + 0x0f7740145246fb8f, 0x186ef2c39acb4103, 0x888c9ab2fc5b3437, 0x96f18b1742aad751, // x 2^14883 ~= 10^4480 + 0xd8bb0fba2183c6ef, 0xbf66d66cc34f0197, 0xba00864671d1053f, 0x99e6196979b978f1, // x 2^15069 ~= 10^4536 + 0x9b71ed2ceb790e49, 0x6faac32d59cc1f5d, 0x61d59d402aae4fea, 0x9ce977ba0ce3a0bd, // x 2^15255 ~= 10^4592 + 0xa0aa6d5e63991cfb, 0x19482fa0ac45669c, 0x803c1cd864033781, 0x9ffbf04722750449, // x 2^15441 ~= 10^4648 + 0x95a9949e04b8bff3, 0x900aa3c2f02ac9d4, 0xa28a151725a55e10, 0xa31dcec2fef14b30, // x 2^15627 ~= 10^4704 + 0x3acf9496dade0ce9, 0xbd8ecf923d23bec0, 0x5b8452af2302fe13, 0xa64f605b4e3352cd, // x 2^15813 ~= 10^4760 + 0x6204425d2b58e822, 0xdee162a8a1248550, 0x82b84cabc828bf93, 0xa990f3c09110c544, // x 2^15999 ~= 10^4816 + 0x091a2658e0639f32, 0x66fa2184cee0b861, 0x8d29dd5122e4278d, 0xace2d92db0390b59, // x 2^16185 ~= 10^4872 + 0x80acda113324758a, 0xded179c26d9ab828, 0x58f8fde02c03a6c6, 0xb045626fb50a35e7, // x 2^16371 ~= 10^4928 + 0x7128a8aad239ce8f, 0x8737bd250290cd5b, 0xd950102978dbd0ff, 0xb3b8e2eda91a232d, // x 2^16557 ~= 10^4984 +] + +@available(macOS 9999, *) +fileprivate func intervalContainingPowerOf10_Binary128(p: Int, lower: inout UInt256, upper: inout UInt256) -> Int { + if p >= 0 && p <= 55 { + let exactLow = powersOf10_Exact128[p * 2] + let exactHigh = powersOf10_Exact128[p * 2 + 1] + lower = UInt256(high: exactHigh, exactLow, 0, low: 0) + upper = lower + return binaryExponentFor10ToThe(p) + } + + let index = p + 4984 + let offset = (index / 56) * 4 + lower = UInt256(high: powersOf10_Binary128[offset + 3], + powersOf10_Binary128[offset + 2], + powersOf10_Binary128[offset + 1], + low: powersOf10_Binary128[offset + 0]) + let extraPower = index % 56 + var e = binaryExponentFor10ToThe(p - extraPower) + + if extraPower > 0 { + let extra = UInt128(_low: powersOf10_Exact128[extraPower * 2], + _high: powersOf10_Exact128[extraPower * 2 + 1]) + lower.multiplyRoundingDown(by: extra) + e += binaryExponentFor10ToThe(extraPower) + } + upper = lower + upper.low += 2 + return e +} +#endif diff --git a/stdlib/public/core/FloatingPointTypes.swift.gyb b/stdlib/public/core/FloatingPointTypes.swift.gyb index f3ed1f013ad5b..662c588ad1219 100644 --- a/stdlib/public/core/FloatingPointTypes.swift.gyb +++ b/stdlib/public/core/FloatingPointTypes.swift.gyb @@ -128,7 +128,6 @@ extension ${Self}: CustomDebugStringConvertible { } } -%if bits == 16 or bits == 32 or bits == 64: ${Availability(bits)} extension ${Self} { // Temporary `debugDescription2` that uses the new Swift implementation. @@ -137,7 +136,7 @@ extension ${Self} { @available(macOS 15, *) public var debugDescription2: String { if #available(macOS 9999, *) { - var buffer = InlineArray<32, UTF8.CodeUnit>(repeating: 0) + var buffer = InlineArray<64, UTF8.CodeUnit>(repeating: 0) var span = buffer.mutableSpan let textRange = Float${bits}ToASCII(value: self, buffer: &span) let textStart = unsafe span._start().assumingMemoryBound(to: UTF8.CodeUnit.self) + textRange.lowerBound @@ -151,7 +150,6 @@ extension ${Self} { } } } -%end ${Availability(bits)} extension ${Self}: TextOutputStreamable { From cbb83740439372f7ade7c387620e68edb90995b3 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Wed, 23 Jul 2025 07:15:54 -0700 Subject: [PATCH 06/19] Float80 performance --- .../public/core/FloatingPointToString.swift | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/stdlib/public/core/FloatingPointToString.swift b/stdlib/public/core/FloatingPointToString.swift index f4fbd41792fa6..ae6ba083c9e73 100644 --- a/stdlib/public/core/FloatingPointToString.swift +++ b/stdlib/public/core/FloatingPointToString.swift @@ -1281,7 +1281,7 @@ fileprivate func _backend_256bit( let powerOfTenExponent = intervalContainingPowerOf10_Binary128(p: -base10Exponent, lower: &powerOfTenRoundedDown, upper: &powerOfTenRoundedUp) - let extraBits = binaryExponent + powerOfTenExponent + let extraBits = binaryExponent &+ powerOfTenExponent // Step 5: Scale the interval (with rounding) let integerBits = 14 @@ -1292,30 +1292,30 @@ fileprivate func _backend_256bit( // Narrow the interval (odd significand) u = powerOfTenRoundedDown u.multiplyRoundingDown(by: upperMidpointExact) - u.shiftRightRoundingDown(by: integerBits - extraBits) + u.shiftRightRoundingDown(by: integerBits &- extraBits) l = powerOfTenRoundedUp l.multiplyRoundingUp(by: lowerMidpointExact) - l.shiftRightRoundingUp(by: integerBits - extraBits) + l.shiftRightRoundingUp(by: integerBits &- extraBits) } else { // Widen the interval (even significand) u = powerOfTenRoundedUp u.multiplyRoundingUp(by: upperMidpointExact) - u.shiftRightRoundingUp(by: integerBits - extraBits) + u.shiftRightRoundingUp(by: integerBits &- extraBits) l = powerOfTenRoundedDown l.multiplyRoundingDown(by: lowerMidpointExact) - l.shiftRightRoundingDown(by: integerBits - extraBits) + l.shiftRightRoundingDown(by: integerBits &- extraBits) } // Step 6: Align first digit, adjust exponent while u.high._high < (UInt64(1) << high64FractionBits) { - base10Exponent -= 1 + base10Exponent &-= 1 l.multiply(by: UInt32(10)) u.multiply(by: UInt32(10)) } var t = u - var delta = u - l + var delta = u &- l // Step 7: Generate digits @@ -1355,7 +1355,7 @@ fileprivate func _backend_256bit( toUncheckedByteOffset: nextDigit, as: UInt16.self) unsafe buffer.storeBytes(of: asciiDigitTable[Int(bitPattern:d34)], - toUncheckedByteOffset: nextDigit + 2, + toUncheckedByteOffset: nextDigit &+ 2, as: UInt16.self) nextDigit &+= 4 t = t0 @@ -1370,8 +1370,8 @@ fileprivate func _backend_256bit( delta.multiply(by: UInt32(10)) t.multiply(by: UInt32(10)) let digit = UInt8(truncatingIfNeeded: t.extractIntegerPart(integerBits)) - buffer.storeBytes(of: 0x30 + digit, - toByteOffset: nextDigit, + buffer.storeBytes(of: 0x30 &+ digit, + toUncheckedByteOffset: nextDigit, as: UInt8.self) nextDigit &+= 1 } @@ -1383,17 +1383,17 @@ fileprivate func _backend_256bit( // 64 bits here. let deltaHigh64 = delta.high._high let tHigh64 = t.high._high - if deltaHigh64 >= tHigh64 + (UInt64(1) << high64FractionBits) { + if deltaHigh64 >= tHigh64 &+ (UInt64(1) << high64FractionBits) { let skew: UInt64 if isBoundary { - skew = deltaHigh64 - deltaHigh64 / 3 - tHigh64 + skew = deltaHigh64 &- deltaHigh64 / 3 &- tHigh64 } else { - skew = deltaHigh64 / 2 - tHigh64 + skew = deltaHigh64 / 2 &- tHigh64 } let one = UInt64(1) << high64FractionBits let fractionMask = one - 1 let oneHalf = one >> 1 - var lastDigit = unsafe buffer.unsafeLoad(fromUncheckedByteOffset: nextDigit - 1, + var lastDigit = unsafe buffer.unsafeLoad(fromUncheckedByteOffset: nextDigit &- 1, as: UInt8.self) if (skew & fractionMask) == oneHalf { let adjust = skew >> high64FractionBits @@ -1404,7 +1404,7 @@ fileprivate func _backend_256bit( lastDigit &-= UInt8(truncatingIfNeeded: adjust) } buffer.storeBytes(of: lastDigit, - toByteOffset: nextDigit - 1, + toByteOffset: nextDigit &- 1, as: UInt8.self) } @@ -1897,7 +1897,7 @@ fileprivate struct UInt256 { return UInt(truncatingIfNeeded: integral) } - static func - (lhs: UInt256, rhs: UInt256) -> UInt256 { + static func &- (lhs: UInt256, rhs: UInt256) -> UInt256 { var t = UInt128(lhs.low._low) &+ UInt128(~rhs.low._low) &+ 1 let newlowlow = t._low t = UInt128(t._high) &+ UInt128(lhs.low._high) &+ UInt128(~rhs.low._high) From 06ced916e0914f4fac1672caf981961bc4cd8642 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Wed, 23 Jul 2025 07:26:35 -0700 Subject: [PATCH 07/19] Comment updates --- .../public/core/FloatingPointToString.swift | 54 +++++++++++++------ 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/stdlib/public/core/FloatingPointToString.swift b/stdlib/public/core/FloatingPointToString.swift index ae6ba083c9e73..4f4c630e01d6c 100644 --- a/stdlib/public/core/FloatingPointToString.swift +++ b/stdlib/public/core/FloatingPointToString.swift @@ -36,7 +36,7 @@ /// https://doi.org/10.1145/2837614.2837654 /// In particular, the Errol paper explored the impact of higher-precision /// fixed-width arithmetic on Grisu2 and showed a way to rapidly test -/// the correctness of such algorithms. +/// the correctness of Grisu-style algorithms. /// /// A few further improvements were inspired by the Ryu algorithm /// from Ulf Anders; "Ryū: fast float-to-string conversion", 2018. @@ -53,10 +53,12 @@ /// values on 32-bit processors, and higher-precision values on all /// processors, it is considerably faster. /// -/// * Always Accurate. Converting the decimal form back to binary -/// will always yield exactly the same value. For the IEEE 754 -/// formats, the round-trip will produce exactly the same bit -/// pattern in memory. +/// * Always Accurate. Except for NaNs, converting the decimal form +/// back to binary will always yield an equal value. For the IEEE +/// 754 formats, the round trip will produce exactly the same bit +/// pattern in memory. This assumes, of course, that the conversion +/// from text to binary uses a correctly-rounded algorithm such as +/// Clinger 1990 or Eisel-Lemire 2021. /// /// * Always Short. This always selects an accurate result with the /// minimum number of significant digits. @@ -73,12 +75,27 @@ /// * When present, a '.' is never the first or last character /// * There is a consecutive range of integer values that can be /// represented in any particular type (-2^54...2^54 for double). -/// Never use exponential form for integral numbers in this range. +/// We do not use exponential form for integral numbers in this +/// range. /// * Generally follow existing practice: Don't use use exponential /// form for fractional values bigger than 10^-4; always write at /// least 2 digits for an exponent. /// * Apart from the above, we do prefer shorter output. +/// Note: If you want to compare performance of this implementation +/// versus some others, keep in mind that this implementation does +/// deliberately sacrifice some performance. Any attempt to compare +/// the performance of this implementation to others should +/// try to compensate for the following: +/// * The output ergonomics described above do take some time. +/// It would be faster to always emit the form "123456e-78" +// (See `finishFormatting()`) +/// * The implementations in published papers generally include +/// large tables with every power of 10 computed out. We've +/// factored these tables down to conserve code size, which +/// requires some additional work to reconstruct the needed power +/// of 10. (See the `intervalContainingPowerOf10_*` functions) + /// /// This Swift implementation was ported from an earlier C version; /// the output is exactly the same in all cases. @@ -90,7 +107,7 @@ /// implementation to ensure that no unsafety actually occurs. For /// Float32, that testing was exhaustive -- we verified all 4 /// billion possible Float32 values. -/// * The Swift code uses an idiom of building up to 8 ASCII characters +/// * The Swift code uses an idiom of building up to 8 digit characters /// in a UInt64 and then writing the whole block to memory. /// * The Swift version is slightly faster than the C version; /// mostly thanks to various minor algorithmic tweaks that were @@ -348,6 +365,7 @@ fileprivate func _Float16ToASCII( // Exhaustive testing shows that the only input value // affected by this is 0.015625 == 2^-6, which // incorrectly prints as "0.01562" without this fix. + // With this, it prints correctly as "0.01563" if t < lDigit || (t == lDigit && l > 0) { t += 1 } @@ -965,10 +983,12 @@ fileprivate func _Float64ToASCII( firstDigit &+= 1 // >90% of random binary64 values need at least 15 digits. - // We already have seven, try grabbing the next 8 digits all at once. + // We have seven so there's probably at least 8 more, which + // we can grab all at once. let TenToTheEighth = 100000000 as UInt128; // 10^(15-bulkFirstDigits) let d0 = delta * TenToTheEighth var t0 = t * TenToTheEighth + // The integer part of t0 is the next 8 digits let next8Digits = UInt32(truncatingIfNeeded: t0._high >> 32) t0 &= fractionMask if d0 < t0 { @@ -1204,8 +1224,9 @@ fileprivate func _Float80ToASCII( // ================================================================ #if false -// Note: We don't need _float128ToStringImpl, since that's only for backwards compatibility, -// and the legacy ABI never supported Float128. +// Note: We don't need _float128ToStringImpl, since that's only for +// backwards compatibility, and the legacy ABI never supported +// Float128. internal func Float128ToASCII( value d: Float128, @@ -1331,13 +1352,12 @@ fileprivate func _backend_256bit( as: UInt8.self) nextDigit &+= 1 - // It would be nice to generate 8 digits at a time - // and take advantage of intToEightDigits, but - // our integer portion has only 14 bits. We can't make - // that bigger without either sacrificing too much - // precision for correct Float128 or folding the first - // digits into the scaling (as we do with Double) which - // would require a back-out phase here. + // It would be nice to generate 8 digits at a time and take + // advantage of intToEightDigits, but our integer portion has only + // 14 bits. We can't make that bigger without either sacrificing + // too much precision for correct Float128 or folding the first + // digits into the scaling (as we do with Double) which would + // require a back-out phase here (as we do with Double). // If there is at least one more digit possible... if delta < t { From 9793d59eacac3724177f64d70dc0be1a266703a8 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Fri, 1 Aug 2025 15:04:42 -0700 Subject: [PATCH 08/19] Fix availability --- .../public/core/FloatingPointToString.swift | 96 ++++++++++--------- 1 file changed, 50 insertions(+), 46 deletions(-) diff --git a/stdlib/public/core/FloatingPointToString.swift b/stdlib/public/core/FloatingPointToString.swift index 4f4c630e01d6c..7459ccda27548 100644 --- a/stdlib/public/core/FloatingPointToString.swift +++ b/stdlib/public/core/FloatingPointToString.swift @@ -112,6 +112,8 @@ /// * The Swift version is slightly faster than the C version; /// mostly thanks to various minor algorithmic tweaks that were /// found during the translation process. +/// * Most of this file is annotated for SwiftStdlib 6.2 +/// because it relies on UInt128, MutableSpan, and InlineArray. /// // ---------------------------------------------------------------------------- @@ -128,8 +130,9 @@ #if !((os(macOS) || targetEnvironment(macCatalyst)) && arch(x86_64)) // Support Legacy ABI on top of new implementation -@_silgen_name("swift_float16ToString2") -internal func _float16ToStringImpl2( +@available(SwiftStdlib 6.2, *) +@_silgen_name("swift_float16ToString") +public func _float16ToStringImpl( _ textBuffer: UnsafeMutablePointer, _ bufferLength: UInt, _ value: Float16, @@ -153,14 +156,14 @@ internal func Float16ToASCII( value f: Float16, buffer utf8Buffer: inout MutableSpan) -> Range { - if #available(macOS 9999, *) { + if #available(SwiftStdlib 6.2, *) { return _Float16ToASCII(value: f, buffer: &utf8Buffer) } else { return 0..<0 } } -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) fileprivate func _Float16ToASCII( value f: Float16, buffer utf8Buffer: inout MutableSpan) -> Range @@ -396,8 +399,8 @@ fileprivate func _Float16ToASCII( // ================================================================ // Support Legacy ABI on top of new implementation -@_silgen_name("swift_float32ToString2") -internal func _float32ToStringImpl2( +@_silgen_name("swift_float32ToString") +public func _float32ToStringImpl( _ textBuffer: UnsafeMutablePointer, _ bufferLength: UInt, _ value: Float32, @@ -421,14 +424,14 @@ internal func Float32ToASCII( value f: Float32, buffer utf8Buffer: inout MutableSpan) -> Range { - if #available(macOS 9999, *) { + if #available(SwiftStdlib 6.2, *) { return _Float32ToASCII(value: f, buffer: &utf8Buffer) } else { return 0..<0 } } -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) fileprivate func _Float32ToASCII( value f: Float32, buffer utf8Buffer: inout MutableSpan) -> Range @@ -616,8 +619,8 @@ fileprivate func _Float32ToASCII( // ================================================================ // Support Legacy ABI on top of new implementation -@_silgen_name("swift_float64ToString2") -internal func _float64ToStringImpl2( +@_silgen_name("swift_float64ToString") +public func _float64ToStringImpl( _ textBuffer: UnsafeMutablePointer, _ bufferLength: UInt, _ value: Float64, @@ -641,14 +644,14 @@ internal func Float64ToASCII( value d: Float64, buffer utf8Buffer: inout MutableSpan) -> Range { - if #available(macOS 9999, *) { + if #available(SwiftStdlib 6.2, *) { return _Float64ToASCII(value: d, buffer: &utf8Buffer) } else { return 0..<0 } } -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) fileprivate func _Float64ToASCII( value d: Float64, buffer utf8Buffer: inout MutableSpan) -> Range @@ -1080,11 +1083,15 @@ fileprivate func _Float64ToASCII( // Float80 // // ================================================================ -#if ((os(macOS) || targetEnvironment(macCatalyst) || os(Linux)) && arch(x86_64)) + +// Float80 is only available on Intel x86/x86_64 processors on certain operating systems +// This matches the condition for the Float80 type + +#if !(os(Windows) || os(Android) || ($Embedded && !os(Linux) && !(os(macOS) || os(iOS) || os(watchOS) || os(tvOS)))) && (arch(i386) || arch(x86_64)) // Support Legacy ABI on top of new implementation -@_silgen_name("swift_float80ToString2") -internal func _float80ToStringImpl2( +@_silgen_name("swift_float80ToString") +internal func _float80ToStringImpl( _ textBuffer: UnsafeMutablePointer, _ bufferLength: UInt, _ value: Float80, @@ -1108,14 +1115,14 @@ internal func Float80ToASCII( value d: Float80, buffer utf8Buffer: inout MutableSpan) -> Range { - if #available(macOS 9999, *) { + if #available(SwiftStdlib 6.2, *) { return _Float80ToASCII(value: d, buffer: &utf8Buffer) } else { return 0..<0 } } -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) fileprivate func _Float80ToASCII( value f: Float80, buffer utf8Buffer: inout MutableSpan) -> Range @@ -1232,14 +1239,14 @@ internal func Float128ToASCII( value d: Float128, buffer utf8Buffer: inout MutableSpan) -> Range { - if #available(macOS 9999, *) { + if #available(SwiftStdlib 6.2, *) { return _Float128ToASCII(value: d, buffer: &utf8Buffer) } else { return 0..<0 } } -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) fileprivate func _Float128ToASCII( value d: Float128, buffer utf8Buffer: inout MutableSpan) -> Range @@ -1282,7 +1289,7 @@ fileprivate func _Float128ToASCII( // ================================================================ #if ((os(macOS) || targetEnvironment(macCatalyst) || os(Linux)) && arch(x86_64)) -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) fileprivate func _backend_256bit( buffer: inout MutableRawSpan, upperMidpointExact: UInt128, @@ -1390,9 +1397,9 @@ fileprivate func _backend_256bit( delta.multiply(by: UInt32(10)) t.multiply(by: UInt32(10)) let digit = UInt8(truncatingIfNeeded: t.extractIntegerPart(integerBits)) - buffer.storeBytes(of: 0x30 &+ digit, - toUncheckedByteOffset: nextDigit, - as: UInt8.self) + unsafe buffer.storeBytes(of: 0x30 &+ digit, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) nextDigit &+= 1 } } @@ -1448,7 +1455,7 @@ fileprivate func _backend_256bit( // `firstDigit` and that those bytes are filled with `"0"` (0x30) // characters. -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) fileprivate func finishFormatting(_ buffer: inout MutableRawSpan, _ sign: FloatingPointSign, _ firstDigit: Int, @@ -1585,7 +1592,7 @@ fileprivate func finishFormatting(_ buffer: inout MutableRawSpan, // Table with ASCII strings for all 2-digit decimal numbers. // Stored as little-endian UInt16s for efficiency -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) fileprivate let asciiDigitTable: InlineArray<100, UInt16> = [ 0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, 0x3930, @@ -1610,6 +1617,7 @@ fileprivate let asciiDigitTable: InlineArray<100, UInt16> = [ ] // The constants below assume we're on a little-endian processor +@available(SwiftStdlib 6.2, *) fileprivate func infinity(buffer: inout MutableRawSpan, sign: FloatingPointSign) -> Range { if sign == .minus { buffer.storeBytes(of: 0x666e692d, toByteOffset: 0, as: UInt32.self) // "-inf" @@ -1620,6 +1628,7 @@ fileprivate func infinity(buffer: inout MutableRawSpan, sign: FloatingPointSign) } } +@available(SwiftStdlib 6.2, *) fileprivate func zero(buffer: inout MutableRawSpan, sign: FloatingPointSign) -> Range { if sign == .minus { buffer.storeBytes(of: 0x302e302d, toByteOffset: 0, as: UInt32.self) // "-0.0" @@ -1630,10 +1639,10 @@ fileprivate func zero(buffer: inout MutableRawSpan, sign: FloatingPointSign) -> } } -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) fileprivate let hexdigits: InlineArray<16, UInt8> = [ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 ] -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) fileprivate func hexWithoutLeadingZeros(buffer: inout MutableRawSpan, offset: inout Int, value: UInt64) { var shift = 60 while (shift > 0) && ((value >> shift) & 0xf == 0) { @@ -1647,7 +1656,7 @@ fileprivate func hexWithoutLeadingZeros(buffer: inout MutableRawSpan, offset: in } } -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) fileprivate func hexWithLeadingZeros(buffer: inout MutableRawSpan, offset: inout Int, value: UInt64) { var shift = 60 while shift >= 0 { @@ -1658,7 +1667,7 @@ fileprivate func hexWithLeadingZeros(buffer: inout MutableRawSpan, offset: inout } } -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) fileprivate func nan_details(buffer: inout MutableRawSpan, sign: FloatingPointSign, quiet: Bool, @@ -1704,6 +1713,7 @@ fileprivate func nan_details(buffer: inout MutableRawSpan, // // This implementation is based on work by Paul Khuong: // https://pvk.ca/Blog/2017/12/22/appnexus-common-framework-its-out-also-how-to-print-integers-faster/ +@available(SwiftStdlib 6.2, *) @inline(__always) fileprivate func intToEightDigits(_ n: UInt32) -> UInt64 { // Break into two numbers of 4 decimal digits each @@ -1755,7 +1765,7 @@ fileprivate func multiply64x32RoundingUp(_ lhs: UInt64, _ rhs: UInt32) -> UInt64 return t + (lhs >> 32) * UInt64(rhs) } -@available(SwiftStdlib 6.0, *) +@available(SwiftStdlib 6.2, *) @inline(__always) fileprivate func multiply128x64RoundingDown(_ lhs: UInt128, _ rhs: UInt64) -> UInt128 { let lhsHigh = UInt128(truncatingIfNeeded: lhs._high) @@ -1764,7 +1774,7 @@ fileprivate func multiply128x64RoundingDown(_ lhs: UInt128, _ rhs: UInt64) -> UI return (lhsHigh &* rhs128) &+ ((lhsLow &* rhs128) >> 64) } -@available(SwiftStdlib 6.0, *) +@available(SwiftStdlib 6.2, *) @inline(__always) fileprivate func multiply128x64RoundingUp(_ lhs: UInt128, _ rhs: UInt64) -> UInt128 { let lhsHigh = UInt128(truncatingIfNeeded: lhs._high) @@ -1776,11 +1786,9 @@ fileprivate func multiply128x64RoundingUp(_ lhs: UInt128, _ rhs: UInt64) -> UInt return h + ((l &+ bias) &>> 64) } -#if ((os(macOS) || targetEnvironment(macCatalyst) || os(Linux)) && arch(x86_64)) // Custom 256-bit unsigned integer type, with various arithmetic helpers as methods. - // Used by 80- and 128-bit floating point formatting logic above... -@available(macOS 15, *) +@available(SwiftStdlib 6.2, *) fileprivate struct UInt256 { var high: UInt128 var low: UInt128 @@ -1910,7 +1918,7 @@ fileprivate struct UInt256 { } mutating func extractIntegerPart(_ bits: Int) -> UInt { - assert(bits < 64) + assert(bits < 16) let integral = high._high >> (64 &- bits) high = UInt128(_low: high._low, _high: high._high &- (integral &<< (64 &- bits))) @@ -1933,7 +1941,6 @@ fileprivate struct UInt256 { && lhs.low < rhs.low) } } -#endif // ================================================================ // @@ -1941,7 +1948,7 @@ fileprivate struct UInt256 { // // ================================================================ -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) @inline(__always) fileprivate func intervalContainingPowerOf10_Binary32(_ p: Int, _ lower: inout UInt64, _ upper: inout UInt64) -> Int { if p >= 0 { @@ -1960,7 +1967,7 @@ fileprivate func intervalContainingPowerOf10_Binary32(_ p: Int, _ lower: inout U return binaryExponentFor10ToThe(p) } -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) @inline(__always) fileprivate func intervalContainingPowerOf10_Binary64(_ p: Int, _ lower: inout UInt128, _ upper: inout UInt128) -> Int { if p >= 0 && p <= 55 { @@ -2018,7 +2025,7 @@ fileprivate func decimalExponentFor2ToThe(_ p: Int) -> Int { // This covers the negative powers of 10 for Float32. // Positive powers of 10 come from the next table below. // Table size: 320 bytes -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) fileprivate let powersOf10_negativeBinary32: InlineArray<_, UInt64> = [ 0x8b61313bbabce2c6, // x 2^-132 ~= 10^-40 0xae397d8aa96c1b77, // x 2^-129 ~= 10^-39 @@ -2077,7 +2084,7 @@ fileprivate let powersOf10_negativeBinary32: InlineArray<_, UInt64> = [ // support. // Table size: 896 bytes -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) fileprivate let powersOf10_Exact128: InlineArray<_, UInt64> = [ // Low order ... high order 0x0000000000000000, 0x8000000000000000, // x 2^1 == 10^0 exactly @@ -2150,7 +2157,7 @@ fileprivate let powersOf10_Exact128: InlineArray<_, UInt64> = [ // penalty. // Table size: 464 bytes -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) fileprivate let powersOf10_Binary64: InlineArray<_, UInt64> = [ // low-order half, high-order half 0x3931b850df08e738, 0x95fe7e07c91efafa, // x 2^-1328 ~= 10^-400 @@ -2184,15 +2191,13 @@ fileprivate let powersOf10_Binary64: InlineArray<_, UInt64> = [ 0x1027fff56784f444, 0xc4c5e310aef8aa17, // x 2^1276 ~= 10^384 ] -#if ((os(macOS) || targetEnvironment(macCatalyst) || os(Linux)) && arch(x86_64)) - // Needed by 80- and 128-bit formatters above // We could cut this in half by keeping only the positive powers and doing // a single additional 256-bit multiplication by 10^-4984 to recover the negative powers. // Table size: 5728 bytes -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) fileprivate let powersOf10_Binary128: InlineArray<_, UInt64> = [ // Low-order ... high-order 0xaec2e6aff96b46ae, 0xf91044c2eff84750, 0x2b55c9e70e00c557, 0xb6536903bf8f2bda, // x 2^-16556 ~= 10^-4984 @@ -2376,7 +2381,7 @@ fileprivate let powersOf10_Binary128: InlineArray<_, UInt64> = [ 0x7128a8aad239ce8f, 0x8737bd250290cd5b, 0xd950102978dbd0ff, 0xb3b8e2eda91a232d, // x 2^16557 ~= 10^4984 ] -@available(macOS 9999, *) +@available(SwiftStdlib 6.2, *) fileprivate func intervalContainingPowerOf10_Binary128(p: Int, lower: inout UInt256, upper: inout UInt256) -> Int { if p >= 0 && p <= 55 { let exactLow = powersOf10_Exact128[p * 2] @@ -2405,4 +2410,3 @@ fileprivate func intervalContainingPowerOf10_Binary128(p: Int, lower: inout UInt upper.low += 2 return e } -#endif From 0934d80148d3d2848b158f4b31d598fde00303a2 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Fri, 1 Aug 2025 15:20:06 -0700 Subject: [PATCH 09/19] Remove old C implementation --- Runtimes/Core/runtime/CMakeLists.txt | 1 - include/swift/Runtime/SwiftDtoa.h | 302 -- .../public/core/FloatingPointTypes.swift.gyb | 59 +- stdlib/public/core/Runtime.swift | 110 - stdlib/public/runtime/CMakeLists.txt | 1 - stdlib/public/runtime/SwiftDtoa.cpp | 2769 ----------------- stdlib/public/stubs/Stubs.cpp | 41 - 7 files changed, 24 insertions(+), 3259 deletions(-) delete mode 100644 include/swift/Runtime/SwiftDtoa.h delete mode 100644 stdlib/public/runtime/SwiftDtoa.cpp diff --git a/Runtimes/Core/runtime/CMakeLists.txt b/Runtimes/Core/runtime/CMakeLists.txt index d226c7380fe71..795852313f946 100644 --- a/Runtimes/Core/runtime/CMakeLists.txt +++ b/Runtimes/Core/runtime/CMakeLists.txt @@ -52,7 +52,6 @@ add_library(swiftRuntime OBJECT RefCount.cpp ReflectionMirror.cpp RuntimeInvocationsTracking.cpp - SwiftDtoa.cpp SwiftTLSContext.cpp ThreadingError.cpp Tracing.cpp diff --git a/include/swift/Runtime/SwiftDtoa.h b/include/swift/Runtime/SwiftDtoa.h deleted file mode 100644 index 3b95cbe3e1775..0000000000000 --- a/include/swift/Runtime/SwiftDtoa.h +++ /dev/null @@ -1,302 +0,0 @@ -//===--- SwiftDtoa.h ---------------------------------------------*- c -*-===// -// -// This source file is part of the Swift.org open source project -// -// Copyright (c) 2018, 2020 Apple Inc. and the Swift project authors -// Licensed under Apache License v2.0 with Runtime Library Exception -// -// See https://swift.org/LICENSE.txt for license information -// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors -// -//===---------------------------------------------------------------------===// -// -/// About SwiftDtoa -/// =============== -/// -/// SwiftDtoa is the C implementation that supports the `.description` -/// and `.debugDescription` properties for the standard Swift -/// floating-point types. These functions produce the "optimal form" -/// for the binary floating point value. The optimal form is a -/// decimal representation that satisfies the following properties: -/// -/// 1. Accurate. Parsing the value back to a binary floating-point -/// value of the same precision will exactly yield the original -/// value. For example, `Double(d.description) == d` for all `Double` -/// values `d` (except for NaN values, of course). -/// -/// 2. Short. Of all accurate results, the returned value will -/// contain the minimum number of significant digits. Note that -/// this is not quite the same as C++ `to_chars` which promises the -/// minimal number of characters. -/// -/// 3. Close. Of all accurate, short results, the value printed will -/// be the one that is closest to the exact binary floating-point -/// value. -/// -/// The optimal form is the ideal textual form for use in JSON and -/// similar interchange formats because it is accurate, compact, and -/// can be generated very quickly. It is also ideal for logging and -/// debugging use; the accuracy guarantees that the result can be -/// cut-and-pasted to obtain the exact original value, and the -/// shortness property eliminates unnecessary digits that can be -/// confusing to readers. -/// -/// Algorithms that produce such output have been known since at least -/// 1990, when Steele and White published their Dragon4 algorithm. -/// However, the earliest algorithms required high-precision -/// arithmetic which limited their use. Starting in 2010 with the -/// publication of Grisu3, there has been a surge of interest and -/// there are now a number of algorithms that can produce optimal -/// forms very quickly. This particular implementation is loosely -/// based on Grisu2 but incorporates concepts from Errol and Ryu that -/// make it significantly faster and ensure accuracy in all cases. -/// -/// About SwiftDtoa v1 -/// ------------------ -/// -/// The first version of SwiftDtoa was committed to the Swift runtime -/// in 2018. It supported Swift's Float, Double, and Float80 formats. -/// -/// About SwiftDtoa v1a -/// ------------------- -/// -/// Version 1a of SwiftDtoa added support for Float16. -/// -/// About SwiftDtoa v2 -/// ------------------ -/// -/// Version 2 of SwiftDtoa is a major overhaul with a number of -/// algorithmic improvements to make it faster (especially for Float16 -/// and Float80), smaller, and more portable (the code only requires -/// C99 and makes no use of C or C++ floating-point facilities). It -/// also includes experimental support for IEEE 754 quad-precision -/// binary128 format, which is not currently supported by Swift. -// -//===---------------------------------------------------------------------===// - -#ifndef SWIFT_DTOA_H -#define SWIFT_DTOA_H - -#define __STDC_WANT_IEC_60559_TYPES_EXT__ // FLT16_MAX -#include -#include -#include -#include - -// -// IEEE 754 Binary16 support (also known as "half-precision") -// - -// Enable this by default. -// Force disable: -DSWIFT_DTOA_BINARY16_SUPPORT=0 -#ifndef SWIFT_DTOA_BINARY16_SUPPORT - #define SWIFT_DTOA_BINARY16_SUPPORT 1 -#endif - -/// Does this platform support needs to pass _Float16 as a float in -/// C function? -#ifndef SWIFT_DTOA_PASS_FLOAT16_AS_FLOAT -// Windows does not define FLT16_MAX even though it supports _Float16 as argument. -# if (!defined(FLT16_MAX) || defined(__wasm__)) && !defined(_WIN32) -# define SWIFT_DTOA_PASS_FLOAT16_AS_FLOAT 1 -# else -# define SWIFT_DTOA_PASS_FLOAT16_AS_FLOAT 0 -# endif -#endif - -// -// IEEE 754 Binary32 support (also known as "single-precision") -// - -// Does "float" on this system use binary32 format? -// (Almost all modern systems do this.) -#if (FLT_RADIX == 2) && (FLT_MANT_DIG == 24) && (FLT_MIN_EXP == -125) && (FLT_MAX_EXP == 128) - #define FLOAT_IS_BINARY32 1 -#else - #undef FLOAT_IS_BINARY32 -#endif - -// We can format binary32 values even if the local C environment -// does not support it. But `float` == binary32 almost everywhere, -// so we enable it by default. -// Force disable: -DSWIFT_DTOA_BINARY32_SUPPORT=0 -#ifndef SWIFT_DTOA_BINARY32_SUPPORT - #define SWIFT_DTOA_BINARY32_SUPPORT 1 -#endif - -// -// IEEE 754 Binary64 support (also known as "double-precision") -// - -// Does "double" on this system use binary64 format? -// (Almost all modern systems do this.) -#if (FLT_RADIX == 2) && (DBL_MANT_DIG == 53) && (DBL_MIN_EXP == -1021) && (DBL_MAX_EXP == 1024) - #define DOUBLE_IS_BINARY64 1 -#else - #undef DOUBLE_IS_BINARY64 -#endif - -// Does "long double" on this system use binary64 format? -// (Windows, for example.) -#if (FLT_RADIX == 2) && (LDBL_MANT_DIG == 53) && (LDBL_MIN_EXP == -1021) && (LDBL_MAX_EXP == 1024) - #define LONG_DOUBLE_IS_BINARY64 1 -#else - #undef LONG_DOUBLE_IS_BINARY64 -#endif - -// We can format binary64 values even if the local C environment -// does not support it. But `double` == binary64 almost everywhere, -// so we enable it by default. -// Force disable: -DSWIFT_DTOA_BINARY64_SUPPORT=0 -#ifndef SWIFT_DTOA_BINARY64_SUPPORT - #define SWIFT_DTOA_BINARY64_SUPPORT 1 -#endif - -// -// Intel x87 Float80 support -// - -// Is "long double" on this system the same as Float80? -// (macOS, Linux, and FreeBSD when running on x86 or x86_64 processors.) -#if (FLT_RADIX == 2) && (LDBL_MANT_DIG == 64) && (LDBL_MIN_EXP == -16381) && (LDBL_MAX_EXP == 16384) - #define LONG_DOUBLE_IS_FLOAT80 1 -#else - #undef LONG_DOUBLE_IS_FLOAT80 -#endif - -// We can format float80 values even if the local C environment -// does not support it. However, by default, we only enable it for -// environments where float80 == long double. -// Force enable: -DSWIFT_DTOA_FLOAT80_SUPPORT=1 -// Force disable: -DSWIFT_DTOA_FLOAT80_SUPPORT=0 -#ifndef SWIFT_DTOA_FLOAT80_SUPPORT - #if LONG_DOUBLE_IS_FLOAT80 - #define SWIFT_DTOA_FLOAT80_SUPPORT 1 - #endif -#endif - -// -// IEEE 754 Binary128 support -// - -// Is "long double" on this system the same as Binary128? -// (Android on LP64 hardware.) -#if (FLT_RADIX == 2) && (LDBL_MANT_DIG == 113) && (LDBL_MIN_EXP == -16381) && (LDBL_MAX_EXP == 16384) - #define LONG_DOUBLE_IS_BINARY128 1 -#else - #undef LONG_DOUBLE_IS_BINARY128 -#endif - -// We can format binary128 values even if the local C environment -// does not support it. However, by default, we only enable it for -// environments where binary128 == long double. -// Force enable: -DSWIFT_DTOA_BINARY128_SUPPORT=1 -// Force disable: -DSWIFT_DTOA_BINARY128_SUPPORT=0 -#ifndef SWIFT_DTOA_BINARY128_SUPPORT - #if LONG_DOUBLE_IS_BINARY128 - #define SWIFT_DTOA_BINARY128_SUPPORT 1 - #endif -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -// Format a floating point value as an ASCII string -// -// Input: -// * `d` is the number to be formatted -// * `dest` is a buffer of length `length` -// -// Output: -// * Return value is the length of the string placed into `dest` -// or zero if the buffer is too small. -// * For infinity, it copies "inf" or "-inf". -// * For NaN, it outputs a Swift-style detailed dump, including -// sign, signaling/quiet, and payload (if any). Typical output: -// "nan", "-nan", "-snan(0x1234)". -// * For zero, it outputs "0.0" or "-0.0" depending on the sign. -// * The destination buffer is always null-terminated (even on error) -// unless the length is zero. -// -// Note: If you want to customize the output for Infinity, zero, or -// Nan, you can easily write a wrapper function that uses `fpclassify` -// to identify those cases and only calls through to these functions -// for normal and subnormal values. -// -// Guarantees: -// -// * Accurate. If you parse the result back to the same floating-point -// format via an accurate algorithm (such as Clinger's algorithm), -// the resulting value will be _exactly_ equal to the original value. -// On most systems, this implies that using `strtod` to parse the -// output of `swift_dtoa_optimal_double` will yield exactly the -// original value. -// -// * Short. No other accurate result will have fewer digits. -// -// * Close. If there are multiple possible decimal forms that are -// both accurate and short, the form computed here will be -// closest to the original binary value. -// -// Naming: The `_p` forms take a `const void *` pointing to the value -// in memory. These forms do not require any support from the local C -// environment. In particular, they should work correctly even on -// systems with no floating-point support. Forms ending in a C -// floating-point type (e.g., "_float", "_double") are identical but -// take the corresponding argument type. These forms obviously -// require the C environment to support passing floating-point types as -// function arguments. - -#if SWIFT_DTOA_BINARY16_SUPPORT -size_t swift_dtoa_optimal_binary16_p(const void *, char *dest, size_t length); -#if !SWIFT_DTOA_PASS_FLOAT16_AS_FLOAT -// If `_Float16` is defined, provide this convenience wrapper. -size_t swift_dtoa_optimal_binary16(_Float16, char *dest, size_t length); -#endif -#endif - -#if SWIFT_DTOA_BINARY32_SUPPORT -size_t swift_dtoa_optimal_binary32_p(const void *, char *dest, size_t length); -#if FLOAT_IS_BINARY32 -// If `float` happens to be binary32, define the convenience wrapper. -size_t swift_dtoa_optimal_float(float, char *dest, size_t length); -#endif -#endif - -#if SWIFT_DTOA_BINARY64_SUPPORT -size_t swift_dtoa_optimal_binary64_p(const void *, char *dest, size_t length); -#if DOUBLE_IS_BINARY64 -// If `double` happens to be binary64, define the convenience wrapper. -size_t swift_dtoa_optimal_double(double, char *dest, size_t length); -#endif -#if LONG_DOUBLE_IS_BINARY64 -// If `long double` happens to be binary64, define the convenience wrapper. -size_t swift_dtoa_optimal_long_double(long double, char *dest, size_t length); -#endif -#endif - -#if SWIFT_DTOA_FLOAT80_SUPPORT -// Universal entry point works on all platforms, regardless of -// whether the local system has direct support for float80 -size_t swift_dtoa_optimal_float80_p(const void *, char *dest, size_t length); -#if LONG_DOUBLE_IS_FLOAT80 -// If 'long double' happens to be float80, define a convenience wrapper. -size_t swift_dtoa_optimal_long_double(long double, char *dest, size_t length); -#endif -#endif - -#if SWIFT_DTOA_BINARY128_SUPPORT -// Universal entry point works on all platforms, regardless of -// whether the local system has direct support for float80 -size_t swift_dtoa_optimal_binary128_p(const void *, char *dest, size_t length); -#if LONG_DOUBLE_IS_BINARY128 -// If 'long double' happens to be binary128, define a convenience wrapper. -size_t swift_dtoa_optimal_long_double(long double, char *dest, size_t length); -#endif -#endif - -#ifdef __cplusplus -} -#endif -#endif diff --git a/stdlib/public/core/FloatingPointTypes.swift.gyb b/stdlib/public/core/FloatingPointTypes.swift.gyb index 662c588ad1219..3fb80cfd6c828 100644 --- a/stdlib/public/core/FloatingPointTypes.swift.gyb +++ b/stdlib/public/core/FloatingPointTypes.swift.gyb @@ -104,11 +104,7 @@ extension ${Self}: CustomStringConvertible { if isNaN { return "nan" } else { - var (buffer, length) = _float${bits}ToString(self, debug: false) - return unsafe buffer.withBytes { (bufferPtr) in - unsafe String._fromASCII( - UnsafeBufferPointer(start: bufferPtr, count: length)) - } + return debugDescription } } } @@ -120,31 +116,16 @@ extension ${Self}: CustomDebugStringConvertible { /// This property has the same value as the `description` property, except /// that NaN values are printed in an extended format. public var debugDescription: String { - var (buffer, length) = _float${bits}ToString(self, debug: true) - return unsafe buffer.withBytes { (bufferPtr) in - unsafe String._fromASCII( - UnsafeBufferPointer(start: bufferPtr, count: length)) - } - } -} - -${Availability(bits)} -extension ${Self} { - // Temporary `debugDescription2` that uses the new Swift implementation. - // `debugDescription` above is still using the old C implementation - // for now so we can compare performance and results between the two. - @available(macOS 15, *) - public var debugDescription2: String { - if #available(macOS 9999, *) { - var buffer = InlineArray<64, UTF8.CodeUnit>(repeating: 0) - var span = buffer.mutableSpan - let textRange = Float${bits}ToASCII(value: self, buffer: &span) - let textStart = unsafe span._start().assumingMemoryBound(to: UTF8.CodeUnit.self) + textRange.lowerBound - let textLength = textRange.upperBound - textRange.lowerBound - - let textBuff = unsafe UnsafeBufferPointer(_uncheckedStart: textStart, - count: textLength) - return unsafe String._fromASCII(textBuff) + if #available(SwiftStdlib 6.2, *) { + var buffer = InlineArray<64, UTF8.CodeUnit>(repeating: 0) + var span = buffer.mutableSpan + let textRange = Float${bits}ToASCII(value: self, buffer: &span) + let textStart = unsafe span._start().assumingMemoryBound(to: UTF8.CodeUnit.self) + textRange.lowerBound + let textLength = textRange.upperBound - textRange.lowerBound + + let textBuff = unsafe UnsafeBufferPointer(_uncheckedStart: textStart, + count: textLength) + return unsafe String._fromASCII(textBuff) } else { fatalError() } @@ -153,11 +134,19 @@ extension ${Self} { ${Availability(bits)} extension ${Self}: TextOutputStreamable { - public func write(to target: inout Target) where Target: TextOutputStream { - var (buffer, length) = _float${bits}ToString(self, debug: true) - unsafe buffer.withBytes { (bufferPtr) in - let bufPtr = unsafe UnsafeBufferPointer(start: bufferPtr, count: length) - unsafe target._writeASCII(bufPtr) +public func write(to target: inout Target) where Target: TextOutputStream { + if #available(SwiftStdlib 6.2, *) { + var buffer = InlineArray<64, UTF8.CodeUnit>(repeating: 0) + var span = buffer.mutableSpan + let textRange = Float${bits}ToASCII(value: self, buffer: &span) + let textStart = unsafe span._start().assumingMemoryBound(to: UTF8.CodeUnit.self) + textRange.lowerBound + let textLength = textRange.upperBound - textRange.lowerBound + + let textBuff = unsafe UnsafeBufferPointer(_uncheckedStart: textStart, + count: textLength) + unsafe target._writeASCII(textBuff) + } else { + fatalError() } } } diff --git a/stdlib/public/core/Runtime.swift b/stdlib/public/core/Runtime.swift index e9398496dfbb0..e92160f9eac79 100644 --- a/stdlib/public/core/Runtime.swift +++ b/stdlib/public/core/Runtime.swift @@ -339,116 +339,6 @@ internal struct _Buffer72 { } } -#if !((os(macOS) || targetEnvironment(macCatalyst)) && arch(x86_64)) -#if arch(wasm32) -// Note that this takes a Float32 argument instead of Float16, because clang -// doesn't have _Float16 on all platforms yet. -@available(SwiftStdlib 5.3, *) -typealias _CFloat16Argument = Float32 -#else -@available(SwiftStdlib 5.3, *) -typealias _CFloat16Argument = Float16 -#endif - -@available(SwiftStdlib 5.3, *) -@_silgen_name("swift_float16ToString") -internal func _float16ToStringImpl( - _ buffer: UnsafeMutablePointer, - _ bufferLength: UInt, - _ value: _CFloat16Argument, - _ debug: Bool -) -> Int - -@available(SwiftStdlib 5.3, *) -internal func _float16ToString( - _ value: Float16, - debug: Bool -) -> (buffer: _Buffer32, length: Int) { - _internalInvariant(MemoryLayout<_Buffer32>.size == 32) - var buffer = _Buffer32() - let length = unsafe buffer.withBytes { (bufferPtr) in - unsafe _float16ToStringImpl(bufferPtr, 32, _CFloat16Argument(value), debug) - } - return (buffer, length) -} -#endif - -// Returns a UInt64, but that value is the length of the string, so it's -// guaranteed to fit into an Int. This is part of the ABI, so we can't -// trivially change it to Int. Callers can safely convert the result -// to any integer type without checks, however. -@_silgen_name("swift_float32ToString") -internal func _float32ToStringImpl( - _ buffer: UnsafeMutablePointer, - _ bufferLength: UInt, - _ value: Float32, - _ debug: Bool -) -> UInt64 - -internal func _float32ToString( - _ value: Float32, - debug: Bool -) -> (buffer: _Buffer32, length: Int) { - _internalInvariant(MemoryLayout<_Buffer32>.size == 32) - var buffer = _Buffer32() - let length = unsafe buffer.withBytes { (bufferPtr) in unsafe Int( - truncatingIfNeeded: _float32ToStringImpl(bufferPtr, 32, value, debug) - )} - return (buffer, length) -} - -// Returns a UInt64, but that value is the length of the string, so it's -// guaranteed to fit into an Int. This is part of the ABI, so we can't -// trivially change it to Int. Callers can safely convert the result -// to any integer type without checks, however. -@_silgen_name("swift_float64ToString") -internal func _float64ToStringImpl( - _ buffer: UnsafeMutablePointer, - _ bufferLength: UInt, - _ value: Float64, - _ debug: Bool -) -> UInt64 - -internal func _float64ToString( - _ value: Float64, - debug: Bool -) -> (buffer: _Buffer32, length: Int) { - _internalInvariant(MemoryLayout<_Buffer32>.size == 32) - var buffer = _Buffer32() - let length = unsafe buffer.withBytes { (bufferPtr) in unsafe Int( - truncatingIfNeeded: _float64ToStringImpl(bufferPtr, 32, value, debug) - )} - return (buffer, length) -} - - -#if !(os(Windows) || os(Android) || ($Embedded && !os(Linux) && !(os(macOS) || os(iOS) || os(watchOS) || os(tvOS)))) && (arch(i386) || arch(x86_64)) - -// Returns a UInt64, but that value is the length of the string, so it's -// guaranteed to fit into an Int. This is part of the ABI, so we can't -// trivially change it to Int. Callers can safely convert the result -// to any integer type without checks, however. -@_silgen_name("swift_float80ToString") -internal func _float80ToStringImpl( - _ buffer: UnsafeMutablePointer, - _ bufferLength: UInt, - _ value: Float80, - _ debug: Bool -) -> UInt64 - -internal func _float80ToString( - _ value: Float80, - debug: Bool -) -> (buffer: _Buffer32, length: Int) { - _internalInvariant(MemoryLayout<_Buffer32>.size == 32) - var buffer = _Buffer32() - let length = unsafe buffer.withBytes { (bufferPtr) in Int( - truncatingIfNeeded: unsafe _float80ToStringImpl(bufferPtr, 32, value, debug) - )} - return (buffer, length) -} -#endif - #if !$Embedded // Returns a UInt64, but that value is the length of the string, so it's // guaranteed to fit into an Int. This is part of the ABI, so we can't diff --git a/stdlib/public/runtime/CMakeLists.txt b/stdlib/public/runtime/CMakeLists.txt index 407382ff6b38a..7e554f7132406 100644 --- a/stdlib/public/runtime/CMakeLists.txt +++ b/stdlib/public/runtime/CMakeLists.txt @@ -76,7 +76,6 @@ set(swift_runtime_sources RefCount.cpp ReflectionMirror.cpp RuntimeInvocationsTracking.cpp - SwiftDtoa.cpp SwiftTLSContext.cpp ThreadingError.cpp Tracing.cpp diff --git a/stdlib/public/runtime/SwiftDtoa.cpp b/stdlib/public/runtime/SwiftDtoa.cpp deleted file mode 100644 index 9dda6d7fcaed4..0000000000000 --- a/stdlib/public/runtime/SwiftDtoa.cpp +++ /dev/null @@ -1,2769 +0,0 @@ -//===--- SwiftDtoa.cpp ---------------------------------------------*- C++ -*-===// -// -// This source file is part of the Swift.org open source project -// -// Copyright (c) 2018-2020 Apple Inc. and the Swift project authors -// Licensed under Apache License v2.0 with Runtime Library Exception -// -// See https://swift.org/LICENSE.txt for license information -// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors -// -//===---------------------------------------------------------------------===// -// -// Note: This source file is used in different projects where it gets -// compiled variously as ".c" or ".cpp". Please keep the code clean -// portable C so others can share your improvements. -// -/// For binary16, this uses a simple approach that is normally -/// implemented with variable-length arithmetic. However, due to -/// the limited range of binary16, this can be implemented simply -/// with only 32-bit integer arithmetic. -/// -/// For other formats, SwiftDtoa uses a modified form of the Grisu2 -/// algorithm from Florian Loitsch; "Printing Floating-Point Numbers -/// Quickly and Accurately with Integers", 2010. -/// https://doi.org/10.1145/1806596.1806623 -/// -/// Some of the Grisu2 modifications were suggested by the "Errol -/// paper": Marc Andrysco, Ranjit Jhala, Sorin Lerner; "Printing -/// Floating-Point Numbers: A Faster, Always Correct Method", 2016. -/// https://doi.org/10.1145/2837614.2837654 -/// In particular, the Errol paper explored the impact of higher-precision -/// fixed-width arithmetic on Grisu2 and showed a way to rapidly test -/// the correctness of such algorithms. -/// -/// A few further improvements were inspired by the Ryu algorithm -/// from Ulf Anders; "Ryū: fast float-to-string conversion", 2018. -/// https://doi.org/10.1145/3296979.3192369 -/// -/// In summary, this implementation is: -/// -/// * Fast. It uses only fixed-width integer arithmetic and has -/// constant memory requirements. For double-precision values on -/// 64-bit processors, it is competitive with Ryu. For double-precision -/// values on 32-bit processors, and higher-precision values on all -/// processors, it is considerably faster. -/// -/// * Always Accurate. Converting the decimal form back to binary -/// will always yield exactly the same value. For the IEEE 754 -/// formats, the round-trip will produce exactly the same bit -/// pattern in memory. -/// -/// * Always Short. This always selects an accurate result with the -/// minimum number of significant digits. -/// -/// * Always Close. Among all accurate, short results, this always -/// chooses the result that is closest to the exact floating-point -/// value. (In case of an exact tie, it rounds the last digit even.) -/// -/// * Portable. The code is written in portable C99. The core -/// implementations utilize only fixed-size integer arithmetic. -/// 128-bit integer support is utilized if present but is not -/// necessary. There are thin wrappers that accept platform-native -/// floating point types and delegate to the portable core -/// functions. -/// -// ---------------------------------------------------------------------------- - -#include -#include -#include -#include -#include -#include -#include - -#include "swift/Runtime/SwiftDtoa.h" - -#if defined(__SIZEOF_INT128__) - // We get a significant speed boost if we can use the __uint128_t - // type that's present in GCC and Clang on 64-bit architectures. In - // particular, we can do 128-bit arithmetic directly and can - // represent 256-bit integers as collections of 64-bit elements. - #define HAVE_UINT128_T 1 -#else - // On 32-bit, we use slower code that manipulates 128-bit - // and 256-bit integers as collections of 32-bit elements. - #define HAVE_UINT128_T 0 -#endif - -// -// Predefine various arithmetic helpers. Most implementations and extensive -// comments are at the bottom of this file. -// - -#if SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT -// The power-of-10 tables do not directly store the associated binary -// exponent. That's because the binary exponent is a simple linear -// function of the decimal power (and vice versa), so it's just as -// fast (and uses much less memory) to compute it: - -// The binary exponent corresponding to a particular power of 10. -// This matches the power-of-10 tables across the full range of binary128. -#define binaryExponentFor10ToThe(p) ((int)(((((int64_t)(p)) * 55732705) >> 24) + 1)) - -// A decimal exponent that approximates a particular binary power. -#define decimalExponentFor2ToThe(e) ((int)(((int64_t)e * 20201781) >> 26)) -#endif - -// -// Helper functions used only by the single-precision binary32 formatter -// - -#if SWIFT_DTOA_BINARY32_SUPPORT -static uint64_t multiply64x32RoundingDown(uint64_t lhs, uint32_t rhs) { - static const uint64_t mask32 = UINT32_MAX; - uint64_t t = ((lhs & mask32) * rhs) >> 32; - return t + (lhs >> 32) * rhs; -} -static uint64_t multiply64x32RoundingUp(uint64_t lhs, uint32_t rhs) { - static const uint64_t mask32 = UINT32_MAX; - uint64_t t = (((lhs & mask32) * rhs) + mask32) >> 32; - return t + (lhs >> 32) * rhs; -} -static void intervalContainingPowerOf10_Binary32(int p, uint64_t *lower, uint64_t *upper, int *exponent); -#endif - -// -// Helpers used by binary32, binary64, float80, and binary128. -// - -#if SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT -#if HAVE_UINT128_T -typedef __uint128_t swift_uint128_t; -#define initialize128WithHighLow64(dest, high64, low64) ((dest) = ((__uint128_t)(high64) << 64) | (low64)) -#define shiftLeft128(u128, shift) (*(u128) <<= shift) -#else -typedef struct { - uint32_t low, b, c, high; -} swift_uint128_t; -#define initialize128WithHighLow64(dest, high64, low64) \ - ((dest).low = (uint32_t)(low64), \ - (dest).b = (uint32_t)((low64) >> 32), \ - (dest).c = (uint32_t)(high64), \ - (dest).high = (uint32_t)((high64) >> 32)) -static void shiftLeft128(swift_uint128_t *, int shift); -#endif -inline static int finishFormatting(char *, size_t, char *, char *, int, int); -#endif - - -// -// Helper functions needed by the binary64 formatter. -// - -#if SWIFT_DTOA_BINARY64_SUPPORT -#if HAVE_UINT128_T -#define isLessThan128x128(lhs, rhs) ((lhs) < (rhs)) -#define subtract128x128(lhs, rhs) (*(lhs) -= (rhs)) -#define multiply128xu32(lhs, rhs) (*(lhs) *= (rhs)) -#define initialize128WithHigh64(dest, value) ((dest) = (__uint128_t)(value) << 64) -#define extractHigh64From128(arg) ((uint64_t)((arg) >> 64)) -#define is128bitZero(arg) ((arg) == 0) -static int extractIntegerPart128(__uint128_t *fixed128, int integerBits) { - const int fractionBits = 128 - integerBits; - int integerPart = (int)(*fixed128 >> fractionBits); - const swift_uint128_t fixedPointMask = (((__uint128_t)1 << fractionBits) - 1); - *fixed128 &= fixedPointMask; - return integerPart; -} -#define shiftRightRoundingDown128(val, shift) ((val) >> (shift)) -#define shiftRightRoundingUp128(val, shift) (((val) + (((uint64_t)1 << (shift)) - 1)) >> (shift)) - -#else - -static int isLessThan128x128(swift_uint128_t lhs, swift_uint128_t rhs); -static void subtract128x128(swift_uint128_t *lhs, swift_uint128_t rhs); -static void multiply128xu32(swift_uint128_t *lhs, uint32_t rhs); -#define initialize128WithHigh64(dest, value) \ - ((dest).low = (dest).b = 0, \ - (dest).c = (uint32_t)(value), \ - (dest).high = (uint32_t)((value) >> 32)) -#define extractHigh64From128(arg) (((uint64_t)(arg).high << 32)|((arg).c)) -#define is128bitZero(dest) \ - (((dest).low | (dest).b | (dest).c | (dest).high) == 0) -// Treat a uint128_t as a fixed-point value with `integerBits` bits in -// the integer portion. Return the integer portion and zero it out. -static int extractIntegerPart128(swift_uint128_t *fixed128, int integerBits) { - const int highFractionBits = 32 - integerBits; - int integerPart = (int)(fixed128->high >> highFractionBits); - fixed128->high &= ((uint32_t)1 << highFractionBits) - 1; - return integerPart; -} -static swift_uint128_t shiftRightRoundingDown128(swift_uint128_t lhs, int shift); -static swift_uint128_t shiftRightRoundingUp128(swift_uint128_t lhs, int shift); -#endif -static swift_uint128_t multiply128x64RoundingDown(swift_uint128_t lhs, uint64_t rhs); -static swift_uint128_t multiply128x64RoundingUp(swift_uint128_t lhs, uint64_t rhs); -static void intervalContainingPowerOf10_Binary64(int p, swift_uint128_t *lower, swift_uint128_t *upper, int *exponent); -#endif - -// -// Helper functions used by the 256-bit backend needed for -// float80 and binary128 -// - -#if SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT -#if HAVE_UINT128_T -// A 256-bit unsigned integer type stored as 3 64-bit words -typedef struct {uint64_t low, midlow, midhigh, high;} swift_uint256_t; -#define initialize256WithHighMidLow64(dest, high64, midhigh64, midlow64, low64) \ - ((dest).low = (low64), \ - (dest).midlow = (midlow64), \ - (dest).midhigh = (midhigh64), \ - (dest).high = (high64)) -#define is256bitZero(dest) \ - (((dest).low | (dest).midlow | (dest).midhigh | (dest).high) == 0) -static int extractIntegerPart256(swift_uint256_t *fixed256, int integerBits) { - int integerPart = (int)(fixed256->high >> (64 - integerBits)); - const uint64_t fixedPointMask = (((uint64_t)1 << (64 - integerBits)) - 1); - fixed256->high &= fixedPointMask; - return integerPart; -} -#else -// A 256-bit unsigned integer type stored as 8 32-bit words -typedef struct { uint32_t elt[8]; } swift_uint256_t; // [0]=low, [7]=high -#define initialize256WithHighMidLow64(dest, high64, midhigh64, midlow64, low64) \ - ((dest).elt[0] = (uint64_t)(low64), \ - (dest).elt[1] = (uint64_t)(low64) >> 32, \ - (dest).elt[2] = (uint64_t)(midlow64), \ - (dest).elt[3] = (uint64_t)(midlow64) >> 32, \ - (dest).elt[4] = (uint64_t)(midhigh64), \ - (dest).elt[5] = (uint64_t)(midhigh64) >> 32, \ - (dest).elt[6] = (uint64_t)(high64), \ - (dest).elt[7] = (uint64_t)(high64) >> 32) -#define is256bitZero(dest) \ - (((dest).elt[0] | (dest).elt[1] | (dest).elt[2] | (dest).elt[3] \ -| (dest).elt[4] | (dest).elt[5] | (dest).elt[6] | (dest).elt[7]) == 0) -static int extractIntegerPart256(swift_uint256_t *fixed256, int integerBits) { - int integerPart = (int)(fixed256->elt[7] >> (32 - integerBits)); - const uint64_t fixedPointMask = (((uint64_t)1 << (32 - integerBits)) - 1); - fixed256->elt[7] &= fixedPointMask; - return integerPart; -} -#endif -static void multiply256xu32(swift_uint256_t *lhs, uint32_t rhs); -// Multiply a 256-bit fraction times a 128-bit fraction, with controlled rounding -static void multiply256x128RoundingDown(swift_uint256_t *lhs, swift_uint128_t rhs); -static void multiply256x128RoundingUp(swift_uint256_t *lhs, swift_uint128_t rhs); -static void subtract256x256(swift_uint256_t *lhs, swift_uint256_t rhs); -static int isLessThan256x256(swift_uint256_t lhs, swift_uint256_t rhs); -static void shiftRightRoundingDown256(swift_uint256_t *lhs, int shift); -static void shiftRightRoundingUp256(swift_uint256_t *lhs, int shift); -static void intervalContainingPowerOf10_Binary128(int p, swift_uint256_t *lower, swift_uint256_t *upper, int *exponent); -static size_t _swift_dtoa_256bit_backend(char *, size_t, swift_uint128_t, swift_uint128_t, int, int, int, int, bool); -#endif - - -// A table of all two-digit decimal numbers -#if SWIFT_DTOA_BINARY16_SUPPORT || SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT -static const char asciiDigitTable[] = - "0001020304050607080910111213141516171819" - "2021222324252627282930313233343536373839" - "4041424344454647484950515253545556575859" - "6061626364656667686970717273747576777879" - "8081828384858687888990919293949596979899"; -#endif - -// ================================================================ -// -// Helpers to output formatted results for infinity, zero, and NaN -// -// ================================================================ - -static size_t infinity(char *dest, size_t len, int negative) { - if (negative) { - if (len >= 5) { - memcpy(dest, "-inf", 5); - return 4; - } - } else { - if (len >= 4) { - memcpy(dest, "inf", 4); - return 3; - } - } - if (len > 0) { - dest[0] = '\0'; - } - return 0; -} - -static size_t zero(char *dest, size_t len, int negative) { - if (negative) { - if (len >= 5) { - memcpy(dest, "-0.0", 5); - return 4; - } - } else { - if (len >= 4) { - memcpy(dest, "0.0", 4); - return 3; - } - } - if (len > 0) { - dest[0] = '\0'; - } - return 0; -} - -static size_t nan_details(char *dest, size_t len, int negative, int quiet, uint64_t payloadHigh, uint64_t payloadLow) { - const char *sign = negative ? "-" : ""; - const char *signalingChar = quiet ? "" : "s"; - char buff[64]; - if (payloadLow != 0) { - if (payloadHigh != 0) { - snprintf(buff, sizeof(buff), "%s%snan(0x%" PRIx64 "%016" PRIx64 ")", - sign, signalingChar, payloadHigh, payloadLow); - } else { - snprintf(buff, sizeof(buff), "%s%snan(0x%" PRIx64 ")", - sign, signalingChar, payloadLow); - } - } else { - snprintf(buff, sizeof(buff), "%s%snan", - sign, signalingChar); - } - size_t nanlen = strlen(buff); - if (nanlen < len) { - memcpy(dest, buff, nanlen + 1); - return nanlen; - } - if (len > 0) { - dest[0] = '\0'; - } - return 0; -} - - -// ================================================================ -// -// BINARY16 -// -// ================================================================ - - -#if SWIFT_DTOA_BINARY16_SUPPORT -#if !SWIFT_DTOA_PASS_FLOAT16_AS_FLOAT -// Format a C `_Float16` -size_t swift_dtoa_optimal_binary16(_Float16 d, char *dest, size_t length) { - return swift_dtoa_optimal_binary16_p(&d, dest, length); -} -#endif - -// Format an IEEE 754 binary16 half-precision floating point value -// into an optimal text form. - -// This does not assume that the C environment has any support -// for binary16. - -// Because binary16 has such a limited range, a simple exact -// implementation can fit in 32 bit arithmetic. Since we can easily -// verify every single binary16 value, this can be experimentally -// optimized. -size_t swift_dtoa_optimal_binary16_p(const void *f, char *dest, size_t length) { - static const int significandBitCount = 10; - static const uint32_t significandMask - = ((uint32_t)1 << significandBitCount) - 1; - static const int exponentBitCount = 5; - static const int exponentMask = (1 << exponentBitCount) - 1; - // See comments in swift_dtoa_optimal_binary64_p - static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 14 - - if (length < 1) { - return 0; - } - - // Step 0: Deconstruct IEEE 754 binary16 format - uint16_t raw = *(const uint16_t *)f; - int exponentBitPattern = (raw >> significandBitCount) & exponentMask; - uint16_t significandBitPattern = raw & significandMask; - int negative = raw >> 15; - - // Step 1: Handle the various input cases: - int binaryExponent; - uint16_t significand; - int isBoundary = significandBitPattern == 0; - if (exponentBitPattern == exponentMask) { // NaN or Infinity - if (isBoundary) { // Infinity - return infinity(dest, length, negative); - } else { - const int quiet = (significandBitPattern >> (significandBitCount - 1)) & 1; - uint16_t payload = significandBitPattern & ((1U << (significandBitCount - 2)) - 1); - return nan_details(dest, length, negative, quiet, 0, payload); - } - } else if (exponentBitPattern == 0) { - if (isBoundary) { // Zero - return zero(dest, length, negative); - } else { // Subnormal - binaryExponent = 1 - exponentBias; - significand = significandBitPattern; - } - } else { // normal - binaryExponent = exponentBitPattern - exponentBias; - uint16_t hiddenBit = (uint32_t)1 << (uint32_t)significandBitCount; - uint16_t fullSignificand = significandBitPattern + hiddenBit; - significand = fullSignificand; - } - - // Step 2: Determine the exact target interval - significand <<= 2; - static const uint16_t halfUlp = 2; - uint32_t upperMidpointExact = significand + halfUlp; - - static const uint16_t quarterUlp = 1; - uint32_t lowerMidpointExact - = significand - (isBoundary ? quarterUlp : halfUlp); - - // Shortest output from here is "1.0" plus null byte - if (length < 4) { - dest[0] = '\0'; - return 0; - } - - char *p = dest; - if (negative) { - *p++ = '-'; - } - - if (binaryExponent < -13 || (binaryExponent == -13 && significand < 0x1a38)) { - // Format values < 10^-5 as exponential form - // We know value < 10^-5, so we can do the first scaling step unconditionally - int decimalExponent = -5; - uint32_t u = (upperMidpointExact << (28 - 13 + binaryExponent)) * 100000; - uint32_t l = (lowerMidpointExact << (28 - 13 + binaryExponent)) * 100000; - uint32_t t = (significand << (28 - 13 + binaryExponent)) * 100000; - const uint32_t mask = (1 << 28) - 1; - if (t < ((1 << 28) / 10)) { - u *= 100; l *= 100; t *= 100; - decimalExponent -= 2; - } - if (t < (1 << 28)) { - u *= 10; l *= 10; t *= 10; - decimalExponent -= 1; - } - const int uDigit = u >> 28, lDigit = l >> 28; - if (uDigit == lDigit) { - // There's more than one digit, emit a '.' and the rest - if (p > dest + length - 6) { - dest[0] = '\0'; - return 0; - } - *p++ = (t >> 28) + '0'; - *p++ = '.'; - while (true) { - u = (u & mask) * 10; l = (l & mask) * 10; - const int uDigit = u >> 28, lDigit = l >> 28; - if (uDigit != lDigit) { - t = (t & mask) * 10; - break; - } - t *= 10; - *p++ = uDigit + '0'; - } - } - t = (t + (1 << 27)) >> 28; // Add 1/2 to round - if (p > dest + length - 6) { // Exactly 6 bytes written below - dest[0] = '\0'; - return 0; - } - *p++ = t + '0'; - memcpy(p, "e-", 2); - p += 2; - memcpy(p, asciiDigitTable + (-decimalExponent) * 2, 2); - p += 2; - *p = '\0'; - return p - dest; - } - - // Format the value using decimal format - - // There's an integer portion of no more than 5 digits - int intportion; - if (binaryExponent < 13) { - intportion = significand >> (13 - binaryExponent); - significand -= intportion << (13 - binaryExponent); - } else { - intportion = significand << (binaryExponent - 13); - significand -= intportion >> (binaryExponent - 13); - } - if (intportion < 10) { - if (p > dest + length - 3) { - dest[0] = '\0'; - return 0; - } - *p++ = intportion + '0'; // One digit is the most common case - } else if (intportion < 1000) { - // 2 or 3 digits - if (p > dest + length - 4) { - dest[0] = '\0'; - return 0; - } - if (intportion > 99) { - *p++ = intportion / 100 + '0'; - } - memcpy(p, asciiDigitTable + (intportion % 100) * 2, 2); - p += 2; - } else { - // 4 or 5 digits - if (p > dest + length - 6) { - dest[0] = '\0'; - return 0; - } - if (intportion > 9999) { - *p++ = intportion / 10000 + '0'; - intportion %= 10000; - } - memcpy(p, asciiDigitTable + (intportion / 100) * 2, 2); - memcpy(p + 2, asciiDigitTable + (intportion % 100) * 2, 2); - p += 4; - } - if (p > dest + length - 3) { - dest[0] = '\0'; - return 0; - } - *p++ = '.'; - if (significand == 0) { // No fraction, so we're done. - *p++ = '0'; - *p = '\0'; - return p - dest; - } - - // Format the fractional part - uint32_t u = upperMidpointExact << (28 - 13 + binaryExponent); - uint32_t l = lowerMidpointExact << (28 - 13 + binaryExponent); - uint32_t t = significand << (28 - 13 + binaryExponent); - const uint32_t mask = (1 << 28) - 1; - unsigned uDigit, lDigit; - while (true) { - u = (u & mask) * 10; l = (l & mask) * 10; - uDigit = u >> 28; lDigit = l >> 28; - if (uDigit != lDigit) { - t = (t & mask) * 10; - break; - } - t *= 10; - if (p > dest + length - 3) { - dest[0] = '\0'; - return 0; - } - *p++ = uDigit + '0'; - } - t += 1 << 27; // Add 1/2 - if ((t & mask) == 0) { // Was exactly 1/2 (now zero) - t = (t >> 28) & ~1; // Round even - } else { - t >>= 28; - } - if (t <= lDigit && l > 0) - t += 1; - *p++ = t + '0'; - *p = '\0'; - return p - dest; -} -#endif - -// ================================================================ -// -// BINARY32 -// -// ================================================================ - - -#if SWIFT_DTOA_BINARY32_SUPPORT -#if FLOAT_IS_BINARY32 -// Format a C `float` -size_t swift_dtoa_optimal_float(float d, char *dest, size_t length) { - return swift_dtoa_optimal_binary32_p(&d, dest, length); -} -#endif - -// Format an IEEE 754 single-precision binary32 format floating-point number. -size_t swift_dtoa_optimal_binary32_p(const void *f, char *dest, size_t length) -{ - static const int significandBitCount = FLT_MANT_DIG - 1; - static const uint32_t significandMask - = ((uint32_t)1 << significandBitCount) - 1; - static const int exponentBitCount = 8; - static const int exponentMask = (1 << exponentBitCount) - 1; - // See comments in swift_dtoa_optimal_binary64_p - static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 125 - - // Step 0: Deconstruct the target number - // Note: this strongly assumes IEEE 754 binary32 format - uint32_t raw = *(const uint32_t *)f; - int exponentBitPattern = (raw >> significandBitCount) & exponentMask; - uint32_t significandBitPattern = raw & significandMask; - int negative = raw >> 31; - - // Step 1: Handle the various input cases: - int binaryExponent; - uint32_t significand; - if (length < 1) { - return 0; - } else if (exponentBitPattern == exponentMask) { // NaN or Infinity - if (significandBitPattern == 0) { // Infinity - return infinity(dest, length, negative); - } else { // NaN - const int quiet = (significandBitPattern >> (significandBitCount - 1)) & 1; - uint32_t payload = raw & ((1UL << (significandBitCount - 2)) - 1); - return nan_details(dest, length, negative, quiet != 0, 0, payload); - } - } else if (exponentBitPattern == 0) { - if (significandBitPattern == 0) { // Zero - return zero(dest, length, negative); - } else { // Subnormal - binaryExponent = 1 - exponentBias; - significand = significandBitPattern << (32 - significandBitCount - 1); - } - } else { // normal - binaryExponent = exponentBitPattern - exponentBias; - uint32_t hiddenBit = (uint32_t)1 << (uint32_t)significandBitCount; - uint32_t fullSignificand = significandBitPattern + hiddenBit; - significand = fullSignificand << (32 - significandBitCount - 1); - } - - // Step 2: Determine the exact unscaled target interval - static const uint32_t halfUlp = (uint32_t)1 << (32 - significandBitCount - 2); - uint64_t upperMidpointExact = (uint64_t)(significand + halfUlp); - - int isBoundary = significandBitPattern == 0; - static const uint32_t quarterUlp = halfUlp >> 1; - uint64_t lowerMidpointExact - = (uint64_t)(significand - (isBoundary ? quarterUlp : halfUlp)); - - // Step 3: Estimate the base 10 exponent - int base10Exponent = decimalExponentFor2ToThe(binaryExponent); - - // Step 4: Compute a power-of-10 scale factor - uint64_t powerOfTenRoundedDown = 0; - uint64_t powerOfTenRoundedUp = 0; - int powerOfTenExponent = 0; - static const int bulkFirstDigits = 1; - intervalContainingPowerOf10_Binary32(-base10Exponent + bulkFirstDigits - 1, - &powerOfTenRoundedDown, - &powerOfTenRoundedUp, - &powerOfTenExponent); - const int extraBits = binaryExponent + powerOfTenExponent; - - // Step 5: Scale the interval (with rounding) - static const int integerBits = 8; - const int shift = integerBits - extraBits; - const int roundUpBias = (1 << shift) - 1; - static const int fractionBits = 64 - integerBits; - static const uint64_t fractionMask = ((uint64_t)1 << fractionBits) - (uint64_t)1; - uint64_t u, l; - if (significandBitPattern & 1) { - // Narrow the interval (odd significand) - uint64_t u1 = multiply64x32RoundingDown(powerOfTenRoundedDown, - upperMidpointExact); - u = u1 >> shift; // Rounding down - - uint64_t l1 = multiply64x32RoundingUp(powerOfTenRoundedUp, - lowerMidpointExact); - l = (l1 + roundUpBias) >> shift; // Rounding Up - } else { - // Widen the interval (even significand) - uint64_t u1 = multiply64x32RoundingUp(powerOfTenRoundedUp, - upperMidpointExact); - u = (u1 + roundUpBias) >> shift; // Rounding Up - - uint64_t l1 = multiply64x32RoundingDown(powerOfTenRoundedDown, - lowerMidpointExact); - l = l1 >> shift; // Rounding down - } - - // Step 6: Align first digit, adjust exponent - // In particular, this prunes leading zeros from subnormals - uint64_t t = u; - uint64_t delta = u - l; - while (t < (uint64_t)1 << fractionBits) { - base10Exponent -= 1; - t *= 10; - delta *= 10; - } - - // Step 7: Generate decimal digits into the destination buffer - char *p = dest; - if (p > dest + length - 3) { - dest[0] = '\0'; - return 0; - } - if (negative) { - *p++ = '-'; - } - char * const firstOutputChar = p; - // Format first digit as a 2-digit value to get a leading '0' - memcpy(p, asciiDigitTable + (t >> fractionBits) * 2, 2); - t &= fractionMask; - p += 2; - - // Emit two digits at a time - while ((delta * 10) < ((t * 10) & fractionMask)) { - if (p > dest + length - 3) { - dest[0] = '\0'; - return 0; - } - delta *= 100; - t *= 100; - memcpy(p, asciiDigitTable + (t >> fractionBits) * 2, 2); - t &= fractionMask; - p += 2; - } - - // Emit any final digit - if (delta < t) { - if (p > dest + length - 2) { - dest[0] = '\0'; - return 0; - } - delta *= 10; - t *= 10; - *p++ = '0' + (t >> fractionBits); - t &= fractionMask; - } - - // Adjust the final digit to be closer to the original value - if (delta > t + ((uint64_t)1 << fractionBits)) { - uint64_t skew; - if (isBoundary) { - skew = delta - delta / 3 - t; - } else { - skew = delta / 2 - t; - } - uint64_t one = (uint64_t)(1) << (64 - integerBits); - uint64_t lastAccurateBit = 1ULL << 24; - uint64_t fractionMask = (one - 1) & ~(lastAccurateBit - 1); - uint64_t oneHalf = one >> 1; - if (((skew + (lastAccurateBit >> 1)) & fractionMask) == oneHalf) { - // If the skew is exactly integer + 1/2, round the last - // digit even after adjustment - int adjust = (int)(skew >> (64 - integerBits)); - p[-1] -= adjust; - p[-1] &= ~1; - } else { - // Else round to nearest... - int adjust = (int)((skew + oneHalf) >> (64 - integerBits)); - p[-1] -= adjust; - } - } - - int forceExponential = binaryExponent > 25 || (binaryExponent == 25 && !isBoundary); - return finishFormatting(dest, length, p, firstOutputChar, forceExponential, base10Exponent); -} -#endif - - -// ================================================================ -// -// BINARY64 -// -// ================================================================ - -#if SWIFT_DTOA_BINARY64_SUPPORT -#if LONG_DOUBLE_IS_BINARY64 -size_t swift_dtoa_optimal_long_double(long double d, char *dest, size_t length) { - return swift_dtoa_optimal_binary64_p(&d, dest, length); -} -#endif -#if DOUBLE_IS_BINARY64 -size_t swift_dtoa_optimal_double(double d, char *dest, size_t length) { - return swift_dtoa_optimal_binary64_p(&d, dest, length); -} -#endif - -// Format an IEEE 754 double-precision binary64 format floating-point number. - -// The calling convention here assumes that C `double` is this format, -// but otherwise, this does not utilize any floating-point arithmetic -// or library routines. -size_t swift_dtoa_optimal_binary64_p(const void *d, char *dest, size_t length) -{ - // Bits in raw significand (not including hidden bit, if present) - static const int significandBitCount = DBL_MANT_DIG - 1; - static const uint64_t significandMask - = ((uint64_t)1 << significandBitCount) - 1; - // Bits in raw exponent - static const int exponentBitCount = 11; - static const int exponentMask = (1 << exponentBitCount) - 1; - // Note: IEEE 754 conventionally uses 1023 as the exponent - // bias. That's because they treat the significand as a - // fixed-point number with one bit (the hidden bit) integer - // portion. The logic here reconstructs the significand as a - // pure fraction, so we need to accommodate that when - // reconstructing the binary exponent. - static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 1022 - - // Step 0: Deconstruct an IEEE 754 binary64 double-precision value - uint64_t raw = *(const uint64_t *)d; - int exponentBitPattern = (raw >> significandBitCount) & exponentMask; - uint64_t significandBitPattern = raw & significandMask; - int negative = raw >> 63; - - // Step 1: Handle the various input cases: - if (length < 1) { - return 0; - } - int binaryExponent; - int isBoundary = significandBitPattern == 0; - uint64_t significand; - if (exponentBitPattern == exponentMask) { // NaN or Infinity - if (isBoundary) { // Infinity - return infinity(dest, length, negative); - } else { - const int quiet = (raw >> (significandBitCount - 1)) & 1; - uint64_t payload = raw & ((1ull << (significandBitCount - 2)) - 1); - return nan_details(dest, length, negative, quiet, 0, payload); - } - } else if (exponentBitPattern == 0) { - if (isBoundary) { // Zero - return zero(dest, length, negative); - } else { // subnormal - binaryExponent = 1 - exponentBias; - significand = significandBitPattern - << (64 - significandBitCount - 1); - } - } else { // normal - binaryExponent = exponentBitPattern - exponentBias; - uint64_t hiddenBit = (uint64_t)1 << significandBitCount; - uint64_t fullSignificand = significandBitPattern + hiddenBit; - significand = fullSignificand << (64 - significandBitCount - 1); - } - - // Step 2: Determine the exact unscaled target interval - - // Grisu-style algorithms construct the shortest decimal digit - // sequence within a specific interval. To build the appropriate - // interval, we start by computing the midpoints between this - // floating-point value and the adjacent ones. Note that this - // step is an exact computation. - - uint64_t halfUlp = (uint64_t)1 << (64 - significandBitCount - 2); - uint64_t quarterUlp = halfUlp >> 1; - uint64_t upperMidpointExact = significand + halfUlp; - - uint64_t lowerMidpointExact - = significand - (isBoundary ? quarterUlp : halfUlp); - - int isOddSignificand = (significandBitPattern & 1) != 0; - - // Step 3: Estimate the base 10 exponent - - // Grisu algorithms are based in part on a simple technique for - // generating a base-10 form for a binary floating-point number. - // Start with a binary floating-point number `f * 2^e` and then - // estimate the decimal exponent `p`. You can then rewrite your - // original number as: - // - // ``` - // f * 2^e * 10^-p * 10^p - // ``` - // - // The last term is part of our output, and a good estimate for - // `p` will ensure that `2^e * 10^-p` is close to 1. Multiplying - // the first three terms then yields a fraction suitable for - // producing the decimal digits. Here we use a very fast estimate - // of `p` that is never off by more than 1; we'll have - // opportunities later to correct any error. - - int base10Exponent = decimalExponentFor2ToThe(binaryExponent); - - // Step 4: Compute a power-of-10 scale factor - - // Compute `10^-p` to 128-bit precision. We generate - // both over- and under-estimates to ensure we can exactly - // bound the later use of these values. - swift_uint128_t powerOfTenRoundedDown; - swift_uint128_t powerOfTenRoundedUp; - int powerOfTenExponent = 0; - static const int bulkFirstDigits = 7; - static const int bulkFirstDigitFactor = 1000000; // 10^(bulkFirstDigits - 1) - // Note the extra factor of 10^bulkFirstDigits -- that will give - // us a headstart on digit generation later on. (In contrast, Ryu - // uses an extra factor of 10^17 here to get all the digits up - // front, but then has to back out any extra digits. Doing that - // with a 17-digit value requires 64-bit division, which is the - // root cause of Ryu's poor performance on 32-bit processors. We - // also might have to back out extra digits if 7 is too many, but - // will only need 32-bit division in that case.) - intervalContainingPowerOf10_Binary64(-base10Exponent + bulkFirstDigits - 1, - &powerOfTenRoundedDown, - &powerOfTenRoundedUp, - &powerOfTenExponent); - const int extraBits = binaryExponent + powerOfTenExponent; - - // Step 5: Scale the interval (with rounding) - - // As mentioned above, the final digit generation works - // with an interval, so we actually apply the scaling - // to the upper and lower midpoint values separately. - - // As part of the scaling here, we'll switch from a pure - // fraction with zero bit integer portion and 128-bit fraction - // to a fixed-point form with 32 bits in the integer portion. - static const int integerBits = 32; - - // We scale the interval in one of two different ways, - // depending on whether the significand is even or odd... - - swift_uint128_t u, l; - if (isOddSignificand) { - // Case A: Narrow the interval (odd significand) - - // Loitsch' original Grisu2 always rounds so as to narrow the - // interval. Since our digit generation will select a value - // within the scaled interval, narrowing the interval - // guarantees that we will find a digit sequence that converts - // back to the original value. - - // This ensures accuracy but, as explained in Loitsch' paper, - // this carries a risk that there will be a shorter digit - // sequence outside of our narrowed interval that we will - // miss. This risk obviously gets lower with increased - // precision, but it wasn't until the Errol paper that anyone - // had a good way to test whether a particular implementation - // had sufficient precision. That paper shows a way to enumerate - // the worst-case numbers; those numbers that are extremely close - // to the mid-points between adjacent floating-point values. - // These are the values that might sit just outside of the - // narrowed interval. By testing these values, we can verify - // the correctness of our implementation. - - // Multiply out the upper midpoint, rounding down... - swift_uint128_t u1 = multiply128x64RoundingDown(powerOfTenRoundedDown, - upperMidpointExact); - // Account for residual binary exponent and adjust - // to the fixed-point format - u = shiftRightRoundingDown128(u1, integerBits - extraBits); - - // Conversely for the lower midpoint... - swift_uint128_t l1 = multiply128x64RoundingUp(powerOfTenRoundedUp, - lowerMidpointExact); - l = shiftRightRoundingUp128(l1, integerBits - extraBits); - - } else { - // Case B: Widen the interval (even significand) - - // As explained in Errol Theorem 6, in certain cases there is - // a short decimal representation at the exact boundary of the - // scaled interval. When such a number is converted back to - // binary, it will get rounded to the adjacent even - // significand. - - // So when the significand is even, we round so as to widen - // the interval in order to ensure that the exact midpoints - // are considered. Of couse, this ensures that we find a - // short result but carries a risk of selecting a result - // outside of the exact scaled interval (which would be - // inaccurate). - - // The same testing approach described above (based on results - // in the Errol paper) also applies - // to this case. - - swift_uint128_t u1 = multiply128x64RoundingUp(powerOfTenRoundedUp, - upperMidpointExact); - u = shiftRightRoundingUp128(u1, integerBits - extraBits); - - swift_uint128_t l1 = multiply128x64RoundingDown(powerOfTenRoundedDown, - lowerMidpointExact); - l = shiftRightRoundingDown128(l1, integerBits - extraBits); - } - - // Step 6: Align first digit, adjust exponent - - // Calculations above used an estimate for the power-of-ten scale. - // Here, we compensate for any error in that estimate by testing - // whether we have the expected number of digits in the integer - // portion and correcting as necessary. This also serves to - // prune leading zeros from subnormals. - - // Except for subnormals, this loop should never run more than once. - // For subnormals, this might run as many as 16 + bulkFirstDigits - // times. -#if HAVE_UINT128_T - while (u < ((__uint128_t)bulkFirstDigitFactor << (128 - integerBits))) -#else - while (u.high < ((uint32_t)bulkFirstDigitFactor << (32 - integerBits))) -#endif - { - base10Exponent -= 1; - multiply128xu32(&l, 10); - multiply128xu32(&u, 10); - } - - // Step 7: Produce decimal digits - - // One standard approach generates digits for the scaled upper and - // lower boundaries and stops when at the first digit that - // differs. For example, note that 0.1234 is the shortest decimal - // between u = 0.123456 and l = 0.123345. - - // Grisu optimizes this by generating digits for the upper bound - // (multiplying by 10 to isolate each digit) while simultaneously - // scaling the interval width `delta`. As we remove each digit - // from the upper bound, the remainder is the difference between - // the base-10 value generated so far and the true upper bound. - // When that remainder is less than the scaled width of the - // interval, we know the current digits specify a value within the - // target interval. - - // The logic below actually blends three different digit-generation - // strategies: - // * The first digits are already in the integer portion of the - // fixed-point value, thanks to the `bulkFirstDigits` factor above. - // We can just break those down and write them out. - // * If we generated too many digits, we use a Ryu-inspired technique - // to backtrack. - // * If we generated too few digits (the usual case), we use an - // optimized form of the Grisu2 method to produce the remaining - // values. - - // Generate digits for `t` with interval width `delta = u - l` - swift_uint128_t t = u; - swift_uint128_t delta = u; - subtract128x128(&delta, l); - - char *p = dest; - if (negative) { - if (p >= dest + length) { - dest[0] = '\0'; - return 0; - } - *p++ = '-'; - } - char * const firstOutputChar = p; - - // The `bulkFirstDigits` adjustment above already set up the first 7 digits - // Format as 8 digits (with a leading zero that we'll exploit later on). - uint32_t d12345678 = extractIntegerPart128(&t, integerBits); - - if (!isLessThan128x128(delta, t)) { - // Oops! We have too many digits. Back out the extra ones to - // get the right answer. This is similar to Ryu, but since - // we've only produced seven digits, we only need 32-bit - // arithmetic here. A few notes: - // * Our target hardware always supports 32-bit hardware division, - // so this should be reasonably fast. - // * For small integers (like "2"), Ryu would have to back out 16 - // digits; we only have to back out 6. - // * Very few double-precision values actually need fewer than 7 - // digits. So this is rarely used except in workloads that - // specifically use double for small integers. This is more - // common for binary32, of course. - - // TODO: Add benchmarking for "small integers" -1000...1000 to - // verify that this does not unduly penalize those values. - - // Why this is critical for performance: In order to use the - // 8-digits-at-a-time optimization below, we need at least 30 - // bits in the integer part of our fixed-point format above. If - // we only use bulkDigits = 1, that leaves only 128 - 30 = 98 - // bit accuracy for our scaling step, which isn't enough - // (binary64 needs ~110 bits for correctness). So we have to - // use a large bulkDigits value to make full use of the 128-bit - // scaling above, which forces us to have some form of logic to - // handle the case of too many digits. The alternatives are to - // use >128 bit values (slower) or do some complex finessing of - // bit counts by working with powers of 5 instead of 10. - -#if HAVE_UINT128_T - uint64_t uHigh = u >> 64; - uint64_t lHigh = l >> 64; - if (0 != (uint64_t)l) { - lHigh += 1; - } -#else - uint64_t uHigh = ((uint64_t)u.high << 32) + u.c; - uint64_t lHigh = ((uint64_t)l.high << 32) + l.c; - if (0 != (l.b | l.low)) { - lHigh += 1; - } -#endif - uint64_t tHigh; - if (isBoundary) { - tHigh = (uHigh + lHigh * 2) / 3; - } else { - tHigh = (uHigh + lHigh) / 2; - } - - uint32_t u0 = uHigh >> (64 - integerBits); - uint32_t l0 = lHigh >> (64 - integerBits); - if ((lHigh & ((1ULL << (64 - integerBits)) - 1)) != 0) { - l0 += 1; - } - uint32_t t0 = tHigh >> (64 - integerBits); - int t0digits = 8; - - uint32_t u1 = u0 / 10; - uint32_t l1 = (l0 + 9) / 10; - int trailingZeros = is128bitZero(t); - int droppedDigit = ((tHigh * 10) >> (64 - integerBits)) % 10; - while (u1 >= l1 && u1 != 0) { - u0 = u1; - l0 = l1; - trailingZeros &= droppedDigit == 0; - droppedDigit = t0 % 10; - t0 /= 10; - t0digits--; - u1 = u0 / 10; - l1 = (l0 + 9) / 10; - } - // Correct the final digit - if (droppedDigit > 5 || (droppedDigit == 5 && !trailingZeros)) { - t0 += 1; - } else if (droppedDigit == 5 && trailingZeros) { - t0 += 1; - t0 &= ~1; - } - // t0 has t0digits digits. Write them out - if (p > dest + length - t0digits - 1) { // Make sure we have space - dest[0] = '\0'; - return 0; - } - int i = t0digits; - while (i > 1) { // Write out 2 digits at a time back-to-front - i -= 2; - memcpy(p + i, asciiDigitTable + (t0 % 100) * 2, 2); - t0 /= 100; - } - if (i > 0) { // Handle an odd number of digits - p[0] = t0 + '0'; - } - p += t0digits; // Move the pointer past the digits we just wrote - } else { - // - // Our initial scaling did not produce too many digits. - // The `d12345678` value holds the first 7 digits (plus - // a leading zero that will be useful later). We write - // those out and then incrementally generate as many - // more digits as necessary. The remainder of this - // algorithm is basically just Grisu2. - // - - if (p > dest + length - 9) { - dest[0] = '\0'; - return 0; - } - // Write out the 7 digits we got earlier + leading zero - int d1234 = d12345678 / 10000; - int d5678 = d12345678 % 10000; - int d78 = d5678 % 100; - int d56 = d5678 / 100; - memcpy(p + 6, asciiDigitTable + d78 * 2, 2); - memcpy(p + 4, asciiDigitTable + d56 * 2, 2); - int d34 = d1234 % 100; - int d12 = d1234 / 100; - memcpy(p + 2, asciiDigitTable + d34 * 2, 2); - memcpy(p, asciiDigitTable + d12 * 2, 2); - p += 8; - - // Seven digits wasn't enough, so let's get some more. - // Most binary64 values need >= 15 digits total. We already have seven, - // so try grabbing the next 8 digits all at once. - // (This is suboptimal for binary32, but the code savings - // from sharing this implementation are worth it.) - static const uint32_t bulkDigitFactor = 100000000; // 10^(15-bulkFirstDigits) - swift_uint128_t d0 = delta; - multiply128xu32(&d0, bulkDigitFactor); - swift_uint128_t t0 = t; - multiply128xu32(&t0, bulkDigitFactor); - int bulkDigits = extractIntegerPart128(&t0, integerBits); // 9 digits - if (isLessThan128x128(d0, t0)) { - if (p > dest + length - 9) { - dest[0] = '\0'; - return 0; - } - // Next 8 digits are good; add them to the output - int d1234 = bulkDigits / 10000; - int d5678 = bulkDigits % 10000; - int d78 = d5678 % 100; - int d56 = d5678 / 100; - memcpy(p + 6, asciiDigitTable + d78 * 2, 2); - memcpy(p + 4, asciiDigitTable + d56 * 2, 2); - int d34 = d1234 % 100; - int d12 = d1234 / 100; - memcpy(p + 2, asciiDigitTable + d34 * 2, 2); - memcpy(p, asciiDigitTable + d12 * 2, 2); - p += 8; - - t = t0; - delta = d0; - } - - // Finish up by generating and writing one digit at a time. - while (isLessThan128x128(delta, t)) { - if (p > dest + length - 2) { - dest[0] = '\0'; - return 0; - } - multiply128xu32(&delta, 10); - multiply128xu32(&t, 10); - *p++ = '0' + extractIntegerPart128(&t, integerBits); - } - - // Adjust the final digit to be closer to the original value. This accounts - // for the fact that sometimes there is more than one shortest digit - // sequence. - - // For example, consider how the above would work if you had the - // value 0.1234 and computed u = 0.1257, l = 0.1211. The above - // digit generation works with `u`, so produces 0.125. But the - // values 0.122, 0.123, and 0.124 are just as short and 0.123 is - // therefore the best choice, since it's closest to the original - // value. - - // We know delta and t are both less than 10.0 here, so we can - // shed some excess integer bits to simplify the following: - const int adjustIntegerBits = 4; // Integer bits for "adjust" phase - shiftLeft128(&delta, integerBits - adjustIntegerBits); - shiftLeft128(&t, integerBits - adjustIntegerBits); - - // Note: We've already consumed most of our available precision, - // so it's okay to just work in 64 bits for this... - uint64_t deltaHigh64 = extractHigh64From128(delta); - uint64_t tHigh64 = extractHigh64From128(t); - - // If `delta < t + 1.0`, then the interval is narrower than - // one decimal digit, so there is no other option. - if (deltaHigh64 >= tHigh64 + ((uint64_t)1 << (64 - adjustIntegerBits))) { - uint64_t skew; - if (isBoundary) { - // If we're at the boundary where the exponent shifts, - // then the original value is 1/3 of the way from - // the bottom of the interval ... - skew = deltaHigh64 - deltaHigh64 / 3 - tHigh64; - } else { - // ... otherwise it's exactly in the middle. - skew = deltaHigh64 / 2 - tHigh64; - } - - // The `skew` above is the difference between our - // computed digits and the original exact value. - // Use that to offset the final digit: - uint64_t one = (uint64_t)(1) << (64 - adjustIntegerBits); - uint64_t fractionMask = one - 1; - uint64_t oneHalf = one >> 1; - if ((skew & fractionMask) == oneHalf) { - int adjust = (int)(skew >> (64 - adjustIntegerBits)); - // If the skew is exactly integer + 1/2, round the - // last digit even after adjustment - p[-1] -= adjust; - p[-1] &= ~1; - } else { - // Else round to nearest... - int adjust = (int)((skew + oneHalf) >> (64 - adjustIntegerBits)); - p[-1] -= adjust; - } - } - } - - // Step 8: Shuffle digits into the final textual form - int forceExponential = binaryExponent > 54 || (binaryExponent == 54 && !isBoundary); - return finishFormatting(dest, length, p, firstOutputChar, forceExponential, base10Exponent); -} -#endif - -// ================================================================ -// -// FLOAT80 -// -// ================================================================ - -#if SWIFT_DTOA_FLOAT80_SUPPORT -#if LONG_DOUBLE_IS_FLOAT80 -size_t swift_dtoa_optimal_long_double(long double d, char *dest, size_t length) { - return swift_dtoa_optimal_float80_p(&d, dest, length); -} -#endif - -// Format an Intel x87 80-bit extended precision floating-point format -// This does not rely on the C environment for floating-point arithmetic -// or library support of any kind. -size_t swift_dtoa_optimal_float80_p(const void *d, char *dest, size_t length) -{ - static const int exponentBitCount = 15; - static const int exponentMask = (1 << exponentBitCount) - 1; - // See comments in swift_dtoa_optimal_binary64_p to understand - // why we use 16,382 instead of 16,383 here. - static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 16,382 - - // Step 0: Deconstruct the target number - // Note: this strongly assumes Intel 80-bit extended format in LSB - // byte order - const uint64_t *raw_p = (const uint64_t *)d; - int exponentBitPattern = raw_p[1] & exponentMask; - int negative = (raw_p[1] >> 15) & 1; - uint64_t significandBitPattern = raw_p[0]; - - // Step 1: Handle the various input cases: - int64_t binaryExponent; - uint64_t significand; - int isBoundary = (significandBitPattern & 0x7fffffffffffffff) == 0; - if (length < 1) { - return 0; - } else if (exponentBitPattern == exponentMask) { // NaN or Infinity - // Following 80387 semantics as documented in Wikipedia.org "Extended Precision" - // Also see Intel's "Floating Point Reference Sheet" - // https://software.intel.com/content/dam/develop/external/us/en/documents/floating-point-reference-sheet.pdf - int selector = significandBitPattern >> 62; // Top 2 bits - uint64_t payload = significandBitPattern & (((uint64_t)1 << 62) - 1); // bottom 62 bits - switch (selector) { - case 0: // ∞ or snan on 287, invalid on 387 - case 1: // Pseudo-NaN: snan on 287, invalid on 387 - break; - case 2: - if (payload == 0) { // snan on 287, ∞ on 387 - return infinity(dest, length, negative); - } else { // snan on 287 and 387 - return nan_details(dest, length, negative, 0 /* quiet */, 0, payload); - } - break; - case 3: - // Zero payload and sign bit set is "indefinite" (treated as qNaN here), - // Otherwise qNan on 387, sNaN on 287 - return nan_details(dest, length, negative, 1 /* quiet */, 0, payload); - } - // Handle "invalid" patterns as plain "nan" - return nan_details(dest, length, 0 /* negative */, 1 /* quiet */, 0, payload); - } else if (exponentBitPattern == 0) { - if (significandBitPattern == 0) { // Zero - return zero(dest, length, negative); - } else { // subnormal - binaryExponent = 1 - exponentBias; - significand = significandBitPattern; - } - } else if (significandBitPattern >> 63) { // Normal - binaryExponent = exponentBitPattern - exponentBias; - significand = significandBitPattern; - } else { - // Invalid pattern rejected by 80387 and later. - // Handle "invalid" patterns as plain "nan" - return nan_details(dest, length, 0 /* negative */, 1 /* quiet */, 0, 0); - } - - // Step 2: Determine the exact unscaled target interval - uint64_t halfUlp = (uint64_t)1 << 63; - uint64_t quarterUlp = halfUlp >> 1; - uint64_t threeQuarterUlp = halfUlp + quarterUlp; - swift_uint128_t upperMidpointExact, lowerMidpointExact; - initialize128WithHighLow64(upperMidpointExact, significand, halfUlp); - // Subtract 1/4 or 1/2 ULP by first subtracting 1 full ULP, then adding some back - initialize128WithHighLow64(lowerMidpointExact, significand - 1, isBoundary ? threeQuarterUlp : halfUlp); - - return _swift_dtoa_256bit_backend - ( - dest, - length, - upperMidpointExact, - lowerMidpointExact, - negative, - isBoundary, - (significandBitPattern & 1) != 0, - binaryExponent, - binaryExponent > 65 || (binaryExponent == 65 && !isBoundary) // forceExponential - ); - -} -#endif - -// ================================================================ -// -// BINARY128 -// -// ================================================================ - -#if SWIFT_DTOA_BINARY128_SUPPORT -#if LONG_DOUBLE_IS_BINARY128 -size_t swift_dtoa_optimal_long_double(long double d, char *dest, size_t length) { - return swift_dtoa_optimal_binary128_p(&d, dest, length); -} -#endif - -// Format an IEEE 754 binary128 quad-precision floating-point number. -// This does not rely on the C environment for floating-point arithmetic -// or library support of any kind. -size_t swift_dtoa_optimal_binary128_p(const void *d, char *dest, size_t length) -{ - static const int exponentBitCount = 15; - static const int exponentMask = (1 << exponentBitCount) - 1; - // See comments in swift_dtoa_optimal_binary64_p to understand - // why we use 16,382 instead of 16,383 here. - static const int64_t exponentBias = (1 << (exponentBitCount - 1)) - 2; // 16,382 - - // Step 0: Deconstruct the target number in IEEE 754 binary128 LSB format - const uint64_t *raw_p = (const uint64_t *)d; - int exponentBitPattern = (raw_p[1] >> 48) & exponentMask; - int negative = (raw_p[1] >> 63) & 1; - uint64_t significandHigh = raw_p[1] & 0xffffffffffffULL; - uint64_t significandLow = raw_p[0]; - - // Step 1: Handle the various input cases: - int64_t binaryExponent; - int isBoundary = (significandLow == 0) && (significandHigh == 0); - if (length < 1) { - return 0; - } else if (exponentBitPattern == exponentMask) { // NaN or Infinity - if (isBoundary) { // Infinity - return infinity(dest, length, negative); - } else { // NaN - int signaling = (significandHigh >> 47) & 1; - uint64_t payloadHigh = significandHigh & 0x3fffffffffffULL; - uint64_t payloadLow = significandLow; - return nan_details(dest, length, negative, signaling == 0, payloadHigh, payloadLow); - } - } else if (exponentBitPattern == 0) { - if (isBoundary) { // Zero - return zero(dest, length, negative); - } else { // subnormal - binaryExponent = 1 - exponentBias; - } - } else { // Normal - binaryExponent = exponentBitPattern - exponentBias; - significandHigh |= (1ULL << 48); - } - // Align significand to 0.113 fractional form - significandHigh <<= 15; - significandHigh |= significandLow >> (64 - 15); - significandLow <<= 15; - - // Step 2: Determine the exact unscaled target interval - uint64_t halfUlp = (uint64_t)1 << 14; - uint64_t quarterUlp = halfUlp >> 1; - swift_uint128_t upperMidpointExact, lowerMidpointExact; - initialize128WithHighLow64(upperMidpointExact, significandHigh, significandLow + halfUlp); - // Subtract 1/4 or 1/2 ULP - if (significandLow == 0) { - initialize128WithHighLow64(lowerMidpointExact, - significandHigh - 1, - significandLow - (isBoundary ? quarterUlp : halfUlp)); - } else { - initialize128WithHighLow64(lowerMidpointExact, - significandHigh, - significandLow - (isBoundary ? quarterUlp : halfUlp)); - } - - return _swift_dtoa_256bit_backend - ( - dest, - length, - upperMidpointExact, - lowerMidpointExact, - negative, - isBoundary, - (significandLow & 0x8000) != 0, - binaryExponent, - binaryExponent > 114 || (binaryExponent == 114 && !isBoundary) // forceExponential - ); -} -#endif - -// ================================================================ -// -// FLOAT80/BINARY128 common backend -// -// This uses 256-bit fixed-width arithmetic to efficiently compute the -// optimal form for a decomposed float80 or binary128 value. It is -// less heavily commented than the 128-bit version above; see that -// implementation for detailed explanation of the logic here. -// -// This sacrifices some performance for float80, which can be done -// more efficiently with 192-bit fixed-width arithmetic. But the code -// size savings from sharing this logic between float80 and binary128 -// are substantial, and the resulting float80 performance is still much -// better than most competing implementations. -// -// Also in the interest of code size savings, this eschews some of the -// optimizations used by the 128-bit backend above. Those -// optimizations are simple to reintroduce if you're interested in -// further performance improvements. -// -// If you are interested in extreme code size, you can also use this -// backend for binary32 and binary64, eliminating the separate 128-bit -// implementation. That variation offers surprisingly reasonable -// performance overall. -// -// ================================================================ - -#if SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT -static size_t _swift_dtoa_256bit_backend -( - char *dest, - size_t length, - swift_uint128_t upperMidpointExact, - swift_uint128_t lowerMidpointExact, - int negative, - int isBoundary, - int isOddSignificand, - int binaryExponent, - bool forceExponential -) -{ - // Step 3: Estimate the base 10 exponent - int base10Exponent = decimalExponentFor2ToThe(binaryExponent); - - // Step 4: Compute a power-of-10 scale factor - swift_uint256_t powerOfTenRoundedDown; - swift_uint256_t powerOfTenRoundedUp; - int powerOfTenExponent = 0; - intervalContainingPowerOf10_Binary128(-base10Exponent, - &powerOfTenRoundedDown, - &powerOfTenRoundedUp, - &powerOfTenExponent); - const int extraBits = binaryExponent + powerOfTenExponent; - - // Step 5: Scale the interval (with rounding) - static const int integerBits = 14; // Enough for 4 decimal digits -#if HAVE_UINT128_T - static const int highFractionBits = 64 - integerBits; -#else - static const int highFractionBits = 32 - integerBits; -#endif - swift_uint256_t u, l; - if (isOddSignificand) { - // Narrow the interval (odd significand) - u = powerOfTenRoundedDown; - multiply256x128RoundingDown(&u, upperMidpointExact); - shiftRightRoundingDown256(&u, integerBits - extraBits); - - l = powerOfTenRoundedUp; - multiply256x128RoundingUp(&l, lowerMidpointExact); - shiftRightRoundingUp256(&l, integerBits - extraBits); - } else { - // Widen the interval (even significand) - u = powerOfTenRoundedUp; - multiply256x128RoundingUp(&u, upperMidpointExact); - shiftRightRoundingUp256(&u, integerBits - extraBits); - - l = powerOfTenRoundedDown; - multiply256x128RoundingDown(&l, lowerMidpointExact); - shiftRightRoundingDown256(&l, integerBits - extraBits); - } - - // Step 6: Align first digit, adjust exponent -#if HAVE_UINT128_T - while (u.high < (uint64_t)1 << highFractionBits) -#else - while (u.elt[7] < (uint64_t)1 << highFractionBits) -#endif - { - base10Exponent -= 1; - multiply256xu32(&l, 10); - multiply256xu32(&u, 10); - } - - swift_uint256_t t = u; - swift_uint256_t delta = u; - subtract256x256(&delta, l); - - // Step 7: Generate digits - char *p = dest; - if (p > dest + length - 4) { // Shortest output is "1.0" (4 bytes) - dest[0] = '\0'; - return 0; - } - if (negative) { - *p++ = '-'; - } - char * const firstOutputChar = p; - - // Adjustment above already set up the first digit - *p++ = '0'; - *p++ = '0' + extractIntegerPart256(&t, integerBits); - - // Generate 4 digits at a time - swift_uint256_t d0 = delta; - multiply256xu32(&d0, 10000); - swift_uint256_t t0 = t; - multiply256xu32(&t0, 10000); - int d1234 = extractIntegerPart256(&t0, integerBits); - while (isLessThan256x256(d0, t0)) { - if (p > dest + length - 5) { - dest[0] = '\0'; - return 0; - } - int d34 = d1234 % 100; - int d12 = d1234 / 100; - memcpy(p + 2, asciiDigitTable + d34 * 2, 2); - memcpy(p, asciiDigitTable + d12 * 2, 2); - p += 4; - t = t0; - delta = d0; - multiply256xu32(&d0, 10000); - multiply256xu32(&t0, 10000); - d1234 = extractIntegerPart256(&t0, integerBits); - } - - // Generate one digit at a time... - while (isLessThan256x256(delta, t)) { - if (p > dest + length - 2) { - dest[0] = '\0'; - return 0; - } - multiply256xu32(&delta, 10); - multiply256xu32(&t, 10); - *p++ = extractIntegerPart256(&t, integerBits) + '0'; - } - - // Adjust the final digit to be closer to the original value - // We've already consumed most of our available precision, and only - // need a couple of integer bits, so we can narrow down to - // 64 bits here. -#if HAVE_UINT128_T - uint64_t deltaHigh64 = delta.high; - uint64_t tHigh64 = t.high; -#else - uint64_t deltaHigh64 = ((uint64_t)delta.elt[7] << 32) + delta.elt[6]; - uint64_t tHigh64 = ((uint64_t)t.elt[7] << 32) + t.elt[6]; -#endif - if (deltaHigh64 >= tHigh64 + ((uint64_t)1 << (64 - integerBits))) { - uint64_t skew; - if (isBoundary) { - skew = deltaHigh64 - deltaHigh64 / 3 - tHigh64; - } else { - skew = deltaHigh64 / 2 - tHigh64; - } - uint64_t one = (uint64_t)(1) << (64 - integerBits); - uint64_t fractionMask = one - 1; - uint64_t oneHalf = one >> 1; - if ((skew & fractionMask) == oneHalf) { - int adjust = (int)(skew >> (64 - integerBits)); - // If the skew is integer + 1/2, round the last digit even - // after adjustment - p[-1] -= adjust; - p[-1] &= ~1; - } else { - // Else round to nearest... - int adjust = (int)((skew + oneHalf) >> (64 - integerBits)); - p[-1] -= adjust; - } - } - - return finishFormatting(dest, length, p, firstOutputChar, forceExponential, base10Exponent); -} -#endif - -#if SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT -static int finishFormatting(char *dest, size_t length, - char *p, - char *firstOutputChar, - int forceExponential, - int base10Exponent) -{ - int digitCount = p - firstOutputChar - 1; - if (base10Exponent < -4 || forceExponential) { - // Exponential form: convert "0123456" => "1.23456e78" - firstOutputChar[0] = firstOutputChar[1]; - if (digitCount > 1) { - firstOutputChar[1] = '.'; - } else { - p--; - } - // Add exponent at the end - if (p > dest + length - 5) { - dest[0] = '\0'; - return 0; - } - *p++ = 'e'; - if (base10Exponent < 0) { - *p++ = '-'; - base10Exponent = -base10Exponent; - } else { - *p++ = '+'; - } - if (base10Exponent > 99) { - if (base10Exponent > 999) { - if (p > dest + length - 5) { - dest[0] = '\0'; - return 0; - } - memcpy(p, asciiDigitTable + (base10Exponent / 100) * 2, 2); - p += 2; - } else { - if (p > dest + length - 4) { - dest[0] = '\0'; - return 0; - } - *p++ = (base10Exponent / 100) + '0'; - } - base10Exponent %= 100; - } - memcpy(p, asciiDigitTable + base10Exponent * 2, 2); - p += 2; - } else if (base10Exponent < 0) { // "0123456" => "0.00123456" - // Slide digits back in buffer and prepend zeros and a period - if (p > dest + length + base10Exponent - 1) { - dest[0] = '\0'; - return 0; - } - memmove(firstOutputChar - base10Exponent, firstOutputChar, p - firstOutputChar); - memset(firstOutputChar, '0', -base10Exponent); - firstOutputChar[1] = '.'; - p += -base10Exponent; - } else if (base10Exponent + 1 < digitCount) { // "0123456" => "123.456" - // Slide integer digits forward and insert a '.' - memmove(firstOutputChar, firstOutputChar + 1, base10Exponent + 1); - firstOutputChar[base10Exponent + 1] = '.'; - } else { // "0123456" => "12345600.0" - // Slide digits forward 1 and append suitable zeros and '.0' - if (p + base10Exponent - digitCount > dest + length - 3) { - dest[0] = '\0'; - return 0; - } - memmove(firstOutputChar, firstOutputChar + 1, p - firstOutputChar - 1); - p -= 1; - memset(p, '0', base10Exponent - digitCount + 1); - p += base10Exponent - digitCount + 1; - *p++ = '.'; - *p++ = '0'; - } - *p = '\0'; - return p - dest; -} -#endif - -// ================================================================ -// -// Arithmetic helpers -// -// ================================================================ - -// The core algorithm relies heavily on fixed-point arithmetic with -// 128-bit and 256-bit integer values. (For binary32/64 and -// float80/binary128, respectively.) They also need precise control -// over all rounding. -// -// Note that most arithmetic operations are the same for integers and -// fractions, so we can just use the normal integer operations in most -// places. Multiplication however, is different for fixed-size -// fractions. Integer multiplication preserves the low-order part and -// discards the high-order part (ignoring overflow). Fraction -// multiplication preserves the high-order part and discards the -// low-order part (rounding). So most of the arithmetic helpers here -// are for multiplication. - -// Note: With 64-bit GCC and Clang, we get a noticeable performance -// gain by using `__uint128_t`. Otherwise, we have to break things -// down into 32-bit chunks so we don't overflow 64-bit temporaries. - -#if SWIFT_DTOA_BINARY64_SUPPORT -// Multiply a 128-bit fraction by a 64-bit fraction, rounding down. -static swift_uint128_t multiply128x64RoundingDown(swift_uint128_t lhs, uint64_t rhs) { -#if HAVE_UINT128_T - uint64_t lhsl = (uint64_t)lhs; - uint64_t lhsh = (uint64_t)(lhs >> 64); - swift_uint128_t h = (swift_uint128_t)lhsh * rhs; - swift_uint128_t l = (swift_uint128_t)lhsl * rhs; - return h + (l >> 64); -#else - swift_uint128_t result; - static const uint64_t mask32 = UINT32_MAX; - uint64_t rhs0 = rhs & mask32; - uint64_t rhs1 = rhs >> 32; - uint64_t t = (lhs.low) * rhs0; - t >>= 32; - uint64_t a = (lhs.b) * rhs0; - uint64_t b = (lhs.low) * rhs1; - t += a + (b & mask32); - t >>= 32; - t += (b >> 32); - a = lhs.c * rhs0; - b = lhs.b * rhs1; - t += (a & mask32) + (b & mask32); - result.low = t; - t >>= 32; - t += (a >> 32) + (b >> 32); - a = lhs.high * rhs0; - b = lhs.c * rhs1; - t += (a & mask32) + (b & mask32); - result.b = t; - t >>= 32; - t += (a >> 32) + (b >> 32); - t += lhs.high * rhs1; - result.c = t; - result.high = t >> 32; - return result; -#endif -} - -// Multiply a 128-bit fraction by a 64-bit fraction, rounding up. -static swift_uint128_t multiply128x64RoundingUp(swift_uint128_t lhs, uint64_t rhs) { -#if HAVE_UINT128_T - uint64_t lhsl = (uint64_t)lhs; - uint64_t lhsh = (uint64_t)(lhs >> 64); - swift_uint128_t h = (swift_uint128_t)lhsh * rhs; - swift_uint128_t l = (swift_uint128_t)lhsl * rhs; - const static __uint128_t bias = ((__uint128_t)1 << 64) - 1; - return h + ((l + bias) >> 64); -#else - swift_uint128_t result; - static const uint64_t mask32 = UINT32_MAX; - uint64_t rhs0 = rhs & mask32; - uint64_t rhs1 = rhs >> 32; - uint64_t t = (lhs.low) * rhs0 + mask32; - t >>= 32; - uint64_t a = (lhs.b) * rhs0; - uint64_t b = (lhs.low) * rhs1; - t += (a & mask32) + (b & mask32) + mask32; - t >>= 32; - t += (a >> 32) + (b >> 32); - a = lhs.c * rhs0; - b = lhs.b * rhs1; - t += (a & mask32) + (b & mask32); - result.low = t; - t >>= 32; - t += (a >> 32) + (b >> 32); - a = lhs.high * rhs0; - b = lhs.c * rhs1; - t += (a & mask32) + (b & mask32); - result.b = t; - t >>= 32; - t += (a >> 32) + (b >> 32); - t += lhs.high * rhs1; - result.c = t; - result.high = t >> 32; - return result; -#endif -} - -#if !HAVE_UINT128_T -// Multiply a 128-bit fraction by a 32-bit integer in a 32-bit environment. -// (On 64-bit, we use a fast inline macro.) -static void multiply128xu32(swift_uint128_t *lhs, uint32_t rhs) { - uint64_t t = (uint64_t)(lhs->low) * rhs; - lhs->low = (uint32_t)t; - t = (t >> 32) + (uint64_t)(lhs->b) * rhs; - lhs->b = (uint32_t)t; - t = (t >> 32) + (uint64_t)(lhs->c) * rhs; - lhs->c = (uint32_t)t; - t = (t >> 32) + (uint64_t)(lhs->high) * rhs; - lhs->high = (uint32_t)t; -} - -// Compare two 128-bit integers in a 32-bit environment -// (On 64-bit, we use a fast inline macro.) -static int isLessThan128x128(swift_uint128_t lhs, swift_uint128_t rhs) { - return ((lhs.high < rhs.high) - || ((lhs.high == rhs.high) - && ((lhs.c < rhs.c) - || ((lhs.c == rhs.c) - && ((lhs.b < rhs.b) - || ((lhs.b == rhs.b) - && (lhs.low < rhs.low))))))); -} - -// Subtract 128-bit values in a 32-bit environment -static void subtract128x128(swift_uint128_t *lhs, swift_uint128_t rhs) { - uint64_t t = (uint64_t)lhs->low + (~rhs.low) + 1; - lhs->low = (uint32_t)t; - t = (t >> 32) + lhs->b + (~rhs.b); - lhs->b = (uint32_t)t; - t = (t >> 32) + lhs->c + (~rhs.c); - lhs->c = (uint32_t)t; - t = (t >> 32) + lhs->high + (~rhs.high); - lhs->high = (uint32_t)t; -} -#endif - -#if !HAVE_UINT128_T -// Shift a 128-bit integer right, rounding down. -static swift_uint128_t shiftRightRoundingDown128(swift_uint128_t lhs, int shift) { - // Note: Shift is always less than 32 - swift_uint128_t result; - uint64_t t = (uint64_t)lhs.low >> shift; - t += ((uint64_t)lhs.b << (32 - shift)); - result.low = t; - t >>= 32; - t += ((uint64_t)lhs.c << (32 - shift)); - result.b = t; - t >>= 32; - t += ((uint64_t)lhs.high << (32 - shift)); - result.c = t; - t >>= 32; - result.high = t; - return result; -} -#endif - -#if !HAVE_UINT128_T -// Shift a 128-bit integer right, rounding up. -static swift_uint128_t shiftRightRoundingUp128(swift_uint128_t lhs, int shift) { - swift_uint128_t result; - const uint64_t bias = (1 << shift) - 1; - uint64_t t = ((uint64_t)lhs.low + bias) >> shift; - t += ((uint64_t)lhs.b << (32 - shift)); - result.low = t; - t >>= 32; - t += ((uint64_t)lhs.c << (32 - shift)); - result.b = t; - t >>= 32; - t += ((uint64_t)lhs.high << (32 - shift)); - result.c = t; - t >>= 32; - result.high = t; - return result; -} -#endif -#endif - - // Shift a 128-bit integer left, discarding high bits -#if (SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT) && !HAVE_UINT128_T -static void shiftLeft128(swift_uint128_t *lhs, int shift) { - // Note: Shift is always less than 32 - uint64_t t = (uint64_t)lhs->high << (shift + 32); - t += (uint64_t)lhs->c << shift; - lhs->high = t >> 32; - t <<= 32; - t += (uint64_t)lhs->b << shift; - lhs->c = t >> 32; - t <<= 32; - t += (uint64_t)lhs->low << shift; - lhs->b = t >> 32; - lhs->low = t; -} -#endif - -#if SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT -// Multiply a 256-bit fraction by a 32-bit integer. -// This is used in the digit generation to multiply by ten or -// 10,000. Note that rounding is never an issue. -// As used above, this will never overflow. -static void multiply256xu32(swift_uint256_t *lhs, uint32_t rhs) { -#if HAVE_UINT128_T - __uint128_t t = (__uint128_t)lhs->low * rhs; - lhs->low = (uint64_t)t; - t = (t >> 64) + (__uint128_t)lhs->midlow * rhs; - lhs->midlow = (uint64_t)t; - t = (t >> 64) + (__uint128_t)lhs->midhigh * rhs; - lhs->midhigh = (uint64_t)t; - t = (t >> 64) + (__uint128_t)lhs->high * rhs; - lhs->high = (uint64_t)t; -#else - uint64_t t = 0; - for (int i = 0; i < 8; ++i) { - t = (t >> 32) + (uint64_t)lhs->elt[i] * rhs; - lhs->elt[i] = t; - } -#endif -} - -// Multiply a 256-bit fraction by a 128-bit fraction, rounding down. -static void multiply256x128RoundingDown(swift_uint256_t *lhs, swift_uint128_t rhs) { -#if HAVE_UINT128_T - // A full multiply of four 64-bit values by two 64-bit values - // yields six such components. We discard the bottom two (except - // for carries) to get a rounded-down four-element result. - __uint128_t current = (__uint128_t)lhs->low * (uint64_t)rhs; - - current = (current >> 64); - __uint128_t t = (__uint128_t)lhs->low * (rhs >> 64); - current += (uint64_t)t; - __uint128_t next = t >> 64; - t = (__uint128_t)lhs->midlow * (uint64_t)rhs; - current += (uint64_t)t; - next += t >> 64; - - current = next + (current >> 64); - t = (__uint128_t)lhs->midlow * (rhs >> 64); - current += (uint64_t)t; - next = t >> 64; - t = (__uint128_t)lhs->midhigh * (uint64_t)rhs; - current += (uint64_t)t; - next += t >> 64; - lhs->low = (uint64_t)current; - - current = next + (current >> 64); - t = (__uint128_t)lhs->midhigh * (rhs >> 64); - current += (uint64_t)t; - next = t >> 64; - t = (__uint128_t)lhs->high * (uint64_t)rhs; - current += (uint64_t)t; - next += t >> 64; - lhs->midlow = (uint64_t)current; - - current = next + (current >> 64); - t = (__uint128_t)lhs->high * (rhs >> 64); - current += t; - lhs->midhigh = (uint64_t)current; - lhs->high = (uint64_t)(current >> 64); -#else - uint64_t a, b, c, d; // temporaries - // Eight 32-bit values multiplied by 4 32-bit values. Oh my. - static const uint64_t mask32 = UINT32_MAX; - uint64_t t = 0; - - a = (uint64_t)lhs->elt[0] * rhs.low; - t += (a & mask32); - t >>= 32; - t += (a >> 32); - - a = (uint64_t)lhs->elt[0] * rhs.b; - b = (uint64_t)lhs->elt[1] * rhs.low; - t += (a & mask32) + (b & mask32); - t >>= 32; - t += (a >> 32) + (b >> 32); - - a = (uint64_t)lhs->elt[0] * rhs.c; - b = (uint64_t)lhs->elt[1] * rhs.b; - c = (uint64_t)lhs->elt[2] * rhs.low; - t += (a & mask32) + (b & mask32) + (c & mask32); - t >>= 32; - t += (a >> 32) + (b >> 32) + (c >> 32); - - a = (uint64_t)lhs->elt[0] * rhs.high; - b = (uint64_t)lhs->elt[1] * rhs.c; - c = (uint64_t)lhs->elt[2] * rhs.b; - d = (uint64_t)lhs->elt[3] * rhs.low; - t += (a & mask32) + (b & mask32) + (c & mask32) + (d & mask32); - t >>= 32; - t += (a >> 32) + (b >> 32) + (c >> 32) + (d >> 32); - - for (int i = 0; i < 4; ++i) { - a = (uint64_t)lhs->elt[i + 1] * rhs.high; - b = (uint64_t)lhs->elt[i + 2] * rhs.c; - c = (uint64_t)lhs->elt[i + 3] * rhs.b; - d = (uint64_t)lhs->elt[i + 4] * rhs.low; - t += (a & mask32) + (b & mask32) + (c & mask32) + (d & mask32); - lhs->elt[i] = t; - t >>= 32; - t += (a >> 32) + (b >> 32) + (c >> 32) + (d >> 32); - } - - a = (uint64_t)lhs->elt[5] * rhs.high; - b = (uint64_t)lhs->elt[6] * rhs.c; - c = (uint64_t)lhs->elt[7] * rhs.b; - t += (a & mask32) + (b & mask32) + (c & mask32); - lhs->elt[4] = t; - t >>= 32; - t += (a >> 32) + (b >> 32) + (c >> 32); - - a = (uint64_t)lhs->elt[6] * rhs.high; - b = (uint64_t)lhs->elt[7] * rhs.c; - t += (a & mask32) + (b & mask32); - lhs->elt[5] = t; - t >>= 32; - t += (a >> 32) + (b >> 32); - - t += (uint64_t)lhs->elt[7] * rhs.high; - lhs->elt[6] = t; - lhs->elt[7] = t >> 32; -#endif -} - -// Multiply a 256-bit fraction by a 128-bit fraction, rounding up. -static void multiply256x128RoundingUp(swift_uint256_t *lhs, swift_uint128_t rhs) { -#if HAVE_UINT128_T - // Same as the rounding-down version, but we add - // UINT128_MAX to the bottom two to force an extra - // carry if they are non-zero. - swift_uint128_t current = (swift_uint128_t)lhs->low * (uint64_t)rhs; - current += UINT64_MAX; - - current = (current >> 64); - swift_uint128_t t = (swift_uint128_t)lhs->low * (rhs >> 64); - current += (uint64_t)t; - swift_uint128_t next = t >> 64; - t = (swift_uint128_t)lhs->midlow * (uint64_t)rhs; - current += (uint64_t)t; - next += t >> 64; - // Round up by adding UINT128_MAX (upper half) - current += UINT64_MAX; - - current = next + (current >> 64); - t = (swift_uint128_t)lhs->midlow * (rhs >> 64); - current += (uint64_t)t; - next = t >> 64; - t = (swift_uint128_t)lhs->midhigh * (uint64_t)rhs; - current += (uint64_t)t; - next += t >> 64; - lhs->low = (uint64_t)current; - - current = next + (current >> 64); - t = (swift_uint128_t)lhs->midhigh * (rhs >> 64); - current += (uint64_t)t; - next = t >> 64; - t = (swift_uint128_t)lhs->high * (uint64_t)rhs; - current += (uint64_t)t; - next += t >> 64; - lhs->midlow = (uint64_t)current; - - current = next + (current >> 64); - t = (swift_uint128_t)lhs->high * (rhs >> 64); - current += t; - lhs->midhigh = (uint64_t)current; - lhs->high = (uint64_t)(current >> 64); -#else - uint64_t a, b, c, d; // temporaries - // Eight 32-bit values multiplied by 4 32-bit values. Oh my. - static const uint64_t mask32 = UINT32_MAX; - uint64_t t = 0; - - a = (uint64_t)lhs->elt[0] * rhs.low + mask32; - t += (a & mask32); - t >>= 32; - t += (a >> 32); - - a = (uint64_t)lhs->elt[0] * rhs.b; - b = (uint64_t)lhs->elt[1] * rhs.low; - t += (a & mask32) + (b & mask32) + mask32; - t >>= 32; - t += (a >> 32) + (b >> 32); - - a = (uint64_t)lhs->elt[0] * rhs.c; - b = (uint64_t)lhs->elt[1] * rhs.b; - c = (uint64_t)lhs->elt[2] * rhs.low; - t += (a & mask32) + (b & mask32) + (c & mask32) + mask32; - t >>= 32; - t += (a >> 32) + (b >> 32) + (c >> 32); - - a = (uint64_t)lhs->elt[0] * rhs.high; - b = (uint64_t)lhs->elt[1] * rhs.c; - c = (uint64_t)lhs->elt[2] * rhs.b; - d = (uint64_t)lhs->elt[3] * rhs.low; - t += (a & mask32) + (b & mask32) + (c & mask32) + (d & mask32) + mask32; - t >>= 32; - t += (a >> 32) + (b >> 32) + (c >> 32) + (d >> 32); - - for (int i = 0; i < 4; ++i) { - a = (uint64_t)lhs->elt[i + 1] * rhs.high; - b = (uint64_t)lhs->elt[i + 2] * rhs.c; - c = (uint64_t)lhs->elt[i + 3] * rhs.b; - d = (uint64_t)lhs->elt[i + 4] * rhs.low; - t += (a & mask32) + (b & mask32) + (c & mask32) + (d & mask32); - lhs->elt[i] = t; - t >>= 32; - t += (a >> 32) + (b >> 32) + (c >> 32) + (d >> 32); - } - - a = (uint64_t)lhs->elt[5] * rhs.high; - b = (uint64_t)lhs->elt[6] * rhs.c; - c = (uint64_t)lhs->elt[7] * rhs.b; - t += (a & mask32) + (b & mask32) + (c & mask32); - lhs->elt[4] = t; - t >>= 32; - t += (a >> 32) + (b >> 32) + (c >> 32); - - a = (uint64_t)lhs->elt[6] * rhs.high; - b = (uint64_t)lhs->elt[7] * rhs.c; - t += (a & mask32) + (b & mask32); - lhs->elt[5] = t; - t >>= 32; - t += (a >> 32) + (b >> 32); - - t += (uint64_t)lhs->elt[7] * rhs.high; - lhs->elt[6] = t; - lhs->elt[7] = t >> 32; - -#endif -} - -// Subtract two 256-bit integers or fractions. -static void subtract256x256(swift_uint256_t *lhs, swift_uint256_t rhs) { -#if HAVE_UINT128_T - swift_uint128_t t = (swift_uint128_t)lhs->low + (~rhs.low) + 1; - lhs->low = t; - t = (t >> 64) + lhs->midlow + (~rhs.midlow); - lhs->midlow = t; - t = (t >> 64) + lhs->midhigh + (~rhs.midhigh); - lhs->midhigh = t; - lhs->high += (t >> 64) + (~rhs.high); -#else - uint64_t t = ((uint64_t)1) << 32; - for (int i = 0; i < 8; i++) { - t = (t >> 32) + lhs->elt[i] + (~rhs.elt[i]); - lhs->elt[i] = t; - } -#endif -} - -// Compare two 256-bit integers or fractions. -static int isLessThan256x256(swift_uint256_t lhs, swift_uint256_t rhs) { -#if HAVE_UINT128_T - return (lhs.high < rhs.high) - || (lhs.high == rhs.high - && (lhs.midhigh < rhs.midhigh - || (lhs.midhigh == rhs.midhigh - && (lhs.midlow < rhs.midlow - || (lhs.midlow == rhs.midlow - && lhs.low < rhs.low))))); -#else - for (int i = 7; i >= 0; i--) { - if (lhs.elt[i] < rhs.elt[i]) { - return true; - } else if (lhs.elt[i] > rhs.elt[i]) { - return false; - } - } - return false; -#endif -} - -// Shift a 256-bit integer right (by less than 32 bits!), rounding down. -static void shiftRightRoundingDown256(swift_uint256_t *lhs, int shift) { -#if HAVE_UINT128_T - __uint128_t t = (__uint128_t)lhs->low >> shift; - t += ((__uint128_t)lhs->midlow << (64 - shift)); - lhs->low = t; - t >>= 64; - t += ((__uint128_t)lhs->midhigh << (64 - shift)); - lhs->midlow = t; - t >>= 64; - t += ((__uint128_t)lhs->high << (64 - shift)); - lhs->midhigh = t; - t >>= 64; - lhs->high = t; -#else - uint64_t t = (uint64_t)lhs->elt[0] >> shift; - for (int i = 0; i < 7; ++i) { - t += ((uint64_t)lhs->elt[i + 1] << (32 - shift)); - lhs->elt[i] = t; - t >>= 32; - } - lhs->elt[7] = t; -#endif -} - -// Shift a 256-bit integer right, rounding up. -// Note: The shift will always be less than 20. Someday, that -// might suggest a way to further optimize this. -static void shiftRightRoundingUp256(swift_uint256_t *lhs, int shift) { -#if HAVE_UINT128_T - const uint64_t bias = (1 << shift) - 1; - __uint128_t t = ((__uint128_t)lhs->low + bias) >> shift; - t += ((__uint128_t)lhs->midlow << (64 - shift)); - lhs->low = t; - t >>= 64; - t += ((__uint128_t)lhs->midhigh << (64 - shift)); - lhs->midlow = t; - t >>= 64; - t += ((__uint128_t)lhs->high << (64 - shift)); - lhs->midhigh = t; - t >>= 64; - lhs->high = t; -#else - const uint64_t bias = (1 << shift) - 1; - uint64_t t = ((uint64_t)lhs->elt[0] + bias) >> shift; - for (int i = 0; i < 7; ++i) { - t += ((uint64_t)lhs->elt[i + 1] << (32 - shift)); - lhs->elt[i] = t; - t >>= 32; - } - lhs->elt[7] = t; -#endif -} -#endif - -// ================================================================ -// -// Power of 10 calculation -// -// ================================================================ - -// -// ------------ Power-of-10 tables. -------------------------- -// -// Grisu-style algorithms rely on being able to rapidly -// find a high-precision approximation of any power of 10. -// These values were computed by a simple script that -// relied on Python's excellent variable-length -// integer support. - -#if SWIFT_DTOA_BINARY32_SUPPORT -// Table with negative powers of 10 to 64 bits -// -// Table size: 320 bytes -static uint64_t powersOf10_negativeBinary32[] = { - 0x8b61313bbabce2c6ULL, // x 2^-132 ~= 10^-40 - 0xae397d8aa96c1b77ULL, // x 2^-129 ~= 10^-39 - 0xd9c7dced53c72255ULL, // x 2^-126 ~= 10^-38 - 0x881cea14545c7575ULL, // x 2^-122 ~= 10^-37 - 0xaa242499697392d2ULL, // x 2^-119 ~= 10^-36 - 0xd4ad2dbfc3d07787ULL, // x 2^-116 ~= 10^-35 - 0x84ec3c97da624ab4ULL, // x 2^-112 ~= 10^-34 - 0xa6274bbdd0fadd61ULL, // x 2^-109 ~= 10^-33 - 0xcfb11ead453994baULL, // x 2^-106 ~= 10^-32 - 0x81ceb32c4b43fcf4ULL, // x 2^-102 ~= 10^-31 - 0xa2425ff75e14fc31ULL, // x 2^-99 ~= 10^-30 - 0xcad2f7f5359a3b3eULL, // x 2^-96 ~= 10^-29 - 0xfd87b5f28300ca0dULL, // x 2^-93 ~= 10^-28 - 0x9e74d1b791e07e48ULL, // x 2^-89 ~= 10^-27 - 0xc612062576589ddaULL, // x 2^-86 ~= 10^-26 - 0xf79687aed3eec551ULL, // x 2^-83 ~= 10^-25 - 0x9abe14cd44753b52ULL, // x 2^-79 ~= 10^-24 - 0xc16d9a0095928a27ULL, // x 2^-76 ~= 10^-23 - 0xf1c90080baf72cb1ULL, // x 2^-73 ~= 10^-22 - 0x971da05074da7beeULL, // x 2^-69 ~= 10^-21 - 0xbce5086492111aeaULL, // x 2^-66 ~= 10^-20 - 0xec1e4a7db69561a5ULL, // x 2^-63 ~= 10^-19 - 0x9392ee8e921d5d07ULL, // x 2^-59 ~= 10^-18 - 0xb877aa3236a4b449ULL, // x 2^-56 ~= 10^-17 - 0xe69594bec44de15bULL, // x 2^-53 ~= 10^-16 - 0x901d7cf73ab0acd9ULL, // x 2^-49 ~= 10^-15 - 0xb424dc35095cd80fULL, // x 2^-46 ~= 10^-14 - 0xe12e13424bb40e13ULL, // x 2^-43 ~= 10^-13 - 0x8cbccc096f5088cbULL, // x 2^-39 ~= 10^-12 - 0xafebff0bcb24aafeULL, // x 2^-36 ~= 10^-11 - 0xdbe6fecebdedd5beULL, // x 2^-33 ~= 10^-10 - 0x89705f4136b4a597ULL, // x 2^-29 ~= 10^-9 - 0xabcc77118461cefcULL, // x 2^-26 ~= 10^-8 - 0xd6bf94d5e57a42bcULL, // x 2^-23 ~= 10^-7 - 0x8637bd05af6c69b5ULL, // x 2^-19 ~= 10^-6 - 0xa7c5ac471b478423ULL, // x 2^-16 ~= 10^-5 - 0xd1b71758e219652bULL, // x 2^-13 ~= 10^-4 - 0x83126e978d4fdf3bULL, // x 2^-9 ~= 10^-3 - 0xa3d70a3d70a3d70aULL, // x 2^-6 ~= 10^-2 - 0xccccccccccccccccULL, // x 2^-3 ~= 10^-1 -}; -#endif - -#if SWIFT_DTOA_BINARY32_SUPPORT || SWIFT_DTOA_BINARY64_SUPPORT || SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT -// Tables with powers of 10 -// -// The constant powers of 10 here represent pure fractions -// with a binary point at the far left. (Each number in -// this first table is implicitly divided by 2^128.) -// -// Table size: 896 bytes -// -// A 64-bit significand allows us to exactly represent powers of 10 up -// to 10^27. In 128 bits, we can exactly represent powers of 10 up to -// 10^55. As with all of these tables, the binary exponent is not stored; -// it is computed by the `binaryExponentFor10ToThe(p)` function. -static const uint64_t powersOf10_Exact128[56 * 2] = { - // Low order ... high order - 0x0000000000000000ULL, 0x8000000000000000ULL, // x 2^1 == 10^0 exactly - 0x0000000000000000ULL, 0xa000000000000000ULL, // x 2^4 == 10^1 exactly - 0x0000000000000000ULL, 0xc800000000000000ULL, // x 2^7 == 10^2 exactly - 0x0000000000000000ULL, 0xfa00000000000000ULL, // x 2^10 == 10^3 exactly - 0x0000000000000000ULL, 0x9c40000000000000ULL, // x 2^14 == 10^4 exactly - 0x0000000000000000ULL, 0xc350000000000000ULL, // x 2^17 == 10^5 exactly - 0x0000000000000000ULL, 0xf424000000000000ULL, // x 2^20 == 10^6 exactly - 0x0000000000000000ULL, 0x9896800000000000ULL, // x 2^24 == 10^7 exactly - 0x0000000000000000ULL, 0xbebc200000000000ULL, // x 2^27 == 10^8 exactly - 0x0000000000000000ULL, 0xee6b280000000000ULL, // x 2^30 == 10^9 exactly - 0x0000000000000000ULL, 0x9502f90000000000ULL, // x 2^34 == 10^10 exactly - 0x0000000000000000ULL, 0xba43b74000000000ULL, // x 2^37 == 10^11 exactly - 0x0000000000000000ULL, 0xe8d4a51000000000ULL, // x 2^40 == 10^12 exactly - 0x0000000000000000ULL, 0x9184e72a00000000ULL, // x 2^44 == 10^13 exactly - 0x0000000000000000ULL, 0xb5e620f480000000ULL, // x 2^47 == 10^14 exactly - 0x0000000000000000ULL, 0xe35fa931a0000000ULL, // x 2^50 == 10^15 exactly - 0x0000000000000000ULL, 0x8e1bc9bf04000000ULL, // x 2^54 == 10^16 exactly - 0x0000000000000000ULL, 0xb1a2bc2ec5000000ULL, // x 2^57 == 10^17 exactly - 0x0000000000000000ULL, 0xde0b6b3a76400000ULL, // x 2^60 == 10^18 exactly - 0x0000000000000000ULL, 0x8ac7230489e80000ULL, // x 2^64 == 10^19 exactly - 0x0000000000000000ULL, 0xad78ebc5ac620000ULL, // x 2^67 == 10^20 exactly - 0x0000000000000000ULL, 0xd8d726b7177a8000ULL, // x 2^70 == 10^21 exactly - 0x0000000000000000ULL, 0x878678326eac9000ULL, // x 2^74 == 10^22 exactly - 0x0000000000000000ULL, 0xa968163f0a57b400ULL, // x 2^77 == 10^23 exactly - 0x0000000000000000ULL, 0xd3c21bcecceda100ULL, // x 2^80 == 10^24 exactly - 0x0000000000000000ULL, 0x84595161401484a0ULL, // x 2^84 == 10^25 exactly - 0x0000000000000000ULL, 0xa56fa5b99019a5c8ULL, // x 2^87 == 10^26 exactly - 0x0000000000000000ULL, 0xcecb8f27f4200f3aULL, // x 2^90 == 10^27 exactly - 0x4000000000000000ULL, 0x813f3978f8940984ULL, // x 2^94 == 10^28 exactly - 0x5000000000000000ULL, 0xa18f07d736b90be5ULL, // x 2^97 == 10^29 exactly - 0xa400000000000000ULL, 0xc9f2c9cd04674edeULL, // x 2^100 == 10^30 exactly - 0x4d00000000000000ULL, 0xfc6f7c4045812296ULL, // x 2^103 == 10^31 exactly - 0xf020000000000000ULL, 0x9dc5ada82b70b59dULL, // x 2^107 == 10^32 exactly - 0x6c28000000000000ULL, 0xc5371912364ce305ULL, // x 2^110 == 10^33 exactly - 0xc732000000000000ULL, 0xf684df56c3e01bc6ULL, // x 2^113 == 10^34 exactly - 0x3c7f400000000000ULL, 0x9a130b963a6c115cULL, // x 2^117 == 10^35 exactly - 0x4b9f100000000000ULL, 0xc097ce7bc90715b3ULL, // x 2^120 == 10^36 exactly - 0x1e86d40000000000ULL, 0xf0bdc21abb48db20ULL, // x 2^123 == 10^37 exactly - 0x1314448000000000ULL, 0x96769950b50d88f4ULL, // x 2^127 == 10^38 exactly - 0x17d955a000000000ULL, 0xbc143fa4e250eb31ULL, // x 2^130 == 10^39 exactly - 0x5dcfab0800000000ULL, 0xeb194f8e1ae525fdULL, // x 2^133 == 10^40 exactly - 0x5aa1cae500000000ULL, 0x92efd1b8d0cf37beULL, // x 2^137 == 10^41 exactly - 0xf14a3d9e40000000ULL, 0xb7abc627050305adULL, // x 2^140 == 10^42 exactly - 0x6d9ccd05d0000000ULL, 0xe596b7b0c643c719ULL, // x 2^143 == 10^43 exactly - 0xe4820023a2000000ULL, 0x8f7e32ce7bea5c6fULL, // x 2^147 == 10^44 exactly - 0xdda2802c8a800000ULL, 0xb35dbf821ae4f38bULL, // x 2^150 == 10^45 exactly - 0xd50b2037ad200000ULL, 0xe0352f62a19e306eULL, // x 2^153 == 10^46 exactly - 0x4526f422cc340000ULL, 0x8c213d9da502de45ULL, // x 2^157 == 10^47 exactly - 0x9670b12b7f410000ULL, 0xaf298d050e4395d6ULL, // x 2^160 == 10^48 exactly - 0x3c0cdd765f114000ULL, 0xdaf3f04651d47b4cULL, // x 2^163 == 10^49 exactly - 0xa5880a69fb6ac800ULL, 0x88d8762bf324cd0fULL, // x 2^167 == 10^50 exactly - 0x8eea0d047a457a00ULL, 0xab0e93b6efee0053ULL, // x 2^170 == 10^51 exactly - 0x72a4904598d6d880ULL, 0xd5d238a4abe98068ULL, // x 2^173 == 10^52 exactly - 0x47a6da2b7f864750ULL, 0x85a36366eb71f041ULL, // x 2^177 == 10^53 exactly - 0x999090b65f67d924ULL, 0xa70c3c40a64e6c51ULL, // x 2^180 == 10^54 exactly - 0xfff4b4e3f741cf6dULL, 0xd0cf4b50cfe20765ULL, // x 2^183 == 10^55 exactly -}; -#endif - -#if SWIFT_DTOA_BINARY64_SUPPORT -// Rounded values supporting the full range of binary64 -// -// Table size: 464 bytes -// -// We only store every 28th power of ten here. -// We can multiply by an exact 64-bit power of -// ten from the table above to reconstruct the -// significand for any power of 10. -static const uint64_t powersOf10_Binary64[] = { - // low-order half, high-order half - 0x3931b850df08e738, 0x95fe7e07c91efafa, // x 2^-1328 ~= 10^-400 - 0xba954f8e758fecb3, 0x9774919ef68662a3, // x 2^-1235 ~= 10^-372 - 0x9028bed2939a635c, 0x98ee4a22ecf3188b, // x 2^-1142 ~= 10^-344 - 0x47b233c92125366e, 0x9a6bb0aa55653b2d, // x 2^-1049 ~= 10^-316 - 0x4ee367f9430aec32, 0x9becce62836ac577, // x 2^-956 ~= 10^-288 - 0x6f773fc3603db4a9, 0x9d71ac8fada6c9b5, // x 2^-863 ~= 10^-260 - 0xc47bc5014a1a6daf, 0x9efa548d26e5a6e1, // x 2^-770 ~= 10^-232 - 0x80e8a40eccd228a4, 0xa086cfcd97bf97f3, // x 2^-677 ~= 10^-204 - 0xb8ada00e5a506a7c, 0xa21727db38cb002f, // x 2^-584 ~= 10^-176 - 0xc13e60d0d2e0ebba, 0xa3ab66580d5fdaf5, // x 2^-491 ~= 10^-148 - 0xc2974eb4ee658828, 0xa54394fe1eedb8fe, // x 2^-398 ~= 10^-120 - 0xcb4ccd500f6bb952, 0xa6dfbd9fb8e5b88e, // x 2^-305 ~= 10^-92 - 0x3f2398d747b36224, 0xa87fea27a539e9a5, // x 2^-212 ~= 10^-64 - 0xdde50bd1d5d0b9e9, 0xaa242499697392d2, // x 2^-119 ~= 10^-36 - 0xfdc20d2b36ba7c3d, 0xabcc77118461cefc, // x 2^-26 ~= 10^-8 - 0x0000000000000000, 0xad78ebc5ac620000, // x 2^67 == 10^20 exactly - 0x9670b12b7f410000, 0xaf298d050e4395d6, // x 2^160 == 10^48 exactly - 0x3b25a55f43294bcb, 0xb0de65388cc8ada8, // x 2^253 ~= 10^76 - 0x58edec91ec2cb657, 0xb2977ee300c50fe7, // x 2^346 ~= 10^104 - 0x29babe4598c311fb, 0xb454e4a179dd1877, // x 2^439 ~= 10^132 - 0x577b986b314d6009, 0xb616a12b7fe617aa, // x 2^532 ~= 10^160 - 0x0c11ed6d538aeb2f, 0xb7dcbf5354e9bece, // x 2^625 ~= 10^188 - 0x6d953e2bd7173692, 0xb9a74a0637ce2ee1, // x 2^718 ~= 10^216 - 0x9d6d1ad41abe37f1, 0xbb764c4ca7a4440f, // x 2^811 ~= 10^244 - 0x4b2d8644d8a74e18, 0xbd49d14aa79dbc82, // x 2^904 ~= 10^272 - 0xe0470a63e6bd56c3, 0xbf21e44003acdd2c, // x 2^997 ~= 10^300 - 0x505f522e53053ff2, 0xc0fe908895cf3b44, // x 2^1090 ~= 10^328 - 0xcca845ab2beafa9a, 0xc2dfe19c8c055535, // x 2^1183 ~= 10^356 - 0x1027fff56784f444, 0xc4c5e310aef8aa17, // x 2^1276 ~= 10^384 -}; -#endif - -#if SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT -// Every 56th power of 10 across the range of Float80/Binary128 -// -// Table size: 5,728 bytes -// -// Note: We could cut this in half at the cost of one additional -// 256-bit multiply by only storing the positive values and -// multiplying by 10^-4984 to obtain the negative ones. -static const uint64_t powersOf10_Binary128[] = { - // Low-order ... high-order - 0xaec2e6aff96b46aeULL, 0xf91044c2eff84750ULL, 0x2b55c9e70e00c557ULL, 0xb6536903bf8f2bdaULL, // x 2^-16556 ~= 10^-4984 - 0xda1b3c3dd3889587ULL, 0x73a7380aba84a6b1ULL, 0xbddb2dfde3f8a6e3ULL, 0xb9e5428330737362ULL, // x 2^-16370 ~= 10^-4928 - 0xa2d23c57cfebb9ecULL, 0x9f165c039ead6d77ULL, 0x88227fdfc13ab53dULL, 0xbd89006346a9a34dULL, // x 2^-16184 ~= 10^-4872 - 0x333d510cf27e5a5ULL, 0x4e3cc383eaa17b7bULL, 0xe05fe4207ca3d508ULL, 0xc13efc51ade7df64ULL, // x 2^-15998 ~= 10^-4816 - 0xff242c569bc1f539ULL, 0x5c67ba58680c4cceULL, 0x3c55f3f947fef0e9ULL, 0xc50791bd8dd72edbULL, // x 2^-15812 ~= 10^-4760 - 0xe4b75ae27bec50bfULL, 0x25b0419765fdfcdbULL, 0x915564d8ab057eeULL, 0xc8e31de056f89c19ULL, // x 2^-15626 ~= 10^-4704 - 0x548b1e80a94f3434ULL, 0xe418e9217ce83755ULL, 0x801e38463183fc88ULL, 0xccd1ffc6bba63e21ULL, // x 2^-15440 ~= 10^-4648 - 0x541950a0fdc2b4d9ULL, 0xeea173da1f0eb7b4ULL, 0xcfadf6b2aa7c4f43ULL, 0xd0d49859d60d40a3ULL, // x 2^-15254 ~= 10^-4592 - 0x7e64501be95ad76bULL, 0x451e855d8acef835ULL, 0x9e601e707a2c3488ULL, 0xd4eb4a687c0253e8ULL, // x 2^-15068 ~= 10^-4536 - 0xdadd9645f360cb51ULL, 0xf290163350ecb3ebULL, 0xa8edffdccfe4db4bULL, 0xd9167ab0c1965798ULL, // x 2^-14882 ~= 10^-4480 - 0x7e447db3018ffbdfULL, 0x4fa1860c08a85923ULL, 0xb17cd86e7fcece75ULL, 0xdd568fe9ab559344ULL, // x 2^-14696 ~= 10^-4424 - 0x61cd4655bf64d265ULL, 0xb19fd88fe285b3bcULL, 0x1151250681d59705ULL, 0xe1abf2cd11206610ULL, // x 2^-14510 ~= 10^-4368 - 0xa5703f5ce7a619ecULL, 0x361243a84b55574dULL, 0x25a8e1e5dbb41d6ULL, 0xe6170e21b2910457ULL, // x 2^-14324 ~= 10^-4312 - 0xb93897a6cf5d3e61ULL, 0x18746fcc6a190db9ULL, 0x66e849253e5da0c2ULL, 0xea984ec57de69f13ULL, // x 2^-14138 ~= 10^-4256 - 0x309043d12ab5b0acULL, 0x79c93cff11f09319ULL, 0xf5a7800f23ef67b8ULL, 0xef3023b80a732d93ULL, // x 2^-13952 ~= 10^-4200 - 0xa3baa84c049b52b9ULL, 0xbec466ee1b586342ULL, 0xe85fc7f4edbd3caULL, 0xf3defe25478e074aULL, // x 2^-13766 ~= 10^-4144 - 0xd1f4628316b15c7aULL, 0xae16192410d3135eULL, 0x4268a54f70bd28c4ULL, 0xf8a551706112897cULL, // x 2^-13580 ~= 10^-4088 - 0x9eb9296cc5749dbaULL, 0x48324e275376dfddULL, 0x5052e9289f0f2333ULL, 0xfd83933eda772c0bULL, // x 2^-13394 ~= 10^-4032 - 0xff6aae669a5a0d8aULL, 0x24fed95087b9006eULL, 0x1b02378a405b421ULL, 0x813d1dc1f0c754d6ULL, // x 2^-13207 ~= 10^-3976 - 0xf993f18de00dc89bULL, 0x15617da021b89f92ULL, 0xb782db1fc6aba49bULL, 0x83c4e245ed051dc1ULL, // x 2^-13021 ~= 10^-3920 - 0xc6a0d64a712172b1ULL, 0x2217669197ac1504ULL, 0x4250be2eeba87d15ULL, 0x86595584116caf3cULL, // x 2^-12835 ~= 10^-3864 - 0xbdc0c67a220687bULL, 0x44a66a6d6fd6537bULL, 0x3f1f93f1943ca9b6ULL, 0x88fab70d8b44952aULL, // x 2^-12649 ~= 10^-3808 - 0xb60b57164ad28122ULL, 0xde5bd4572c25a830ULL, 0x2c87f18b39478aa2ULL, 0x8ba947b223e5783eULL, // x 2^-12463 ~= 10^-3752 - 0xbd59568efdb9bfeeULL, 0x292f8f2c98d7f44cULL, 0x4054f5360249ebd1ULL, 0x8e6549867da7d11aULL, // x 2^-12277 ~= 10^-3696 - 0x9fa0721e66791accULL, 0x1789061d717d454cULL, 0xc1187fa0c18adbbeULL, 0x912effea7015b2c5ULL, // x 2^-12091 ~= 10^-3640 - 0x982b64e953ac4e27ULL, 0x45efb05f20cf48b3ULL, 0x4b4de34e0ebc3e06ULL, 0x9406af8f83fd6265ULL, // x 2^-11905 ~= 10^-3584 - 0xa53f5950eec21dcaULL, 0x3bd8754763bdbca1ULL, 0xac73f0226eff5ea1ULL, 0x96ec9e7f9004839bULL, // x 2^-11719 ~= 10^-3528 - 0x320e19f88f1161b7ULL, 0x72e93fe0cce7cfd9ULL, 0x2184706ea46a4c38ULL, 0x99e11423765ec1d0ULL, // x 2^-11533 ~= 10^-3472 - 0x491aba48dfc0e36eULL, 0xd3de560ee34022b2ULL, 0xddadb80577b906bdULL, 0x9ce4594a044e0f1bULL, // x 2^-11347 ~= 10^-3416 - 0x6789d038697142fULL, 0x7a466a75be73db21ULL, 0x60dbd8aa443b560fULL, 0x9ff6b82ef415d222ULL, // x 2^-11161 ~= 10^-3360 - 0x40ed8056af76ac43ULL, 0x8251c601e346456ULL, 0x7401c6f091f87727ULL, 0xa3187c82120dace6ULL, // x 2^-10975 ~= 10^-3304 - 0x8c643ee307bffec6ULL, 0xf369a11c6f66c05aULL, 0x4d5b32f713d7f476ULL, 0xa649f36e8583e81aULL, // x 2^-10789 ~= 10^-3248 - 0xe32f5e080e36b4beULL, 0x3adf30ff2eb163d4ULL, 0xb4b39dd9ddb8d317ULL, 0xa98b6ba23e2300c7ULL, // x 2^-10603 ~= 10^-3192 - 0x6b9d538c192cfb1bULL, 0x1c5af3bd4d2c60b5ULL, 0xec41c1793d69d0d1ULL, 0xacdd3555869159d1ULL, // x 2^-10417 ~= 10^-3136 - 0x1adadaeedf7d699cULL, 0x71043692494aa743ULL, 0x3ca5a7540d9d56c9ULL, 0xb03fa252bd05a815ULL, // x 2^-10231 ~= 10^-3080 - 0xec3e4e5fc6b03617ULL, 0x47c9b16afe8fdf74ULL, 0x92e1bc1fbb33f18dULL, 0xb3b305fe328e571fULL, // x 2^-10045 ~= 10^-3024 - 0x1d42fa68b12bdb23ULL, 0xac46a7b3f2b4b34eULL, 0xa908fd4a88728b6aULL, 0xb737b55e31cdde04ULL, // x 2^-9859 ~= 10^-2968 - 0x887dede507f2b618ULL, 0x359a8fa0d014b9a7ULL, 0x7c4c65d15c614c56ULL, 0xbace07232df1c802ULL, // x 2^-9673 ~= 10^-2912 - 0x504708e718b4b669ULL, 0xfb4d9440822af452ULL, 0xef84cc99cb4c5d17ULL, 0xbe7653b01aae13e5ULL, // x 2^-9487 ~= 10^-2856 - 0x5b7977525516bff0ULL, 0x75913092420c9b35ULL, 0xcfc147ade4843a24ULL, 0xc230f522ee0a7fc2ULL, // x 2^-9301 ~= 10^-2800 - 0xad5d11883cc1302bULL, 0x860a754894b9a0bcULL, 0x4668677d5f46c29bULL, 0xc5fe475d4cd35cffULL, // x 2^-9115 ~= 10^-2744 - 0x42032f9f971bfc07ULL, 0x9fb576046ab35018ULL, 0x474b3cb1fe1d6a7fULL, 0xc9dea80d6283a34cULL, // x 2^-8929 ~= 10^-2688 - 0xd3e7fbb72403a4ddULL, 0x8ca223055819af54ULL, 0xd6ea3b733029ef0bULL, 0xcdd276b6e582284fULL, // x 2^-8743 ~= 10^-2632 - 0xba2431d885f2b7d9ULL, 0xc9879fc42869f610ULL, 0x3736730a9e47fef8ULL, 0xd1da14bc489025eaULL, // x 2^-8557 ~= 10^-2576 - 0xa11edbcd65dd1844ULL, 0xcb8edae81a295887ULL, 0x3d24e68dc1027246ULL, 0xd5f5e5681a4b9285ULL, // x 2^-8371 ~= 10^-2520 - 0xa0f076652f69ad08ULL, 0x9d19c341f5f42f2aULL, 0x742ab8f3864562c8ULL, 0xda264df693ac3e30ULL, // x 2^-8185 ~= 10^-2464 - 0x29f760ef115f2824ULL, 0xe0ee47c041c9de0fULL, 0x8c119f3680212413ULL, 0xde6bb59f56672cdaULL, // x 2^-7999 ~= 10^-2408 - 0x8b90230b3409c9d3ULL, 0x9d76eef2c1543e65ULL, 0x43190b523f872b9cULL, 0xe2c6859f5c284230ULL, // x 2^-7813 ~= 10^-2352 - 0xd44ce9993bc6611eULL, 0x777c9b2dfbede079ULL, 0x2a0969bf88679396ULL, 0xe7372943179706fcULL, // x 2^-7627 ~= 10^-2296 - 0xe8c5f5a63fd0fbd1ULL, 0xccc12293f1d7a58ULL, 0x131565be33dda91aULL, 0xebbe0df0c8201ac5ULL, // x 2^-7441 ~= 10^-2240 - 0xdb97988dd6b776f4ULL, 0xeb2106f435f7e1d5ULL, 0xccfb1cc2ef1f44deULL, 0xf05ba3330181c750ULL, // x 2^-7255 ~= 10^-2184 - 0x2fcbc8df94a1d54bULL, 0x796d0a8120801513ULL, 0x5f8385b3a882ff4cULL, 0xf5105ac3681f2716ULL, // x 2^-7069 ~= 10^-2128 - 0xc8700c11071a40f5ULL, 0x23cb9e9df9331fe4ULL, 0x166c15f456786c27ULL, 0xf9dca895a3226409ULL, // x 2^-6883 ~= 10^-2072 - 0x9589f4637a50cbb5ULL, 0xea8242b0030e4a51ULL, 0x6c656c3b1f2c9d91ULL, 0xfec102e2857bc1f9ULL, // x 2^-6697 ~= 10^-2016 - 0xc4be56c83349136cULL, 0x6188db81ac8e775dULL, 0xfa70b9a2ca60b004ULL, 0x81def119b76837c8ULL, // x 2^-6510 ~= 10^-1960 - 0xb85d39054658b363ULL, 0xe7df06bc613fda21ULL, 0x6a22490e8e9ec98bULL, 0x8469e0b6f2b8bd9bULL, // x 2^-6324 ~= 10^-1904 - 0x800b1e1349fef248ULL, 0x469cfd2e6ca32a77ULL, 0x69138459b0fa72d4ULL, 0x87018eefb53c6325ULL, // x 2^-6138 ~= 10^-1848 - 0xb62593291c768919ULL, 0xc098e6ed0bfbd6f6ULL, 0x6c83ad1260ff20f4ULL, 0x89a63ba4c497b50eULL, // x 2^-5952 ~= 10^-1792 - 0x92ee7fce474479d3ULL, 0xe02017175bf040c6ULL, 0xd82ef2860273de8dULL, 0x8c5827f711735b46ULL, // x 2^-5766 ~= 10^-1736 - 0x7b0e6375ca8c77d9ULL, 0x5f07e1e10097d47fULL, 0x416d7f9ab1e67580ULL, 0x8f17964dfc3961f2ULL, // x 2^-5580 ~= 10^-1680 - 0xc8d869ed561af1ceULL, 0x8b6648e941de779bULL, 0x56700866b85d57feULL, 0x91e4ca5db93dbfecULL, // x 2^-5394 ~= 10^-1624 - 0xfc04df783488a410ULL, 0x64d1f15da2c146b1ULL, 0x43cf71d5c4fd7868ULL, 0x94c0092dd4ef9511ULL, // x 2^-5208 ~= 10^-1568 - 0xfbaf03b48a965a64ULL, 0x9b6122aa2b72a13cULL, 0x387898a6e22f821bULL, 0x97a9991fd8b3afc0ULL, // x 2^-5022 ~= 10^-1512 - 0x50f7f7c13119aaddULL, 0xe415d8b25694250aULL, 0x8f8857e875e7774eULL, 0x9aa1c1f6110c0dd0ULL, // x 2^-4836 ~= 10^-1456 - 0xce214403545fd685ULL, 0xf36d1ad779b90e09ULL, 0xa5c58d5f91a476d7ULL, 0x9da8ccda75b341b5ULL, // x 2^-4650 ~= 10^-1400 - 0x63ddfb68f971b0c5ULL, 0x2822e38faf74b26eULL, 0x6e1f7f1642ebaac8ULL, 0xa0bf0465b455e921ULL, // x 2^-4464 ~= 10^-1344 - 0xf0d00cec9daf7444ULL, 0x6bf3eea6f661a32aULL, 0xfad2be1679765f27ULL, 0xa3e4b4a65e97b76aULL, // x 2^-4278 ~= 10^-1288 - 0x463b4ab4bd478f57ULL, 0x6f6583b5b36d5426ULL, 0x800cfab80c4e2eb1ULL, 0xa71a2b283c14fba6ULL, // x 2^-4092 ~= 10^-1232 - 0xef163df2fa96e983ULL, 0xa825f32bc8f6b080ULL, 0x850b0c5976b21027ULL, 0xaa5fb6fbc115010bULL, // x 2^-3906 ~= 10^-1176 - 0x7db1b3f8e100eb43ULL, 0x2862b1f61d64ddc3ULL, 0x61363686961a41e5ULL, 0xadb5a8bdaaa53051ULL, // x 2^-3720 ~= 10^-1120 - 0xfd349cf00ba1e09aULL, 0x6d282fe1b7112879ULL, 0xc6f075c4b81fc72dULL, 0xb11c529ec0d87268ULL, // x 2^-3534 ~= 10^-1064 - 0xf7221741b221cf6fULL, 0x3739f15b06ac3c76ULL, 0xb4e4be5b6455ef96ULL, 0xb494086bbfea00c3ULL, // x 2^-3348 ~= 10^-1008 - 0xc4e5a2f864c403bbULL, 0x6e33cdcda4367276ULL, 0x24d256c540a50309ULL, 0xb81d1f9569068d8eULL, // x 2^-3162 ~= 10^-952 - 0x276e3f0f67f0553bULL, 0xde73d9d5be6974ULL, 0x6d4aa5b50bb5dc0dULL, 0xbbb7ef38bb827f2dULL, // x 2^-2976 ~= 10^-896 - 0x51a34a3e674484edULL, 0x1fb6069f8b26f840ULL, 0x925624c0d7d93317ULL, 0xbf64d0275747de70ULL, // x 2^-2790 ~= 10^-840 - 0xcc775c8cb6de1dbcULL, 0x6d60d02eac6309eeULL, 0x8e5a2e5116baf191ULL, 0xc3241cf0094a8e70ULL, // x 2^-2604 ~= 10^-784 - 0x6023c8fa17d7b105ULL, 0x69cf8f51d2e5e65ULL, 0xb0560c246f90e9e8ULL, 0xc6f631e782d57096ULL, // x 2^-2418 ~= 10^-728 - 0x92c17acb2d08d5fdULL, 0xc26ffb8e81532725ULL, 0x2ffff1289a804c5aULL, 0xcadb6d313c8736fcULL, // x 2^-2232 ~= 10^-672 - 0x47df78ab9e92897aULL, 0xc02b302a892b81dcULL, 0xa855e127113c887bULL, 0xced42ec885d9dbbeULL, // x 2^-2046 ~= 10^-616 - 0xdaf2dec03ec0c322ULL, 0x72db3bc15b0c7014ULL, 0xe00bad8dfc0d8c8eULL, 0xd2e0d889c213fd60ULL, // x 2^-1860 ~= 10^-560 - 0xd3a04799e4473ac8ULL, 0xa116409a2fdf1e9eULL, 0xc654d07271e6c39fULL, 0xd701ce3bd387bf47ULL, // x 2^-1674 ~= 10^-504 - 0x5c8a5dc65d745a24ULL, 0x2726c48a85389fa7ULL, 0x84c663cee6b86e7cULL, 0xdb377599b6074244ULL, // x 2^-1488 ~= 10^-448 - 0xd7ebc61ba77a9e66ULL, 0x8bf77d4bc59b35b1ULL, 0xcb285ceb2fed040dULL, 0xdf82365c497b5453ULL, // x 2^-1302 ~= 10^-392 - 0x744ce999bfed213aULL, 0x363b1f2c568dc3e2ULL, 0xfd1b1b2308169b25ULL, 0xe3e27a444d8d98b7ULL, // x 2^-1116 ~= 10^-336 - 0x6a40608fe10de7e7ULL, 0xf910f9f648232f14ULL, 0xd1b3400f8f9cff68ULL, 0xe858ad248f5c22c9ULL, // x 2^-930 ~= 10^-280 - 0x9bdbfc21260dd1adULL, 0x4609ac5c7899ca36ULL, 0xa4f8bf5635246428ULL, 0xece53cec4a314ebdULL, // x 2^-744 ~= 10^-224 - 0xd88181aad19d7454ULL, 0xf80f36174730ca34ULL, 0xdc44e6c3cb279ac1ULL, 0xf18899b1bc3f8ca1ULL, // x 2^-558 ~= 10^-168 - 0xee19bfa6947f8e02ULL, 0xaa09501d5954a559ULL, 0x4d4617b5ff4a16d5ULL, 0xf64335bcf065d37dULL, // x 2^-372 ~= 10^-112 - 0xebbc75a03b4d60e6ULL, 0xac2e4f162cfad40aULL, 0xeed6e2f0f0d56712ULL, 0xfb158592be068d2eULL, // x 2^-186 ~= 10^-56 - 0x0ULL, 0x0ULL, 0x0ULL, 0x8000000000000000ULL, // x 2^1 == 10^0 exactly - 0x0ULL, 0x2000000000000000ULL, 0xbff8f10e7a8921a4ULL, 0x82818f1281ed449fULL, // x 2^187 == 10^56 exactly - 0x51775f71e92bf2f2ULL, 0x74a7ef0198791097ULL, 0x3e2cf6bc604ddb0ULL, 0x850fadc09923329eULL, // x 2^373 ~= 10^112 - 0xb204b3d9686f55b5ULL, 0xfb118fc9c217a1d2ULL, 0x90fb44d2f05d0842ULL, 0x87aa9aff79042286ULL, // x 2^559 ~= 10^168 - 0xd7924bff833149faULL, 0xbc10c5c5cda97c8dULL, 0x82bd6b70d99aaa6fULL, 0x8a5296ffe33cc92fULL, // x 2^745 ~= 10^224 - 0xa67d072d3c7fa14bULL, 0x7ec63730f500b406ULL, 0xdb0b487b6423e1e8ULL, 0x8d07e33455637eb2ULL, // x 2^931 ~= 10^280 - 0x546f2a35dc367e47ULL, 0x949063d8a46f0c0eULL, 0x213a4f0aa5e8a7b1ULL, 0x8fcac257558ee4e6ULL, // x 2^1117 ~= 10^336 - 0x50611a621c0ee3aeULL, 0x202d895116aa96beULL, 0x1c306f5d1b0b5fdfULL, 0x929b7871de7f22b9ULL, // x 2^1303 ~= 10^392 - 0xffa6738a27dcf7a3ULL, 0x3c11d8430d5c4802ULL, 0xa7ea9c8838ce9437ULL, 0x957a4ae1ebf7f3d3ULL, // x 2^1489 ~= 10^448 - 0x5bf36c0f40bde99dULL, 0x284ba600ee9f6303ULL, 0xbf1d49cacccd5e68ULL, 0x9867806127ece4f4ULL, // x 2^1675 ~= 10^504 - 0xa6e937834ed12e58ULL, 0x73f26eb82f6b8066ULL, 0x655494c5c95d77f2ULL, 0x9b63610bb9243e46ULL, // x 2^1861 ~= 10^560 - 0xcd4b7660adc6930ULL, 0x8f868688f8eb79ebULL, 0x2e008393fd60b55ULL, 0x9e6e366733f85561ULL, // x 2^2047 ~= 10^616 - 0x3efb9807d86d3c6aULL, 0x84c10a1d22f5adc5ULL, 0x55e04dba4b3bd4ddULL, 0xa1884b69ade24964ULL, // x 2^2233 ~= 10^672 - 0xf065089401df33b4ULL, 0x1fc02370c451a755ULL, 0x44b222741eb1ebbfULL, 0xa4b1ec80f47c84adULL, // x 2^2419 ~= 10^728 - 0xa62d0da836fce7d5ULL, 0x75933380ceb5048cULL, 0x1cf4a5c3bc09fa6fULL, 0xa7eb6799e8aec999ULL, // x 2^2605 ~= 10^784 - 0x7a400df820f096c2ULL, 0x802c4085068d2dd5ULL, 0x3c4a575151b294dcULL, 0xab350c27feb90accULL, // x 2^2791 ~= 10^840 - 0xf48b51375df06e86ULL, 0x412fe9e72afd355eULL, 0x870a8d87239d8f35ULL, 0xae8f2b2ce3d5dbe9ULL, // x 2^2977 ~= 10^896 - 0x881883521930127cULL, 0xe53fd3fcb5b4df25ULL, 0xdd929f09c3eff5acULL, 0xb1fa17404a30e5e8ULL, // x 2^3163 ~= 10^952 - 0x270cd9f1348eb326ULL, 0x37ed82fe9c75fccfULL, 0x1931b583a9431d7eULL, 0xb5762497dbf17a9eULL, // x 2^3349 ~= 10^1008 - 0x8919b01a5b3d9ec1ULL, 0x6a7669bdfc6f699cULL, 0xe30db03e0f8dd286ULL, 0xb903a90f561d25e2ULL, // x 2^3535 ~= 10^1064 - 0xf0461526b4201aa5ULL, 0x7fe40defe17e55f5ULL, 0x9eb5cb19647508c5ULL, 0xbca2fc30cc19f090ULL, // x 2^3721 ~= 10^1120 - 0xd67bf35422978bbfULL, 0xdbb1c416ebe661fULL, 0x24bd4c00042ad125ULL, 0xc054773d149bf26bULL, // x 2^3907 ~= 10^1176 - 0xdd093192ef5508d0ULL, 0x6eac3085943ccc0fULL, 0x7ea30dbd7ea479e3ULL, 0xc418753460cdcca9ULL, // x 2^4093 ~= 10^1232 - 0xfe4ff20db6d25dc2ULL, 0x5d5d5a9519e34a42ULL, 0x764f4cf916b4deceULL, 0xc7ef52defe87b751ULL, // x 2^4279 ~= 10^1288 - 0xd8adfb2e00494c5eULL, 0x72435286baf0e84eULL, 0xbeb7fbdc1cbe8b37ULL, 0xcbd96ed6466cf081ULL, // x 2^4465 ~= 10^1344 - 0xe07c1e4384f594afULL, 0xc6b90b8874d5189ULL, 0xdce472c619aa3f63ULL, 0xcfd7298db6cb9672ULL, // x 2^4651 ~= 10^1400 - 0x5dd902c68fa448cfULL, 0xea8d16bd9544e48eULL, 0xe47defc14a406e4fULL, 0xd3e8e55c3c1f43d0ULL, // x 2^4837 ~= 10^1456 - 0x1223d79357bedca8ULL, 0xeae6c2843752ac35ULL, 0xb7157c60a24a0569ULL, 0xd80f0685a81b2a81ULL, // x 2^5023 ~= 10^1512 - 0xcff72d64bc79e429ULL, 0xccc52c236decd778ULL, 0xfb0b98f6bbc4f0cbULL, 0xdc49f3445824e360ULL, // x 2^5209 ~= 10^1568 - 0x3731f76b905dffbbULL, 0x5e2bddd7d12a9e42ULL, 0xc6c6c1764e047e15ULL, 0xe09a13d30c2dba62ULL, // x 2^5395 ~= 10^1624 - 0xeb58d8ef2ada7c09ULL, 0xbc1a3b726b789947ULL, 0x87e8dcfc09dbc33aULL, 0xe4ffd276eedce658ULL, // x 2^5581 ~= 10^1680 - 0x249a5c06dc5d5db7ULL, 0xa8f09440be97bfe6ULL, 0xb1a3642a8da3cf4fULL, 0xe97b9b89d001dab3ULL, // x 2^5767 ~= 10^1736 - 0xbf34ff7963028cd9ULL, 0xc20578fa3851488bULL, 0x2d4070f33b21ab7bULL, 0xee0ddd84924ab88cULL, // x 2^5953 ~= 10^1792 - 0x2d0511317361d5ULL, 0xd6919e041129a1a7ULL, 0xa2bf0c63a814e04eULL, 0xf2b70909cd3fd35cULL, // x 2^6139 ~= 10^1848 - 0x1fa87f28acf1dcd2ULL, 0xe7a0a88981d1a0f9ULL, 0x8f13995cf9c2747ULL, 0xf77790f0a48a45ceULL, // x 2^6325 ~= 10^1904 - 0x1b6ff8afbe589b72ULL, 0xc851bb3f9aeb1211ULL, 0x7a37993eb21444faULL, 0xfc4fea4fd590b40aULL, // x 2^6511 ~= 10^1960 - 0xef23a4cbc039f0c2ULL, 0xbb3f8498a972f18eULL, 0xb7b1ada9cdeba84dULL, 0x80a046447e3d49f1ULL, // x 2^6698 ~= 10^2016 - 0x2cc44f2b602b6231ULL, 0xf231f4b7996b7278ULL, 0xcc6866c5d69b2cbULL, 0x8324f8aa08d7d411ULL, // x 2^6884 ~= 10^2072 - 0x822c97629a3a4c69ULL, 0x8a9afcdbc940e6f9ULL, 0x7fe2b4308dcbf1a3ULL, 0x85b64a659077660eULL, // x 2^7070 ~= 10^2128 - 0xf66cfcf42d4896b0ULL, 0x1f11852a20ed33c5ULL, 0x1d73ef3eaac3c964ULL, 0x88547abb1d8e5bd9ULL, // x 2^7256 ~= 10^2184 - 0x63093ad0caadb06cULL, 0x31be1482014cdaf0ULL, 0x1e34291b1ef566c7ULL, 0x8affca2bd1f88549ULL, // x 2^7442 ~= 10^2240 - 0xab50f69048738e9aULL, 0xa126c32ff4882be8ULL, 0x9e9383d73d486881ULL, 0x8db87a7c1e56d873ULL, // x 2^7628 ~= 10^2296 - 0xe57e659432b0a73eULL, 0x47a0e15dfc7986b8ULL, 0x9cc5ee51962c011aULL, 0x907eceba168949b3ULL, // x 2^7814 ~= 10^2352 - 0x8a6ff950599f8ae5ULL, 0xd1cbbb7d005a76d3ULL, 0x413407cfeeac9743ULL, 0x93530b43e5e2c129ULL, // x 2^8000 ~= 10^2408 - 0xd4e6b6e847550caaULL, 0x56a3106227b87706ULL, 0x7efa7d29c44e11b7ULL, 0x963575ce63b6332dULL, // x 2^8186 ~= 10^2464 - 0xd835c90b09842263ULL, 0xb69f01a641da2a42ULL, 0x5a848859645d1c6fULL, 0x9926556bc8defe43ULL, // x 2^8372 ~= 10^2520 - 0x9b0ae73c204ecd61ULL, 0x794fd5e5a51ac2fULL, 0x51edea897b34601fULL, 0x9c25f29286e9ddb6ULL, // x 2^8558 ~= 10^2576 - 0x3130484fb0a61d89ULL, 0x32b7105223a27365ULL, 0xb50008d92529e91fULL, 0x9f3497244186fca4ULL, // x 2^8744 ~= 10^2632 - 0x8cd036553f38a1e8ULL, 0x5e997e9f45d7897dULL, 0xf09e780bcc8238d9ULL, 0xa2528e74eaf101fcULL, // x 2^8930 ~= 10^2688 - 0xe1f8b43b08b5d0efULL, 0xa0eaf3f62dc1777cULL, 0x3a5828869701a165ULL, 0xa580255203f84b47ULL, // x 2^9116 ~= 10^2744 - 0x3c7f62e3154fa708ULL, 0x5786f3927eb15bd5ULL, 0x8b231a70eb5444ceULL, 0xa8bdaa0a0064fa44ULL, // x 2^9302 ~= 10^2800 - 0x1ebc24a19cd70a2aULL, 0x843fddd10c7006b8ULL, 0xfa1bde1f473556a4ULL, 0xac0b6c73d065f8ccULL, // x 2^9488 ~= 10^2856 - 0x46b6aae34cfd26fcULL, 0xdb7d919b136c68ULL, 0x7730e00421da4d55ULL, 0xaf69bdf68fc6a740ULL, // x 2^9674 ~= 10^2912 - 0x1c4edcb83fc4c49dULL, 0x61c0edd56bbcb3e8ULL, 0x7f959cb702329d14ULL, 0xb2d8f1915ba88ca5ULL, // x 2^9860 ~= 10^2968 - 0x428c840d247382feULL, 0x9cc3b1569b1325a4ULL, 0x40c3a071220f5567ULL, 0xb6595be34f821493ULL, // x 2^10046 ~= 10^3024 - 0xbeb82e734787ec63ULL, 0xbeff12280d5a1676ULL, 0x11c48d02b8326bd3ULL, 0xb9eb5333aa272e9bULL, // x 2^10232 ~= 10^3080 - 0x302349e12f45c73fULL, 0xb494bcc96d53e49cULL, 0x566765461bd2f61bULL, 0xbd8f2f7a1ba47d6dULL, // x 2^10418 ~= 10^3136 - 0x5704ebf5f16946ceULL, 0x431388ec68ac7a26ULL, 0xb889018e4f6e9a52ULL, 0xc1454a673cb9b1ceULL, // x 2^10604 ~= 10^3192 - 0x5a30431166af9b23ULL, 0x132d031fc1d1fec0ULL, 0xf85333a94848659fULL, 0xc50dff6d30c3aefcULL, // x 2^10790 ~= 10^3248 - 0x7573d4b3ffe4ba3bULL, 0xf888498a40220657ULL, 0x1a1aeae7cf8a9d3dULL, 0xc8e9abc872eb2bc1ULL, // x 2^10976 ~= 10^3304 - 0xb5eaef7441511eb9ULL, 0xc9cf998035a91664ULL, 0x12e29f09d9061609ULL, 0xccd8ae88cf70ad84ULL, // x 2^11162 ~= 10^3360 - 0x73aed4f1908f4d01ULL, 0x8c53e7beeca4578fULL, 0xdf7601457ca20b35ULL, 0xd0db689a89f2f9b1ULL, // x 2^11348 ~= 10^3416 - 0x5adbd55696e1cdd9ULL, 0x4949d09424b87626ULL, 0xcbdcd02f23cc7690ULL, 0xd4f23ccfb1916df5ULL, // x 2^11534 ~= 10^3472 - 0x3f500ccf4ea03593ULL, 0x9b80aac81b50762aULL, 0x44289dd21b589d7aULL, 0xd91d8fe9a3d019ccULL, // x 2^11720 ~= 10^3528 - 0x134ca67a679b84aeULL, 0x8909e424a112a3cdULL, 0x95aa118ec1d08317ULL, 0xdd5dc8a2bf27f3f7ULL, // x 2^11906 ~= 10^3584 - 0xe89e3cf733d9ff40ULL, 0x14344660a175c36ULL, 0x72c4d2cad73b0a7bULL, 0xe1b34fb846321d04ULL, // x 2^12092 ~= 10^3640 - 0x68c0a2c6c02dae9aULL, 0xb11160a6edb5f57ULL, 0xe20a88f1134f906dULL, 0xe61e8ff47461cda9ULL, // x 2^12278 ~= 10^3696 - 0x47fa54906741561aULL, 0xaa13acba1e5511f5ULL, 0xc7c91d5c341ed39dULL, 0xea9ff638c54554e1ULL, // x 2^12464 ~= 10^3752 - 0x365460ed91271c24ULL, 0xabe33496aff629b4ULL, 0xf659ede2159a45ecULL, 0xef37f1886f4b6690ULL, // x 2^12650 ~= 10^3808 - 0xe4cbf4acc7fba37fULL, 0x350e915f7055b1b8ULL, 0x78d946bab954b82fULL, 0xf3e6f313130ef0efULL, // x 2^12836 ~= 10^3864 - 0xe692accdfa5bd859ULL, 0xf4d4d3202379829eULL, 0xc9b1474d8f89c269ULL, 0xf8ad6e3fa030bd15ULL, // x 2^13022 ~= 10^3920 - 0xeca0018ea3b8d1b4ULL, 0xe878edb67072c26dULL, 0x6b1d2745340e7b14ULL, 0xfd8bd8b770cb469eULL, // x 2^13208 ~= 10^3976 - 0xce5fec949ab87cf7ULL, 0x151dcd7a53488c3ULL, 0xf22e502fcdd4bca2ULL, 0x81415538ce493bd5ULL, // x 2^13395 ~= 10^4032 - 0x5e1731fbff8c032eULL, 0xe752f53c2f8fa6c1ULL, 0x7c1735fc3b813c8cULL, 0x83c92edf425b292dULL, // x 2^13581 ~= 10^4088 - 0xb552102ea83f47e6ULL, 0xdf0fd2002ff6b3a3ULL, 0x367500a8e9a178fULL, 0x865db7a9ccd2839eULL, // x 2^13767 ~= 10^4144 - 0x76507bafe00ec873ULL, 0x71b256ecd954434cULL, 0xc9ac50475e25293aULL, 0x88ff2f2bade74531ULL, // x 2^13953 ~= 10^4200 - 0x5e2075ba289a360bULL, 0xac376f28b45e5accULL, 0x879b2e5f6ee8b1cULL, 0x8badd636cc48b341ULL, // x 2^14139 ~= 10^4256 - 0xab87d85e6311e801ULL, 0xb7f786d14d58173dULL, 0x2f33c652bd12fab7ULL, 0x8e69eee1f23f2be5ULL, // x 2^14325 ~= 10^4312 - 0x7fed9b68d77255beULL, 0x35dc241819de7182ULL, 0xad6a6308a8e8b557ULL, 0x9133bc8f2a130fe5ULL, // x 2^14511 ~= 10^4368 - 0x728ae72899d4bd12ULL, 0xe5413d9414142a55ULL, 0x9dbaa465efe141a0ULL, 0x940b83f23a55842aULL, // x 2^14697 ~= 10^4424 - 0xf7740145246fb8fULL, 0x186ef2c39acb4103ULL, 0x888c9ab2fc5b3437ULL, 0x96f18b1742aad751ULL, // x 2^14883 ~= 10^4480 - 0xd8bb0fba2183c6efULL, 0xbf66d66cc34f0197ULL, 0xba00864671d1053fULL, 0x99e6196979b978f1ULL, // x 2^15069 ~= 10^4536 - 0x9b71ed2ceb790e49ULL, 0x6faac32d59cc1f5dULL, 0x61d59d402aae4feaULL, 0x9ce977ba0ce3a0bdULL, // x 2^15255 ~= 10^4592 - 0xa0aa6d5e63991cfbULL, 0x19482fa0ac45669cULL, 0x803c1cd864033781ULL, 0x9ffbf04722750449ULL, // x 2^15441 ~= 10^4648 - 0x95a9949e04b8bff3ULL, 0x900aa3c2f02ac9d4ULL, 0xa28a151725a55e10ULL, 0xa31dcec2fef14b30ULL, // x 2^15627 ~= 10^4704 - 0x3acf9496dade0ce9ULL, 0xbd8ecf923d23bec0ULL, 0x5b8452af2302fe13ULL, 0xa64f605b4e3352cdULL, // x 2^15813 ~= 10^4760 - 0x6204425d2b58e822ULL, 0xdee162a8a1248550ULL, 0x82b84cabc828bf93ULL, 0xa990f3c09110c544ULL, // x 2^15999 ~= 10^4816 - 0x91a2658e0639f32ULL, 0x66fa2184cee0b861ULL, 0x8d29dd5122e4278dULL, 0xace2d92db0390b59ULL, // x 2^16185 ~= 10^4872 - 0x80acda113324758aULL, 0xded179c26d9ab828ULL, 0x58f8fde02c03a6c6ULL, 0xb045626fb50a35e7ULL, // x 2^16371 ~= 10^4928 - 0x7128a8aad239ce8fULL, 0x8737bd250290cd5bULL, 0xd950102978dbd0ffULL, 0xb3b8e2eda91a232dULL, // x 2^16557 ~= 10^4984 -}; -#endif - -#if SWIFT_DTOA_BINARY32_SUPPORT -// Given a power `p`, this returns three values: -// * 64-bit fractions `lower` and `upper` -// * integer `exponent` -// -// The returned values satisfy the following: -// ``` -// lower * 2^exponent <= 10^p <= upper * 2^exponent -// ``` -// -// Note: Max(*upper - *lower) = 3 -static void intervalContainingPowerOf10_Binary32(int p, uint64_t *lower, uint64_t *upper, int *exponent) { - if (p >= 0) { - uint64_t base = powersOf10_Exact128[p * 2 + 1]; - *lower = base; - if (p < 28) { - *upper = base; - } else { - *upper = base + 1; - } - } else { - uint64_t base = powersOf10_negativeBinary32[p + 40]; - *lower = base; - *upper = base + 1; - } - *exponent = binaryExponentFor10ToThe(p); -} -#endif - -#if SWIFT_DTOA_BINARY64_SUPPORT -// Given a power `p`, this returns three values: -// * 128-bit fractions `lower` and `upper` -// * integer `exponent` -// -// Note: This function takes on average about 10% of the total runtime -// for formatting a double, as the general case here requires several -// multiplications to accurately reconstruct the lower and upper -// bounds. -// -// The returned values satisfy the following: -// ``` -// lower * 2^exponent <= 10^p <= upper * 2^exponent -// ``` -// -// Note: Max(*upper - *lower) = 3 -static void intervalContainingPowerOf10_Binary64(int p, swift_uint128_t *lower, swift_uint128_t *upper, int *exponent) { - if (p >= 0 && p <= 55) { - // Use one 64-bit exact value - swift_uint128_t exact; - initialize128WithHighLow64(exact, - powersOf10_Exact128[p * 2 + 1], - powersOf10_Exact128[p * 2]); - *upper = exact; - *lower = exact; - *exponent = binaryExponentFor10ToThe(p); - return; - } - - // Multiply a 128-bit approximate value with a 64-bit exact value - int index = p + 400; - // Copy a pair of uint64_t into a swift_uint128_t - int mainPower = index / 28; - const uint64_t *base_p = powersOf10_Binary64 + mainPower * 2; - swift_uint128_t base; - initialize128WithHighLow64(base, base_p[1], base_p[0]); - int extraPower = index - mainPower * 28; - int baseExponent = binaryExponentFor10ToThe(p - extraPower); - - int e = baseExponent; - if (extraPower == 0) { - // We're using a tightly-rounded lower bound, so +1 gives a tightly-rounded upper bound - *lower = base; -#if HAVE_UINT128_T - *upper = *lower + 1; -#else - *upper = *lower; - upper->low += 1; -#endif - } else { - // We need to multiply two values to get a lower bound - int64_t extra = powersOf10_Exact128[extraPower * 2 + 1]; - e += binaryExponentFor10ToThe(extraPower); - *lower = multiply128x64RoundingDown(base, extra); - // +2 is enough to get an upper bound - // (Verified through exhaustive testing.) -#if HAVE_UINT128_T - *upper = *lower + 2; -#else - *upper = *lower; - upper->low += 2; -#endif - } - *exponent = e; -} -#endif - -#if SWIFT_DTOA_FLOAT80_SUPPORT || SWIFT_DTOA_BINARY128_SUPPORT -// As above, but returning 256-bit fractions suitable for -// converting float80/binary128. -static void intervalContainingPowerOf10_Binary128(int p, swift_uint256_t *lower, swift_uint256_t *upper, int *exponent) { - if (p >= 0 && p <= 55) { - // We have an exact form, return a zero-width interval - // and avoid the multiplication. - uint64_t exactLow = powersOf10_Exact128[p * 2]; - uint64_t exactHigh = powersOf10_Exact128[p * 2 + 1]; - initialize256WithHighMidLow64(*lower, exactHigh, exactLow, 0, 0); - *upper = *lower; - *exponent = binaryExponentFor10ToThe(p); - return; - } - - int index = p + 4984; - const uint64_t *base_p = powersOf10_Binary128 + (index / 56) * 4; - // The values in the table are always tightly rounded down, so we use that - // directly as a lower bound. - initialize256WithHighMidLow64(*lower, base_p[3], base_p[2], base_p[1], base_p[0]); - int extraPower = index % 56; - int e = binaryExponentFor10ToThe(p - extraPower); - - if (extraPower > 0) { - swift_uint128_t extra; - initialize128WithHighLow64(extra, - powersOf10_Exact128[extraPower * 2 + 1], - powersOf10_Exact128[extraPower * 2]); - multiply256x128RoundingDown(lower, extra); - e += binaryExponentFor10ToThe(extraPower); - } - // We could compute upper similar to lower using rounding-up - // multiplications, but this is faster. - // Since there's just one multiplication, we can prove that 2 is - // enough to get a true upper bound, and we've verified (through - // exhaustive testing) that the least-significant component never - // wraps. - *upper = *lower; -#if HAVE_UINT128_T - upper->low += 2; -#else - upper->elt[0] += 2; -#endif - - *exponent = e; -} -#endif diff --git a/stdlib/public/stubs/Stubs.cpp b/stdlib/public/stubs/Stubs.cpp index c44fbd7e226d6..19c116472b8fc 100644 --- a/stdlib/public/stubs/Stubs.cpp +++ b/stdlib/public/stubs/Stubs.cpp @@ -63,7 +63,6 @@ #endif #include "swift/Runtime/Debug.h" -#include "swift/Runtime/SwiftDtoa.h" #include "swift/Basic/Lazy.h" #include "swift/Threading/Thread.h" @@ -173,46 +172,6 @@ static locale_t getCLocale() { #endif #endif // SWIFT_STDLIB_HAS_LOCALE -#if SWIFT_DTOA_PASS_FLOAT16_AS_FLOAT -using _CFloat16Argument = float; -#else -using _CFloat16Argument = _Float16; -#endif - -SWIFT_CC(swift) SWIFT_RUNTIME_STDLIB_API -__swift_ssize_t swift_float16ToString(char *Buffer, size_t BufferLength, - _CFloat16Argument Value, bool Debug) { -#if SWIFT_DTOA_PASS_FLOAT16_AS_FLOAT - __fp16 v = Value; - return swift_dtoa_optimal_binary16_p(&v, Buffer, BufferLength); -#else - return swift_dtoa_optimal_binary16_p(&Value, Buffer, BufferLength); -#endif -} - -SWIFT_CC(swift) SWIFT_RUNTIME_STDLIB_API -uint64_t swift_float32ToString(char *Buffer, size_t BufferLength, - float Value, bool Debug) { - return swift_dtoa_optimal_float(Value, Buffer, BufferLength); -} - -SWIFT_CC(swift) SWIFT_RUNTIME_STDLIB_API -uint64_t swift_float64ToString(char *Buffer, size_t BufferLength, - double Value, bool Debug) { - return swift_dtoa_optimal_double(Value, Buffer, BufferLength); -} - -// We only support float80 on platforms that use that exact format for 'long double' -// This should match the conditionals in Runtime.swift -#if !defined(_WIN32) && !defined(__ANDROID__) && (defined(__i386__) || defined(__i686__) || defined(__x86_64__)) -SWIFT_CC(swift) SWIFT_RUNTIME_STDLIB_API -uint64_t swift_float80ToString(char *Buffer, size_t BufferLength, - long double Value, bool Debug) { - // SwiftDtoa.cpp automatically enables float80 on platforms that use it for 'long double' - return swift_dtoa_optimal_float80_p(&Value, Buffer, BufferLength); -} -#endif - #if SWIFT_STDLIB_HAS_STDIN /// \param[out] LinePtr Replaced with the pointer to the malloc()-allocated From 1dbd399d6164e8d8aa67a549f2d2f2f2c44a2ed6 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Mon, 4 Aug 2025 12:07:49 -0700 Subject: [PATCH 10/19] Add FP-to-string support to core library --- Runtimes/Core/core/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Runtimes/Core/core/CMakeLists.txt b/Runtimes/Core/core/CMakeLists.txt index 864b546b3f2cc..4a36f4884663c 100644 --- a/Runtimes/Core/core/CMakeLists.txt +++ b/Runtimes/Core/core/CMakeLists.txt @@ -80,6 +80,7 @@ add_library(swiftCore FlatMap.swift Flatten.swift FloatingPoint.swift + FloatingPointToString.swift Hashable.swift AnyHashable.swift # ORDER DEPENDENCY Hasher.swift From e24076f344fd8e7675fd4b6408e60a204469d149 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Mon, 4 Aug 2025 13:22:28 -0700 Subject: [PATCH 11/19] Match up conditional for Float80 support --- stdlib/public/core/FloatingPointToString.swift | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/stdlib/public/core/FloatingPointToString.swift b/stdlib/public/core/FloatingPointToString.swift index 7459ccda27548..2d83e17c0af4b 100644 --- a/stdlib/public/core/FloatingPointToString.swift +++ b/stdlib/public/core/FloatingPointToString.swift @@ -1287,7 +1287,8 @@ fileprivate func _Float128ToASCII( // performance overall. // // ================================================================ -#if ((os(macOS) || targetEnvironment(macCatalyst) || os(Linux)) && arch(x86_64)) + +#if !(os(Windows) || os(Android) || ($Embedded && !os(Linux) && !(os(macOS) || os(iOS) || os(watchOS) || os(tvOS)))) && (arch(i386) || arch(x86_64)) @available(SwiftStdlib 6.2, *) fileprivate func _backend_256bit( From d4371f70a4796bd3eb13d6dfb0572e9827301e90 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Tue, 5 Aug 2025 14:31:15 -0700 Subject: [PATCH 12/19] Try to match stdlib coding conventions --- .../public/core/FloatingPointToString.swift | 4011 +++++++++-------- .../public/core/FloatingPointTypes.swift.gyb | 4 +- 2 files changed, 2111 insertions(+), 1904 deletions(-) diff --git a/stdlib/public/core/FloatingPointToString.swift b/stdlib/public/core/FloatingPointToString.swift index 2d83e17c0af4b..5a804dd023100 100644 --- a/stdlib/public/core/FloatingPointToString.swift +++ b/stdlib/public/core/FloatingPointToString.swift @@ -136,259 +136,282 @@ public func _float16ToStringImpl( _ textBuffer: UnsafeMutablePointer, _ bufferLength: UInt, _ value: Float16, - _ debug: Bool) -> UInt64 { - // Code below works with raw memory. - var buffer = unsafe MutableSpan(_unchecked: textBuffer, - count: Int(bufferLength)) - let textRange = Float16ToASCII(value: value, buffer: &buffer) - let textLength = textRange.upperBound - textRange.lowerBound - - // Move the text to the start of the buffer - if textRange.lowerBound != 0 { - unsafe _memmove(dest: textBuffer, - src: textBuffer + textRange.lowerBound, - size: UInt(truncatingIfNeeded: textLength)) - } - return UInt64(truncatingIfNeeded: textLength) + _ debug: Bool +) -> UInt64 { + // Code below works with raw memory. + var buffer = unsafe MutableSpan( + _unchecked: textBuffer, + count: Int(bufferLength)) + let textRange = _Float16ToASCII(value: value, buffer: &buffer) + let textLength = textRange.upperBound - textRange.lowerBound + + // Move the text to the start of the buffer + if textRange.lowerBound != 0 { + unsafe _memmove( + dest: textBuffer, + src: textBuffer + textRange.lowerBound, + size: UInt(truncatingIfNeeded: textLength)) + } + return UInt64(truncatingIfNeeded: textLength) } -internal func Float16ToASCII( +internal func _Float16ToASCII( value f: Float16, - buffer utf8Buffer: inout MutableSpan) -> Range -{ - if #available(SwiftStdlib 6.2, *) { - return _Float16ToASCII(value: f, buffer: &utf8Buffer) - } else { - return 0..<0 - } + buffer utf8Buffer: inout MutableSpan +) -> Range { + if #available(SwiftStdlib 6.2, *) { + return _Float16ToASCIIImpl(value: f, buffer: &utf8Buffer) + } else { + return 0..<0 + } } @available(SwiftStdlib 6.2, *) -fileprivate func _Float16ToASCII( +fileprivate func _Float16ToASCIIImpl( value f: Float16, - buffer utf8Buffer: inout MutableSpan) -> Range -{ - // We need a MutableRawSpan in order to use wide store/load operations - precondition(utf8Buffer.count >= 32) - var buffer = utf8Buffer.mutableBytes - - // Step 1: Handle various input cases: - let binaryExponent: Int - let significand: Float16.RawSignificand - let exponentBias = (1 << (Float16.exponentBitCount - 1)) - 2; // 14 - if (f.exponentBitPattern == 0x1f) { // NaN or Infinity - if (f.isInfinite) { - return infinity(buffer: &buffer, sign: f.sign) - } else { // f.isNaN - let quietBit = (f.significandBitPattern >> (Float16.significandBitCount - 1)) & 1; - let payloadMask = UInt16(1 &<< (Float16.significandBitCount - 2)) - 1 - let payload16 = f.significandBitPattern & payloadMask - return nan_details(buffer: &buffer, - sign: f.sign, - quiet: quietBit != 0, - payloadHigh: 0, - payloadLow: UInt64(truncatingIfNeeded:payload16)) - } - } else if (f.exponentBitPattern == 0) { - if (f.isZero) { - return zero(buffer: &buffer, sign: f.sign) - } else { // Subnormal - binaryExponent = 1 - exponentBias - significand = f.significandBitPattern &<< 2 - } - } else { // normal - binaryExponent = Int(f.exponentBitPattern) &- exponentBias - let hiddenBit = Float16.RawSignificand(1) << Float16.significandBitCount - significand = (f.significandBitPattern &+ hiddenBit) &<< 2 + buffer utf8Buffer: inout MutableSpan +) -> Range { + // We need a MutableRawSpan in order to use wide store/load operations + precondition(utf8Buffer.count >= 32) + var buffer = utf8Buffer.mutableBytes + + // Step 1: Handle various input cases: + let binaryExponent: Int + let significand: Float16.RawSignificand + let exponentBias = (1 << (Float16.exponentBitCount - 1)) - 2; // 14 + if (f.exponentBitPattern == 0x1f) { // NaN or Infinity + if (f.isInfinite) { + return _infinity(buffer: &buffer, sign: f.sign) + } else { // f.isNaN + let quietBit = + (f.significandBitPattern >> (Float16.significandBitCount - 1)) & 1; + let payloadMask = UInt16(1 &<< (Float16.significandBitCount - 2)) - 1 + let payload16 = f.significandBitPattern & payloadMask + return nan_details( + buffer: &buffer, + sign: f.sign, + quiet: quietBit != 0, + payloadHigh: 0, + payloadLow: UInt64(truncatingIfNeeded:payload16)) } - - // Step 2: Determine the exact target interval - let halfUlp: Float16.RawSignificand = 2 - let quarterUlp = halfUlp >> 1 - let upperMidpointExact = significand &+ halfUlp - let lowerMidpointExact = significand &- ((f.significandBitPattern == 0) ? quarterUlp : halfUlp) - - var firstDigit = 1 - var nextDigit = firstDigit - - // Emit the text form differently depending on what range it's in. - // We use `storeBytes(of:toUncheckedByteOffset:as:)` for most of - // the output, but are careful to use the checked/safe form - // `storeBytes(of:toByteOffset:as:)` for the last byte so that we - // reliably crash if we overflow the provided buffer. - - // Step 3: If it's < 10^-5, format as exponential form - if binaryExponent < -13 || (binaryExponent == -13 && significand < 0x1a38) { - var decimalExponent = -5 - var u = (UInt32(upperMidpointExact) << (28 - 13 &+ binaryExponent)) &* 100000 - var l = (UInt32(lowerMidpointExact) << (28 - 13 &+ binaryExponent)) &* 100000 - var t = (UInt32(significand) << (28 - 13 &+ binaryExponent)) &* 100000 - let mask = (UInt32(1) << 28) - 1 - if t < ((1 << 28) / 10) { - u &*= 100 - l &*= 100 - t &*= 100 - decimalExponent &-= 2 - } - if t < (1 << 28) { - u &*= 10 - l &*= 10 - t &*= 10 - decimalExponent &-= 1 - } + } else if (f.exponentBitPattern == 0) { + if (f.isZero) { + return _zero(buffer: &buffer, sign: f.sign) + } else { // Subnormal + binaryExponent = 1 - exponentBias + significand = f.significandBitPattern &<< 2 + } + } else { // normal + binaryExponent = Int(f.exponentBitPattern) &- exponentBias + let hiddenBit = Float16.RawSignificand(1) << Float16.significandBitCount + significand = (f.significandBitPattern &+ hiddenBit) &<< 2 + } + + // Step 2: Determine the exact target interval + let halfUlp: Float16.RawSignificand = 2 + let quarterUlp = halfUlp >> 1 + let upperMidpointExact = + significand &+ halfUlp + let lowerMidpointExact = + significand &- ((f.significandBitPattern == 0) ? quarterUlp : halfUlp) + + var firstDigit = 1 + var nextDigit = firstDigit + + // Emit the text form differently depending on what range it's in. + // We use `storeBytes(of:toUncheckedByteOffset:as:)` for most of + // the output, but are careful to use the checked/safe form + // `storeBytes(of:toByteOffset:as:)` for the last byte so that we + // reliably crash if we overflow the provided buffer. + + // Step 3: If it's < 10^-5, format as exponential form + if binaryExponent < -13 || (binaryExponent == -13 && significand < 0x1a38) { + var decimalExponent = -5 + var u = + (UInt32(upperMidpointExact) << (28 - 13 &+ binaryExponent)) &* 100000 + var l = + (UInt32(lowerMidpointExact) << (28 - 13 &+ binaryExponent)) &* 100000 + var t = + (UInt32(significand) << (28 - 13 &+ binaryExponent)) &* 100000 + let mask = (UInt32(1) << 28) - 1 + if t < ((1 << 28) / 10) { + u &*= 100 + l &*= 100 + t &*= 100 + decimalExponent &-= 2 + } + if t < (1 << 28) { + u &*= 10 + l &*= 10 + t &*= 10 + decimalExponent &-= 1 + } + let uDigit = u >> 28 + if uDigit == (l >> 28) { + // More than one digit, so write first digit, ".", then the rest + unsafe buffer.storeBytes( + of: 0x30 + UInt8(truncatingIfNeeded: uDigit), + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + unsafe buffer.storeBytes( + of: 0x2e, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + while true { + u = (u & mask) &* 10 + l = (l & mask) &* 10 + t = (t & mask) &* 10 let uDigit = u >> 28 - if uDigit == (l >> 28) { - // More than one digit, so write first digit, ".", then the rest - unsafe buffer.storeBytes(of: 0x30 + UInt8(truncatingIfNeeded: uDigit), - toUncheckedByteOffset: nextDigit, - as: UInt8.self) - nextDigit &+= 1 - unsafe buffer.storeBytes(of: 0x2e, - toUncheckedByteOffset: nextDigit, - as: UInt8.self) - nextDigit &+= 1 - while true { - u = (u & mask) &* 10 - l = (l & mask) &* 10 - t = (t & mask) &* 10 - let uDigit = u >> 28 - if uDigit != (l >> 28) { - // Stop before emitting the last digit - break - } - unsafe buffer.storeBytes(of: 0x30 &+ UInt8(truncatingIfNeeded: uDigit), - toUncheckedByteOffset: nextDigit, - as: UInt8.self) - nextDigit &+= 1 - } + if uDigit != (l >> 28) { + // Stop before emitting the last digit + break } - let digit = 0x30 &+ (t &+ (1 &<< 27)) >> 28 - unsafe buffer.storeBytes(of: UInt8(truncatingIfNeeded: digit), - toUncheckedByteOffset: nextDigit, - as: UInt8.self) - nextDigit &+= 1 - unsafe buffer.storeBytes(of: 0x65, // "e" - toUncheckedByteOffset: nextDigit, - as: UInt8.self) - nextDigit &+= 1 - unsafe buffer.storeBytes(of: 0x2d, // "-" - toUncheckedByteOffset: nextDigit, - as: UInt8.self) - nextDigit &+= 1 - unsafe buffer.storeBytes(of: UInt8(truncatingIfNeeded: -decimalExponent / 10 &+ 0x30), - toUncheckedByteOffset: nextDigit, - as: UInt8.self) - nextDigit &+= 1 - // Last write on this branch, so use a safe checked store - buffer.storeBytes(of: UInt8(truncatingIfNeeded: -decimalExponent % 10 &+ 0x30), - toByteOffset: nextDigit, - as: UInt8.self) + unsafe buffer.storeBytes( + of: 0x30 &+ UInt8(truncatingIfNeeded: uDigit), + toUncheckedByteOffset: nextDigit, + as: UInt8.self) nextDigit &+= 1 + } + } + let digit = 0x30 &+ (t &+ (1 &<< 27)) >> 28 + unsafe buffer.storeBytes( + of: UInt8(truncatingIfNeeded: digit), + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + unsafe buffer.storeBytes( + of: 0x65, // "e" + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + unsafe buffer.storeBytes( + of: 0x2d, // "-" + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + unsafe buffer.storeBytes( + of: UInt8(truncatingIfNeeded: -decimalExponent / 10 &+ 0x30), + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + // Last write on this branch, so use a safe checked store + buffer.storeBytes( + of: UInt8(truncatingIfNeeded: -decimalExponent % 10 &+ 0x30), + toByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + } else { + + // Step 4: Greater than 10^-5, so use decimal format "123.45" + // (Note: Float16 is never big enough to need exponential for + // positive exponents) + // First, split into integer and fractional parts: + + let intPart : Float16.RawSignificand + let fractionPart : Float16.RawSignificand + if binaryExponent < 13 { + intPart = significand >> (13 &- binaryExponent) + fractionPart = significand &- (intPart &<< (13 &- binaryExponent)) + } else { + intPart = significand &<< (binaryExponent &- 13) + fractionPart = significand &- (intPart >> (binaryExponent &- 13)) + } + // Step 5: Emit the integer part + let text = _intToEightDigits(UInt32(intPart)) + unsafe buffer.storeBytes( + of: text, + toUncheckedByteOffset: nextDigit, + as: UInt64.self) + nextDigit &+= 8 + + // Skip leading zeros + if intPart < 10 { + firstDigit &+= 7 + } else if intPart < 100 { + firstDigit &+= 6 + } else if intPart < 1000 { + firstDigit &+= 5 + } else if intPart < 10000 { + firstDigit &+= 4 } else { + firstDigit &+= 3 + } - // Step 4: Greater than 10^-5, so use decimal format "123.45" - // (Note: Float16 is never big enough to need exponential for - // positive exponents) - // First, split into integer and fractional parts: - - let intPart : Float16.RawSignificand - let fractionPart : Float16.RawSignificand - if binaryExponent < 13 { - intPart = significand >> (13 &- binaryExponent) - fractionPart = significand &- (intPart &<< (13 &- binaryExponent)) - } else { - intPart = significand &<< (binaryExponent &- 13) - fractionPart = significand &- (intPart >> (binaryExponent &- 13)) - } + // After the integer part comes a period... + unsafe buffer.storeBytes( + of: 0x2e, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 - // Step 5: Emit the integer part - let text = intToEightDigits(UInt32(intPart)) - unsafe buffer.storeBytes(of: text, - toUncheckedByteOffset: nextDigit, - as: UInt64.self) - nextDigit &+= 8 - - // Skip leading zeros - if intPart < 10 { - firstDigit &+= 7 - } else if intPart < 100 { - firstDigit &+= 6 - } else if intPart < 1000 { - firstDigit &+= 5 - } else if intPart < 10000 { - firstDigit &+= 4 - } else { - firstDigit &+= 3 + if fractionPart == 0 { + // Step 6: No fraction, so ".0" and we're done + // Last write on this branch, so use a checked store + buffer.storeBytes( + of: 0x30, + toByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + } else { + // Step 7: Emit the fractional part by repeatedly + // multiplying by 10 to produce successive digits: + var u = UInt32(upperMidpointExact) &<< (28 - 13 &+ binaryExponent) + var l = UInt32(lowerMidpointExact) &<< (28 - 13 &+ binaryExponent) + var t = UInt32(fractionPart) &<< (28 - 13 &+ binaryExponent) + let mask = (UInt32(1) << 28) - 1 + var uDigit: UInt8 = 0 + var lDigit: UInt8 = 0 + while true { + u = (u & mask) &* 10 + l = (l & mask) &* 10 + uDigit = UInt8(truncatingIfNeeded: u >> 28) + lDigit = UInt8(truncatingIfNeeded: l >> 28) + if uDigit != lDigit { + t = (t & mask) &* 10 + break } - - // After the integer part comes a period... - unsafe buffer.storeBytes(of: 0x2e, - toUncheckedByteOffset: nextDigit, - as: UInt8.self) + // This overflows, but we don't care at this point. + t &*= 10 + unsafe buffer.storeBytes( + of: 0x30 &+ uDigit, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) nextDigit &+= 1 - - if fractionPart == 0 { - // Step 6: No fraction, so ".0" and we're done - // Last write on this branch, so use a checked store - buffer.storeBytes(of: 0x30, - toByteOffset: nextDigit, - as: UInt8.self) - nextDigit &+= 1 - } else { - // Step 7: Emit the fractional part by repeatedly - // multiplying by 10 to produce successive digits: - var u = UInt32(upperMidpointExact) &<< (28 - 13 &+ binaryExponent) - var l = UInt32(lowerMidpointExact) &<< (28 - 13 &+ binaryExponent) - var t = UInt32(fractionPart) &<< (28 - 13 &+ binaryExponent) - let mask = (UInt32(1) << 28) - 1 - var uDigit: UInt8 = 0 - var lDigit: UInt8 = 0 - while true { - u = (u & mask) &* 10 - l = (l & mask) &* 10 - uDigit = UInt8(truncatingIfNeeded: u >> 28) - lDigit = UInt8(truncatingIfNeeded: l >> 28) - if uDigit != lDigit { - t = (t & mask) &* 10 - break - } - // This overflows, but we don't care at this point. - t &*= 10 - unsafe buffer.storeBytes(of: 0x30 &+ uDigit, - toUncheckedByteOffset: nextDigit, - as: UInt8.self) - nextDigit &+= 1 - } - t &+= 1 << 27 - if (t & mask) == 0 { // Exactly 1/2 - t = (t >> 28) & ~1 // Round last digit even - // Rounding `t` even can end up moving `t` below - // `l`. Detect and correct for this possibility. - // Exhaustive testing shows that the only input value - // affected by this is 0.015625 == 2^-6, which - // incorrectly prints as "0.01562" without this fix. - // With this, it prints correctly as "0.01563" - if t < lDigit || (t == lDigit && l > 0) { - t += 1 - } - } else { - t >>= 28 - } - // Last write on this branch, so use a checked store - buffer.storeBytes(of: UInt8(truncatingIfNeeded: 0x30 + t), - toByteOffset: nextDigit, - as: UInt8.self) - nextDigit &+= 1 + } + t &+= 1 << 27 + if (t & mask) == 0 { // Exactly 1/2 + t = (t >> 28) & ~1 // Round last digit even + // Rounding `t` even can end up moving `t` below + // `l`. Detect and correct for this possibility. + // Exhaustive testing shows that the only input value + // affected by this is 0.015625 == 2^-6, which + // incorrectly prints as "0.01562" without this fix. + // With this, it prints correctly as "0.01563" + if t < lDigit || (t == lDigit && l > 0) { + t += 1 } + } else { + t >>= 28 + } + // Last write on this branch, so use a checked store + buffer.storeBytes( + of: UInt8(truncatingIfNeeded: 0x30 + t), + toByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 } - if f.sign == .minus { - buffer.storeBytes(of: 0x2d, - toByteOffset: firstDigit &- 1, - as: UInt8.self) // "-" - firstDigit &-= 1 - } - return firstDigit.., _ bufferLength: UInt, _ value: Float32, - _ debug: Bool) -> UInt64 { - // Code below works with raw memory. - var buffer = unsafe MutableSpan(_unchecked: textBuffer, - count: Int(bufferLength)) - let textRange = Float32ToASCII(value: value, buffer: &buffer) - let textLength = textRange.upperBound - textRange.lowerBound - - // Move the text to the start of the buffer - if textRange.lowerBound != 0 { - unsafe _memmove(dest: textBuffer, - src: textBuffer + textRange.lowerBound, - size: UInt(truncatingIfNeeded: textLength)) - } - return UInt64(truncatingIfNeeded: textLength) + _ debug: Bool +) -> UInt64 { + // Code below works with raw memory. + var buffer = unsafe MutableSpan( + _unchecked: textBuffer, + count: Int(bufferLength)) + let textRange = _Float32ToASCII(value: value, buffer: &buffer) + let textLength = textRange.upperBound - textRange.lowerBound + + // Move the text to the start of the buffer + if textRange.lowerBound != 0 { + unsafe _memmove( + dest: textBuffer, + src: textBuffer + textRange.lowerBound, + size: UInt(truncatingIfNeeded: textLength)) + } + return UInt64(truncatingIfNeeded: textLength) } -internal func Float32ToASCII( +internal func _Float32ToASCII( value f: Float32, - buffer utf8Buffer: inout MutableSpan) -> Range -{ - if #available(SwiftStdlib 6.2, *) { - return _Float32ToASCII(value: f, buffer: &utf8Buffer) - } else { - return 0..<0 - } + buffer utf8Buffer: inout MutableSpan +) -> Range { + if #available(SwiftStdlib 6.2, *) { + return _Float32ToASCIIImpl(value: f, buffer: &utf8Buffer) + } else { + return 0..<0 + } } @available(SwiftStdlib 6.2, *) -fileprivate func _Float32ToASCII( +fileprivate func _Float32ToASCIIImpl( value f: Float32, - buffer utf8Buffer: inout MutableSpan) -> Range -{ - // Note: The algorithm here is the same as for Float64, only - // with narrower arithmetic. Refer to `_Float64ToASCII` for - // more detailed comments and explanation. - - // We need a MutableRawSpan in order to use wide store/load operations - precondition(utf8Buffer.count >= 32) - var buffer = utf8Buffer.mutableBytes - - // Step 1: Handle the special cases, decompose the input - - let binaryExponent: Int - let significand: Float.RawSignificand - let exponentBias = (1 << (Float.exponentBitCount - 1)) - 2; // 126 - if (f.exponentBitPattern == 0xff) { - if (f.isInfinite) { - return infinity(buffer: &buffer, sign: f.sign) - } else { // f.isNaN - let quietBit = (f.significandBitPattern >> (Float.significandBitCount - 1)) & 1 - let payloadMask = UInt32(1 << (Float.significandBitCount - 2)) - 1 - let payload32 = f.significandBitPattern & payloadMask - return nan_details(buffer: &buffer, - sign: f.sign, - quiet: quietBit != 0, - payloadHigh: 0, - payloadLow: UInt64(truncatingIfNeeded:payload32)) - } - } else if (f.exponentBitPattern == 0) { - if (f.isZero) { - return zero(buffer: &buffer, sign: f.sign) - } else { // f.isSubnormal - binaryExponent = 1 - exponentBias - significand = f.significandBitPattern &<< Float.exponentBitCount - } - } else { - binaryExponent = Int(f.exponentBitPattern) &- exponentBias - significand = (f.significandBitPattern &+ (1 << Float.significandBitCount)) &<< Float.exponentBitCount + buffer utf8Buffer: inout MutableSpan +) -> Range { + // Note: The algorithm here is the same as for Float64, only + // with narrower arithmetic. Refer to `_Float64ToASCII` for + // more detailed comments and explanation. + + // We need a MutableRawSpan in order to use wide store/load operations + precondition(utf8Buffer.count >= 32) + var buffer = utf8Buffer.mutableBytes + + // Step 1: Handle the special cases, decompose the input + + let binaryExponent: Int + let significand: Float.RawSignificand + let exponentBias = (1 << (Float.exponentBitCount - 1)) - 2; // 126 + if (f.exponentBitPattern == 0xff) { + if (f.isInfinite) { + return _infinity(buffer: &buffer, sign: f.sign) + } else { // f.isNaN + let quietBit = + (f.significandBitPattern >> (Float.significandBitCount - 1)) & 1 + let payloadMask = UInt32(1 << (Float.significandBitCount - 2)) - 1 + let payload32 = f.significandBitPattern & payloadMask + return nan_details( + buffer: &buffer, + sign: f.sign, + quiet: quietBit != 0, + payloadHigh: 0, + payloadLow: UInt64(truncatingIfNeeded:payload32)) } - - // Step 2: Determine the exact unscaled target interval - - let halfUlp: Float.RawSignificand = 1 << (Float.exponentBitCount - 1) - let quarterUlp = halfUlp >> 1 - let upperMidpointExact = significand &+ halfUlp - let lowerMidpointExact = significand &- ((f.significandBitPattern == 0) ? quarterUlp : halfUlp) - let isOddSignificand = ((f.significandBitPattern & 1) != 0) - - // Step 3: Estimate the base 10 exponent - - var base10Exponent = decimalExponentFor2ToThe(binaryExponent) - - // Step 4: Compute power-of-10 scale factor - - var powerOfTenRoundedDown: UInt64 = 0 - var powerOfTenRoundedUp: UInt64 = 0 - - let bulkFirstDigits = 1 - let powerOfTenExponent = intervalContainingPowerOf10_Binary32( - -base10Exponent &+ bulkFirstDigits &- 1, - &powerOfTenRoundedDown, &powerOfTenRoundedUp) - let extraBits = binaryExponent &+ powerOfTenExponent - - // Step 5: Scale the interval (with rounding) - - // Experimentally, 11 is as large as we can go here without introducing errors. - // We need 7 to generate 2 digits at a time below. - // 11 should allow us to generate 3 digits at a time, but - // that doesn't seem to be any faster. - let integerBits = 11 - let fractionBits = 64 - integerBits - var u: UInt64 - var l: UInt64 - if isOddSignificand { - // Narrow the interval (odd significand) - let u1 = multiply64x32RoundingDown(powerOfTenRoundedDown, upperMidpointExact) - u = u1 >> (integerBits - extraBits) - let l1 = multiply64x32RoundingUp(powerOfTenRoundedUp, lowerMidpointExact) - let bias = UInt64((1 &<< (integerBits &- extraBits)) &- 1) - l = (l1 &+ bias) >> (integerBits &- extraBits) - } else { - // Widen the interval (even significand) - let u1 = multiply64x32RoundingUp(powerOfTenRoundedUp, upperMidpointExact) - let bias = UInt64((1 &<< (integerBits &- extraBits)) &- 1) - u = (u1 &+ bias) >> (integerBits &- extraBits) - let l1 = multiply64x32RoundingDown(powerOfTenRoundedDown, lowerMidpointExact) - l = l1 >> (integerBits &- extraBits) + } else if (f.exponentBitPattern == 0) { + if (f.isZero) { + return _zero(buffer: &buffer, sign: f.sign) + } else { // f.isSubnormal + binaryExponent = 1 - exponentBias + significand = f.significandBitPattern &<< Float.exponentBitCount } - - // Step 6: Align first digit, adjust exponent - - while u < (1 &<< fractionBits) { - base10Exponent &-= 1 - l &*= 10 - u &*= 10 - } - - // Step 7: Generate decimal digits into the destination buffer - - var t = u - var delta = u &- l - let fractionMask: UInt64 = (1 << fractionBits) - 1 - - // Write 8 leading zeros to the beginning of the buffer: - unsafe buffer.storeBytes(of: 0x3030303030303030, - toUncheckedByteOffset: 0, - as: UInt64.self) - - // Overwrite the first digit at index 7: - let firstDigit = 7 - let digit = (t >> fractionBits) &+ 0x30 + } else { + binaryExponent = Int(f.exponentBitPattern) &- exponentBias + significand = + ((f.significandBitPattern &+ (1 << Float.significandBitCount)) + &<< Float.exponentBitCount) + } + + // Step 2: Determine the exact unscaled target interval + + let halfUlp: Float.RawSignificand = 1 << (Float.exponentBitCount - 1) + let quarterUlp = halfUlp >> 1 + let upperMidpointExact = + significand &+ halfUlp + let lowerMidpointExact = + significand &- ((f.significandBitPattern == 0) ? quarterUlp : halfUlp) + let isOddSignificand = ((f.significandBitPattern & 1) != 0) + + // Step 3: Estimate the base 10 exponent + + var base10Exponent = decimalExponentFor2ToThe(binaryExponent) + + // Step 4: Compute power-of-10 scale factor + + var powerOfTenRoundedDown: UInt64 = 0 + var powerOfTenRoundedUp: UInt64 = 0 + + let bulkFirstDigits = 1 + let powerOfTenExponent = _intervalContainingPowerOf10_Binary32( + p: -base10Exponent &+ bulkFirstDigits &- 1, + lower: &powerOfTenRoundedDown, + upper: &powerOfTenRoundedUp) + let extraBits = binaryExponent &+ powerOfTenExponent + + // Step 5: Scale the interval (with rounding) + + // Experimentally, 11 is as large as we can go here without + // introducing errors. + // We need 7 to generate 2 digits at a time below. + // 11 should allow us to generate 3 digits at a time, but + // that doesn't seem to be any faster. + let integerBits = 11 + let fractionBits = 64 - integerBits + var u: UInt64 + var l: UInt64 + if isOddSignificand { + // Narrow the interval (odd significand) + let u1 = _multiply64x32RoundingDown( + powerOfTenRoundedDown, + upperMidpointExact) + u = u1 >> (integerBits - extraBits) + let l1 = _multiply64x32RoundingUp( + powerOfTenRoundedUp, + lowerMidpointExact) + let bias = UInt64((1 &<< (integerBits &- extraBits)) &- 1) + l = (l1 &+ bias) >> (integerBits &- extraBits) + } else { + // Widen the interval (even significand) + let u1 = _multiply64x32RoundingUp( + powerOfTenRoundedUp, + upperMidpointExact) + let bias = UInt64((1 &<< (integerBits &- extraBits)) &- 1) + u = (u1 &+ bias) >> (integerBits &- extraBits) + let l1 = _multiply64x32RoundingDown( + powerOfTenRoundedDown, + lowerMidpointExact) + l = l1 >> (integerBits &- extraBits) + } + + // Step 6: Align first digit, adjust exponent + + while u < (1 &<< fractionBits) { + base10Exponent &-= 1 + l &*= 10 + u &*= 10 + } + + // Step 7: Generate decimal digits into the destination buffer + + var t = u + var delta = u &- l + let fractionMask: UInt64 = (1 << fractionBits) - 1 + + // Write 8 leading zeros to the beginning of the buffer: + unsafe buffer.storeBytes( + of: 0x3030303030303030, + toUncheckedByteOffset: 0, + as: UInt64.self) + + // Overwrite the first digit at index 7: + let firstDigit = 7 + let digit = (t >> fractionBits) &+ 0x30 + t &= fractionMask + unsafe buffer.storeBytes( + of: UInt8(truncatingIfNeeded: digit), + toUncheckedByteOffset: firstDigit, + as: UInt8.self) + var nextDigit = firstDigit &+ 1 + + // Generate 2 digits at a time... + while (delta &* 10) < ((t &* 10) & fractionMask) { + delta &*= 100 + t &*= 100 + let d12 = Int(truncatingIfNeeded: t >> fractionBits) + let text = unsafe asciiDigitTable[unchecked: d12] + unsafe buffer.storeBytes( + of: text, + toUncheckedByteOffset: nextDigit, + as: UInt16.self) + nextDigit &+= 2 t &= fractionMask - unsafe buffer.storeBytes(of: UInt8(truncatingIfNeeded: digit), - toUncheckedByteOffset: firstDigit, - as: UInt8.self) - var nextDigit = firstDigit &+ 1 - - // Generate 2 digits at a time... - while (delta &* 10) < ((t &* 10) & fractionMask) { - delta &*= 100 - t &*= 100 - let d12 = Int(truncatingIfNeeded: t >> fractionBits) - let text = unsafe asciiDigitTable[unchecked: d12] - unsafe buffer.storeBytes(of: text, - toUncheckedByteOffset: nextDigit, - as: UInt16.self) - nextDigit &+= 2 - t &= fractionMask - } - - // ... and a final single digit, if necessary - if delta < t { - delta &*= 10 - t &*= 10 - let text = 0x30 + UInt8(truncatingIfNeeded: t >> fractionBits) - unsafe buffer.storeBytes(of: text, - toUncheckedByteOffset: nextDigit, - as: UInt8.self) - nextDigit &+= 1 - t &= fractionMask + } + + // ... and a final single digit, if necessary + if delta < t { + delta &*= 10 + t &*= 10 + let text = 0x30 + UInt8(truncatingIfNeeded: t >> fractionBits) + unsafe buffer.storeBytes( + of: text, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + t &= fractionMask + } + + // Adjust the final digit to be closer to the original value + let isBoundary = (f.significandBitPattern == 0) + if delta > t &+ (1 &<< fractionBits) { + let skew: UInt64 + if isBoundary { + skew = delta &- delta / 3 &- t + } else { + skew = delta / 2 &- t } - - // Adjust the final digit to be closer to the original value - let isBoundary = (f.significandBitPattern == 0) - if delta > t &+ (1 &<< fractionBits) { - let skew: UInt64 - if isBoundary { - skew = delta &- delta / 3 &- t - } else { - skew = delta / 2 &- t - } - let one = UInt64(1) << (64 - integerBits) - let lastAccurateBit = UInt64(1) << 24 - let fractionMask = (one - 1) & ~(lastAccurateBit - 1); - let oneHalf = one >> 1 - var lastDigit = unsafe buffer.unsafeLoad(fromUncheckedByteOffset: nextDigit &- 1, - as: UInt8.self) - if ((skew &+ (lastAccurateBit >> 1)) & fractionMask) == oneHalf { - // Skew is integer + 1/2, round even after adjustment - let adjust = skew >> (64 - integerBits) - lastDigit &-= UInt8(truncatingIfNeeded: adjust) - lastDigit &= ~1 - } else { - // Round nearest - let adjust = (skew &+ oneHalf) >> (64 - integerBits) - lastDigit &-= UInt8(truncatingIfNeeded: adjust) - } - unsafe buffer.storeBytes(of: lastDigit, - toUncheckedByteOffset: nextDigit &- 1, - as: UInt8.self) + let one = UInt64(1) << (64 - integerBits) + let lastAccurateBit = UInt64(1) << 24 + let fractionMask = (one - 1) & ~(lastAccurateBit - 1); + let oneHalf = one >> 1 + var lastDigit = unsafe buffer.unsafeLoad( + fromUncheckedByteOffset: nextDigit &- 1, + as: UInt8.self) + if ((skew &+ (lastAccurateBit >> 1)) & fractionMask) == oneHalf { + // Skew is integer + 1/2, round even after adjustment + let adjust = skew >> (64 - integerBits) + lastDigit &-= UInt8(truncatingIfNeeded: adjust) + lastDigit &= ~1 + } else { + // Round nearest + let adjust = (skew &+ oneHalf) >> (64 - integerBits) + lastDigit &-= UInt8(truncatingIfNeeded: adjust) } - - // Step 8: Finish formatting - let forceExponential = (binaryExponent > 25) || (binaryExponent == 25 && !isBoundary) - return finishFormatting(&buffer, f.sign, firstDigit, nextDigit, - forceExponential, base10Exponent) + unsafe buffer.storeBytes( + of: lastDigit, + toUncheckedByteOffset: nextDigit &- 1, + as: UInt8.self) + } + + // Step 8: Finish formatting + let forceExponential = + ((binaryExponent > 25) + || (binaryExponent == 25 && !isBoundary)) + return _finishFormatting( + buffer: &buffer, + sign: f.sign, + firstDigit: firstDigit, + nextDigit: nextDigit, + forceExponential: forceExponential, + base10Exponent: base10Exponent) } // ================================================================ @@ -624,457 +679,491 @@ public func _float64ToStringImpl( _ textBuffer: UnsafeMutablePointer, _ bufferLength: UInt, _ value: Float64, - _ debug: Bool) -> UInt64 { - // Code below works with raw memory. - var buffer = unsafe MutableSpan(_unchecked: textBuffer, - count: Int(bufferLength)) - let textRange = Float64ToASCII(value: value, buffer: &buffer) - let textLength = textRange.upperBound - textRange.lowerBound - - // Move the text to the start of the buffer - if textRange.lowerBound != 0 { - unsafe _memmove(dest: textBuffer, - src: textBuffer + textRange.lowerBound, - size: UInt(truncatingIfNeeded: textLength)) - } - return UInt64(truncatingIfNeeded: textLength) + _ debug: Bool +) -> UInt64 { + // Code below works with raw memory. + var buffer = unsafe MutableSpan( + _unchecked: textBuffer, + count: Int(bufferLength)) + let textRange = _Float64ToASCII(value: value, buffer: &buffer) + let textLength = textRange.upperBound - textRange.lowerBound + + // Move the text to the start of the buffer + if textRange.lowerBound != 0 { + unsafe _memmove( + dest: textBuffer, + src: textBuffer + textRange.lowerBound, + size: UInt(truncatingIfNeeded: textLength)) + } + return UInt64(truncatingIfNeeded: textLength) } -internal func Float64ToASCII( +internal func _Float64ToASCII( value d: Float64, - buffer utf8Buffer: inout MutableSpan) -> Range -{ - if #available(SwiftStdlib 6.2, *) { - return _Float64ToASCII(value: d, buffer: &utf8Buffer) - } else { - return 0..<0 - } + buffer utf8Buffer: inout MutableSpan +) -> Range { + if #available(SwiftStdlib 6.2, *) { + return _Float64ToASCIIImpl(value: d, buffer: &utf8Buffer) + } else { + return 0..<0 + } } @available(SwiftStdlib 6.2, *) -fileprivate func _Float64ToASCII( +fileprivate func _Float64ToASCIIImpl( value d: Float64, - buffer utf8Buffer: inout MutableSpan) -> Range -{ - // We need a MutableRawSpan in order to use wide store/load operations - precondition(utf8Buffer.count >= 32) - var buffer = utf8Buffer.mutableBytes - - // - // Step 1: Handle the special cases, decompose the input - // - let binaryExponent: Int - let significand: Double.RawSignificand - let exponentBias = (1 << (Double.exponentBitCount - 1)) - 2; // 1022 - - if (d.exponentBitPattern == 0x7ff) { - if (d.isInfinite) { - return infinity(buffer: &buffer, sign: d.sign) - } else { // d.isNaN - let quietBit = (d.significandBitPattern >> (Double.significandBitCount - 1)) & 1 - let payloadMask = UInt64(1 << (Double.significandBitCount - 2)) - 1 - let payload64 = d.significandBitPattern & payloadMask - return nan_details(buffer: &buffer, - sign: d.sign, - quiet: quietBit != 0, - payloadHigh: 0, - payloadLow: UInt64(truncatingIfNeeded:payload64)) - } - } else if (d.exponentBitPattern == 0) { - if (d.isZero) { - return zero(buffer: &buffer, sign: d.sign) - } else { // d.isSubnormal - binaryExponent = 1 - exponentBias - significand = d.significandBitPattern &<< Double.exponentBitCount - } - } else { - binaryExponent = Int(d.exponentBitPattern) &- exponentBias - significand = (d.significandBitPattern &+ (1 << Double.significandBitCount)) &<< Double.exponentBitCount + buffer utf8Buffer: inout MutableSpan +) -> Range { + // We need a MutableRawSpan in order to use wide store/load operations + precondition(utf8Buffer.count >= 32) + var buffer = utf8Buffer.mutableBytes + + // + // Step 1: Handle the special cases, decompose the input + // + let binaryExponent: Int + let significand: Double.RawSignificand + let exponentBias = (1 << (Double.exponentBitCount - 1)) - 2; // 1022 + + if (d.exponentBitPattern == 0x7ff) { + if (d.isInfinite) { + return _infinity(buffer: &buffer, sign: d.sign) + } else { // d.isNaN + let quietBit = + (d.significandBitPattern >> (Double.significandBitCount - 1)) & 1 + let payloadMask = UInt64(1 << (Double.significandBitCount - 2)) - 1 + let payload64 = d.significandBitPattern & payloadMask + return nan_details( + buffer: &buffer, + sign: d.sign, + quiet: quietBit != 0, + payloadHigh: 0, + payloadLow: UInt64(truncatingIfNeeded:payload64)) + } + } else if (d.exponentBitPattern == 0) { + if (d.isZero) { + return _zero(buffer: &buffer, sign: d.sign) + } else { // d.isSubnormal + binaryExponent = 1 - exponentBias + significand = d.significandBitPattern &<< Double.exponentBitCount } - // The input has been decomposed as significand * 2^binaryExponent, - // where `significand` is a 64-bit fraction with the binary - // point at the far left. - - // Step 2: Determine the exact unscaled target interval - - // Grisu-style algorithms construct the shortest decimal digit - // sequence within a specific interval. To build the appropriate - // interval, we start by computing the midpoints between this - // floating-point value and the adjacent ones. Note that this - // step is an exact computation. - - let halfUlp: Double.RawSignificand = 1 << (Double.exponentBitCount - 1) - let quarterUlp = halfUlp >> 1 - let upperMidpointExact = significand &+ halfUlp - let lowerMidpointExact = significand &- ((d.significandBitPattern == 0) ? quarterUlp : halfUlp) - let isOddSignificand = ((d.significandBitPattern & 1) != 0) - - // Step 3: Estimate the base 10 exponent - - // Grisu algorithms are based in part on a simple technique for - // generating a base-10 form for a binary floating-point number. - // Start with a binary floating-point number `f * 2^e` and then - // estimate the decimal exponent `p`. You can then rewrite your - // original number as: - // - // ``` - // f * 2^e * 10^-p * 10^p - // ``` - // - // The last term is part of our output, and a good estimate for - // `p` will ensure that `2^e * 10^-p` is close to 1. Multiplying - // the first three terms then yields a fraction suitable for - // producing the decimal digits. Here we use a very fast estimate - // of `p` that is never off by more than 1; we'll have - // opportunities later to correct any error. - - var base10Exponent = decimalExponentFor2ToThe(binaryExponent) - - // Step 4: Compute power-of-10 scale factor - - // Compute `10^-p` to 128-bit precision. We generate - // both over- and under-estimates to ensure we can exactly - // bound the later use of these values. - // The `powerOfTenRounded{Up,Down}` values are 128-bit - // pure fractions with the decimal point at the far left. - - var powerOfTenRoundedDown: UInt128 = 0 - var powerOfTenRoundedUp: UInt128 = 0 - - // Note the extra factor of 10^bulkFirstDigits -- that will give - // us a headstart on digit generation later on. (In contrast, Ryu - // uses an extra factor of 10^17 here to get all the digits up - // front, but then has to back out any extra digits. Doing that - // with a 17-digit value requires 64-bit division, which is the - // root cause of Ryu's poor performance on 32-bit processors. We - // also might have to back out extra digits if 7 is too many, but - // will only need 32-bit division in that case.) - - let bulkFirstDigits = 7 - let bulkFirstDigitFactor = 1000000 // 10^(bulkFirstDigits - 1) - - let powerOfTenExponent = intervalContainingPowerOf10_Binary64( - -base10Exponent &+ bulkFirstDigits &- 1, - &powerOfTenRoundedDown, &powerOfTenRoundedUp) - - let extraBits = binaryExponent + powerOfTenExponent - - // Step 5: Scale the interval (with rounding) - - // As mentioned above, the final digit generation works - // with an interval, so we actually apply the scaling - // to the upper and lower midpoint values separately. - - // As part of the scaling here, we'll switch from a pure - // fraction with zero bit integer portion and 128-bit fraction - // to a fixed-point form with 32 bits in the integer portion. - - let integerBits = 32 - let roundingBias = UInt128((1 &<< UInt64(truncatingIfNeeded: integerBits &- extraBits)) &- 1) - var u: UInt128 - var l: UInt128 - if isOddSignificand { - // Case A: Narrow the interval (odd significand) - - // Loitsch' original Grisu2 always rounds so as to narrow the - // interval. Since our digit generation will select a value - // within the scaled interval, narrowing the interval - // guarantees that we will find a digit sequence that converts - // back to the original value. - - // This ensures accuracy but, as explained in Loitsch' paper, - // this carries a risk that there will be a shorter digit - // sequence outside of our narrowed interval that we will - // miss. This risk obviously gets lower with increased - // precision, but it wasn't until the Errol paper that anyone - // had a good way to test whether a particular implementation - // had sufficient precision. That paper shows a way to enumerate - // the worst-case numbers; those numbers that are extremely close - // to the mid-points between adjacent floating-point values. - // These are the values that might sit just outside of the - // narrowed interval. By testing these values, we can verify - // the correctness of our implementation. - - // Multiply out the upper midpoint, rounding down... - let u1 = multiply128x64RoundingDown(powerOfTenRoundedDown, upperMidpointExact) - // Account for residual binary exponent and adjust - // to the fixed-point format - u = u1 >> (integerBits - extraBits) - - // Conversely for the lower midpoint... - let l1 = multiply128x64RoundingUp(powerOfTenRoundedUp, lowerMidpointExact) - l = (l1 + roundingBias) >> (integerBits - extraBits) + } else { + binaryExponent = Int(d.exponentBitPattern) &- exponentBias + significand = + ((d.significandBitPattern &+ (1 << Double.significandBitCount)) + &<< Double.exponentBitCount) + } + // The input has been decomposed as significand * 2^binaryExponent, + // where `significand` is a 64-bit fraction with the binary + // point at the far left. + + // Step 2: Determine the exact unscaled target interval + + // Grisu-style algorithms construct the shortest decimal digit + // sequence within a specific interval. To build the appropriate + // interval, we start by computing the midpoints between this + // floating-point value and the adjacent ones. Note that this + // step is an exact computation. + + let halfUlp: Double.RawSignificand = 1 << (Double.exponentBitCount - 1) + let quarterUlp = halfUlp >> 1 + let upperMidpointExact = significand &+ halfUlp + let lowerMidpointExact = + significand &- ((d.significandBitPattern == 0) ? quarterUlp : halfUlp) + let isOddSignificand = ((d.significandBitPattern & 1) != 0) + + // Step 3: Estimate the base 10 exponent + + // Grisu algorithms are based in part on a simple technique for + // generating a base-10 form for a binary floating-point number. + // Start with a binary floating-point number `f * 2^e` and then + // estimate the decimal exponent `p`. You can then rewrite your + // original number as: + // + // ``` + // f * 2^e * 10^-p * 10^p + // ``` + // + // The last term is part of our output, and a good estimate for + // `p` will ensure that `2^e * 10^-p` is close to 1. Multiplying + // the first three terms then yields a fraction suitable for + // producing the decimal digits. Here we use a very fast estimate + // of `p` that is never off by more than 1; we'll have + // opportunities later to correct any error. + + var base10Exponent = decimalExponentFor2ToThe(binaryExponent) + + // Step 4: Compute power-of-10 scale factor + + // Compute `10^-p` to 128-bit precision. We generate + // both over- and under-estimates to ensure we can exactly + // bound the later use of these values. + // The `powerOfTenRounded{Up,Down}` values are 128-bit + // pure fractions with the decimal point at the far left. + + var powerOfTenRoundedDown: UInt128 = 0 + var powerOfTenRoundedUp: UInt128 = 0 + + // Note the extra factor of 10^bulkFirstDigits -- that will give + // us a headstart on digit generation later on. (In contrast, Ryu + // uses an extra factor of 10^17 here to get all the digits up + // front, but then has to back out any extra digits. Doing that + // with a 17-digit value requires 64-bit division, which is the + // root cause of Ryu's poor performance on 32-bit processors. We + // also might have to back out extra digits if 7 is too many, but + // will only need 32-bit division in that case.) + + let bulkFirstDigits = 7 + let bulkFirstDigitFactor = 1000000 // 10^(bulkFirstDigits - 1) + + let powerOfTenExponent = _intervalContainingPowerOf10_Binary64( + p: -base10Exponent &+ bulkFirstDigits &- 1, + lower: &powerOfTenRoundedDown, + upper: &powerOfTenRoundedUp) + + let extraBits = binaryExponent + powerOfTenExponent + + // Step 5: Scale the interval (with rounding) + + // As mentioned above, the final digit generation works + // with an interval, so we actually apply the scaling + // to the upper and lower midpoint values separately. + + // As part of the scaling here, we'll switch from a pure + // fraction with zero bit integer portion and 128-bit fraction + // to a fixed-point form with 32 bits in the integer portion. + + let integerBits = 32 + let roundingBias = + UInt128((1 &<< UInt64(truncatingIfNeeded: integerBits &- extraBits)) &- 1) + var u: UInt128 + var l: UInt128 + if isOddSignificand { + // Case A: Narrow the interval (odd significand) + + // Loitsch' original Grisu2 always rounds so as to narrow the + // interval. Since our digit generation will select a value + // within the scaled interval, narrowing the interval + // guarantees that we will find a digit sequence that converts + // back to the original value. + + // This ensures accuracy but, as explained in Loitsch' paper, + // this carries a risk that there will be a shorter digit + // sequence outside of our narrowed interval that we will + // miss. This risk obviously gets lower with increased + // precision, but it wasn't until the Errol paper that anyone + // had a good way to test whether a particular implementation + // had sufficient precision. That paper shows a way to enumerate + // the worst-case numbers; those numbers that are extremely close + // to the mid-points between adjacent floating-point values. + // These are the values that might sit just outside of the + // narrowed interval. By testing these values, we can verify + // the correctness of our implementation. + + // Multiply out the upper midpoint, rounding down... + let u1 = _multiply128x64RoundingDown( + powerOfTenRoundedDown, + upperMidpointExact) + // Account for residual binary exponent and adjust + // to the fixed-point format + u = u1 >> (integerBits - extraBits) + + // Conversely for the lower midpoint... + let l1 = _multiply128x64RoundingUp( + powerOfTenRoundedUp, + lowerMidpointExact) + l = (l1 + roundingBias) >> (integerBits - extraBits) + } else { + // Case B: Widen the interval (even significand) + + // As explained in Errol Theorem 6, in certain cases there is + // a short decimal representation at the exact boundary of the + // scaled interval. When such a number is converted back to + // binary, it will get rounded to the adjacent even + // significand. + + // So when the significand is even, we round so as to widen + // the interval in order to ensure that the exact midpoints + // are considered. Of couse, this ensures that we find a + // short result but carries a risk of selecting a result + // outside of the exact scaled interval (which would be + // inaccurate). + // (This technique of rounding differently for even/odd significands + // seems to be new; I've not seen it described in any of the + // papers on floating-point printing.) + + // The same testing approach described above (based on results + // in the Errol paper) also applies + // to this case. + + let u1 = _multiply128x64RoundingUp( + powerOfTenRoundedUp, + upperMidpointExact) + u = (u1 &+ roundingBias) >> (integerBits - extraBits) + let l1 = _multiply128x64RoundingDown( + powerOfTenRoundedDown, + lowerMidpointExact) + l = l1 >> (integerBits - extraBits) + } + + // Step 6: Align the first digit, adjust exponent + + // Calculations above used an estimate for the power-of-ten scale. + // Here, we compensate for any error in that estimate by testing + // whether we have the expected number of digits in the integer + // portion and correcting as necessary. This also serves to + // prune leading zeros from subnormals. + + // Except for subnormals, this loop never runs more than once. + // For subnormals, this might run as many as 16 times. + let minimumU = UInt128(bulkFirstDigitFactor) << (128 - integerBits) + while u < minimumU { + base10Exponent -= 1 + l &*= 10 + u &*= 10 + } + + // Step 7: Produce decimal digits + + // One standard approach generates digits for the scaled upper and + // lower boundaries and stops at the first digit that + // differs. For example, note that 0.1234 is the shortest decimal + // between u = 0.123456 and l = 0.123345. + + // Grisu optimizes this by generating digits for the upper bound + // (multiplying by 10 to isolate each digit) while simultaneously + // scaling the interval width `delta`. As we remove each digit + // from the upper bound, the remainder is the difference between + // the base-10 value generated so far and the true upper bound. + // When that remainder is less than the scaled width of the + // interval, we know the current digits specify a value within the + // target interval. + + // The logic below actually blends three different digit-generation + // strategies: + // * The first digits are already in the integer portion of the + // fixed-point value, thanks to the `bulkFirstDigits` factor above. + // We can just break those down and write them out. + // * If we generated too many digits, we use a Ryu-inspired technique + // to backtrack. + // * If we generated too few digits (the usual case), we use an + // optimized form of the Grisu2 method to produce the remaining + // values. + + // + // Generate digits and build the output. + // + + // Generate digits for `t` with interval width `delta = u - l` + // As above, these are fixed-point with 32-bit integer, 96-bit fraction + var t = u + var delta = u &- l + let fractionMask = (UInt128(1) << 96) - 1 + + var nextDigit = 5 + var firstDigit = nextDigit + unsafe buffer.storeBytes( + of: 0x3030303030303030 as UInt64, + toUncheckedByteOffset: 0, + as: UInt64.self) + + // Our initial scaling gave us the first 7 digits already: + let d12345678 = UInt32(truncatingIfNeeded: t._high >> 32) + t &= fractionMask + + if delta >= t { + // Oops! We have too many digits. Back out the extra ones to + // get the right answer. This is similar to Ryu, but since + // we've only produced seven digits, we only need 32-bit + // arithmetic here. (Ryu needs 64-bit arithmetic to back out + // digits, which severely compromises performance on 32-bit + // processors. The same problem occurs with Ryu for 128-bit + // floats on 64-bit processors.) + // A few notes: + // * Our target hardware always supports 32-bit hardware division, + // so this should be reasonably fast. + // * For small integers (like "2.0"), Ryu would have to back out 16 + // digits; we only have to back out 6. + // * Very few double-precision values actually need fewer than 7 + // digits. So this is rarely used except in workloads that + // specifically use double for small integers. + + // Why this is critical for performance: In order to use the + // 8-digits-at-a-time optimization below, we need at least 30 + // bits in the integer part of our fixed-point format above. + // If we only use bulkDigits = 1, that leaves only 128 - 30 = + // 98 bit accuracy for our scaling step, which isn't enough + // (experiments suggest that binary64 needs ~110 bits for + // correctness). So we have to use a large bulkDigits value + // to make full use of the 128-bit scaling above, which forces + // us to have some form of logic to handle the case of too + // many digits. The alternatives are either to use >128 bit + // arithmetic, or to back up and repeat the original scaling + // with bulkDigits = 1. + + let uHigh = u._high + let lHigh = (l &+ UInt128(UInt64.max))._high + let tHigh: UInt64 + if d.significand == 0 { + tHigh = (uHigh &+ lHigh &* 2) / 3 } else { - // Case B: Widen the interval (even significand) - - // As explained in Errol Theorem 6, in certain cases there is - // a short decimal representation at the exact boundary of the - // scaled interval. When such a number is converted back to - // binary, it will get rounded to the adjacent even - // significand. - - // So when the significand is even, we round so as to widen - // the interval in order to ensure that the exact midpoints - // are considered. Of couse, this ensures that we find a - // short result but carries a risk of selecting a result - // outside of the exact scaled interval (which would be - // inaccurate). - // (This technique of rounding differently for even/odd significands - // seems to be new; I've not seen it described in any of the - // papers on floating-point printing.) - - // The same testing approach described above (based on results - // in the Errol paper) also applies - // to this case. - - let u1 = multiply128x64RoundingUp(powerOfTenRoundedUp, upperMidpointExact) - u = (u1 &+ roundingBias) >> (integerBits - extraBits) - let l1 = multiply128x64RoundingDown(powerOfTenRoundedDown, lowerMidpointExact) - l = l1 >> (integerBits - extraBits) + tHigh = (uHigh &+ lHigh) / 2 } - - // Step 6: Align the first digit, adjust exponent - - // Calculations above used an estimate for the power-of-ten scale. - // Here, we compensate for any error in that estimate by testing - // whether we have the expected number of digits in the integer - // portion and correcting as necessary. This also serves to - // prune leading zeros from subnormals. - - // Except for subnormals, this loop never runs more than once. - // For subnormals, this might run as many as 16 times. - let minimumU = UInt128(bulkFirstDigitFactor) << (128 - integerBits) - while u < minimumU { - base10Exponent -= 1 - l &*= 10 - u &*= 10 + var u0 = UInt32(truncatingIfNeeded: uHigh >> (64 - integerBits)) + var l0 = UInt32(truncatingIfNeeded: lHigh >> (64 - integerBits)) + if lHigh & ((1 << (64 - integerBits)) - 1) != 0 { + l0 &+= 1 + } + var t0 = UInt32(truncatingIfNeeded: tHigh >> (64 - integerBits)) + var t0digits = 8 + + var u1 = u0 / 10 + var l1 = (l0 &+ 9) / 10 + var trailingZeros = (t == 0) + var droppedDigit = UInt32( + truncatingIfNeeded: ((tHigh &* 10) >> (64 - integerBits)) % 10) + while u1 >= l1 && u1 != 0 { + u0 = u1 + l0 = l1 + trailingZeros = trailingZeros && (droppedDigit == 0) + droppedDigit = t0 % 10 + t0 /= 10 + t0digits -= 1 + u1 = u0 / 10 + l1 = (l0 &+ 9) / 10 + } + // Correct the final digit + if droppedDigit > 5 || (droppedDigit == 5 && !trailingZeros) { // > 0.5000 + t0 &+= 1 + } else if droppedDigit == 5 && trailingZeros { // == 0.5000 + t0 &+= 1 + t0 &= ~1 + } + // t0 has t0digits digits. Write them out + let text = _intToEightDigits(t0) >> ((8 - t0digits) * 8) + buffer.storeBytes( + of: text, + toByteOffset: nextDigit, + as: UInt64.self) + nextDigit &+= t0digits + firstDigit &+= 1 + } else { + // Our initial scaling did not produce too many digits. The + // `d12345678` value holds the first 7 digits (plus a leading + // zero). The remainder of this algorithm is basically just a + // heavily-optimized variation of Grisu2. + + // Write out exactly 8 digits, assuming little-endian. + let chars = _intToEightDigits(d12345678) + unsafe buffer.storeBytes( + of: chars, + toUncheckedByteOffset: nextDigit, + as: UInt64.self) + nextDigit &+= 8 + firstDigit &+= 1 + + // >90% of random binary64 values need at least 15 digits. + // We have seven so there's probably at least 8 more, which + // we can grab all at once. + let TenToTheEighth = 100000000 as UInt128; // 10^(15-bulkFirstDigits) + let d0 = delta * TenToTheEighth + var t0 = t * TenToTheEighth + // The integer part of t0 is the next 8 digits + let next8Digits = UInt32(truncatingIfNeeded: t0._high >> 32) + t0 &= fractionMask + if d0 < t0 { + // We got 8 more digits! (So number is at least 15 digits) + // Write them out: + let chars = _intToEightDigits(next8Digits) + unsafe buffer.storeBytes( + of: chars, + toUncheckedByteOffset: nextDigit, + as: UInt64.self) + nextDigit &+= 8 + t = t0 + delta = d0 } - // Step 7: Produce decimal digits - - // One standard approach generates digits for the scaled upper and - // lower boundaries and stops at the first digit that - // differs. For example, note that 0.1234 is the shortest decimal - // between u = 0.123456 and l = 0.123345. - - // Grisu optimizes this by generating digits for the upper bound - // (multiplying by 10 to isolate each digit) while simultaneously - // scaling the interval width `delta`. As we remove each digit - // from the upper bound, the remainder is the difference between - // the base-10 value generated so far and the true upper bound. - // When that remainder is less than the scaled width of the - // interval, we know the current digits specify a value within the - // target interval. - - // The logic below actually blends three different digit-generation - // strategies: - // * The first digits are already in the integer portion of the - // fixed-point value, thanks to the `bulkFirstDigits` factor above. - // We can just break those down and write them out. - // * If we generated too many digits, we use a Ryu-inspired technique - // to backtrack. - // * If we generated too few digits (the usual case), we use an - // optimized form of the Grisu2 method to produce the remaining - // values. - - // - // Generate digits and build the output. - // - - // Generate digits for `t` with interval width `delta = u - l` - // As above, these are fixed-point with 32-bit integer, 96-bit fraction - var t = u - var delta = u &- l - let fractionMask = (UInt128(1) << 96) - 1 - - var nextDigit = 5 - var firstDigit = nextDigit - unsafe buffer.storeBytes(of: 0x3030303030303030 as UInt64, - toUncheckedByteOffset: 0, - as: UInt64.self) - - // Our initial scaling gave us the first 7 digits already: - let d12345678 = UInt32(truncatingIfNeeded: t._high >> 32) - t &= fractionMask - - if delta >= t { - // Oops! We have too many digits. Back out the extra ones to - // get the right answer. This is similar to Ryu, but since - // we've only produced seven digits, we only need 32-bit - // arithmetic here. (Ryu needs 64-bit arithmetic to back out - // digits, which severely compromises performance on 32-bit - // processors. The same problem occurs with Ryu for 128-bit - // floats on 64-bit processors.) - // A few notes: - // * Our target hardware always supports 32-bit hardware division, - // so this should be reasonably fast. - // * For small integers (like "2.0"), Ryu would have to back out 16 - // digits; we only have to back out 6. - // * Very few double-precision values actually need fewer than 7 - // digits. So this is rarely used except in workloads that - // specifically use double for small integers. - - // Why this is critical for performance: In order to use the - // 8-digits-at-a-time optimization below, we need at least 30 - // bits in the integer part of our fixed-point format above. - // If we only use bulkDigits = 1, that leaves only 128 - 30 = - // 98 bit accuracy for our scaling step, which isn't enough - // (experiments suggest that binary64 needs ~110 bits for - // correctness). So we have to use a large bulkDigits value - // to make full use of the 128-bit scaling above, which forces - // us to have some form of logic to handle the case of too - // many digits. The alternatives are either to use >128 bit - // arithmetic, or to back up and repeat the original scaling - // with bulkDigits = 1. - - let uHigh = u._high - let lHigh = (l &+ UInt128(UInt64.max))._high - let tHigh: UInt64 - if d.significand == 0 { - tHigh = (uHigh &+ lHigh &* 2) / 3 - } else { - tHigh = (uHigh &+ lHigh) / 2 - } - var u0 = UInt32(truncatingIfNeeded: uHigh >> (64 - integerBits)) - var l0 = UInt32(truncatingIfNeeded: lHigh >> (64 - integerBits)) - if lHigh & ((1 << (64 - integerBits)) - 1) != 0 { - l0 &+= 1 - } - var t0 = UInt32(truncatingIfNeeded: tHigh >> (64 - integerBits)) - var t0digits = 8 - - var u1 = u0 / 10 - var l1 = (l0 &+ 9) / 10 - var trailingZeros = (t == 0) - var droppedDigit = UInt32(truncatingIfNeeded: ((tHigh &* 10) >> (64 - integerBits)) % 10) - while u1 >= l1 && u1 != 0 { - u0 = u1 - l0 = l1 - trailingZeros = trailingZeros && (droppedDigit == 0) - droppedDigit = t0 % 10 - t0 /= 10 - t0digits -= 1 - u1 = u0 / 10 - l1 = (l0 &+ 9) / 10 - } - // Correct the final digit - if droppedDigit > 5 || (droppedDigit == 5 && !trailingZeros) { // > 0.5000 - t0 &+= 1 - } else if droppedDigit == 5 && trailingZeros { // == 0.5000 - t0 &+= 1 - t0 &= ~1 - } - // t0 has t0digits digits. Write them out - let text = intToEightDigits(t0) >> ((8 - t0digits) * 8) - buffer.storeBytes(of: text, - toByteOffset: nextDigit, - as: UInt64.self) - nextDigit &+= t0digits - firstDigit &+= 1 - } else { - // Our initial scaling did not produce too many digits. The - // `d12345678` value holds the first 7 digits (plus a leading - // zero). The remainder of this algorithm is basically just a - // heavily-optimized variation of Grisu2. - - // Write out exactly 8 digits, assuming little-endian. - let chars = intToEightDigits(d12345678) - unsafe buffer.storeBytes(of: chars, - toUncheckedByteOffset: nextDigit, - as: UInt64.self) - nextDigit &+= 8 - firstDigit &+= 1 - - // >90% of random binary64 values need at least 15 digits. - // We have seven so there's probably at least 8 more, which - // we can grab all at once. - let TenToTheEighth = 100000000 as UInt128; // 10^(15-bulkFirstDigits) - let d0 = delta * TenToTheEighth - var t0 = t * TenToTheEighth - // The integer part of t0 is the next 8 digits - let next8Digits = UInt32(truncatingIfNeeded: t0._high >> 32) - t0 &= fractionMask - if d0 < t0 { - // We got 8 more digits! (So number is at least 15 digits) - // Write them out: - let chars = intToEightDigits(next8Digits) - unsafe buffer.storeBytes(of: chars, - toUncheckedByteOffset: nextDigit, - as: UInt64.self) - nextDigit &+= 8 - t = t0 - delta = d0 - } - - // Generate remaining digits one at a time, following Grisu: - while (delta < t) { - delta &*= 10 - t &*= 10 - unsafe buffer.storeBytes(of: UInt8(truncatingIfNeeded: t._high >> 32) &+ 0x30, - toUncheckedByteOffset: nextDigit, - as: UInt8.self) - nextDigit &+= 1 - t &= fractionMask - } - - // Adjust the final digit to be closer to the original value. - // This accounts for the fact that sometimes there is more than - // one shortest digit sequence. - - // For example, consider how the above would work if you had the - // value 0.1234 and computed u = 0.1257, l = 0.1211. The above - // digit generation works with `u`, so produces 0.125. But the - // values 0.122, 0.123, and 0.124 are just as short and 0.123 is - // therefore the best choice, since it's closest to the original - // value. - - // We know delta and t are both less than 10.0 here, so we can - // shed some excess integer bits to simplify the following: - let adjustIntegerBits = 4 // Integer bits for "adjust" phase - let deltaHigh64 = UInt64(truncatingIfNeeded: delta >> (64 - integerBits + adjustIntegerBits)) - let tHigh64 = UInt64(truncatingIfNeeded: t >> (64 - integerBits + adjustIntegerBits)) - - let one = UInt64(1) << (64 - adjustIntegerBits) - let adjustFractionMask = one - 1; - let oneHalf = one >> 1; - if deltaHigh64 >= tHigh64 &+ one { - // The `skew` is the difference between our - // computed digits and the original exact value. - var skew: UInt64 - if (d.significandBitPattern == 0) { - skew = deltaHigh64 &- deltaHigh64 / 3 &- tHigh64 - } else { - skew = deltaHigh64 / 2 &- tHigh64 - } - - var lastDigit = unsafe buffer.unsafeLoad(fromUncheckedByteOffset: nextDigit - 1, - as: UInt8.self) - - // We use the `skew` to figure out whether there's - // a better base-10 value than our current one. - if (skew & adjustFractionMask) == oneHalf { - // Difference is an integer + exactly 1/2, so ... - let adjust = skew >> (64 - adjustIntegerBits) - lastDigit &-= UInt8(truncatingIfNeeded: adjust) - // ... we round the last digit even. - lastDigit &= ~1 - } else { - let adjust = (skew + oneHalf) >> (64 - adjustIntegerBits) - lastDigit &-= UInt8(truncatingIfNeeded: adjust) - } - buffer.storeBytes(of: lastDigit, - toByteOffset: nextDigit - 1, - as: UInt8.self) - } + // Generate remaining digits one at a time, following Grisu: + while (delta < t) { + delta &*= 10 + t &*= 10 + unsafe buffer.storeBytes( + of: UInt8(truncatingIfNeeded: t._high >> 32) &+ 0x30, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + t &= fractionMask } - // Step 8: Finalize formatting by rearranging - // the digits and filling in decimal points, - // exponents, and zero padding. - let isBoundary = (d.significandBitPattern == 0) - let forceExponential = (binaryExponent > 54) || (binaryExponent == 54 && !isBoundary) - return finishFormatting(&buffer, d.sign, firstDigit, nextDigit, - forceExponential, base10Exponent) + // Adjust the final digit to be closer to the original value. + // This accounts for the fact that sometimes there is more than + // one shortest digit sequence. + + // For example, consider how the above would work if you had the + // value 0.1234 and computed u = 0.1257, l = 0.1211. The above + // digit generation works with `u`, so produces 0.125. But the + // values 0.122, 0.123, and 0.124 are just as short and 0.123 is + // therefore the best choice, since it's closest to the original + // value. + + // We know delta and t are both less than 10.0 here, so we can + // shed some excess integer bits to simplify the following: + let adjustIntegerBits = 4 // Integer bits for "adjust" phase + let deltaHigh64 = UInt64( + truncatingIfNeeded: delta >> (64 - integerBits + adjustIntegerBits)) + let tHigh64 = UInt64( + truncatingIfNeeded: t >> (64 - integerBits + adjustIntegerBits)) + + let one = UInt64(1) << (64 - adjustIntegerBits) + let adjustFractionMask = one - 1; + let oneHalf = one >> 1; + if deltaHigh64 >= tHigh64 &+ one { + // The `skew` is the difference between our + // computed digits and the original exact value. + var skew: UInt64 + if (d.significandBitPattern == 0) { + skew = deltaHigh64 &- deltaHigh64 / 3 &- tHigh64 + } else { + skew = deltaHigh64 / 2 &- tHigh64 + } + + var lastDigit = unsafe buffer.unsafeLoad( + fromUncheckedByteOffset: nextDigit - 1, + as: UInt8.self) + + // We use the `skew` to figure out whether there's + // a better base-10 value than our current one. + if (skew & adjustFractionMask) == oneHalf { + // Difference is an integer + exactly 1/2, so ... + let adjust = skew >> (64 - adjustIntegerBits) + lastDigit &-= UInt8(truncatingIfNeeded: adjust) + // ... we round the last digit even. + lastDigit &= ~1 + } else { + let adjust = (skew + oneHalf) >> (64 - adjustIntegerBits) + lastDigit &-= UInt8(truncatingIfNeeded: adjust) + } + buffer.storeBytes( + of: lastDigit, + toByteOffset: nextDigit - 1, + as: UInt8.self) + } + } + + // Step 8: Finalize formatting by rearranging + // the digits and filling in decimal points, + // exponents, and zero padding. + let isBoundary = (d.significandBitPattern == 0) + let forceExponential = + ((binaryExponent > 54) || (binaryExponent == 54 && !isBoundary)) + return _finishFormatting( + buffer: &buffer, + sign: d.sign, + firstDigit: firstDigit, + nextDigit: nextDigit, + forceExponential: forceExponential, + base10Exponent: base10Exponent) } @@ -1095,132 +1184,144 @@ internal func _float80ToStringImpl( _ textBuffer: UnsafeMutablePointer, _ bufferLength: UInt, _ value: Float80, - _ debug: Bool) -> UInt64 { - // Code below works with raw memory. - var buffer = unsafe MutableSpan(_unchecked: textBuffer, - count: Int(bufferLength)) - let textRange = Float80ToASCII(value: value, buffer: &buffer) - let textLength = textRange.upperBound - textRange.lowerBound - - // Move the text to the start of the buffer - if textRange.lowerBound != 0 { - unsafe _memmove(dest: textBuffer, - src: textBuffer + textRange.lowerBound, - size: UInt(truncatingIfNeeded: textLength)) - } - return UInt64(truncatingIfNeeded: textLength) + _ debug: Bool +) -> UInt64 { + // Code below works with raw memory. + var buffer = unsafe MutableSpan( + _unchecked: textBuffer, + count: Int(bufferLength)) + let textRange = _Float80ToASCII(value: value, buffer: &buffer) + let textLength = textRange.upperBound - textRange.lowerBound + + // Move the text to the start of the buffer + if textRange.lowerBound != 0 { + unsafe _memmove( + dest: textBuffer, + src: textBuffer + textRange.lowerBound, + size: UInt(truncatingIfNeeded: textLength)) + } + return UInt64(truncatingIfNeeded: textLength) } -internal func Float80ToASCII( +internal func _Float80ToASCII( value d: Float80, - buffer utf8Buffer: inout MutableSpan) -> Range -{ - if #available(SwiftStdlib 6.2, *) { - return _Float80ToASCII(value: d, buffer: &utf8Buffer) - } else { - return 0..<0 - } + buffer utf8Buffer: inout MutableSpan +) -> Range { + if #available(SwiftStdlib 6.2, *) { + return _Float80ToASCIIImpl(value: d, buffer: &utf8Buffer) + } else { + return 0..<0 + } } @available(SwiftStdlib 6.2, *) -fileprivate func _Float80ToASCII( +fileprivate func _Float80ToASCIIImpl( value f: Float80, - buffer utf8Buffer: inout MutableSpan) -> Range -{ - // We need a MutableRawSpan in order to use wide store/load operations - precondition(utf8Buffer.count >= 32) - var buffer = utf8Buffer.mutableBytes - - // Step 1: Handle special cases, decompose the input - - // The Intel 80-bit floating point format has some quirks that - // make this a lot more complex than the corresponding logic for - // the IEEE 754 portable formats. - - // f.significandBitPattern is processed to try to mimic the - // semantics of IEEE portable formats. But for the following, - // we need the actual raw bits: - let rawSignificand = f._representation.explicitSignificand - let binaryExponent: Int - let significand: Float80.RawSignificand - let exponentBias = (1 << (Float80.exponentBitCount - 1)) - 2; // 16382 - let isBoundary = f.significandBitPattern == 0 - if f.exponentBitPattern == 0x7fff { // NaN or Infinity - // 80387 semantics and 80287 semantics differ somewhat; - // we follow 80387 semantics here. - // See: Wikipedia.org "Extended Precision" - // See: Intel's "Floating Point Reference Sheet" - // https://software.intel.com/content/dam/develop/external/us/en/documents/floating-point-reference-sheet.pdf - let selector = rawSignificand >> 62 - let payload = rawSignificand & ((1 << 62) - 1) - switch selector { - case 0: // ∞ or snan on 287, invalid on 387 - fallthrough - case 1: // Pseudo-NaN: snan on 287, invalid on 387 - // Invalid patterns treated as plain "nan" - return nan_details(buffer: &buffer, - sign: .plus, - quiet: true, - payloadHigh: 0, - payloadLow: payload) - case 2: - if payload == 0 { // snan on 287, ∞ on 387 - return infinity(buffer: &buffer, sign: f.sign) - } else { // snan on 287 and 387 - return nan_details(buffer: &buffer, - sign: f.sign, - quiet: false, - payloadHigh: 0, - payloadLow: payload) - } - case 3: - // Zero payload and sign bit set is "indefinite" (treated as qNaN here), - // otherwise qNaN on 387, sNaN on 287 - return nan_details(buffer: &buffer, - sign: f.sign, - quiet: true, - payloadHigh: 0, - payloadLow: payload) - default: - fatalError() - } - } else if f.exponentBitPattern == 0 { - if rawSignificand == 0 { // Zero - return zero(buffer: &buffer, sign: f.sign) - } else { // subnormal - binaryExponent = 1 - exponentBias - significand = rawSignificand - } - } else if rawSignificand >> 63 == 1 { // Normal - binaryExponent = Int(bitPattern:f.exponentBitPattern) - exponentBias - significand = rawSignificand - } else { - return nan_details(buffer: &buffer, - sign: .plus, - quiet: true, - payloadHigh: 0, - payloadLow: 0) + buffer utf8Buffer: inout MutableSpan +) -> Range { + // We need a MutableRawSpan in order to use wide store/load operations + precondition(utf8Buffer.count >= 32) + var buffer = utf8Buffer.mutableBytes + + // Step 1: Handle special cases, decompose the input + + // The Intel 80-bit floating point format has some quirks that + // make this a lot more complex than the corresponding logic for + // the IEEE 754 portable formats. + + // f.significandBitPattern is processed to try to mimic the + // semantics of IEEE portable formats. But for the following, + // we need the actual raw bits: + let rawSignificand = f._representation.explicitSignificand + let binaryExponent: Int + let significand: Float80.RawSignificand + let exponentBias = (1 << (Float80.exponentBitCount - 1)) - 2; // 16382 + let isBoundary = f.significandBitPattern == 0 + if f.exponentBitPattern == 0x7fff { // NaN or Infinity + // 80387 semantics and 80287 semantics differ somewhat; + // we follow 80387 semantics here. + // See: Wikipedia.org "Extended Precision" + // See: Intel's "Floating Point Reference Sheet" + // https://software.intel.com/content/dam/develop/external/us/en/documents/floating-point-reference-sheet.pdf + let selector = rawSignificand >> 62 + let payload = rawSignificand & ((1 << 62) - 1) + switch selector { + case 0: // ∞ or snan on 287, invalid on 387 + fallthrough + case 1: // Pseudo-NaN: snan on 287, invalid on 387 + // Invalid patterns treated as plain "nan" + return nan_details( + buffer: &buffer, + sign: .plus, + quiet: true, + payloadHigh: 0, + payloadLow: payload) + case 2: + if payload == 0 { // snan on 287, ∞ on 387 + return _infinity(buffer: &buffer, sign: f.sign) + } else { // snan on 287 and 387 + return nan_details( + buffer: &buffer, + sign: f.sign, + quiet: false, + payloadHigh: 0, + payloadLow: payload) + } + case 3: + // Zero payload and sign bit set is "indefinite" (treated as qNaN here), + // otherwise qNaN on 387, sNaN on 287 + return nan_details( + buffer: &buffer, + sign: f.sign, + quiet: true, + payloadHigh: 0, + payloadLow: payload) + default: + fatalError() } - - // Step 2: Determine the exact unscaled target interval - let halfUlp = UInt64(1) << 63 - let quarterUlp = halfUlp >> 1 - let threeQuarterUlp = halfUlp + quarterUlp - // Significand is the upper 64 bits of our 128-bit franction - // Upper midpoint adds 1/2 ULP: - let upperMidpointExact = UInt128(_low: halfUlp, _high: significand) - // Lower midpoint subtracts 1 ULP and then adds 1/2 or 3/4 ULP: - let lowerMidpointExact = UInt128(_low: isBoundary ? threeQuarterUlp : halfUlp, - _high: significand - 1) - - return _backend_256bit(buffer: &buffer, - upperMidpointExact: upperMidpointExact, - lowerMidpointExact: lowerMidpointExact, - sign: f.sign, - isBoundary: isBoundary, - isOddSignificand: (f.significandBitPattern & 1) != 0, - binaryExponent: binaryExponent, - forceExponential: binaryExponent > 65 || (binaryExponent == 65 && !isBoundary)) + } else if f.exponentBitPattern == 0 { + if rawSignificand == 0 { // Zero + return _zero(buffer: &buffer, sign: f.sign) + } else { // subnormal + binaryExponent = 1 - exponentBias + significand = rawSignificand + } + } else if rawSignificand >> 63 == 1 { // Normal + binaryExponent = Int(bitPattern:f.exponentBitPattern) - exponentBias + significand = rawSignificand + } else { + return nan_details( + buffer: &buffer, + sign: .plus, + quiet: true, + payloadHigh: 0, + payloadLow: 0) + } + + // Step 2: Determine the exact unscaled target interval + let halfUlp = UInt64(1) << 63 + let quarterUlp = halfUlp >> 1 + let threeQuarterUlp = halfUlp + quarterUlp + // Significand is the upper 64 bits of our 128-bit franction + // Upper midpoint adds 1/2 ULP: + let upperMidpointExact = UInt128(_low: halfUlp, _high: significand) + // Lower midpoint subtracts 1 ULP and then adds 1/2 or 3/4 ULP: + let lowerMidpointExact = UInt128( + _low: isBoundary ? threeQuarterUlp : halfUlp, + _high: significand - 1) + + let forceExponential = + (binaryExponent > 65 + || (binaryExponent == 65 && !isBoundary)) + return _backend_256bit( + buffer: &buffer, + upperMidpointExact: upperMidpointExact, + lowerMidpointExact: lowerMidpointExact, + sign: f.sign, + isBoundary: isBoundary, + isOddSignificand: (f.significandBitPattern & 1) != 0, + binaryExponent: binaryExponent, + forceExponential: forceExponential) } #endif @@ -1235,28 +1336,28 @@ fileprivate func _Float80ToASCII( // backwards compatibility, and the legacy ABI never supported // Float128. -internal func Float128ToASCII( +internal func _Float128ToASCII( value d: Float128, - buffer utf8Buffer: inout MutableSpan) -> Range -{ - if #available(SwiftStdlib 6.2, *) { - return _Float128ToASCII(value: d, buffer: &utf8Buffer) - } else { - return 0..<0 - } + buffer utf8Buffer: inout MutableSpan +) -> Range { + if #available(SwiftStdlib 6.2, *) { + return _Float128ToASCIIImpl(value: d, buffer: &utf8Buffer) + } else { + return 0..<0 + } } @available(SwiftStdlib 6.2, *) -fileprivate func _Float128ToASCII( +fileprivate func _Float128ToASCIIImpl( value d: Float128, - buffer utf8Buffer: inout MutableSpan) -> Range -{ - // TODO: Write Me! - - // Note: All the interesting parts are already implemented in _backend_256bit(...), - // so this can easily be implemented someday by just copyihng _Float80ToASCII - // and making the obvious changes. (See the introductory parts of - // _Float64ToASCII for the structure common to all IEEE 754 formats.) + buffer utf8Buffer: inout MutableSpan +) -> Range { + // TODO: Write Me! + + // Note: All the interesting parts are already implemented in _backend_256bit(...), + // so this can easily be implemented someday by just copyihng _Float80ToASCII + // and making the obvious changes. (See the introductory parts of + // _Float64ToASCII for the structure common to all IEEE 754 formats.) } #endif @@ -1299,145 +1400,160 @@ fileprivate func _backend_256bit( isBoundary: Bool, isOddSignificand: Bool, binaryExponent: Int, - forceExponential: Bool) -> Range { - - // Step 3: Estimate the base 10 exponent - var base10Exponent = decimalExponentFor2ToThe(binaryExponent) - - // Step 4: Compute a power-of-10 scale factor - var powerOfTenRoundedDown = UInt256() - var powerOfTenRoundedUp = UInt256() - let powerOfTenExponent = intervalContainingPowerOf10_Binary128(p: -base10Exponent, - lower: &powerOfTenRoundedDown, - upper: &powerOfTenRoundedUp) - let extraBits = binaryExponent &+ powerOfTenExponent - - // Step 5: Scale the interval (with rounding) - let integerBits = 14 - let high64FractionBits = 64 - integerBits - var u: UInt256 - var l: UInt256 - if isOddSignificand { - // Narrow the interval (odd significand) - u = powerOfTenRoundedDown - u.multiplyRoundingDown(by: upperMidpointExact) - u.shiftRightRoundingDown(by: integerBits &- extraBits) - - l = powerOfTenRoundedUp - l.multiplyRoundingUp(by: lowerMidpointExact) - l.shiftRightRoundingUp(by: integerBits &- extraBits) - } else { - // Widen the interval (even significand) - u = powerOfTenRoundedUp - u.multiplyRoundingUp(by: upperMidpointExact) - u.shiftRightRoundingUp(by: integerBits &- extraBits) - - l = powerOfTenRoundedDown - l.multiplyRoundingDown(by: lowerMidpointExact) - l.shiftRightRoundingDown(by: integerBits &- extraBits) + forceExponential: Bool +) -> Range { + + // Step 3: Estimate the base 10 exponent + var base10Exponent = decimalExponentFor2ToThe(binaryExponent) + + // Step 4: Compute a power-of-10 scale factor + var powerOfTenRoundedDown = _UInt256() + var powerOfTenRoundedUp = _UInt256() + let powerOfTenExponent = _intervalContainingPowerOf10_Binary128( + p: -base10Exponent, + lower: &powerOfTenRoundedDown, + upper: &powerOfTenRoundedUp) + let extraBits = binaryExponent &+ powerOfTenExponent + + // Step 5: Scale the interval (with rounding) + let integerBits = 14 + let high64FractionBits = 64 - integerBits + var u: _UInt256 + var l: _UInt256 + if isOddSignificand { + // Narrow the interval (odd significand) + u = powerOfTenRoundedDown + u.multiplyRoundingDown(by: upperMidpointExact) + u.shiftRightRoundingDown(by: integerBits &- extraBits) + + l = powerOfTenRoundedUp + l.multiplyRoundingUp(by: lowerMidpointExact) + l.shiftRightRoundingUp(by: integerBits &- extraBits) + } else { + // Widen the interval (even significand) + u = powerOfTenRoundedUp + u.multiplyRoundingUp(by: upperMidpointExact) + u.shiftRightRoundingUp(by: integerBits &- extraBits) + + l = powerOfTenRoundedDown + l.multiplyRoundingDown(by: lowerMidpointExact) + l.shiftRightRoundingDown(by: integerBits &- extraBits) + } + + // Step 6: Align first digit, adjust exponent + while u.high._high < (UInt64(1) << high64FractionBits) { + base10Exponent &-= 1 + l.multiply(by: UInt32(10)) + u.multiply(by: UInt32(10)) + } + var t = u + var delta = u &- l + + // Step 7: Generate digits + + // Include 8 "0" characters at the beginning of the buffer + // for finishFormatting to use + buffer.storeBytes( + of: 0x3030303030303030, + toByteOffset: 0, + as: UInt64.self) + // Start writing digits just after that + let firstDigit = 8 + var nextDigit = firstDigit + buffer.storeBytes( + of: 0x30 + UInt8(truncatingIfNeeded: t.extractIntegerPart(integerBits)), + toByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + + // It would be nice to generate 8 digits at a time and take + // advantage of intToEightDigits, but our integer portion has only + // 14 bits. We can't make that bigger without either sacrificing + // too much precision for correct Float128 or folding the first + // digits into the scaling (as we do with Double) which would + // require a back-out phase here (as we do with Double). + + // If there is at least one more digit possible... + if delta < t { + + // Try grabbing four digits at a time + var d0 = delta + var t0 = t + d0.multiply(by: 10000) + t0.multiply(by: 10000) + var d1234 = t0.extractIntegerPart(integerBits) + while d0 < t0 { + let d12 = d1234 / 100 + let d34 = d1234 % 100 + unsafe buffer.storeBytes( + of: asciiDigitTable[Int(bitPattern:d12)], + toUncheckedByteOffset: nextDigit, + as: UInt16.self) + unsafe buffer.storeBytes( + of: asciiDigitTable[Int(bitPattern:d34)], + toUncheckedByteOffset: nextDigit &+ 2, + as: UInt16.self) + nextDigit &+= 4 + t = t0 + delta = d0 + d0.multiply(by: 10000) + t0.multiply(by: 10000) + d1234 = t0.extractIntegerPart(integerBits) } - // Step 6: Align first digit, adjust exponent - while u.high._high < (UInt64(1) << high64FractionBits) { - base10Exponent &-= 1 - l.multiply(by: UInt32(10)) - u.multiply(by: UInt32(10)) + // Finish by generating one digit at a time... + while delta < t { + delta.multiply(by: UInt32(10)) + t.multiply(by: UInt32(10)) + let digit = UInt8(truncatingIfNeeded: t.extractIntegerPart(integerBits)) + unsafe buffer.storeBytes( + of: 0x30 &+ digit, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 } - var t = u - var delta = u &- l - - // Step 7: Generate digits - - // Include 8 "0" characters at the beginning of the buffer for finishFormatting to use - buffer.storeBytes(of: 0x3030303030303030, - toByteOffset: 0, - as: UInt64.self) - // Start writing digits just after that - let firstDigit = 8 - var nextDigit = firstDigit - buffer.storeBytes(of: 0x30 + UInt8(truncatingIfNeeded: t.extractIntegerPart(integerBits)), - toByteOffset: nextDigit, - as: UInt8.self) - nextDigit &+= 1 - - // It would be nice to generate 8 digits at a time and take - // advantage of intToEightDigits, but our integer portion has only - // 14 bits. We can't make that bigger without either sacrificing - // too much precision for correct Float128 or folding the first - // digits into the scaling (as we do with Double) which would - // require a back-out phase here (as we do with Double). - - // If there is at least one more digit possible... - if delta < t { - - // Try grabbing four digits at a time - var d0 = delta - var t0 = t - d0.multiply(by: 10000) - t0.multiply(by: 10000) - var d1234 = t0.extractIntegerPart(integerBits) - while d0 < t0 { - let d12 = d1234 / 100 - let d34 = d1234 % 100 - unsafe buffer.storeBytes(of: asciiDigitTable[Int(bitPattern:d12)], - toUncheckedByteOffset: nextDigit, - as: UInt16.self) - unsafe buffer.storeBytes(of: asciiDigitTable[Int(bitPattern:d34)], - toUncheckedByteOffset: nextDigit &+ 2, - as: UInt16.self) - nextDigit &+= 4 - t = t0 - delta = d0 - d0.multiply(by: 10000) - t0.multiply(by: 10000) - d1234 = t0.extractIntegerPart(integerBits) - } - - // Finish by generating one digit at a time... - while delta < t { - delta.multiply(by: UInt32(10)) - t.multiply(by: UInt32(10)) - let digit = UInt8(truncatingIfNeeded: t.extractIntegerPart(integerBits)) - unsafe buffer.storeBytes(of: 0x30 &+ digit, - toUncheckedByteOffset: nextDigit, - as: UInt8.self) - nextDigit &+= 1 - } + } + + // Adjust the final digit to be closer to the original value + // We've already consumed most of our available precision, and only + // need a couple of integer bits, so we can narrow down to + // 64 bits here. + let deltaHigh64 = delta.high._high + let tHigh64 = t.high._high + if deltaHigh64 >= tHigh64 &+ (UInt64(1) << high64FractionBits) { + let skew: UInt64 + if isBoundary { + skew = deltaHigh64 &- deltaHigh64 / 3 &- tHigh64 + } else { + skew = deltaHigh64 / 2 &- tHigh64 } - - // Adjust the final digit to be closer to the original value - // We've already consumed most of our available precision, and only - // need a couple of integer bits, so we can narrow down to - // 64 bits here. - let deltaHigh64 = delta.high._high - let tHigh64 = t.high._high - if deltaHigh64 >= tHigh64 &+ (UInt64(1) << high64FractionBits) { - let skew: UInt64 - if isBoundary { - skew = deltaHigh64 &- deltaHigh64 / 3 &- tHigh64 - } else { - skew = deltaHigh64 / 2 &- tHigh64 - } - let one = UInt64(1) << high64FractionBits - let fractionMask = one - 1 - let oneHalf = one >> 1 - var lastDigit = unsafe buffer.unsafeLoad(fromUncheckedByteOffset: nextDigit &- 1, - as: UInt8.self) - if (skew & fractionMask) == oneHalf { - let adjust = skew >> high64FractionBits - lastDigit &-= UInt8(truncatingIfNeeded: adjust) - lastDigit &= ~1 - } else { - let adjust = (skew + oneHalf) >> high64FractionBits - lastDigit &-= UInt8(truncatingIfNeeded: adjust) - } - buffer.storeBytes(of: lastDigit, - toByteOffset: nextDigit &- 1, - as: UInt8.self) + let one = UInt64(1) << high64FractionBits + let fractionMask = one - 1 + let oneHalf = one >> 1 + var lastDigit = unsafe buffer.unsafeLoad( + fromUncheckedByteOffset: nextDigit &- 1, + as: UInt8.self) + if (skew & fractionMask) == oneHalf { + let adjust = skew >> high64FractionBits + lastDigit &-= UInt8(truncatingIfNeeded: adjust) + lastDigit &= ~1 + } else { + let adjust = (skew + oneHalf) >> high64FractionBits + lastDigit &-= UInt8(truncatingIfNeeded: adjust) } - - return finishFormatting(&buffer, sign, firstDigit, nextDigit, - forceExponential, base10Exponent) + buffer.storeBytes( + of: lastDigit, + toByteOffset: nextDigit &- 1, + as: UInt8.self) + } + + return _finishFormatting( + buffer: &buffer, + sign: sign, + firstDigit: firstDigit, + nextDigit: nextDigit, + forceExponential: forceExponential, + base10Exponent: base10Exponent) } #endif @@ -1457,138 +1573,155 @@ fileprivate func _backend_256bit( // characters. @available(SwiftStdlib 6.2, *) -fileprivate func finishFormatting(_ buffer: inout MutableRawSpan, - _ sign: FloatingPointSign, - _ firstDigit: Int, - _ nextDigit: Int, - _ forceExponential: Bool, - _ base10Exponent: Int) -> Range -{ - // Performance note: This could be made noticeably faster by - // writing the output consistently in exponential form with no - // decimal point, e.g., "31415926e-07". But the extra cost seems - // worthwhile to achieve "3.1415926" instead. - var firstDigit = firstDigit - var nextDigit = nextDigit - - let digitCount = nextDigit &- firstDigit - if base10Exponent < -4 || forceExponential { - // Exponential form: "-1.23456789e+123" - // Rewrite "123456789" => "1.23456789" by moving the first - // digit to the left one byte and overwriting a period. - // (This is one reason we left empty space to the left of the digits.) - // We don't do this for single-digit significands: "1e+78", "5e-324" - if digitCount > 1 { - let t = unsafe buffer.unsafeLoad(fromUncheckedByteOffset: firstDigit, - as: UInt8.self) - unsafe buffer.storeBytes(of: 0x2e, - toUncheckedByteOffset: firstDigit, - as: UInt8.self) - firstDigit &-= 1 - unsafe buffer.storeBytes(of: t, - toUncheckedByteOffset: firstDigit, - as: UInt8.self) - } - // Append the exponent: - unsafe buffer.storeBytes(of: 0x65, // "e" - toUncheckedByteOffset: nextDigit, - as: UInt8.self) - nextDigit &+= 1 - var e = base10Exponent - let expSign: UInt8 - if base10Exponent < 0 { - expSign = 0x2d // "-" - e = 0 &- e - } else { - expSign = 0x2b // "+" - } - unsafe buffer.storeBytes(of: expSign, - toUncheckedByteOffset: nextDigit, - as: UInt8.self) - nextDigit &+= 1 - if e > 99 { - if e > 999 { - let d = asciiDigitTable[e / 100] - unsafe buffer.storeBytes(of: d, - toUncheckedByteOffset: nextDigit, - as: UInt16.self) - nextDigit &+= 2 - } else { - let d = 0x30 &+ UInt8(truncatingIfNeeded: (e / 100)) - unsafe buffer.storeBytes(of: d, - toUncheckedByteOffset: nextDigit, - as: UInt8.self) - nextDigit &+= 1 - } - e = e % 100 - } - let d = unsafe asciiDigitTable[unchecked: e] - buffer.storeBytes(of: d, - toByteOffset: nextDigit, - as: UInt16.self) +fileprivate func _finishFormatting( + buffer: inout MutableRawSpan, + sign: FloatingPointSign, + firstDigit: Int, + nextDigit: Int, + forceExponential: Bool, + base10Exponent: Int +) -> Range { + // Performance note: This could be made noticeably faster by + // writing the output consistently in exponential form with no + // decimal point, e.g., "31415926e-07". But the extra cost seems + // worthwhile to achieve "3.1415926" instead. + var firstDigit = firstDigit + var nextDigit = nextDigit + + let digitCount = nextDigit &- firstDigit + if base10Exponent < -4 || forceExponential { + // Exponential form: "-1.23456789e+123" + // Rewrite "123456789" => "1.23456789" by moving the first + // digit to the left one byte and overwriting a period. + // (This is one reason we left empty space to the left of the digits.) + // We don't do this for single-digit significands: "1e+78", "5e-324" + if digitCount > 1 { + let t = unsafe buffer.unsafeLoad( + fromUncheckedByteOffset: firstDigit, + as: UInt8.self) + unsafe buffer.storeBytes( + of: 0x2e, + toUncheckedByteOffset: firstDigit, + as: UInt8.self) + firstDigit &-= 1 + unsafe buffer.storeBytes( + of: t, + toUncheckedByteOffset: firstDigit, + as: UInt8.self) + } + // Append the exponent: + unsafe buffer.storeBytes( + of: 0x65, // "e" + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + var e = base10Exponent + let expSign: UInt8 + if base10Exponent < 0 { + expSign = 0x2d // "-" + e = 0 &- e + } else { + expSign = 0x2b // "+" + } + unsafe buffer.storeBytes( + of: expSign, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + if e > 99 { + if e > 999 { + let d = asciiDigitTable[e / 100] + unsafe buffer.storeBytes( + of: d, + toUncheckedByteOffset: nextDigit, + as: UInt16.self) nextDigit &+= 2 + } else { + let d = 0x30 &+ UInt8(truncatingIfNeeded: (e / 100)) + unsafe buffer.storeBytes( + of: d, + toUncheckedByteOffset: nextDigit, + as: UInt8.self) + nextDigit &+= 1 + } + e = e % 100 + } + let d = unsafe asciiDigitTable[unchecked: e] + buffer.storeBytes( + of: d, + toByteOffset: nextDigit, + as: UInt16.self) + nextDigit &+= 2 } else if base10Exponent < 0 { - // "-0.000123456789" - // We need up to 5 leading characters before the digits. - // Note that the formatters above all insert extra leading "0" characters - // to the beginning of the buffer, so we don't need to memset() here, - // just back up the start to include them... - firstDigit &+= base10Exponent - 1 - // ... and then overwrite a decimal point to get "0." at the beginning - buffer.storeBytes(of: 0x2e, // "." - toByteOffset: firstDigit &+ 1, - as: UInt8.self) - } else if base10Exponent &+ 1 < digitCount { - // "123456.789" - // We move the first digits forward one position - // so we can insert a decimal point in the middle. - // Note: This is the only case where we actually move - // more than one digit around in the buffer. - // TODO: Find out how to use C memmove() here - firstDigit &-= 1 - for i in 0...(base10Exponent &+ 1) { - let t = unsafe buffer.unsafeLoad(fromUncheckedByteOffset: firstDigit &+ i &+ 1, - as: UInt8.self) - unsafe buffer.storeBytes(of: t, - toUncheckedByteOffset: firstDigit &+ i, - as: UInt8.self) - } - buffer.storeBytes(of: 0x2e, - toByteOffset: firstDigit &+ base10Exponent &+ 1, - as: UInt8.self) - } else { - // "12345678900.0" - // Fill trailing zeros, put ".0" at the end - // so the result is obviously floating-point. - let zeroEnd = firstDigit &+ base10Exponent &+ 3 - // TODO: Find out how to use C memset() here: - // Blast 8 "0" digits into the buffer - unsafe buffer.storeBytes(of: 0x3030303030303030 as UInt64, - toUncheckedByteOffset: nextDigit, - as: UInt64.self) - // Add more "0" digits if needed... - // (Note: Can't use a standard range loop because nextDigit+8 - // can legitimately be larger than zeroEnd here.) - var i = nextDigit + 8 - while i < zeroEnd { - unsafe buffer.storeBytes(of: 0x30, - toUncheckedByteOffset: i, - as: UInt8.self) - i &+= 1 - } - nextDigit = zeroEnd - buffer.storeBytes(of: 0x2e, - toByteOffset: nextDigit &- 2, - as: UInt8.self) + // "-0.000123456789" + // We need up to 5 leading characters before the digits. + // Note that the formatters above all insert extra leading "0" characters + // to the beginning of the buffer, so we don't need to memset() here, + // just back up the start to include them... + firstDigit &+= base10Exponent - 1 + // ... and then overwrite a decimal point to get "0." at the beginning + buffer.storeBytes( + of: 0x2e, // "." + toByteOffset: firstDigit &+ 1, + as: UInt8.self) + } else if base10Exponent &+ 1 < digitCount { + // "123456.789" + // We move the first digits forward one position + // so we can insert a decimal point in the middle. + // Note: This is the only case where we actually move + // more than one digit around in the buffer. + // TODO: Find out how to use C memmove() here + firstDigit &-= 1 + for i in 0...(base10Exponent &+ 1) { + let t = unsafe buffer.unsafeLoad( + fromUncheckedByteOffset: firstDigit &+ i &+ 1, + as: UInt8.self) + unsafe buffer.storeBytes( + of: t, + toUncheckedByteOffset: firstDigit &+ i, + as: UInt8.self) } - if sign == .minus { - buffer.storeBytes(of: 0x2d, // "-" - toByteOffset: firstDigit &- 1, - as: UInt8.self) - firstDigit &-= 1 + buffer.storeBytes( + of: 0x2e, + toByteOffset: firstDigit &+ base10Exponent &+ 1, + as: UInt8.self) + } else { + // "12345678900.0" + // Fill trailing zeros, put ".0" at the end + // so the result is obviously floating-point. + let zeroEnd = firstDigit &+ base10Exponent &+ 3 + // TODO: Find out how to use C memset() here: + // Blast 8 "0" digits into the buffer + unsafe buffer.storeBytes( + of: 0x3030303030303030 as UInt64, + toUncheckedByteOffset: nextDigit, + as: UInt64.self) + // Add more "0" digits if needed... + // (Note: Can't use a standard range loop because nextDigit+8 + // can legitimately be larger than zeroEnd here.) + var i = nextDigit + 8 + while i < zeroEnd { + unsafe buffer.storeBytes( + of: 0x30, + toUncheckedByteOffset: i, + as: UInt8.self) + i &+= 1 } - - return unsafe Range(_uncheckedBounds: (lower: firstDigit, upper: nextDigit)) + nextDigit = zeroEnd + buffer.storeBytes( + of: 0x2e, + toByteOffset: nextDigit &- 2, + as: UInt8.self) + } + if sign == .minus { + buffer.storeBytes( + of: 0x2d, // "-" + toByteOffset: firstDigit &- 1, + as: UInt8.self) + firstDigit &-= 1 + } + + return unsafe Range(_uncheckedBounds: (lower: firstDigit, upper: nextDigit)) } // Table with ASCII strings for all 2-digit decimal numbers. @@ -1619,93 +1752,135 @@ fileprivate let asciiDigitTable: InlineArray<100, UInt16> = [ // The constants below assume we're on a little-endian processor @available(SwiftStdlib 6.2, *) -fileprivate func infinity(buffer: inout MutableRawSpan, sign: FloatingPointSign) -> Range { - if sign == .minus { - buffer.storeBytes(of: 0x666e692d, toByteOffset: 0, as: UInt32.self) // "-inf" - return 0..<4 - } else { - buffer.storeBytes(of: 0x00666e69, toByteOffset: 0, as: UInt32.self) // "inf\0" - return 0..<3 - } +fileprivate func _infinity( + buffer: inout MutableRawSpan, + sign: FloatingPointSign +) -> Range { + if sign == .minus { + buffer.storeBytes( + of: 0x666e692d, // "-inf" + toByteOffset: 0, + as: UInt32.self) + return 0..<4 + } else { + buffer.storeBytes( + of: 0x00666e69, // "inf\0" + toByteOffset: 0, + as: UInt32.self) + return 0..<3 + } } @available(SwiftStdlib 6.2, *) -fileprivate func zero(buffer: inout MutableRawSpan, sign: FloatingPointSign) -> Range { - if sign == .minus { - buffer.storeBytes(of: 0x302e302d, toByteOffset: 0, as: UInt32.self) // "-0.0" - return 0..<4 - } else { - buffer.storeBytes(of: 0x00302e30, toByteOffset: 0, as: UInt32.self) // "0.0\0" - return 0..<3 - } +fileprivate func _zero( + buffer: inout MutableRawSpan, + sign: FloatingPointSign +) -> Range { + if sign == .minus { + buffer.storeBytes( + of: 0x302e302d, // "-0.0" + toByteOffset: 0, + as: UInt32.self) + return 0..<4 + } else { + buffer.storeBytes( + of: 0x00302e30, // "0.0\0" + toByteOffset: 0, + as: UInt32.self) + return 0..<3 + } } @available(SwiftStdlib 6.2, *) -fileprivate let hexdigits: InlineArray<16, UInt8> = [ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 ] +fileprivate let hexdigits: InlineArray<16, UInt8> = [ + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66 +] @available(SwiftStdlib 6.2, *) -fileprivate func hexWithoutLeadingZeros(buffer: inout MutableRawSpan, offset: inout Int, value: UInt64) { - var shift = 60 - while (shift > 0) && ((value >> shift) & 0xf == 0) { - shift -= 4 - } - while shift >= 0 { - let d = hexdigits[Int(truncatingIfNeeded: (value >> shift) & 0xf)] - shift -= 4 - buffer.storeBytes(of: d, toByteOffset: offset, as: UInt8.self) - offset += 1 - } +fileprivate func _hexWithoutLeadingZeros( + buffer: inout MutableRawSpan, + offset: inout Int, + value: UInt64 +) { + var shift = 60 + while (shift > 0) && ((value >> shift) & 0xf == 0) { + shift -= 4 + } + while shift >= 0 { + let d = hexdigits[Int(truncatingIfNeeded: (value >> shift) & 0xf)] + shift -= 4 + buffer.storeBytes( + of: d, + toByteOffset: offset, + as: UInt8.self) + offset += 1 + } } @available(SwiftStdlib 6.2, *) -fileprivate func hexWithLeadingZeros(buffer: inout MutableRawSpan, offset: inout Int, value: UInt64) { - var shift = 60 - while shift >= 0 { - let d = hexdigits[Int(truncatingIfNeeded: (value >> shift) & 0xf)] - shift -= 4 - buffer.storeBytes(of: d, toByteOffset: offset, as: UInt8.self) - offset += 1 - } +fileprivate func _hexWithLeadingZeros( + buffer: inout MutableRawSpan, + offset: inout Int, + value: UInt64 +) { + var shift = 60 + while shift >= 0 { + let d = hexdigits[Int(truncatingIfNeeded: (value >> shift) & 0xf)] + shift -= 4 + buffer.storeBytes( + of: d, + toByteOffset: offset, + as: UInt8.self) + offset += 1 + } } @available(SwiftStdlib 6.2, *) -fileprivate func nan_details(buffer: inout MutableRawSpan, - sign: FloatingPointSign, - quiet: Bool, - payloadHigh: UInt64, - payloadLow: UInt64) -> Range -{ - // value is a NaN of some sort - var i = 0 - if sign == .minus { - buffer.storeBytes(of: 0x2d, toByteOffset: 0, as: UInt8.self) // "-" - i = 1 - } - if !quiet { - buffer.storeBytes(of: 0x73, toByteOffset: i, as: UInt8.self) // "s" - i += 1 - } - buffer.storeBytes(of: 0x6e, toByteOffset: i, as: UInt8.self) // "n" - buffer.storeBytes(of: 0x61, toByteOffset: i + 1, as: UInt8.self) // "a" - buffer.storeBytes(of: 0x6e, toByteOffset: i + 2, as: UInt8.self) // "n" - i += 3 - if payloadHigh != 0 || payloadLow != 0 { - buffer.storeBytes(of: 0x28, toByteOffset: i, as: UInt8.self) // "(" - i += 1 - buffer.storeBytes(of: 0x30, toByteOffset: i, as: UInt8.self) // "0" - i += 1 - buffer.storeBytes(of: 0x78, toByteOffset: i, as: UInt8.self) // "x" - i += 1 - if payloadHigh == 0 { - hexWithoutLeadingZeros(buffer: &buffer, offset: &i, value: payloadLow) - } else { - hexWithoutLeadingZeros(buffer: &buffer, offset: &i, value: payloadHigh) - hexWithLeadingZeros(buffer: &buffer, offset: &i, value: payloadLow) - } - buffer.storeBytes(of: 0x29, toByteOffset: i, as: UInt8.self) // ")" - i += 1 +fileprivate func nan_details( + buffer: inout MutableRawSpan, + sign: FloatingPointSign, + quiet: Bool, + payloadHigh: UInt64, + payloadLow: UInt64 +) -> Range { + // value is a NaN of some sort + var i = 0 + if sign == .minus { + buffer.storeBytes( + of: 0x2d, // "-" + toByteOffset: 0, + as: UInt8.self) + i = 1 + } + if !quiet { + buffer.storeBytes( + of: 0x73, // "s" + toByteOffset: i, + as: UInt8.self) + i += 1 + } + buffer.storeBytes(of: 0x6e, toByteOffset: i, as: UInt8.self) // "n" + buffer.storeBytes(of: 0x61, toByteOffset: i + 1, as: UInt8.self) // "a" + buffer.storeBytes(of: 0x6e, toByteOffset: i + 2, as: UInt8.self) // "n" + i += 3 + if payloadHigh != 0 || payloadLow != 0 { + buffer.storeBytes(of: 0x28, toByteOffset: i, as: UInt8.self) // "(" + i += 1 + buffer.storeBytes(of: 0x30, toByteOffset: i, as: UInt8.self) // "0" + i += 1 + buffer.storeBytes(of: 0x78, toByteOffset: i, as: UInt8.self) // "x" + i += 1 + if payloadHigh == 0 { + _hexWithoutLeadingZeros(buffer: &buffer, offset: &i, value: payloadLow) + } else { + _hexWithoutLeadingZeros(buffer: &buffer, offset: &i, value: payloadHigh) + _hexWithLeadingZeros(buffer: &buffer, offset: &i, value: payloadLow) } - return 0.. UInt64 { - // Break into two numbers of 4 decimal digits each - let div8 = n / 10000 - let mod8 = n &- div8 &* 10000 - let fours = UInt64(div8) | (UInt64(mod8) << 32) - - // Break into 4 numbers of 2 decimal digits each - let mask100: UInt64 = 0x0000007f0000007f - let div4 = ((fours &* 10486) >> 20) & mask100 - let mod4 = fours &- 100 &* div4 - let pairs = div4 | (mod4 &<< 16) - - // Break into 8 numbers of a single decimal digit each - let mask10: UInt64 = 0x000f000f000f000f - let div2 = ((pairs &* 103) >> 10) & mask10 - let mod2 = pairs &- 10 &* div2 - let singles = div2 | (mod2 &<< 8) - - // Convert 8 digits to ASCII characters - return singles &+ 0x3030303030303030 +fileprivate func _intToEightDigits(_ n: UInt32) -> UInt64 { + // Break into two numbers of 4 decimal digits each + let div8 = n / 10000 + let mod8 = n &- div8 &* 10000 + let fours = UInt64(div8) | (UInt64(mod8) << 32) + + // Break into 4 numbers of 2 decimal digits each + let mask100: UInt64 = 0x0000007f0000007f + let div4 = ((fours &* 10486) >> 20) & mask100 + let mod4 = fours &- 100 &* div4 + let pairs = div4 | (mod4 &<< 16) + + // Break into 8 numbers of a single decimal digit each + let mask10: UInt64 = 0x000f000f000f000f + let div2 = ((pairs &* 103) >> 10) & mask10 + let mod2 = pairs &- 10 &* div2 + let singles = div2 | (mod2 &<< 8) + + // Convert 8 digits to ASCII characters + return singles &+ 0x3030303030303030 } // ================================================================ @@ -1753,194 +1928,210 @@ fileprivate func intToEightDigits(_ n: UInt32) -> UInt64 { // ================================================================ @inline(__always) -fileprivate func multiply64x32RoundingDown(_ lhs: UInt64, _ rhs: UInt32) -> UInt64 { - let mask32 = UInt64(UInt32.max) - let t = ((lhs & mask32) * UInt64(rhs)) >> 32 - return t + (lhs >> 32) * UInt64(rhs) +fileprivate func _multiply64x32RoundingDown( + _ lhs: UInt64, + _ rhs: UInt32 +) -> UInt64 { + let mask32 = UInt64(UInt32.max) + let t = ((lhs & mask32) * UInt64(rhs)) >> 32 + return t + (lhs >> 32) * UInt64(rhs) } @inline(__always) -fileprivate func multiply64x32RoundingUp(_ lhs: UInt64, _ rhs: UInt32) -> UInt64 { - let mask32 = UInt64(UInt32.max) - let t = (((lhs & mask32) * UInt64(rhs)) + mask32) >> 32 - return t + (lhs >> 32) * UInt64(rhs) +fileprivate func _multiply64x32RoundingUp( + _ lhs: UInt64, + _ rhs: UInt32 +) -> UInt64 { + let mask32 = UInt64(UInt32.max) + let t = (((lhs & mask32) * UInt64(rhs)) + mask32) >> 32 + return t + (lhs >> 32) * UInt64(rhs) } @available(SwiftStdlib 6.2, *) @inline(__always) -fileprivate func multiply128x64RoundingDown(_ lhs: UInt128, _ rhs: UInt64) -> UInt128 { - let lhsHigh = UInt128(truncatingIfNeeded: lhs._high) - let lhsLow = UInt128(truncatingIfNeeded: lhs._low) - let rhs128 = UInt128(truncatingIfNeeded: rhs) - return (lhsHigh &* rhs128) &+ ((lhsLow &* rhs128) >> 64) +fileprivate func _multiply128x64RoundingDown( + _ lhs: UInt128, + _ rhs: UInt64 +) -> UInt128 { + let lhsHigh = UInt128(truncatingIfNeeded: lhs._high) + let lhsLow = UInt128(truncatingIfNeeded: lhs._low) + let rhs128 = UInt128(truncatingIfNeeded: rhs) + return (lhsHigh &* rhs128) &+ ((lhsLow &* rhs128) >> 64) } @available(SwiftStdlib 6.2, *) @inline(__always) -fileprivate func multiply128x64RoundingUp(_ lhs: UInt128, _ rhs: UInt64) -> UInt128 { - let lhsHigh = UInt128(truncatingIfNeeded: lhs._high) - let lhsLow = UInt128(truncatingIfNeeded: lhs._low) - let rhs128 = UInt128(truncatingIfNeeded: rhs) - let h = lhsHigh &* rhs128 - let l = lhsLow &* rhs128 - let bias = (UInt128(1) << 64) &- 1 - return h + ((l &+ bias) &>> 64) +fileprivate func _multiply128x64RoundingUp( + _ lhs: UInt128, + _ rhs: UInt64 +) -> UInt128 { + let lhsHigh = UInt128(truncatingIfNeeded: lhs._high) + let lhsLow = UInt128(truncatingIfNeeded: lhs._low) + let rhs128 = UInt128(truncatingIfNeeded: rhs) + let h = lhsHigh &* rhs128 + let l = lhsLow &* rhs128 + let bias = (UInt128(1) << 64) &- 1 + return h + ((l &+ bias) &>> 64) } -// Custom 256-bit unsigned integer type, with various arithmetic helpers as methods. +// Custom 256-bit unsigned integer type, with various arithmetic +// helpers as methods. // Used by 80- and 128-bit floating point formatting logic above... @available(SwiftStdlib 6.2, *) -fileprivate struct UInt256 { - var high: UInt128 - var low: UInt128 - - init() { - self.high = 0 - self.low = 0 - } - - init(high: UInt64, _ midHigh: UInt64, _ midLow: UInt64, low: UInt64) { - self.high = UInt128(_low: midHigh, _high: high) - self.low = UInt128(_low: low, _high: midLow) - } - - init(high: UInt128, low: UInt128) { - self.high = high - self.low = low - } - - mutating func shiftRightRoundingDown(by shift: Int) { - assert(shift < 32 && shift >= 0) - var t = UInt128(low._low >> shift) - t |= UInt128(low._high) &<< (64 - shift) - let newlow = t._low - t = UInt128(t._high) - t |= UInt128(high._low) &<< (64 - shift) - low = UInt128(_low: newlow, _high: t._low) - t = UInt128(t._high) - t |= UInt128(high._high) &<< (64 - shift) - high = t - } - - mutating func shiftRightRoundingUp(by shift: Int) { - assert(shift < 32 && shift >= 0) - let bias = (UInt64(1) &<< shift) - 1 - var t = UInt128((low._low + bias) >> shift) - t |= UInt128(low._high) &<< (64 - shift) - let newlow = t._low - t = UInt128(t._high) - t |= UInt128(high._low) &<< (64 - shift) - low = UInt128(_low: newlow, _high: t._low) - t = UInt128(t._high) - t |= UInt128(high._high) &<< (64 - shift) - high = t - } - - mutating func multiply(by rhs: UInt32) { - var t = UInt128(low._low) &* UInt128(rhs) - let newlow = t._low - t = UInt128(t._high) &+ UInt128(low._high) &* UInt128(rhs) - low = UInt128(_low: newlow, _high: t._low) - t = UInt128(t._high) &+ UInt128(high._low) &* UInt128(rhs) - let newmidhigh = t._low - t = UInt128(t._high) &+ UInt128(high._high) &* UInt128(rhs) - high = UInt128(_low: newmidhigh, _high: t._low) - assert(t._high == 0) - } - - mutating func multiplyRoundingDown(by rhs: UInt128) { - var current = UInt128(low._low) * UInt128(rhs._low) - - current = UInt128(current._high) - var t = UInt128(low._low) &* UInt128(rhs._high) - current += UInt128(t._low) - var next = UInt128(t._high) - t = UInt128(low._high) &* UInt128(rhs._low) - current += UInt128(t._low) - next += UInt128(t._high) - - current = next + UInt128(current._high) - t = UInt128(low._high) &* UInt128(rhs._high) - current += UInt128(t._low) - next = UInt128(t._high) - t = UInt128(high._low) &* UInt128(rhs._low) - current += UInt128(t._low) - next += UInt128(t._high) - let newlow = current._low - - current = next + UInt128(current._high) - t = UInt128(high._low) &* UInt128(rhs._high) - current += UInt128(t._low) - next = UInt128(t._high) - t = UInt128(high._high) &* UInt128(rhs._low) - current += UInt128(t._low) - next += UInt128(t._high) - low = UInt128(_low: newlow, _high: current._low) - - current = next + UInt128(current._high) - t = UInt128(high._high) &* UInt128(rhs._high) - high = current + t - } - - mutating func multiplyRoundingUp(by rhs: UInt128) { - var current = UInt128(low._low) &* UInt128(rhs._low) - current += UInt128(UInt64.max) - - current = UInt128(current._high) - var t = UInt128(low._low) &* UInt128(rhs._high) - current += UInt128(t._low) - var next = UInt128(t._high) - t = UInt128(low._high) &* UInt128(rhs._low) - current += UInt128(t._low) - next += UInt128(t._high) - current += UInt128(UInt64.max) - - current = next + UInt128(current._high) - t = UInt128(low._high) &* UInt128(rhs._high) - current += UInt128(t._low) - next = UInt128(t._high) - t = UInt128(high._low) &* UInt128(rhs._low) - current += UInt128(t._low) - next += UInt128(t._high) - let newlow = current._low - - current = next + UInt128(current._high) - t = UInt128(high._low) &* UInt128(rhs._high) - current += UInt128(t._low) - next = UInt128(t._high) - t = UInt128(high._high) &* UInt128(rhs._low) - current += UInt128(t._low) - next += UInt128(t._high) - low = UInt128(_low: newlow, _high: current._low) - - current = next + UInt128(current._high) - t = UInt128(high._high) &* UInt128(rhs._high) - high = current + t - } - - mutating func extractIntegerPart(_ bits: Int) -> UInt { - assert(bits < 16) - let integral = high._high >> (64 &- bits) - high = UInt128(_low: high._low, - _high: high._high &- (integral &<< (64 &- bits))) - return UInt(truncatingIfNeeded: integral) - } - - static func &- (lhs: UInt256, rhs: UInt256) -> UInt256 { - var t = UInt128(lhs.low._low) &+ UInt128(~rhs.low._low) &+ 1 - let newlowlow = t._low - t = UInt128(t._high) &+ UInt128(lhs.low._high) &+ UInt128(~rhs.low._high) - let newlow = UInt128(_low: newlowlow, _high: t._low) - t = UInt128(t._high) &+ UInt128(lhs.high._low) &+ UInt128(~rhs.high._low) - let newhigh = UInt128(_low: t._low, _high: t._high &+ lhs.high._high &+ ~rhs.high._high) - return UInt256(high: newhigh, low: newlow) - } - - static func < (lhs: UInt256, rhs: UInt256) -> Bool { - return (lhs.high < rhs.high) - || (lhs.high == rhs.high - && lhs.low < rhs.low) - } +fileprivate struct _UInt256 { + var high: UInt128 + var low: UInt128 + + init() { + self.high = 0 + self.low = 0 + } + + init(high: UInt64, _ midHigh: UInt64, _ midLow: UInt64, low: UInt64) { + self.high = UInt128(_low: midHigh, _high: high) + self.low = UInt128(_low: low, _high: midLow) + } + + init(high: UInt128, low: UInt128) { + self.high = high + self.low = low + } + + mutating func shiftRightRoundingDown(by shift: Int) { + assert(shift < 32 && shift >= 0) + var t = UInt128(low._low >> shift) + t |= UInt128(low._high) &<< (64 - shift) + let newlow = t._low + t = UInt128(t._high) + t |= UInt128(high._low) &<< (64 - shift) + low = UInt128(_low: newlow, _high: t._low) + t = UInt128(t._high) + t |= UInt128(high._high) &<< (64 - shift) + high = t + } + + mutating func shiftRightRoundingUp(by shift: Int) { + assert(shift < 32 && shift >= 0) + let bias = (UInt64(1) &<< shift) - 1 + var t = UInt128((low._low + bias) >> shift) + t |= UInt128(low._high) &<< (64 - shift) + let newlow = t._low + t = UInt128(t._high) + t |= UInt128(high._low) &<< (64 - shift) + low = UInt128(_low: newlow, _high: t._low) + t = UInt128(t._high) + t |= UInt128(high._high) &<< (64 - shift) + high = t + } + + mutating func multiply(by rhs: UInt32) { + var t = UInt128(low._low) &* UInt128(rhs) + let newlow = t._low + t = UInt128(t._high) &+ UInt128(low._high) &* UInt128(rhs) + low = UInt128(_low: newlow, _high: t._low) + t = UInt128(t._high) &+ UInt128(high._low) &* UInt128(rhs) + let newmidhigh = t._low + t = UInt128(t._high) &+ UInt128(high._high) &* UInt128(rhs) + high = UInt128(_low: newmidhigh, _high: t._low) + assert(t._high == 0) + } + + mutating func multiplyRoundingDown(by rhs: UInt128) { + var current = UInt128(low._low) * UInt128(rhs._low) + + current = UInt128(current._high) + var t = UInt128(low._low) &* UInt128(rhs._high) + current += UInt128(t._low) + var next = UInt128(t._high) + t = UInt128(low._high) &* UInt128(rhs._low) + current += UInt128(t._low) + next += UInt128(t._high) + + current = next + UInt128(current._high) + t = UInt128(low._high) &* UInt128(rhs._high) + current += UInt128(t._low) + next = UInt128(t._high) + t = UInt128(high._low) &* UInt128(rhs._low) + current += UInt128(t._low) + next += UInt128(t._high) + let newlow = current._low + + current = next + UInt128(current._high) + t = UInt128(high._low) &* UInt128(rhs._high) + current += UInt128(t._low) + next = UInt128(t._high) + t = UInt128(high._high) &* UInt128(rhs._low) + current += UInt128(t._low) + next += UInt128(t._high) + low = UInt128(_low: newlow, _high: current._low) + + current = next + UInt128(current._high) + t = UInt128(high._high) &* UInt128(rhs._high) + high = current + t + } + + mutating func multiplyRoundingUp(by rhs: UInt128) { + var current = UInt128(low._low) &* UInt128(rhs._low) + current += UInt128(UInt64.max) + + current = UInt128(current._high) + var t = UInt128(low._low) &* UInt128(rhs._high) + current += UInt128(t._low) + var next = UInt128(t._high) + t = UInt128(low._high) &* UInt128(rhs._low) + current += UInt128(t._low) + next += UInt128(t._high) + current += UInt128(UInt64.max) + + current = next + UInt128(current._high) + t = UInt128(low._high) &* UInt128(rhs._high) + current += UInt128(t._low) + next = UInt128(t._high) + t = UInt128(high._low) &* UInt128(rhs._low) + current += UInt128(t._low) + next += UInt128(t._high) + let newlow = current._low + + current = next + UInt128(current._high) + t = UInt128(high._low) &* UInt128(rhs._high) + current += UInt128(t._low) + next = UInt128(t._high) + t = UInt128(high._high) &* UInt128(rhs._low) + current += UInt128(t._low) + next += UInt128(t._high) + low = UInt128(_low: newlow, _high: current._low) + + current = next + UInt128(current._high) + t = UInt128(high._high) &* UInt128(rhs._high) + high = current + t + } + + mutating func extractIntegerPart(_ bits: Int) -> UInt { + assert(bits < 16) + let integral = high._high >> (64 &- bits) + high = UInt128( + _low: high._low, + _high: high._high &- (integral &<< (64 &- bits))) + return UInt(truncatingIfNeeded: integral) + } + + static func &- (lhs: _UInt256, rhs: _UInt256) -> _UInt256 { + var t = UInt128(lhs.low._low) &+ UInt128(~rhs.low._low) &+ 1 + let newlowlow = t._low + t = UInt128(t._high) &+ UInt128(lhs.low._high) &+ UInt128(~rhs.low._high) + let newlow = UInt128(_low: newlowlow, _high: t._low) + t = UInt128(t._high) &+ UInt128(lhs.high._low) &+ UInt128(~rhs.high._low) + let newhigh = UInt128( + _low: t._low, + _high: t._high &+ lhs.high._high &+ ~rhs.high._high) + return _UInt256(high: newhigh, low: newlow) + } + + static func < (lhs: _UInt256, rhs: _UInt256) -> Bool { + return (lhs.high < rhs.high) + || (lhs.high == rhs.high + && lhs.low < rhs.low) + } } // ================================================================ @@ -1951,62 +2142,72 @@ fileprivate struct UInt256 { @available(SwiftStdlib 6.2, *) @inline(__always) -fileprivate func intervalContainingPowerOf10_Binary32(_ p: Int, _ lower: inout UInt64, _ upper: inout UInt64) -> Int { - if p >= 0 { - let base = powersOf10_Exact128[p &* 2 &+ 1] - lower = base - if p < 28 { - upper = base - } else { - upper = base &+ 1 - } +fileprivate func _intervalContainingPowerOf10_Binary32( + p: Int, + lower: inout UInt64, + upper: inout UInt64 +) -> Int { + if p >= 0 { + let base = powersOf10_Exact128[p &* 2 &+ 1] + lower = base + if p < 28 { + upper = base } else { - let base = powersOf10_negativeBinary32[p &+ 40] - lower = base - upper = base &+ 1 + upper = base &+ 1 } - return binaryExponentFor10ToThe(p) + } else { + let base = powersOf10_negativeBinary32[p &+ 40] + lower = base + upper = base &+ 1 + } + return binaryExponentFor10ToThe(p) } @available(SwiftStdlib 6.2, *) @inline(__always) -fileprivate func intervalContainingPowerOf10_Binary64(_ p: Int, _ lower: inout UInt128, _ upper: inout UInt128) -> Int { - if p >= 0 && p <= 55 { - let upper64 = powersOf10_Exact128[p &* 2 &+ 1] - let lower64 = powersOf10_Exact128[p &* 2] - upper = UInt128(_low: lower64, _high: upper64) - lower = upper - return binaryExponentFor10ToThe(p) - } - - let index = p &+ 400 - let mainPower = index / 28 - let baseHigh = powersOf10_Binary64[mainPower &* 2 &+ 1] - let baseLow = powersOf10_Binary64[mainPower &* 2] - let extraPower = index &- mainPower &* 28 - let baseExponent = binaryExponentFor10ToThe(p &- extraPower) - - if extraPower == 0 { - lower = UInt128(_low: baseLow, _high: baseHigh) - upper = lower &+ 1 - return baseExponent - } else { - let extra = powersOf10_Exact128[extraPower &* 2 &+ 1] - lower = ((UInt128(truncatingIfNeeded:baseHigh) &* UInt128(truncatingIfNeeded:extra)) - &+ ((UInt128(truncatingIfNeeded:baseLow) &* UInt128(truncatingIfNeeded:extra)) &>> 64)) - upper = lower &+ 2 - return baseExponent &+ binaryExponentFor10ToThe(extraPower) - } +fileprivate func _intervalContainingPowerOf10_Binary64( + p: Int, + lower: inout UInt128, + upper: inout UInt128 +) -> Int { + if p >= 0 && p <= 55 { + let upper64 = powersOf10_Exact128[p &* 2 &+ 1] + let lower64 = powersOf10_Exact128[p &* 2] + upper = UInt128(_low: lower64, _high: upper64) + lower = upper + return binaryExponentFor10ToThe(p) + } + + let index = p &+ 400 + let mainPower = index / 28 + let baseHigh = powersOf10_Binary64[mainPower &* 2 &+ 1] + let baseLow = powersOf10_Binary64[mainPower &* 2] + let extraPower = index &- mainPower &* 28 + let baseExponent = binaryExponentFor10ToThe(p &- extraPower) + + if extraPower == 0 { + lower = UInt128(_low: baseLow, _high: baseHigh) + upper = lower &+ 1 + return baseExponent + } else { + let extra = powersOf10_Exact128[extraPower &* 2 &+ 1] + lower = ((UInt128(truncatingIfNeeded:baseHigh) + &* UInt128(truncatingIfNeeded:extra)) + &+ ((UInt128(truncatingIfNeeded:baseLow) + &* UInt128(truncatingIfNeeded:extra)) &>> 64)) + upper = lower &+ 2 + return baseExponent &+ binaryExponentFor10ToThe(extraPower) + } } @inline(__always) fileprivate func binaryExponentFor10ToThe(_ p: Int) -> Int { - return Int(((Int64(p) &* 55732705) >> 24) &+ 1) + return Int(((Int64(p) &* 55732705) >> 24) &+ 1) } @inline(__always) fileprivate func decimalExponentFor2ToThe(_ p: Int) -> Int { - return Int((Int64(p) &* 20201781) >> 26) + return Int((Int64(p) &* 20201781) >> 26) } // Each of the constant values here have an implicit binary point at @@ -2028,46 +2229,46 @@ fileprivate func decimalExponentFor2ToThe(_ p: Int) -> Int { // Table size: 320 bytes @available(SwiftStdlib 6.2, *) fileprivate let powersOf10_negativeBinary32: InlineArray<_, UInt64> = [ - 0x8b61313bbabce2c6, // x 2^-132 ~= 10^-40 - 0xae397d8aa96c1b77, // x 2^-129 ~= 10^-39 - 0xd9c7dced53c72255, // x 2^-126 ~= 10^-38 - 0x881cea14545c7575, // x 2^-122 ~= 10^-37 - 0xaa242499697392d2, // x 2^-119 ~= 10^-36 - 0xd4ad2dbfc3d07787, // x 2^-116 ~= 10^-35 - 0x84ec3c97da624ab4, // x 2^-112 ~= 10^-34 - 0xa6274bbdd0fadd61, // x 2^-109 ~= 10^-33 - 0xcfb11ead453994ba, // x 2^-106 ~= 10^-32 - 0x81ceb32c4b43fcf4, // x 2^-102 ~= 10^-31 - 0xa2425ff75e14fc31, // x 2^-99 ~= 10^-30 - 0xcad2f7f5359a3b3e, // x 2^-96 ~= 10^-29 - 0xfd87b5f28300ca0d, // x 2^-93 ~= 10^-28 - 0x9e74d1b791e07e48, // x 2^-89 ~= 10^-27 - 0xc612062576589dda, // x 2^-86 ~= 10^-26 - 0xf79687aed3eec551, // x 2^-83 ~= 10^-25 - 0x9abe14cd44753b52, // x 2^-79 ~= 10^-24 - 0xc16d9a0095928a27, // x 2^-76 ~= 10^-23 - 0xf1c90080baf72cb1, // x 2^-73 ~= 10^-22 - 0x971da05074da7bee, // x 2^-69 ~= 10^-21 - 0xbce5086492111aea, // x 2^-66 ~= 10^-20 - 0xec1e4a7db69561a5, // x 2^-63 ~= 10^-19 - 0x9392ee8e921d5d07, // x 2^-59 ~= 10^-18 - 0xb877aa3236a4b449, // x 2^-56 ~= 10^-17 - 0xe69594bec44de15b, // x 2^-53 ~= 10^-16 - 0x901d7cf73ab0acd9, // x 2^-49 ~= 10^-15 - 0xb424dc35095cd80f, // x 2^-46 ~= 10^-14 - 0xe12e13424bb40e13, // x 2^-43 ~= 10^-13 - 0x8cbccc096f5088cb, // x 2^-39 ~= 10^-12 - 0xafebff0bcb24aafe, // x 2^-36 ~= 10^-11 - 0xdbe6fecebdedd5be, // x 2^-33 ~= 10^-10 - 0x89705f4136b4a597, // x 2^-29 ~= 10^-9 - 0xabcc77118461cefc, // x 2^-26 ~= 10^-8 - 0xd6bf94d5e57a42bc, // x 2^-23 ~= 10^-7 - 0x8637bd05af6c69b5, // x 2^-19 ~= 10^-6 - 0xa7c5ac471b478423, // x 2^-16 ~= 10^-5 - 0xd1b71758e219652b, // x 2^-13 ~= 10^-4 - 0x83126e978d4fdf3b, // x 2^-9 ~= 10^-3 - 0xa3d70a3d70a3d70a, // x 2^-6 ~= 10^-2 - 0xcccccccccccccccc, // x 2^-3 ~= 10^-1 + 0x8b61313bbabce2c6, // x 2^-132 ~= 10^-40 + 0xae397d8aa96c1b77, // x 2^-129 ~= 10^-39 + 0xd9c7dced53c72255, // x 2^-126 ~= 10^-38 + 0x881cea14545c7575, // x 2^-122 ~= 10^-37 + 0xaa242499697392d2, // x 2^-119 ~= 10^-36 + 0xd4ad2dbfc3d07787, // x 2^-116 ~= 10^-35 + 0x84ec3c97da624ab4, // x 2^-112 ~= 10^-34 + 0xa6274bbdd0fadd61, // x 2^-109 ~= 10^-33 + 0xcfb11ead453994ba, // x 2^-106 ~= 10^-32 + 0x81ceb32c4b43fcf4, // x 2^-102 ~= 10^-31 + 0xa2425ff75e14fc31, // x 2^-99 ~= 10^-30 + 0xcad2f7f5359a3b3e, // x 2^-96 ~= 10^-29 + 0xfd87b5f28300ca0d, // x 2^-93 ~= 10^-28 + 0x9e74d1b791e07e48, // x 2^-89 ~= 10^-27 + 0xc612062576589dda, // x 2^-86 ~= 10^-26 + 0xf79687aed3eec551, // x 2^-83 ~= 10^-25 + 0x9abe14cd44753b52, // x 2^-79 ~= 10^-24 + 0xc16d9a0095928a27, // x 2^-76 ~= 10^-23 + 0xf1c90080baf72cb1, // x 2^-73 ~= 10^-22 + 0x971da05074da7bee, // x 2^-69 ~= 10^-21 + 0xbce5086492111aea, // x 2^-66 ~= 10^-20 + 0xec1e4a7db69561a5, // x 2^-63 ~= 10^-19 + 0x9392ee8e921d5d07, // x 2^-59 ~= 10^-18 + 0xb877aa3236a4b449, // x 2^-56 ~= 10^-17 + 0xe69594bec44de15b, // x 2^-53 ~= 10^-16 + 0x901d7cf73ab0acd9, // x 2^-49 ~= 10^-15 + 0xb424dc35095cd80f, // x 2^-46 ~= 10^-14 + 0xe12e13424bb40e13, // x 2^-43 ~= 10^-13 + 0x8cbccc096f5088cb, // x 2^-39 ~= 10^-12 + 0xafebff0bcb24aafe, // x 2^-36 ~= 10^-11 + 0xdbe6fecebdedd5be, // x 2^-33 ~= 10^-10 + 0x89705f4136b4a597, // x 2^-29 ~= 10^-9 + 0xabcc77118461cefc, // x 2^-26 ~= 10^-8 + 0xd6bf94d5e57a42bc, // x 2^-23 ~= 10^-7 + 0x8637bd05af6c69b5, // x 2^-19 ~= 10^-6 + 0xa7c5ac471b478423, // x 2^-16 ~= 10^-5 + 0xd1b71758e219652b, // x 2^-13 ~= 10^-4 + 0x83126e978d4fdf3b, // x 2^-9 ~= 10^-3 + 0xa3d70a3d70a3d70a, // x 2^-6 ~= 10^-2 + 0xcccccccccccccccc, // x 2^-3 ~= 10^-1 ] // All the powers of 10 that can be represented exactly @@ -2087,63 +2288,63 @@ fileprivate let powersOf10_negativeBinary32: InlineArray<_, UInt64> = [ // Table size: 896 bytes @available(SwiftStdlib 6.2, *) fileprivate let powersOf10_Exact128: InlineArray<_, UInt64> = [ - // Low order ... high order - 0x0000000000000000, 0x8000000000000000, // x 2^1 == 10^0 exactly - 0x0000000000000000, 0xa000000000000000, // x 2^4 == 10^1 exactly - 0x0000000000000000, 0xc800000000000000, // x 2^7 == 10^2 exactly - 0x0000000000000000, 0xfa00000000000000, // x 2^10 == 10^3 exactly - 0x0000000000000000, 0x9c40000000000000, // x 2^14 == 10^4 exactly - 0x0000000000000000, 0xc350000000000000, // x 2^17 == 10^5 exactly - 0x0000000000000000, 0xf424000000000000, // x 2^20 == 10^6 exactly - 0x0000000000000000, 0x9896800000000000, // x 2^24 == 10^7 exactly - 0x0000000000000000, 0xbebc200000000000, // x 2^27 == 10^8 exactly - 0x0000000000000000, 0xee6b280000000000, // x 2^30 == 10^9 exactly - 0x0000000000000000, 0x9502f90000000000, // x 2^34 == 10^10 exactly - 0x0000000000000000, 0xba43b74000000000, // x 2^37 == 10^11 exactly - 0x0000000000000000, 0xe8d4a51000000000, // x 2^40 == 10^12 exactly - 0x0000000000000000, 0x9184e72a00000000, // x 2^44 == 10^13 exactly - 0x0000000000000000, 0xb5e620f480000000, // x 2^47 == 10^14 exactly - 0x0000000000000000, 0xe35fa931a0000000, // x 2^50 == 10^15 exactly - 0x0000000000000000, 0x8e1bc9bf04000000, // x 2^54 == 10^16 exactly - 0x0000000000000000, 0xb1a2bc2ec5000000, // x 2^57 == 10^17 exactly - 0x0000000000000000, 0xde0b6b3a76400000, // x 2^60 == 10^18 exactly - 0x0000000000000000, 0x8ac7230489e80000, // x 2^64 == 10^19 exactly - 0x0000000000000000, 0xad78ebc5ac620000, // x 2^67 == 10^20 exactly - 0x0000000000000000, 0xd8d726b7177a8000, // x 2^70 == 10^21 exactly - 0x0000000000000000, 0x878678326eac9000, // x 2^74 == 10^22 exactly - 0x0000000000000000, 0xa968163f0a57b400, // x 2^77 == 10^23 exactly - 0x0000000000000000, 0xd3c21bcecceda100, // x 2^80 == 10^24 exactly - 0x0000000000000000, 0x84595161401484a0, // x 2^84 == 10^25 exactly - 0x0000000000000000, 0xa56fa5b99019a5c8, // x 2^87 == 10^26 exactly - 0x0000000000000000, 0xcecb8f27f4200f3a, // x 2^90 == 10^27 exactly - 0x4000000000000000, 0x813f3978f8940984, // x 2^94 == 10^28 exactly - 0x5000000000000000, 0xa18f07d736b90be5, // x 2^97 == 10^29 exactly - 0xa400000000000000, 0xc9f2c9cd04674ede, // x 2^100 == 10^30 exactly - 0x4d00000000000000, 0xfc6f7c4045812296, // x 2^103 == 10^31 exactly - 0xf020000000000000, 0x9dc5ada82b70b59d, // x 2^107 == 10^32 exactly - 0x6c28000000000000, 0xc5371912364ce305, // x 2^110 == 10^33 exactly - 0xc732000000000000, 0xf684df56c3e01bc6, // x 2^113 == 10^34 exactly - 0x3c7f400000000000, 0x9a130b963a6c115c, // x 2^117 == 10^35 exactly - 0x4b9f100000000000, 0xc097ce7bc90715b3, // x 2^120 == 10^36 exactly - 0x1e86d40000000000, 0xf0bdc21abb48db20, // x 2^123 == 10^37 exactly - 0x1314448000000000, 0x96769950b50d88f4, // x 2^127 == 10^38 exactly - 0x17d955a000000000, 0xbc143fa4e250eb31, // x 2^130 == 10^39 exactly - 0x5dcfab0800000000, 0xeb194f8e1ae525fd, // x 2^133 == 10^40 exactly - 0x5aa1cae500000000, 0x92efd1b8d0cf37be, // x 2^137 == 10^41 exactly - 0xf14a3d9e40000000, 0xb7abc627050305ad, // x 2^140 == 10^42 exactly - 0x6d9ccd05d0000000, 0xe596b7b0c643c719, // x 2^143 == 10^43 exactly - 0xe4820023a2000000, 0x8f7e32ce7bea5c6f, // x 2^147 == 10^44 exactly - 0xdda2802c8a800000, 0xb35dbf821ae4f38b, // x 2^150 == 10^45 exactly - 0xd50b2037ad200000, 0xe0352f62a19e306e, // x 2^153 == 10^46 exactly - 0x4526f422cc340000, 0x8c213d9da502de45, // x 2^157 == 10^47 exactly - 0x9670b12b7f410000, 0xaf298d050e4395d6, // x 2^160 == 10^48 exactly - 0x3c0cdd765f114000, 0xdaf3f04651d47b4c, // x 2^163 == 10^49 exactly - 0xa5880a69fb6ac800, 0x88d8762bf324cd0f, // x 2^167 == 10^50 exactly - 0x8eea0d047a457a00, 0xab0e93b6efee0053, // x 2^170 == 10^51 exactly - 0x72a4904598d6d880, 0xd5d238a4abe98068, // x 2^173 == 10^52 exactly - 0x47a6da2b7f864750, 0x85a36366eb71f041, // x 2^177 == 10^53 exactly - 0x999090b65f67d924, 0xa70c3c40a64e6c51, // x 2^180 == 10^54 exactly - 0xfff4b4e3f741cf6d, 0xd0cf4b50cfe20765, // x 2^183 == 10^55 exactly + // Low order ... high order + 0x0000000000000000, 0x8000000000000000, // x 2^1 == 10^0 exactly + 0x0000000000000000, 0xa000000000000000, // x 2^4 == 10^1 exactly + 0x0000000000000000, 0xc800000000000000, // x 2^7 == 10^2 exactly + 0x0000000000000000, 0xfa00000000000000, // x 2^10 == 10^3 exactly + 0x0000000000000000, 0x9c40000000000000, // x 2^14 == 10^4 exactly + 0x0000000000000000, 0xc350000000000000, // x 2^17 == 10^5 exactly + 0x0000000000000000, 0xf424000000000000, // x 2^20 == 10^6 exactly + 0x0000000000000000, 0x9896800000000000, // x 2^24 == 10^7 exactly + 0x0000000000000000, 0xbebc200000000000, // x 2^27 == 10^8 exactly + 0x0000000000000000, 0xee6b280000000000, // x 2^30 == 10^9 exactly + 0x0000000000000000, 0x9502f90000000000, // x 2^34 == 10^10 exactly + 0x0000000000000000, 0xba43b74000000000, // x 2^37 == 10^11 exactly + 0x0000000000000000, 0xe8d4a51000000000, // x 2^40 == 10^12 exactly + 0x0000000000000000, 0x9184e72a00000000, // x 2^44 == 10^13 exactly + 0x0000000000000000, 0xb5e620f480000000, // x 2^47 == 10^14 exactly + 0x0000000000000000, 0xe35fa931a0000000, // x 2^50 == 10^15 exactly + 0x0000000000000000, 0x8e1bc9bf04000000, // x 2^54 == 10^16 exactly + 0x0000000000000000, 0xb1a2bc2ec5000000, // x 2^57 == 10^17 exactly + 0x0000000000000000, 0xde0b6b3a76400000, // x 2^60 == 10^18 exactly + 0x0000000000000000, 0x8ac7230489e80000, // x 2^64 == 10^19 exactly + 0x0000000000000000, 0xad78ebc5ac620000, // x 2^67 == 10^20 exactly + 0x0000000000000000, 0xd8d726b7177a8000, // x 2^70 == 10^21 exactly + 0x0000000000000000, 0x878678326eac9000, // x 2^74 == 10^22 exactly + 0x0000000000000000, 0xa968163f0a57b400, // x 2^77 == 10^23 exactly + 0x0000000000000000, 0xd3c21bcecceda100, // x 2^80 == 10^24 exactly + 0x0000000000000000, 0x84595161401484a0, // x 2^84 == 10^25 exactly + 0x0000000000000000, 0xa56fa5b99019a5c8, // x 2^87 == 10^26 exactly + 0x0000000000000000, 0xcecb8f27f4200f3a, // x 2^90 == 10^27 exactly + 0x4000000000000000, 0x813f3978f8940984, // x 2^94 == 10^28 exactly + 0x5000000000000000, 0xa18f07d736b90be5, // x 2^97 == 10^29 exactly + 0xa400000000000000, 0xc9f2c9cd04674ede, // x 2^100 == 10^30 exactly + 0x4d00000000000000, 0xfc6f7c4045812296, // x 2^103 == 10^31 exactly + 0xf020000000000000, 0x9dc5ada82b70b59d, // x 2^107 == 10^32 exactly + 0x6c28000000000000, 0xc5371912364ce305, // x 2^110 == 10^33 exactly + 0xc732000000000000, 0xf684df56c3e01bc6, // x 2^113 == 10^34 exactly + 0x3c7f400000000000, 0x9a130b963a6c115c, // x 2^117 == 10^35 exactly + 0x4b9f100000000000, 0xc097ce7bc90715b3, // x 2^120 == 10^36 exactly + 0x1e86d40000000000, 0xf0bdc21abb48db20, // x 2^123 == 10^37 exactly + 0x1314448000000000, 0x96769950b50d88f4, // x 2^127 == 10^38 exactly + 0x17d955a000000000, 0xbc143fa4e250eb31, // x 2^130 == 10^39 exactly + 0x5dcfab0800000000, 0xeb194f8e1ae525fd, // x 2^133 == 10^40 exactly + 0x5aa1cae500000000, 0x92efd1b8d0cf37be, // x 2^137 == 10^41 exactly + 0xf14a3d9e40000000, 0xb7abc627050305ad, // x 2^140 == 10^42 exactly + 0x6d9ccd05d0000000, 0xe596b7b0c643c719, // x 2^143 == 10^43 exactly + 0xe4820023a2000000, 0x8f7e32ce7bea5c6f, // x 2^147 == 10^44 exactly + 0xdda2802c8a800000, 0xb35dbf821ae4f38b, // x 2^150 == 10^45 exactly + 0xd50b2037ad200000, 0xe0352f62a19e306e, // x 2^153 == 10^46 exactly + 0x4526f422cc340000, 0x8c213d9da502de45, // x 2^157 == 10^47 exactly + 0x9670b12b7f410000, 0xaf298d050e4395d6, // x 2^160 == 10^48 exactly + 0x3c0cdd765f114000, 0xdaf3f04651d47b4c, // x 2^163 == 10^49 exactly + 0xa5880a69fb6ac800, 0x88d8762bf324cd0f, // x 2^167 == 10^50 exactly + 0x8eea0d047a457a00, 0xab0e93b6efee0053, // x 2^170 == 10^51 exactly + 0x72a4904598d6d880, 0xd5d238a4abe98068, // x 2^173 == 10^52 exactly + 0x47a6da2b7f864750, 0x85a36366eb71f041, // x 2^177 == 10^53 exactly + 0x999090b65f67d924, 0xa70c3c40a64e6c51, // x 2^180 == 10^54 exactly + 0xfff4b4e3f741cf6d, 0xd0cf4b50cfe20765, // x 2^183 == 10^55 exactly ] // Every 28th power of 10 across the full range of Double. @@ -2160,36 +2361,36 @@ fileprivate let powersOf10_Exact128: InlineArray<_, UInt64> = [ // Table size: 464 bytes @available(SwiftStdlib 6.2, *) fileprivate let powersOf10_Binary64: InlineArray<_, UInt64> = [ - // low-order half, high-order half - 0x3931b850df08e738, 0x95fe7e07c91efafa, // x 2^-1328 ~= 10^-400 - 0xba954f8e758fecb3, 0x9774919ef68662a3, // x 2^-1235 ~= 10^-372 - 0x9028bed2939a635c, 0x98ee4a22ecf3188b, // x 2^-1142 ~= 10^-344 - 0x47b233c92125366e, 0x9a6bb0aa55653b2d, // x 2^-1049 ~= 10^-316 - 0x4ee367f9430aec32, 0x9becce62836ac577, // x 2^-956 ~= 10^-288 - 0x6f773fc3603db4a9, 0x9d71ac8fada6c9b5, // x 2^-863 ~= 10^-260 - 0xc47bc5014a1a6daf, 0x9efa548d26e5a6e1, // x 2^-770 ~= 10^-232 - 0x80e8a40eccd228a4, 0xa086cfcd97bf97f3, // x 2^-677 ~= 10^-204 - 0xb8ada00e5a506a7c, 0xa21727db38cb002f, // x 2^-584 ~= 10^-176 - 0xc13e60d0d2e0ebba, 0xa3ab66580d5fdaf5, // x 2^-491 ~= 10^-148 - 0xc2974eb4ee658828, 0xa54394fe1eedb8fe, // x 2^-398 ~= 10^-120 - 0xcb4ccd500f6bb952, 0xa6dfbd9fb8e5b88e, // x 2^-305 ~= 10^-92 - 0x3f2398d747b36224, 0xa87fea27a539e9a5, // x 2^-212 ~= 10^-64 - 0xdde50bd1d5d0b9e9, 0xaa242499697392d2, // x 2^-119 ~= 10^-36 - 0xfdc20d2b36ba7c3d, 0xabcc77118461cefc, // x 2^-26 ~= 10^-8 - 0x0000000000000000, 0xad78ebc5ac620000, // x 2^67 == 10^20 exactly - 0x9670b12b7f410000, 0xaf298d050e4395d6, // x 2^160 == 10^48 exactly - 0x3b25a55f43294bcb, 0xb0de65388cc8ada8, // x 2^253 ~= 10^76 - 0x58edec91ec2cb657, 0xb2977ee300c50fe7, // x 2^346 ~= 10^104 - 0x29babe4598c311fb, 0xb454e4a179dd1877, // x 2^439 ~= 10^132 - 0x577b986b314d6009, 0xb616a12b7fe617aa, // x 2^532 ~= 10^160 - 0x0c11ed6d538aeb2f, 0xb7dcbf5354e9bece, // x 2^625 ~= 10^188 - 0x6d953e2bd7173692, 0xb9a74a0637ce2ee1, // x 2^718 ~= 10^216 - 0x9d6d1ad41abe37f1, 0xbb764c4ca7a4440f, // x 2^811 ~= 10^244 - 0x4b2d8644d8a74e18, 0xbd49d14aa79dbc82, // x 2^904 ~= 10^272 - 0xe0470a63e6bd56c3, 0xbf21e44003acdd2c, // x 2^997 ~= 10^300 - 0x505f522e53053ff2, 0xc0fe908895cf3b44, // x 2^1090 ~= 10^328 - 0xcca845ab2beafa9a, 0xc2dfe19c8c055535, // x 2^1183 ~= 10^356 - 0x1027fff56784f444, 0xc4c5e310aef8aa17, // x 2^1276 ~= 10^384 + // low-order half, high-order half + 0x3931b850df08e738, 0x95fe7e07c91efafa, // x 2^-1328 ~= 10^-400 + 0xba954f8e758fecb3, 0x9774919ef68662a3, // x 2^-1235 ~= 10^-372 + 0x9028bed2939a635c, 0x98ee4a22ecf3188b, // x 2^-1142 ~= 10^-344 + 0x47b233c92125366e, 0x9a6bb0aa55653b2d, // x 2^-1049 ~= 10^-316 + 0x4ee367f9430aec32, 0x9becce62836ac577, // x 2^-956 ~= 10^-288 + 0x6f773fc3603db4a9, 0x9d71ac8fada6c9b5, // x 2^-863 ~= 10^-260 + 0xc47bc5014a1a6daf, 0x9efa548d26e5a6e1, // x 2^-770 ~= 10^-232 + 0x80e8a40eccd228a4, 0xa086cfcd97bf97f3, // x 2^-677 ~= 10^-204 + 0xb8ada00e5a506a7c, 0xa21727db38cb002f, // x 2^-584 ~= 10^-176 + 0xc13e60d0d2e0ebba, 0xa3ab66580d5fdaf5, // x 2^-491 ~= 10^-148 + 0xc2974eb4ee658828, 0xa54394fe1eedb8fe, // x 2^-398 ~= 10^-120 + 0xcb4ccd500f6bb952, 0xa6dfbd9fb8e5b88e, // x 2^-305 ~= 10^-92 + 0x3f2398d747b36224, 0xa87fea27a539e9a5, // x 2^-212 ~= 10^-64 + 0xdde50bd1d5d0b9e9, 0xaa242499697392d2, // x 2^-119 ~= 10^-36 + 0xfdc20d2b36ba7c3d, 0xabcc77118461cefc, // x 2^-26 ~= 10^-8 + 0x0000000000000000, 0xad78ebc5ac620000, // x 2^67 == 10^20 exactly + 0x9670b12b7f410000, 0xaf298d050e4395d6, // x 2^160 == 10^48 exactly + 0x3b25a55f43294bcb, 0xb0de65388cc8ada8, // x 2^253 ~= 10^76 + 0x58edec91ec2cb657, 0xb2977ee300c50fe7, // x 2^346 ~= 10^104 + 0x29babe4598c311fb, 0xb454e4a179dd1877, // x 2^439 ~= 10^132 + 0x577b986b314d6009, 0xb616a12b7fe617aa, // x 2^532 ~= 10^160 + 0x0c11ed6d538aeb2f, 0xb7dcbf5354e9bece, // x 2^625 ~= 10^188 + 0x6d953e2bd7173692, 0xb9a74a0637ce2ee1, // x 2^718 ~= 10^216 + 0x9d6d1ad41abe37f1, 0xbb764c4ca7a4440f, // x 2^811 ~= 10^244 + 0x4b2d8644d8a74e18, 0xbd49d14aa79dbc82, // x 2^904 ~= 10^272 + 0xe0470a63e6bd56c3, 0xbf21e44003acdd2c, // x 2^997 ~= 10^300 + 0x505f522e53053ff2, 0xc0fe908895cf3b44, // x 2^1090 ~= 10^328 + 0xcca845ab2beafa9a, 0xc2dfe19c8c055535, // x 2^1183 ~= 10^356 + 0x1027fff56784f444, 0xc4c5e310aef8aa17, // x 2^1276 ~= 10^384 ] // Needed by 80- and 128-bit formatters above @@ -2200,214 +2401,220 @@ fileprivate let powersOf10_Binary64: InlineArray<_, UInt64> = [ // Table size: 5728 bytes @available(SwiftStdlib 6.2, *) fileprivate let powersOf10_Binary128: InlineArray<_, UInt64> = [ - // Low-order ... high-order - 0xaec2e6aff96b46ae, 0xf91044c2eff84750, 0x2b55c9e70e00c557, 0xb6536903bf8f2bda, // x 2^-16556 ~= 10^-4984 - 0xda1b3c3dd3889587, 0x73a7380aba84a6b1, 0xbddb2dfde3f8a6e3, 0xb9e5428330737362, // x 2^-16370 ~= 10^-4928 - 0xa2d23c57cfebb9ec, 0x9f165c039ead6d77, 0x88227fdfc13ab53d, 0xbd89006346a9a34d, // x 2^-16184 ~= 10^-4872 - 0x0333d510cf27e5a5, 0x4e3cc383eaa17b7b, 0xe05fe4207ca3d508, 0xc13efc51ade7df64, // x 2^-15998 ~= 10^-4816 - 0xff242c569bc1f539, 0x5c67ba58680c4cce, 0x3c55f3f947fef0e9, 0xc50791bd8dd72edb, // x 2^-15812 ~= 10^-4760 - 0xe4b75ae27bec50bf, 0x25b0419765fdfcdb, 0x0915564d8ab057ee, 0xc8e31de056f89c19, // x 2^-15626 ~= 10^-4704 - 0x548b1e80a94f3434, 0xe418e9217ce83755, 0x801e38463183fc88, 0xccd1ffc6bba63e21, // x 2^-15440 ~= 10^-4648 - 0x541950a0fdc2b4d9, 0xeea173da1f0eb7b4, 0xcfadf6b2aa7c4f43, 0xd0d49859d60d40a3, // x 2^-15254 ~= 10^-4592 - 0x7e64501be95ad76b, 0x451e855d8acef835, 0x9e601e707a2c3488, 0xd4eb4a687c0253e8, // x 2^-15068 ~= 10^-4536 - 0xdadd9645f360cb51, 0xf290163350ecb3eb, 0xa8edffdccfe4db4b, 0xd9167ab0c1965798, // x 2^-14882 ~= 10^-4480 - 0x7e447db3018ffbdf, 0x4fa1860c08a85923, 0xb17cd86e7fcece75, 0xdd568fe9ab559344, // x 2^-14696 ~= 10^-4424 - 0x61cd4655bf64d265, 0xb19fd88fe285b3bc, 0x1151250681d59705, 0xe1abf2cd11206610, // x 2^-14510 ~= 10^-4368 - 0xa5703f5ce7a619ec, 0x361243a84b55574d, 0x025a8e1e5dbb41d6, 0xe6170e21b2910457, // x 2^-14324 ~= 10^-4312 - 0xb93897a6cf5d3e61, 0x18746fcc6a190db9, 0x66e849253e5da0c2, 0xea984ec57de69f13, // x 2^-14138 ~= 10^-4256 - 0x309043d12ab5b0ac, 0x79c93cff11f09319, 0xf5a7800f23ef67b8, 0xef3023b80a732d93, // x 2^-13952 ~= 10^-4200 - 0xa3baa84c049b52b9, 0xbec466ee1b586342, 0x0e85fc7f4edbd3ca, 0xf3defe25478e074a, // x 2^-13766 ~= 10^-4144 - 0xd1f4628316b15c7a, 0xae16192410d3135e, 0x4268a54f70bd28c4, 0xf8a551706112897c, // x 2^-13580 ~= 10^-4088 - 0x9eb9296cc5749dba, 0x48324e275376dfdd, 0x5052e9289f0f2333, 0xfd83933eda772c0b, // x 2^-13394 ~= 10^-4032 - 0xff6aae669a5a0d8a, 0x24fed95087b9006e, 0x01b02378a405b421, 0x813d1dc1f0c754d6, // x 2^-13207 ~= 10^-3976 - 0xf993f18de00dc89b, 0x15617da021b89f92, 0xb782db1fc6aba49b, 0x83c4e245ed051dc1, // x 2^-13021 ~= 10^-3920 - 0xc6a0d64a712172b1, 0x2217669197ac1504, 0x4250be2eeba87d15, 0x86595584116caf3c, // x 2^-12835 ~= 10^-3864 - 0x0bdc0c67a220687b, 0x44a66a6d6fd6537b, 0x3f1f93f1943ca9b6, 0x88fab70d8b44952a, // x 2^-12649 ~= 10^-3808 - 0xb60b57164ad28122, 0xde5bd4572c25a830, 0x2c87f18b39478aa2, 0x8ba947b223e5783e, // x 2^-12463 ~= 10^-3752 - 0xbd59568efdb9bfee, 0x292f8f2c98d7f44c, 0x4054f5360249ebd1, 0x8e6549867da7d11a, // x 2^-12277 ~= 10^-3696 - 0x9fa0721e66791acc, 0x1789061d717d454c, 0xc1187fa0c18adbbe, 0x912effea7015b2c5, // x 2^-12091 ~= 10^-3640 - 0x982b64e953ac4e27, 0x45efb05f20cf48b3, 0x4b4de34e0ebc3e06, 0x9406af8f83fd6265, // x 2^-11905 ~= 10^-3584 - 0xa53f5950eec21dca, 0x3bd8754763bdbca1, 0xac73f0226eff5ea1, 0x96ec9e7f9004839b, // x 2^-11719 ~= 10^-3528 - 0x320e19f88f1161b7, 0x72e93fe0cce7cfd9, 0x2184706ea46a4c38, 0x99e11423765ec1d0, // x 2^-11533 ~= 10^-3472 - 0x491aba48dfc0e36e, 0xd3de560ee34022b2, 0xddadb80577b906bd, 0x9ce4594a044e0f1b, // x 2^-11347 ~= 10^-3416 - 0x06789d038697142f, 0x7a466a75be73db21, 0x60dbd8aa443b560f, 0x9ff6b82ef415d222, // x 2^-11161 ~= 10^-3360 - 0x40ed8056af76ac43, 0x08251c601e346456, 0x7401c6f091f87727, 0xa3187c82120dace6, // x 2^-10975 ~= 10^-3304 - 0x8c643ee307bffec6, 0xf369a11c6f66c05a, 0x4d5b32f713d7f476, 0xa649f36e8583e81a, // x 2^-10789 ~= 10^-3248 - 0xe32f5e080e36b4be, 0x3adf30ff2eb163d4, 0xb4b39dd9ddb8d317, 0xa98b6ba23e2300c7, // x 2^-10603 ~= 10^-3192 - 0x6b9d538c192cfb1b, 0x1c5af3bd4d2c60b5, 0xec41c1793d69d0d1, 0xacdd3555869159d1, // x 2^-10417 ~= 10^-3136 - 0x1adadaeedf7d699c, 0x71043692494aa743, 0x3ca5a7540d9d56c9, 0xb03fa252bd05a815, // x 2^-10231 ~= 10^-3080 - 0xec3e4e5fc6b03617, 0x47c9b16afe8fdf74, 0x92e1bc1fbb33f18d, 0xb3b305fe328e571f, // x 2^-10045 ~= 10^-3024 - 0x1d42fa68b12bdb23, 0xac46a7b3f2b4b34e, 0xa908fd4a88728b6a, 0xb737b55e31cdde04, // x 2^-9859 ~= 10^-2968 - 0x887dede507f2b618, 0x359a8fa0d014b9a7, 0x7c4c65d15c614c56, 0xbace07232df1c802, // x 2^-9673 ~= 10^-2912 - 0x504708e718b4b669, 0xfb4d9440822af452, 0xef84cc99cb4c5d17, 0xbe7653b01aae13e5, // x 2^-9487 ~= 10^-2856 - 0x5b7977525516bff0, 0x75913092420c9b35, 0xcfc147ade4843a24, 0xc230f522ee0a7fc2, // x 2^-9301 ~= 10^-2800 - 0xad5d11883cc1302b, 0x860a754894b9a0bc, 0x4668677d5f46c29b, 0xc5fe475d4cd35cff, // x 2^-9115 ~= 10^-2744 - 0x42032f9f971bfc07, 0x9fb576046ab35018, 0x474b3cb1fe1d6a7f, 0xc9dea80d6283a34c, // x 2^-8929 ~= 10^-2688 - 0xd3e7fbb72403a4dd, 0x8ca223055819af54, 0xd6ea3b733029ef0b, 0xcdd276b6e582284f, // x 2^-8743 ~= 10^-2632 - 0xba2431d885f2b7d9, 0xc9879fc42869f610, 0x3736730a9e47fef8, 0xd1da14bc489025ea, // x 2^-8557 ~= 10^-2576 - 0xa11edbcd65dd1844, 0xcb8edae81a295887, 0x3d24e68dc1027246, 0xd5f5e5681a4b9285, // x 2^-8371 ~= 10^-2520 - 0xa0f076652f69ad08, 0x9d19c341f5f42f2a, 0x742ab8f3864562c8, 0xda264df693ac3e30, // x 2^-8185 ~= 10^-2464 - 0x29f760ef115f2824, 0xe0ee47c041c9de0f, 0x8c119f3680212413, 0xde6bb59f56672cda, // x 2^-7999 ~= 10^-2408 - 0x8b90230b3409c9d3, 0x9d76eef2c1543e65, 0x43190b523f872b9c, 0xe2c6859f5c284230, // x 2^-7813 ~= 10^-2352 - 0xd44ce9993bc6611e, 0x777c9b2dfbede079, 0x2a0969bf88679396, 0xe7372943179706fc, // x 2^-7627 ~= 10^-2296 - 0xe8c5f5a63fd0fbd1, 0x0ccc12293f1d7a58, 0x131565be33dda91a, 0xebbe0df0c8201ac5, // x 2^-7441 ~= 10^-2240 - 0xdb97988dd6b776f4, 0xeb2106f435f7e1d5, 0xccfb1cc2ef1f44de, 0xf05ba3330181c750, // x 2^-7255 ~= 10^-2184 - 0x2fcbc8df94a1d54b, 0x796d0a8120801513, 0x5f8385b3a882ff4c, 0xf5105ac3681f2716, // x 2^-7069 ~= 10^-2128 - 0xc8700c11071a40f5, 0x23cb9e9df9331fe4, 0x166c15f456786c27, 0xf9dca895a3226409, // x 2^-6883 ~= 10^-2072 - 0x9589f4637a50cbb5, 0xea8242b0030e4a51, 0x6c656c3b1f2c9d91, 0xfec102e2857bc1f9, // x 2^-6697 ~= 10^-2016 - 0xc4be56c83349136c, 0x6188db81ac8e775d, 0xfa70b9a2ca60b004, 0x81def119b76837c8, // x 2^-6510 ~= 10^-1960 - 0xb85d39054658b363, 0xe7df06bc613fda21, 0x6a22490e8e9ec98b, 0x8469e0b6f2b8bd9b, // x 2^-6324 ~= 10^-1904 - 0x800b1e1349fef248, 0x469cfd2e6ca32a77, 0x69138459b0fa72d4, 0x87018eefb53c6325, // x 2^-6138 ~= 10^-1848 - 0xb62593291c768919, 0xc098e6ed0bfbd6f6, 0x6c83ad1260ff20f4, 0x89a63ba4c497b50e, // x 2^-5952 ~= 10^-1792 - 0x92ee7fce474479d3, 0xe02017175bf040c6, 0xd82ef2860273de8d, 0x8c5827f711735b46, // x 2^-5766 ~= 10^-1736 - 0x7b0e6375ca8c77d9, 0x5f07e1e10097d47f, 0x416d7f9ab1e67580, 0x8f17964dfc3961f2, // x 2^-5580 ~= 10^-1680 - 0xc8d869ed561af1ce, 0x8b6648e941de779b, 0x56700866b85d57fe, 0x91e4ca5db93dbfec, // x 2^-5394 ~= 10^-1624 - 0xfc04df783488a410, 0x64d1f15da2c146b1, 0x43cf71d5c4fd7868, 0x94c0092dd4ef9511, // x 2^-5208 ~= 10^-1568 - 0xfbaf03b48a965a64, 0x9b6122aa2b72a13c, 0x387898a6e22f821b, 0x97a9991fd8b3afc0, // x 2^-5022 ~= 10^-1512 - 0x50f7f7c13119aadd, 0xe415d8b25694250a, 0x8f8857e875e7774e, 0x9aa1c1f6110c0dd0, // x 2^-4836 ~= 10^-1456 - 0xce214403545fd685, 0xf36d1ad779b90e09, 0xa5c58d5f91a476d7, 0x9da8ccda75b341b5, // x 2^-4650 ~= 10^-1400 - 0x63ddfb68f971b0c5, 0x2822e38faf74b26e, 0x6e1f7f1642ebaac8, 0xa0bf0465b455e921, // x 2^-4464 ~= 10^-1344 - 0xf0d00cec9daf7444, 0x6bf3eea6f661a32a, 0xfad2be1679765f27, 0xa3e4b4a65e97b76a, // x 2^-4278 ~= 10^-1288 - 0x463b4ab4bd478f57, 0x6f6583b5b36d5426, 0x800cfab80c4e2eb1, 0xa71a2b283c14fba6, // x 2^-4092 ~= 10^-1232 - 0xef163df2fa96e983, 0xa825f32bc8f6b080, 0x850b0c5976b21027, 0xaa5fb6fbc115010b, // x 2^-3906 ~= 10^-1176 - 0x7db1b3f8e100eb43, 0x2862b1f61d64ddc3, 0x61363686961a41e5, 0xadb5a8bdaaa53051, // x 2^-3720 ~= 10^-1120 - 0xfd349cf00ba1e09a, 0x6d282fe1b7112879, 0xc6f075c4b81fc72d, 0xb11c529ec0d87268, // x 2^-3534 ~= 10^-1064 - 0xf7221741b221cf6f, 0x3739f15b06ac3c76, 0xb4e4be5b6455ef96, 0xb494086bbfea00c3, // x 2^-3348 ~= 10^-1008 - 0xc4e5a2f864c403bb, 0x6e33cdcda4367276, 0x24d256c540a50309, 0xb81d1f9569068d8e, // x 2^-3162 ~= 10^-952 - 0x276e3f0f67f0553b, 0x00de73d9d5be6974, 0x6d4aa5b50bb5dc0d, 0xbbb7ef38bb827f2d, // x 2^-2976 ~= 10^-896 - 0x51a34a3e674484ed, 0x1fb6069f8b26f840, 0x925624c0d7d93317, 0xbf64d0275747de70, // x 2^-2790 ~= 10^-840 - 0xcc775c8cb6de1dbc, 0x6d60d02eac6309ee, 0x8e5a2e5116baf191, 0xc3241cf0094a8e70, // x 2^-2604 ~= 10^-784 - 0x6023c8fa17d7b105, 0x069cf8f51d2e5e65, 0xb0560c246f90e9e8, 0xc6f631e782d57096, // x 2^-2418 ~= 10^-728 - 0x92c17acb2d08d5fd, 0xc26ffb8e81532725, 0x2ffff1289a804c5a, 0xcadb6d313c8736fc, // x 2^-2232 ~= 10^-672 - 0x47df78ab9e92897a, 0xc02b302a892b81dc, 0xa855e127113c887b, 0xced42ec885d9dbbe, // x 2^-2046 ~= 10^-616 - 0xdaf2dec03ec0c322, 0x72db3bc15b0c7014, 0xe00bad8dfc0d8c8e, 0xd2e0d889c213fd60, // x 2^-1860 ~= 10^-560 - 0xd3a04799e4473ac8, 0xa116409a2fdf1e9e, 0xc654d07271e6c39f, 0xd701ce3bd387bf47, // x 2^-1674 ~= 10^-504 - 0x5c8a5dc65d745a24, 0x2726c48a85389fa7, 0x84c663cee6b86e7c, 0xdb377599b6074244, // x 2^-1488 ~= 10^-448 - 0xd7ebc61ba77a9e66, 0x8bf77d4bc59b35b1, 0xcb285ceb2fed040d, 0xdf82365c497b5453, // x 2^-1302 ~= 10^-392 - 0x744ce999bfed213a, 0x363b1f2c568dc3e2, 0xfd1b1b2308169b25, 0xe3e27a444d8d98b7, // x 2^-1116 ~= 10^-336 - 0x6a40608fe10de7e7, 0xf910f9f648232f14, 0xd1b3400f8f9cff68, 0xe858ad248f5c22c9, // x 2^-930 ~= 10^-280 - 0x9bdbfc21260dd1ad, 0x4609ac5c7899ca36, 0xa4f8bf5635246428, 0xece53cec4a314ebd, // x 2^-744 ~= 10^-224 - 0xd88181aad19d7454, 0xf80f36174730ca34, 0xdc44e6c3cb279ac1, 0xf18899b1bc3f8ca1, // x 2^-558 ~= 10^-168 - 0xee19bfa6947f8e02, 0xaa09501d5954a559, 0x4d4617b5ff4a16d5, 0xf64335bcf065d37d, // x 2^-372 ~= 10^-112 - 0xebbc75a03b4d60e6, 0xac2e4f162cfad40a, 0xeed6e2f0f0d56712, 0xfb158592be068d2e, // x 2^-186 ~= 10^-56 - 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, // x 2^1 == 10^0 exactly - 0x0000000000000000, 0x2000000000000000, 0xbff8f10e7a8921a4, 0x82818f1281ed449f, // x 2^187 == 10^56 exactly - 0x51775f71e92bf2f2, 0x74a7ef0198791097, 0x03e2cf6bc604ddb0, 0x850fadc09923329e, // x 2^373 ~= 10^112 - 0xb204b3d9686f55b5, 0xfb118fc9c217a1d2, 0x90fb44d2f05d0842, 0x87aa9aff79042286, // x 2^559 ~= 10^168 - 0xd7924bff833149fa, 0xbc10c5c5cda97c8d, 0x82bd6b70d99aaa6f, 0x8a5296ffe33cc92f, // x 2^745 ~= 10^224 - 0xa67d072d3c7fa14b, 0x7ec63730f500b406, 0xdb0b487b6423e1e8, 0x8d07e33455637eb2, // x 2^931 ~= 10^280 - 0x546f2a35dc367e47, 0x949063d8a46f0c0e, 0x213a4f0aa5e8a7b1, 0x8fcac257558ee4e6, // x 2^1117 ~= 10^336 - 0x50611a621c0ee3ae, 0x202d895116aa96be, 0x1c306f5d1b0b5fdf, 0x929b7871de7f22b9, // x 2^1303 ~= 10^392 - 0xffa6738a27dcf7a3, 0x3c11d8430d5c4802, 0xa7ea9c8838ce9437, 0x957a4ae1ebf7f3d3, // x 2^1489 ~= 10^448 - 0x5bf36c0f40bde99d, 0x284ba600ee9f6303, 0xbf1d49cacccd5e68, 0x9867806127ece4f4, // x 2^1675 ~= 10^504 - 0xa6e937834ed12e58, 0x73f26eb82f6b8066, 0x655494c5c95d77f2, 0x9b63610bb9243e46, // x 2^1861 ~= 10^560 - 0x0cd4b7660adc6930, 0x8f868688f8eb79eb, 0x02e008393fd60b55, 0x9e6e366733f85561, // x 2^2047 ~= 10^616 - 0x3efb9807d86d3c6a, 0x84c10a1d22f5adc5, 0x55e04dba4b3bd4dd, 0xa1884b69ade24964, // x 2^2233 ~= 10^672 - 0xf065089401df33b4, 0x1fc02370c451a755, 0x44b222741eb1ebbf, 0xa4b1ec80f47c84ad, // x 2^2419 ~= 10^728 - 0xa62d0da836fce7d5, 0x75933380ceb5048c, 0x1cf4a5c3bc09fa6f, 0xa7eb6799e8aec999, // x 2^2605 ~= 10^784 - 0x7a400df820f096c2, 0x802c4085068d2dd5, 0x3c4a575151b294dc, 0xab350c27feb90acc, // x 2^2791 ~= 10^840 - 0xf48b51375df06e86, 0x412fe9e72afd355e, 0x870a8d87239d8f35, 0xae8f2b2ce3d5dbe9, // x 2^2977 ~= 10^896 - 0x881883521930127c, 0xe53fd3fcb5b4df25, 0xdd929f09c3eff5ac, 0xb1fa17404a30e5e8, // x 2^3163 ~= 10^952 - 0x270cd9f1348eb326, 0x37ed82fe9c75fccf, 0x1931b583a9431d7e, 0xb5762497dbf17a9e, // x 2^3349 ~= 10^1008 - 0x8919b01a5b3d9ec1, 0x6a7669bdfc6f699c, 0xe30db03e0f8dd286, 0xb903a90f561d25e2, // x 2^3535 ~= 10^1064 - 0xf0461526b4201aa5, 0x7fe40defe17e55f5, 0x9eb5cb19647508c5, 0xbca2fc30cc19f090, // x 2^3721 ~= 10^1120 - 0xd67bf35422978bbf, 0x0dbb1c416ebe661f, 0x24bd4c00042ad125, 0xc054773d149bf26b, // x 2^3907 ~= 10^1176 - 0xdd093192ef5508d0, 0x6eac3085943ccc0f, 0x7ea30dbd7ea479e3, 0xc418753460cdcca9, // x 2^4093 ~= 10^1232 - 0xfe4ff20db6d25dc2, 0x5d5d5a9519e34a42, 0x764f4cf916b4dece, 0xc7ef52defe87b751, // x 2^4279 ~= 10^1288 - 0xd8adfb2e00494c5e, 0x72435286baf0e84e, 0xbeb7fbdc1cbe8b37, 0xcbd96ed6466cf081, // x 2^4465 ~= 10^1344 - 0xe07c1e4384f594af, 0x0c6b90b8874d5189, 0xdce472c619aa3f63, 0xcfd7298db6cb9672, // x 2^4651 ~= 10^1400 - 0x5dd902c68fa448cf, 0xea8d16bd9544e48e, 0xe47defc14a406e4f, 0xd3e8e55c3c1f43d0, // x 2^4837 ~= 10^1456 - 0x1223d79357bedca8, 0xeae6c2843752ac35, 0xb7157c60a24a0569, 0xd80f0685a81b2a81, // x 2^5023 ~= 10^1512 - 0xcff72d64bc79e429, 0xccc52c236decd778, 0xfb0b98f6bbc4f0cb, 0xdc49f3445824e360, // x 2^5209 ~= 10^1568 - 0x3731f76b905dffbb, 0x5e2bddd7d12a9e42, 0xc6c6c1764e047e15, 0xe09a13d30c2dba62, // x 2^5395 ~= 10^1624 - 0xeb58d8ef2ada7c09, 0xbc1a3b726b789947, 0x87e8dcfc09dbc33a, 0xe4ffd276eedce658, // x 2^5581 ~= 10^1680 - 0x249a5c06dc5d5db7, 0xa8f09440be97bfe6, 0xb1a3642a8da3cf4f, 0xe97b9b89d001dab3, // x 2^5767 ~= 10^1736 - 0xbf34ff7963028cd9, 0xc20578fa3851488b, 0x2d4070f33b21ab7b, 0xee0ddd84924ab88c, // x 2^5953 ~= 10^1792 - 0x002d0511317361d5, 0xd6919e041129a1a7, 0xa2bf0c63a814e04e, 0xf2b70909cd3fd35c, // x 2^6139 ~= 10^1848 - 0x1fa87f28acf1dcd2, 0xe7a0a88981d1a0f9, 0x08f13995cf9c2747, 0xf77790f0a48a45ce, // x 2^6325 ~= 10^1904 - 0x1b6ff8afbe589b72, 0xc851bb3f9aeb1211, 0x7a37993eb21444fa, 0xfc4fea4fd590b40a, // x 2^6511 ~= 10^1960 - 0xef23a4cbc039f0c2, 0xbb3f8498a972f18e, 0xb7b1ada9cdeba84d, 0x80a046447e3d49f1, // x 2^6698 ~= 10^2016 - 0x2cc44f2b602b6231, 0xf231f4b7996b7278, 0x0cc6866c5d69b2cb, 0x8324f8aa08d7d411, // x 2^6884 ~= 10^2072 - 0x822c97629a3a4c69, 0x8a9afcdbc940e6f9, 0x7fe2b4308dcbf1a3, 0x85b64a659077660e, // x 2^7070 ~= 10^2128 - 0xf66cfcf42d4896b0, 0x1f11852a20ed33c5, 0x1d73ef3eaac3c964, 0x88547abb1d8e5bd9, // x 2^7256 ~= 10^2184 - 0x63093ad0caadb06c, 0x31be1482014cdaf0, 0x1e34291b1ef566c7, 0x8affca2bd1f88549, // x 2^7442 ~= 10^2240 - 0xab50f69048738e9a, 0xa126c32ff4882be8, 0x9e9383d73d486881, 0x8db87a7c1e56d873, // x 2^7628 ~= 10^2296 - 0xe57e659432b0a73e, 0x47a0e15dfc7986b8, 0x9cc5ee51962c011a, 0x907eceba168949b3, // x 2^7814 ~= 10^2352 - 0x8a6ff950599f8ae5, 0xd1cbbb7d005a76d3, 0x413407cfeeac9743, 0x93530b43e5e2c129, // x 2^8000 ~= 10^2408 - 0xd4e6b6e847550caa, 0x56a3106227b87706, 0x7efa7d29c44e11b7, 0x963575ce63b6332d, // x 2^8186 ~= 10^2464 - 0xd835c90b09842263, 0xb69f01a641da2a42, 0x5a848859645d1c6f, 0x9926556bc8defe43, // x 2^8372 ~= 10^2520 - 0x9b0ae73c204ecd61, 0x0794fd5e5a51ac2f, 0x51edea897b34601f, 0x9c25f29286e9ddb6, // x 2^8558 ~= 10^2576 - 0x3130484fb0a61d89, 0x32b7105223a27365, 0xb50008d92529e91f, 0x9f3497244186fca4, // x 2^8744 ~= 10^2632 - 0x8cd036553f38a1e8, 0x5e997e9f45d7897d, 0xf09e780bcc8238d9, 0xa2528e74eaf101fc, // x 2^8930 ~= 10^2688 - 0xe1f8b43b08b5d0ef, 0xa0eaf3f62dc1777c, 0x3a5828869701a165, 0xa580255203f84b47, // x 2^9116 ~= 10^2744 - 0x3c7f62e3154fa708, 0x5786f3927eb15bd5, 0x8b231a70eb5444ce, 0xa8bdaa0a0064fa44, // x 2^9302 ~= 10^2800 - 0x1ebc24a19cd70a2a, 0x843fddd10c7006b8, 0xfa1bde1f473556a4, 0xac0b6c73d065f8cc, // x 2^9488 ~= 10^2856 - 0x46b6aae34cfd26fc, 0x00db7d919b136c68, 0x7730e00421da4d55, 0xaf69bdf68fc6a740, // x 2^9674 ~= 10^2912 - 0x1c4edcb83fc4c49d, 0x61c0edd56bbcb3e8, 0x7f959cb702329d14, 0xb2d8f1915ba88ca5, // x 2^9860 ~= 10^2968 - 0x428c840d247382fe, 0x9cc3b1569b1325a4, 0x40c3a071220f5567, 0xb6595be34f821493, // x 2^10046 ~= 10^3024 - 0xbeb82e734787ec63, 0xbeff12280d5a1676, 0x11c48d02b8326bd3, 0xb9eb5333aa272e9b, // x 2^10232 ~= 10^3080 - 0x302349e12f45c73f, 0xb494bcc96d53e49c, 0x566765461bd2f61b, 0xbd8f2f7a1ba47d6d, // x 2^10418 ~= 10^3136 - 0x5704ebf5f16946ce, 0x431388ec68ac7a26, 0xb889018e4f6e9a52, 0xc1454a673cb9b1ce, // x 2^10604 ~= 10^3192 - 0x5a30431166af9b23, 0x132d031fc1d1fec0, 0xf85333a94848659f, 0xc50dff6d30c3aefc, // x 2^10790 ~= 10^3248 - 0x7573d4b3ffe4ba3b, 0xf888498a40220657, 0x1a1aeae7cf8a9d3d, 0xc8e9abc872eb2bc1, // x 2^10976 ~= 10^3304 - 0xb5eaef7441511eb9, 0xc9cf998035a91664, 0x12e29f09d9061609, 0xccd8ae88cf70ad84, // x 2^11162 ~= 10^3360 - 0x73aed4f1908f4d01, 0x8c53e7beeca4578f, 0xdf7601457ca20b35, 0xd0db689a89f2f9b1, // x 2^11348 ~= 10^3416 - 0x5adbd55696e1cdd9, 0x4949d09424b87626, 0xcbdcd02f23cc7690, 0xd4f23ccfb1916df5, // x 2^11534 ~= 10^3472 - 0x3f500ccf4ea03593, 0x9b80aac81b50762a, 0x44289dd21b589d7a, 0xd91d8fe9a3d019cc, // x 2^11720 ~= 10^3528 - 0x134ca67a679b84ae, 0x8909e424a112a3cd, 0x95aa118ec1d08317, 0xdd5dc8a2bf27f3f7, // x 2^11906 ~= 10^3584 - 0xe89e3cf733d9ff40, 0x014344660a175c36, 0x72c4d2cad73b0a7b, 0xe1b34fb846321d04, // x 2^12092 ~= 10^3640 - 0x68c0a2c6c02dae9a, 0x0b11160a6edb5f57, 0xe20a88f1134f906d, 0xe61e8ff47461cda9, // x 2^12278 ~= 10^3696 - 0x47fa54906741561a, 0xaa13acba1e5511f5, 0xc7c91d5c341ed39d, 0xea9ff638c54554e1, // x 2^12464 ~= 10^3752 - 0x365460ed91271c24, 0xabe33496aff629b4, 0xf659ede2159a45ec, 0xef37f1886f4b6690, // x 2^12650 ~= 10^3808 - 0xe4cbf4acc7fba37f, 0x350e915f7055b1b8, 0x78d946bab954b82f, 0xf3e6f313130ef0ef, // x 2^12836 ~= 10^3864 - 0xe692accdfa5bd859, 0xf4d4d3202379829e, 0xc9b1474d8f89c269, 0xf8ad6e3fa030bd15, // x 2^13022 ~= 10^3920 - 0xeca0018ea3b8d1b4, 0xe878edb67072c26d, 0x6b1d2745340e7b14, 0xfd8bd8b770cb469e, // x 2^13208 ~= 10^3976 - 0xce5fec949ab87cf7, 0x0151dcd7a53488c3, 0xf22e502fcdd4bca2, 0x81415538ce493bd5, // x 2^13395 ~= 10^4032 - 0x5e1731fbff8c032e, 0xe752f53c2f8fa6c1, 0x7c1735fc3b813c8c, 0x83c92edf425b292d, // x 2^13581 ~= 10^4088 - 0xb552102ea83f47e6, 0xdf0fd2002ff6b3a3, 0x0367500a8e9a178f, 0x865db7a9ccd2839e, // x 2^13767 ~= 10^4144 - 0x76507bafe00ec873, 0x71b256ecd954434c, 0xc9ac50475e25293a, 0x88ff2f2bade74531, // x 2^13953 ~= 10^4200 - 0x5e2075ba289a360b, 0xac376f28b45e5acc, 0x0879b2e5f6ee8b1c, 0x8badd636cc48b341, // x 2^14139 ~= 10^4256 - 0xab87d85e6311e801, 0xb7f786d14d58173d, 0x2f33c652bd12fab7, 0x8e69eee1f23f2be5, // x 2^14325 ~= 10^4312 - 0x7fed9b68d77255be, 0x35dc241819de7182, 0xad6a6308a8e8b557, 0x9133bc8f2a130fe5, // x 2^14511 ~= 10^4368 - 0x728ae72899d4bd12, 0xe5413d9414142a55, 0x9dbaa465efe141a0, 0x940b83f23a55842a, // x 2^14697 ~= 10^4424 - 0x0f7740145246fb8f, 0x186ef2c39acb4103, 0x888c9ab2fc5b3437, 0x96f18b1742aad751, // x 2^14883 ~= 10^4480 - 0xd8bb0fba2183c6ef, 0xbf66d66cc34f0197, 0xba00864671d1053f, 0x99e6196979b978f1, // x 2^15069 ~= 10^4536 - 0x9b71ed2ceb790e49, 0x6faac32d59cc1f5d, 0x61d59d402aae4fea, 0x9ce977ba0ce3a0bd, // x 2^15255 ~= 10^4592 - 0xa0aa6d5e63991cfb, 0x19482fa0ac45669c, 0x803c1cd864033781, 0x9ffbf04722750449, // x 2^15441 ~= 10^4648 - 0x95a9949e04b8bff3, 0x900aa3c2f02ac9d4, 0xa28a151725a55e10, 0xa31dcec2fef14b30, // x 2^15627 ~= 10^4704 - 0x3acf9496dade0ce9, 0xbd8ecf923d23bec0, 0x5b8452af2302fe13, 0xa64f605b4e3352cd, // x 2^15813 ~= 10^4760 - 0x6204425d2b58e822, 0xdee162a8a1248550, 0x82b84cabc828bf93, 0xa990f3c09110c544, // x 2^15999 ~= 10^4816 - 0x091a2658e0639f32, 0x66fa2184cee0b861, 0x8d29dd5122e4278d, 0xace2d92db0390b59, // x 2^16185 ~= 10^4872 - 0x80acda113324758a, 0xded179c26d9ab828, 0x58f8fde02c03a6c6, 0xb045626fb50a35e7, // x 2^16371 ~= 10^4928 - 0x7128a8aad239ce8f, 0x8737bd250290cd5b, 0xd950102978dbd0ff, 0xb3b8e2eda91a232d, // x 2^16557 ~= 10^4984 + // Low-order ... high-order + 0xaec2e6aff96b46ae, 0xf91044c2eff84750, 0x2b55c9e70e00c557, 0xb6536903bf8f2bda, // x 2^-16556 ~= 10^-4984 + 0xda1b3c3dd3889587, 0x73a7380aba84a6b1, 0xbddb2dfde3f8a6e3, 0xb9e5428330737362, // x 2^-16370 ~= 10^-4928 + 0xa2d23c57cfebb9ec, 0x9f165c039ead6d77, 0x88227fdfc13ab53d, 0xbd89006346a9a34d, // x 2^-16184 ~= 10^-4872 + 0x0333d510cf27e5a5, 0x4e3cc383eaa17b7b, 0xe05fe4207ca3d508, 0xc13efc51ade7df64, // x 2^-15998 ~= 10^-4816 + 0xff242c569bc1f539, 0x5c67ba58680c4cce, 0x3c55f3f947fef0e9, 0xc50791bd8dd72edb, // x 2^-15812 ~= 10^-4760 + 0xe4b75ae27bec50bf, 0x25b0419765fdfcdb, 0x0915564d8ab057ee, 0xc8e31de056f89c19, // x 2^-15626 ~= 10^-4704 + 0x548b1e80a94f3434, 0xe418e9217ce83755, 0x801e38463183fc88, 0xccd1ffc6bba63e21, // x 2^-15440 ~= 10^-4648 + 0x541950a0fdc2b4d9, 0xeea173da1f0eb7b4, 0xcfadf6b2aa7c4f43, 0xd0d49859d60d40a3, // x 2^-15254 ~= 10^-4592 + 0x7e64501be95ad76b, 0x451e855d8acef835, 0x9e601e707a2c3488, 0xd4eb4a687c0253e8, // x 2^-15068 ~= 10^-4536 + 0xdadd9645f360cb51, 0xf290163350ecb3eb, 0xa8edffdccfe4db4b, 0xd9167ab0c1965798, // x 2^-14882 ~= 10^-4480 + 0x7e447db3018ffbdf, 0x4fa1860c08a85923, 0xb17cd86e7fcece75, 0xdd568fe9ab559344, // x 2^-14696 ~= 10^-4424 + 0x61cd4655bf64d265, 0xb19fd88fe285b3bc, 0x1151250681d59705, 0xe1abf2cd11206610, // x 2^-14510 ~= 10^-4368 + 0xa5703f5ce7a619ec, 0x361243a84b55574d, 0x025a8e1e5dbb41d6, 0xe6170e21b2910457, // x 2^-14324 ~= 10^-4312 + 0xb93897a6cf5d3e61, 0x18746fcc6a190db9, 0x66e849253e5da0c2, 0xea984ec57de69f13, // x 2^-14138 ~= 10^-4256 + 0x309043d12ab5b0ac, 0x79c93cff11f09319, 0xf5a7800f23ef67b8, 0xef3023b80a732d93, // x 2^-13952 ~= 10^-4200 + 0xa3baa84c049b52b9, 0xbec466ee1b586342, 0x0e85fc7f4edbd3ca, 0xf3defe25478e074a, // x 2^-13766 ~= 10^-4144 + 0xd1f4628316b15c7a, 0xae16192410d3135e, 0x4268a54f70bd28c4, 0xf8a551706112897c, // x 2^-13580 ~= 10^-4088 + 0x9eb9296cc5749dba, 0x48324e275376dfdd, 0x5052e9289f0f2333, 0xfd83933eda772c0b, // x 2^-13394 ~= 10^-4032 + 0xff6aae669a5a0d8a, 0x24fed95087b9006e, 0x01b02378a405b421, 0x813d1dc1f0c754d6, // x 2^-13207 ~= 10^-3976 + 0xf993f18de00dc89b, 0x15617da021b89f92, 0xb782db1fc6aba49b, 0x83c4e245ed051dc1, // x 2^-13021 ~= 10^-3920 + 0xc6a0d64a712172b1, 0x2217669197ac1504, 0x4250be2eeba87d15, 0x86595584116caf3c, // x 2^-12835 ~= 10^-3864 + 0x0bdc0c67a220687b, 0x44a66a6d6fd6537b, 0x3f1f93f1943ca9b6, 0x88fab70d8b44952a, // x 2^-12649 ~= 10^-3808 + 0xb60b57164ad28122, 0xde5bd4572c25a830, 0x2c87f18b39478aa2, 0x8ba947b223e5783e, // x 2^-12463 ~= 10^-3752 + 0xbd59568efdb9bfee, 0x292f8f2c98d7f44c, 0x4054f5360249ebd1, 0x8e6549867da7d11a, // x 2^-12277 ~= 10^-3696 + 0x9fa0721e66791acc, 0x1789061d717d454c, 0xc1187fa0c18adbbe, 0x912effea7015b2c5, // x 2^-12091 ~= 10^-3640 + 0x982b64e953ac4e27, 0x45efb05f20cf48b3, 0x4b4de34e0ebc3e06, 0x9406af8f83fd6265, // x 2^-11905 ~= 10^-3584 + 0xa53f5950eec21dca, 0x3bd8754763bdbca1, 0xac73f0226eff5ea1, 0x96ec9e7f9004839b, // x 2^-11719 ~= 10^-3528 + 0x320e19f88f1161b7, 0x72e93fe0cce7cfd9, 0x2184706ea46a4c38, 0x99e11423765ec1d0, // x 2^-11533 ~= 10^-3472 + 0x491aba48dfc0e36e, 0xd3de560ee34022b2, 0xddadb80577b906bd, 0x9ce4594a044e0f1b, // x 2^-11347 ~= 10^-3416 + 0x06789d038697142f, 0x7a466a75be73db21, 0x60dbd8aa443b560f, 0x9ff6b82ef415d222, // x 2^-11161 ~= 10^-3360 + 0x40ed8056af76ac43, 0x08251c601e346456, 0x7401c6f091f87727, 0xa3187c82120dace6, // x 2^-10975 ~= 10^-3304 + 0x8c643ee307bffec6, 0xf369a11c6f66c05a, 0x4d5b32f713d7f476, 0xa649f36e8583e81a, // x 2^-10789 ~= 10^-3248 + 0xe32f5e080e36b4be, 0x3adf30ff2eb163d4, 0xb4b39dd9ddb8d317, 0xa98b6ba23e2300c7, // x 2^-10603 ~= 10^-3192 + 0x6b9d538c192cfb1b, 0x1c5af3bd4d2c60b5, 0xec41c1793d69d0d1, 0xacdd3555869159d1, // x 2^-10417 ~= 10^-3136 + 0x1adadaeedf7d699c, 0x71043692494aa743, 0x3ca5a7540d9d56c9, 0xb03fa252bd05a815, // x 2^-10231 ~= 10^-3080 + 0xec3e4e5fc6b03617, 0x47c9b16afe8fdf74, 0x92e1bc1fbb33f18d, 0xb3b305fe328e571f, // x 2^-10045 ~= 10^-3024 + 0x1d42fa68b12bdb23, 0xac46a7b3f2b4b34e, 0xa908fd4a88728b6a, 0xb737b55e31cdde04, // x 2^-9859 ~= 10^-2968 + 0x887dede507f2b618, 0x359a8fa0d014b9a7, 0x7c4c65d15c614c56, 0xbace07232df1c802, // x 2^-9673 ~= 10^-2912 + 0x504708e718b4b669, 0xfb4d9440822af452, 0xef84cc99cb4c5d17, 0xbe7653b01aae13e5, // x 2^-9487 ~= 10^-2856 + 0x5b7977525516bff0, 0x75913092420c9b35, 0xcfc147ade4843a24, 0xc230f522ee0a7fc2, // x 2^-9301 ~= 10^-2800 + 0xad5d11883cc1302b, 0x860a754894b9a0bc, 0x4668677d5f46c29b, 0xc5fe475d4cd35cff, // x 2^-9115 ~= 10^-2744 + 0x42032f9f971bfc07, 0x9fb576046ab35018, 0x474b3cb1fe1d6a7f, 0xc9dea80d6283a34c, // x 2^-8929 ~= 10^-2688 + 0xd3e7fbb72403a4dd, 0x8ca223055819af54, 0xd6ea3b733029ef0b, 0xcdd276b6e582284f, // x 2^-8743 ~= 10^-2632 + 0xba2431d885f2b7d9, 0xc9879fc42869f610, 0x3736730a9e47fef8, 0xd1da14bc489025ea, // x 2^-8557 ~= 10^-2576 + 0xa11edbcd65dd1844, 0xcb8edae81a295887, 0x3d24e68dc1027246, 0xd5f5e5681a4b9285, // x 2^-8371 ~= 10^-2520 + 0xa0f076652f69ad08, 0x9d19c341f5f42f2a, 0x742ab8f3864562c8, 0xda264df693ac3e30, // x 2^-8185 ~= 10^-2464 + 0x29f760ef115f2824, 0xe0ee47c041c9de0f, 0x8c119f3680212413, 0xde6bb59f56672cda, // x 2^-7999 ~= 10^-2408 + 0x8b90230b3409c9d3, 0x9d76eef2c1543e65, 0x43190b523f872b9c, 0xe2c6859f5c284230, // x 2^-7813 ~= 10^-2352 + 0xd44ce9993bc6611e, 0x777c9b2dfbede079, 0x2a0969bf88679396, 0xe7372943179706fc, // x 2^-7627 ~= 10^-2296 + 0xe8c5f5a63fd0fbd1, 0x0ccc12293f1d7a58, 0x131565be33dda91a, 0xebbe0df0c8201ac5, // x 2^-7441 ~= 10^-2240 + 0xdb97988dd6b776f4, 0xeb2106f435f7e1d5, 0xccfb1cc2ef1f44de, 0xf05ba3330181c750, // x 2^-7255 ~= 10^-2184 + 0x2fcbc8df94a1d54b, 0x796d0a8120801513, 0x5f8385b3a882ff4c, 0xf5105ac3681f2716, // x 2^-7069 ~= 10^-2128 + 0xc8700c11071a40f5, 0x23cb9e9df9331fe4, 0x166c15f456786c27, 0xf9dca895a3226409, // x 2^-6883 ~= 10^-2072 + 0x9589f4637a50cbb5, 0xea8242b0030e4a51, 0x6c656c3b1f2c9d91, 0xfec102e2857bc1f9, // x 2^-6697 ~= 10^-2016 + 0xc4be56c83349136c, 0x6188db81ac8e775d, 0xfa70b9a2ca60b004, 0x81def119b76837c8, // x 2^-6510 ~= 10^-1960 + 0xb85d39054658b363, 0xe7df06bc613fda21, 0x6a22490e8e9ec98b, 0x8469e0b6f2b8bd9b, // x 2^-6324 ~= 10^-1904 + 0x800b1e1349fef248, 0x469cfd2e6ca32a77, 0x69138459b0fa72d4, 0x87018eefb53c6325, // x 2^-6138 ~= 10^-1848 + 0xb62593291c768919, 0xc098e6ed0bfbd6f6, 0x6c83ad1260ff20f4, 0x89a63ba4c497b50e, // x 2^-5952 ~= 10^-1792 + 0x92ee7fce474479d3, 0xe02017175bf040c6, 0xd82ef2860273de8d, 0x8c5827f711735b46, // x 2^-5766 ~= 10^-1736 + 0x7b0e6375ca8c77d9, 0x5f07e1e10097d47f, 0x416d7f9ab1e67580, 0x8f17964dfc3961f2, // x 2^-5580 ~= 10^-1680 + 0xc8d869ed561af1ce, 0x8b6648e941de779b, 0x56700866b85d57fe, 0x91e4ca5db93dbfec, // x 2^-5394 ~= 10^-1624 + 0xfc04df783488a410, 0x64d1f15da2c146b1, 0x43cf71d5c4fd7868, 0x94c0092dd4ef9511, // x 2^-5208 ~= 10^-1568 + 0xfbaf03b48a965a64, 0x9b6122aa2b72a13c, 0x387898a6e22f821b, 0x97a9991fd8b3afc0, // x 2^-5022 ~= 10^-1512 + 0x50f7f7c13119aadd, 0xe415d8b25694250a, 0x8f8857e875e7774e, 0x9aa1c1f6110c0dd0, // x 2^-4836 ~= 10^-1456 + 0xce214403545fd685, 0xf36d1ad779b90e09, 0xa5c58d5f91a476d7, 0x9da8ccda75b341b5, // x 2^-4650 ~= 10^-1400 + 0x63ddfb68f971b0c5, 0x2822e38faf74b26e, 0x6e1f7f1642ebaac8, 0xa0bf0465b455e921, // x 2^-4464 ~= 10^-1344 + 0xf0d00cec9daf7444, 0x6bf3eea6f661a32a, 0xfad2be1679765f27, 0xa3e4b4a65e97b76a, // x 2^-4278 ~= 10^-1288 + 0x463b4ab4bd478f57, 0x6f6583b5b36d5426, 0x800cfab80c4e2eb1, 0xa71a2b283c14fba6, // x 2^-4092 ~= 10^-1232 + 0xef163df2fa96e983, 0xa825f32bc8f6b080, 0x850b0c5976b21027, 0xaa5fb6fbc115010b, // x 2^-3906 ~= 10^-1176 + 0x7db1b3f8e100eb43, 0x2862b1f61d64ddc3, 0x61363686961a41e5, 0xadb5a8bdaaa53051, // x 2^-3720 ~= 10^-1120 + 0xfd349cf00ba1e09a, 0x6d282fe1b7112879, 0xc6f075c4b81fc72d, 0xb11c529ec0d87268, // x 2^-3534 ~= 10^-1064 + 0xf7221741b221cf6f, 0x3739f15b06ac3c76, 0xb4e4be5b6455ef96, 0xb494086bbfea00c3, // x 2^-3348 ~= 10^-1008 + 0xc4e5a2f864c403bb, 0x6e33cdcda4367276, 0x24d256c540a50309, 0xb81d1f9569068d8e, // x 2^-3162 ~= 10^-952 + 0x276e3f0f67f0553b, 0x00de73d9d5be6974, 0x6d4aa5b50bb5dc0d, 0xbbb7ef38bb827f2d, // x 2^-2976 ~= 10^-896 + 0x51a34a3e674484ed, 0x1fb6069f8b26f840, 0x925624c0d7d93317, 0xbf64d0275747de70, // x 2^-2790 ~= 10^-840 + 0xcc775c8cb6de1dbc, 0x6d60d02eac6309ee, 0x8e5a2e5116baf191, 0xc3241cf0094a8e70, // x 2^-2604 ~= 10^-784 + 0x6023c8fa17d7b105, 0x069cf8f51d2e5e65, 0xb0560c246f90e9e8, 0xc6f631e782d57096, // x 2^-2418 ~= 10^-728 + 0x92c17acb2d08d5fd, 0xc26ffb8e81532725, 0x2ffff1289a804c5a, 0xcadb6d313c8736fc, // x 2^-2232 ~= 10^-672 + 0x47df78ab9e92897a, 0xc02b302a892b81dc, 0xa855e127113c887b, 0xced42ec885d9dbbe, // x 2^-2046 ~= 10^-616 + 0xdaf2dec03ec0c322, 0x72db3bc15b0c7014, 0xe00bad8dfc0d8c8e, 0xd2e0d889c213fd60, // x 2^-1860 ~= 10^-560 + 0xd3a04799e4473ac8, 0xa116409a2fdf1e9e, 0xc654d07271e6c39f, 0xd701ce3bd387bf47, // x 2^-1674 ~= 10^-504 + 0x5c8a5dc65d745a24, 0x2726c48a85389fa7, 0x84c663cee6b86e7c, 0xdb377599b6074244, // x 2^-1488 ~= 10^-448 + 0xd7ebc61ba77a9e66, 0x8bf77d4bc59b35b1, 0xcb285ceb2fed040d, 0xdf82365c497b5453, // x 2^-1302 ~= 10^-392 + 0x744ce999bfed213a, 0x363b1f2c568dc3e2, 0xfd1b1b2308169b25, 0xe3e27a444d8d98b7, // x 2^-1116 ~= 10^-336 + 0x6a40608fe10de7e7, 0xf910f9f648232f14, 0xd1b3400f8f9cff68, 0xe858ad248f5c22c9, // x 2^-930 ~= 10^-280 + 0x9bdbfc21260dd1ad, 0x4609ac5c7899ca36, 0xa4f8bf5635246428, 0xece53cec4a314ebd, // x 2^-744 ~= 10^-224 + 0xd88181aad19d7454, 0xf80f36174730ca34, 0xdc44e6c3cb279ac1, 0xf18899b1bc3f8ca1, // x 2^-558 ~= 10^-168 + 0xee19bfa6947f8e02, 0xaa09501d5954a559, 0x4d4617b5ff4a16d5, 0xf64335bcf065d37d, // x 2^-372 ~= 10^-112 + 0xebbc75a03b4d60e6, 0xac2e4f162cfad40a, 0xeed6e2f0f0d56712, 0xfb158592be068d2e, // x 2^-186 ~= 10^-56 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x8000000000000000, // x 2^1 == 10^0 exactly + 0x0000000000000000, 0x2000000000000000, 0xbff8f10e7a8921a4, 0x82818f1281ed449f, // x 2^187 == 10^56 exactly + 0x51775f71e92bf2f2, 0x74a7ef0198791097, 0x03e2cf6bc604ddb0, 0x850fadc09923329e, // x 2^373 ~= 10^112 + 0xb204b3d9686f55b5, 0xfb118fc9c217a1d2, 0x90fb44d2f05d0842, 0x87aa9aff79042286, // x 2^559 ~= 10^168 + 0xd7924bff833149fa, 0xbc10c5c5cda97c8d, 0x82bd6b70d99aaa6f, 0x8a5296ffe33cc92f, // x 2^745 ~= 10^224 + 0xa67d072d3c7fa14b, 0x7ec63730f500b406, 0xdb0b487b6423e1e8, 0x8d07e33455637eb2, // x 2^931 ~= 10^280 + 0x546f2a35dc367e47, 0x949063d8a46f0c0e, 0x213a4f0aa5e8a7b1, 0x8fcac257558ee4e6, // x 2^1117 ~= 10^336 + 0x50611a621c0ee3ae, 0x202d895116aa96be, 0x1c306f5d1b0b5fdf, 0x929b7871de7f22b9, // x 2^1303 ~= 10^392 + 0xffa6738a27dcf7a3, 0x3c11d8430d5c4802, 0xa7ea9c8838ce9437, 0x957a4ae1ebf7f3d3, // x 2^1489 ~= 10^448 + 0x5bf36c0f40bde99d, 0x284ba600ee9f6303, 0xbf1d49cacccd5e68, 0x9867806127ece4f4, // x 2^1675 ~= 10^504 + 0xa6e937834ed12e58, 0x73f26eb82f6b8066, 0x655494c5c95d77f2, 0x9b63610bb9243e46, // x 2^1861 ~= 10^560 + 0x0cd4b7660adc6930, 0x8f868688f8eb79eb, 0x02e008393fd60b55, 0x9e6e366733f85561, // x 2^2047 ~= 10^616 + 0x3efb9807d86d3c6a, 0x84c10a1d22f5adc5, 0x55e04dba4b3bd4dd, 0xa1884b69ade24964, // x 2^2233 ~= 10^672 + 0xf065089401df33b4, 0x1fc02370c451a755, 0x44b222741eb1ebbf, 0xa4b1ec80f47c84ad, // x 2^2419 ~= 10^728 + 0xa62d0da836fce7d5, 0x75933380ceb5048c, 0x1cf4a5c3bc09fa6f, 0xa7eb6799e8aec999, // x 2^2605 ~= 10^784 + 0x7a400df820f096c2, 0x802c4085068d2dd5, 0x3c4a575151b294dc, 0xab350c27feb90acc, // x 2^2791 ~= 10^840 + 0xf48b51375df06e86, 0x412fe9e72afd355e, 0x870a8d87239d8f35, 0xae8f2b2ce3d5dbe9, // x 2^2977 ~= 10^896 + 0x881883521930127c, 0xe53fd3fcb5b4df25, 0xdd929f09c3eff5ac, 0xb1fa17404a30e5e8, // x 2^3163 ~= 10^952 + 0x270cd9f1348eb326, 0x37ed82fe9c75fccf, 0x1931b583a9431d7e, 0xb5762497dbf17a9e, // x 2^3349 ~= 10^1008 + 0x8919b01a5b3d9ec1, 0x6a7669bdfc6f699c, 0xe30db03e0f8dd286, 0xb903a90f561d25e2, // x 2^3535 ~= 10^1064 + 0xf0461526b4201aa5, 0x7fe40defe17e55f5, 0x9eb5cb19647508c5, 0xbca2fc30cc19f090, // x 2^3721 ~= 10^1120 + 0xd67bf35422978bbf, 0x0dbb1c416ebe661f, 0x24bd4c00042ad125, 0xc054773d149bf26b, // x 2^3907 ~= 10^1176 + 0xdd093192ef5508d0, 0x6eac3085943ccc0f, 0x7ea30dbd7ea479e3, 0xc418753460cdcca9, // x 2^4093 ~= 10^1232 + 0xfe4ff20db6d25dc2, 0x5d5d5a9519e34a42, 0x764f4cf916b4dece, 0xc7ef52defe87b751, // x 2^4279 ~= 10^1288 + 0xd8adfb2e00494c5e, 0x72435286baf0e84e, 0xbeb7fbdc1cbe8b37, 0xcbd96ed6466cf081, // x 2^4465 ~= 10^1344 + 0xe07c1e4384f594af, 0x0c6b90b8874d5189, 0xdce472c619aa3f63, 0xcfd7298db6cb9672, // x 2^4651 ~= 10^1400 + 0x5dd902c68fa448cf, 0xea8d16bd9544e48e, 0xe47defc14a406e4f, 0xd3e8e55c3c1f43d0, // x 2^4837 ~= 10^1456 + 0x1223d79357bedca8, 0xeae6c2843752ac35, 0xb7157c60a24a0569, 0xd80f0685a81b2a81, // x 2^5023 ~= 10^1512 + 0xcff72d64bc79e429, 0xccc52c236decd778, 0xfb0b98f6bbc4f0cb, 0xdc49f3445824e360, // x 2^5209 ~= 10^1568 + 0x3731f76b905dffbb, 0x5e2bddd7d12a9e42, 0xc6c6c1764e047e15, 0xe09a13d30c2dba62, // x 2^5395 ~= 10^1624 + 0xeb58d8ef2ada7c09, 0xbc1a3b726b789947, 0x87e8dcfc09dbc33a, 0xe4ffd276eedce658, // x 2^5581 ~= 10^1680 + 0x249a5c06dc5d5db7, 0xa8f09440be97bfe6, 0xb1a3642a8da3cf4f, 0xe97b9b89d001dab3, // x 2^5767 ~= 10^1736 + 0xbf34ff7963028cd9, 0xc20578fa3851488b, 0x2d4070f33b21ab7b, 0xee0ddd84924ab88c, // x 2^5953 ~= 10^1792 + 0x002d0511317361d5, 0xd6919e041129a1a7, 0xa2bf0c63a814e04e, 0xf2b70909cd3fd35c, // x 2^6139 ~= 10^1848 + 0x1fa87f28acf1dcd2, 0xe7a0a88981d1a0f9, 0x08f13995cf9c2747, 0xf77790f0a48a45ce, // x 2^6325 ~= 10^1904 + 0x1b6ff8afbe589b72, 0xc851bb3f9aeb1211, 0x7a37993eb21444fa, 0xfc4fea4fd590b40a, // x 2^6511 ~= 10^1960 + 0xef23a4cbc039f0c2, 0xbb3f8498a972f18e, 0xb7b1ada9cdeba84d, 0x80a046447e3d49f1, // x 2^6698 ~= 10^2016 + 0x2cc44f2b602b6231, 0xf231f4b7996b7278, 0x0cc6866c5d69b2cb, 0x8324f8aa08d7d411, // x 2^6884 ~= 10^2072 + 0x822c97629a3a4c69, 0x8a9afcdbc940e6f9, 0x7fe2b4308dcbf1a3, 0x85b64a659077660e, // x 2^7070 ~= 10^2128 + 0xf66cfcf42d4896b0, 0x1f11852a20ed33c5, 0x1d73ef3eaac3c964, 0x88547abb1d8e5bd9, // x 2^7256 ~= 10^2184 + 0x63093ad0caadb06c, 0x31be1482014cdaf0, 0x1e34291b1ef566c7, 0x8affca2bd1f88549, // x 2^7442 ~= 10^2240 + 0xab50f69048738e9a, 0xa126c32ff4882be8, 0x9e9383d73d486881, 0x8db87a7c1e56d873, // x 2^7628 ~= 10^2296 + 0xe57e659432b0a73e, 0x47a0e15dfc7986b8, 0x9cc5ee51962c011a, 0x907eceba168949b3, // x 2^7814 ~= 10^2352 + 0x8a6ff950599f8ae5, 0xd1cbbb7d005a76d3, 0x413407cfeeac9743, 0x93530b43e5e2c129, // x 2^8000 ~= 10^2408 + 0xd4e6b6e847550caa, 0x56a3106227b87706, 0x7efa7d29c44e11b7, 0x963575ce63b6332d, // x 2^8186 ~= 10^2464 + 0xd835c90b09842263, 0xb69f01a641da2a42, 0x5a848859645d1c6f, 0x9926556bc8defe43, // x 2^8372 ~= 10^2520 + 0x9b0ae73c204ecd61, 0x0794fd5e5a51ac2f, 0x51edea897b34601f, 0x9c25f29286e9ddb6, // x 2^8558 ~= 10^2576 + 0x3130484fb0a61d89, 0x32b7105223a27365, 0xb50008d92529e91f, 0x9f3497244186fca4, // x 2^8744 ~= 10^2632 + 0x8cd036553f38a1e8, 0x5e997e9f45d7897d, 0xf09e780bcc8238d9, 0xa2528e74eaf101fc, // x 2^8930 ~= 10^2688 + 0xe1f8b43b08b5d0ef, 0xa0eaf3f62dc1777c, 0x3a5828869701a165, 0xa580255203f84b47, // x 2^9116 ~= 10^2744 + 0x3c7f62e3154fa708, 0x5786f3927eb15bd5, 0x8b231a70eb5444ce, 0xa8bdaa0a0064fa44, // x 2^9302 ~= 10^2800 + 0x1ebc24a19cd70a2a, 0x843fddd10c7006b8, 0xfa1bde1f473556a4, 0xac0b6c73d065f8cc, // x 2^9488 ~= 10^2856 + 0x46b6aae34cfd26fc, 0x00db7d919b136c68, 0x7730e00421da4d55, 0xaf69bdf68fc6a740, // x 2^9674 ~= 10^2912 + 0x1c4edcb83fc4c49d, 0x61c0edd56bbcb3e8, 0x7f959cb702329d14, 0xb2d8f1915ba88ca5, // x 2^9860 ~= 10^2968 + 0x428c840d247382fe, 0x9cc3b1569b1325a4, 0x40c3a071220f5567, 0xb6595be34f821493, // x 2^10046 ~= 10^3024 + 0xbeb82e734787ec63, 0xbeff12280d5a1676, 0x11c48d02b8326bd3, 0xb9eb5333aa272e9b, // x 2^10232 ~= 10^3080 + 0x302349e12f45c73f, 0xb494bcc96d53e49c, 0x566765461bd2f61b, 0xbd8f2f7a1ba47d6d, // x 2^10418 ~= 10^3136 + 0x5704ebf5f16946ce, 0x431388ec68ac7a26, 0xb889018e4f6e9a52, 0xc1454a673cb9b1ce, // x 2^10604 ~= 10^3192 + 0x5a30431166af9b23, 0x132d031fc1d1fec0, 0xf85333a94848659f, 0xc50dff6d30c3aefc, // x 2^10790 ~= 10^3248 + 0x7573d4b3ffe4ba3b, 0xf888498a40220657, 0x1a1aeae7cf8a9d3d, 0xc8e9abc872eb2bc1, // x 2^10976 ~= 10^3304 + 0xb5eaef7441511eb9, 0xc9cf998035a91664, 0x12e29f09d9061609, 0xccd8ae88cf70ad84, // x 2^11162 ~= 10^3360 + 0x73aed4f1908f4d01, 0x8c53e7beeca4578f, 0xdf7601457ca20b35, 0xd0db689a89f2f9b1, // x 2^11348 ~= 10^3416 + 0x5adbd55696e1cdd9, 0x4949d09424b87626, 0xcbdcd02f23cc7690, 0xd4f23ccfb1916df5, // x 2^11534 ~= 10^3472 + 0x3f500ccf4ea03593, 0x9b80aac81b50762a, 0x44289dd21b589d7a, 0xd91d8fe9a3d019cc, // x 2^11720 ~= 10^3528 + 0x134ca67a679b84ae, 0x8909e424a112a3cd, 0x95aa118ec1d08317, 0xdd5dc8a2bf27f3f7, // x 2^11906 ~= 10^3584 + 0xe89e3cf733d9ff40, 0x014344660a175c36, 0x72c4d2cad73b0a7b, 0xe1b34fb846321d04, // x 2^12092 ~= 10^3640 + 0x68c0a2c6c02dae9a, 0x0b11160a6edb5f57, 0xe20a88f1134f906d, 0xe61e8ff47461cda9, // x 2^12278 ~= 10^3696 + 0x47fa54906741561a, 0xaa13acba1e5511f5, 0xc7c91d5c341ed39d, 0xea9ff638c54554e1, // x 2^12464 ~= 10^3752 + 0x365460ed91271c24, 0xabe33496aff629b4, 0xf659ede2159a45ec, 0xef37f1886f4b6690, // x 2^12650 ~= 10^3808 + 0xe4cbf4acc7fba37f, 0x350e915f7055b1b8, 0x78d946bab954b82f, 0xf3e6f313130ef0ef, // x 2^12836 ~= 10^3864 + 0xe692accdfa5bd859, 0xf4d4d3202379829e, 0xc9b1474d8f89c269, 0xf8ad6e3fa030bd15, // x 2^13022 ~= 10^3920 + 0xeca0018ea3b8d1b4, 0xe878edb67072c26d, 0x6b1d2745340e7b14, 0xfd8bd8b770cb469e, // x 2^13208 ~= 10^3976 + 0xce5fec949ab87cf7, 0x0151dcd7a53488c3, 0xf22e502fcdd4bca2, 0x81415538ce493bd5, // x 2^13395 ~= 10^4032 + 0x5e1731fbff8c032e, 0xe752f53c2f8fa6c1, 0x7c1735fc3b813c8c, 0x83c92edf425b292d, // x 2^13581 ~= 10^4088 + 0xb552102ea83f47e6, 0xdf0fd2002ff6b3a3, 0x0367500a8e9a178f, 0x865db7a9ccd2839e, // x 2^13767 ~= 10^4144 + 0x76507bafe00ec873, 0x71b256ecd954434c, 0xc9ac50475e25293a, 0x88ff2f2bade74531, // x 2^13953 ~= 10^4200 + 0x5e2075ba289a360b, 0xac376f28b45e5acc, 0x0879b2e5f6ee8b1c, 0x8badd636cc48b341, // x 2^14139 ~= 10^4256 + 0xab87d85e6311e801, 0xb7f786d14d58173d, 0x2f33c652bd12fab7, 0x8e69eee1f23f2be5, // x 2^14325 ~= 10^4312 + 0x7fed9b68d77255be, 0x35dc241819de7182, 0xad6a6308a8e8b557, 0x9133bc8f2a130fe5, // x 2^14511 ~= 10^4368 + 0x728ae72899d4bd12, 0xe5413d9414142a55, 0x9dbaa465efe141a0, 0x940b83f23a55842a, // x 2^14697 ~= 10^4424 + 0x0f7740145246fb8f, 0x186ef2c39acb4103, 0x888c9ab2fc5b3437, 0x96f18b1742aad751, // x 2^14883 ~= 10^4480 + 0xd8bb0fba2183c6ef, 0xbf66d66cc34f0197, 0xba00864671d1053f, 0x99e6196979b978f1, // x 2^15069 ~= 10^4536 + 0x9b71ed2ceb790e49, 0x6faac32d59cc1f5d, 0x61d59d402aae4fea, 0x9ce977ba0ce3a0bd, // x 2^15255 ~= 10^4592 + 0xa0aa6d5e63991cfb, 0x19482fa0ac45669c, 0x803c1cd864033781, 0x9ffbf04722750449, // x 2^15441 ~= 10^4648 + 0x95a9949e04b8bff3, 0x900aa3c2f02ac9d4, 0xa28a151725a55e10, 0xa31dcec2fef14b30, // x 2^15627 ~= 10^4704 + 0x3acf9496dade0ce9, 0xbd8ecf923d23bec0, 0x5b8452af2302fe13, 0xa64f605b4e3352cd, // x 2^15813 ~= 10^4760 + 0x6204425d2b58e822, 0xdee162a8a1248550, 0x82b84cabc828bf93, 0xa990f3c09110c544, // x 2^15999 ~= 10^4816 + 0x091a2658e0639f32, 0x66fa2184cee0b861, 0x8d29dd5122e4278d, 0xace2d92db0390b59, // x 2^16185 ~= 10^4872 + 0x80acda113324758a, 0xded179c26d9ab828, 0x58f8fde02c03a6c6, 0xb045626fb50a35e7, // x 2^16371 ~= 10^4928 + 0x7128a8aad239ce8f, 0x8737bd250290cd5b, 0xd950102978dbd0ff, 0xb3b8e2eda91a232d, // x 2^16557 ~= 10^4984 ] @available(SwiftStdlib 6.2, *) -fileprivate func intervalContainingPowerOf10_Binary128(p: Int, lower: inout UInt256, upper: inout UInt256) -> Int { - if p >= 0 && p <= 55 { - let exactLow = powersOf10_Exact128[p * 2] - let exactHigh = powersOf10_Exact128[p * 2 + 1] - lower = UInt256(high: exactHigh, exactLow, 0, low: 0) - upper = lower - return binaryExponentFor10ToThe(p) - } - - let index = p + 4984 - let offset = (index / 56) * 4 - lower = UInt256(high: powersOf10_Binary128[offset + 3], - powersOf10_Binary128[offset + 2], - powersOf10_Binary128[offset + 1], - low: powersOf10_Binary128[offset + 0]) - let extraPower = index % 56 - var e = binaryExponentFor10ToThe(p - extraPower) - - if extraPower > 0 { - let extra = UInt128(_low: powersOf10_Exact128[extraPower * 2], - _high: powersOf10_Exact128[extraPower * 2 + 1]) - lower.multiplyRoundingDown(by: extra) - e += binaryExponentFor10ToThe(extraPower) - } +fileprivate func _intervalContainingPowerOf10_Binary128( + p: Int, + lower: inout _UInt256, + upper: inout _UInt256 +) -> Int { + if p >= 0 && p <= 55 { + let exactLow = powersOf10_Exact128[p * 2] + let exactHigh = powersOf10_Exact128[p * 2 + 1] + lower = _UInt256(high: exactHigh, exactLow, 0, low: 0) upper = lower - upper.low += 2 - return e + return binaryExponentFor10ToThe(p) + } + + let index = p + 4984 + let offset = (index / 56) * 4 + lower = _UInt256( + high: powersOf10_Binary128[offset + 3], + powersOf10_Binary128[offset + 2], + powersOf10_Binary128[offset + 1], + low: powersOf10_Binary128[offset + 0]) + let extraPower = index % 56 + var e = binaryExponentFor10ToThe(p - extraPower) + + if extraPower > 0 { + let extra = UInt128( + _low: powersOf10_Exact128[extraPower * 2], + _high: powersOf10_Exact128[extraPower * 2 + 1]) + lower.multiplyRoundingDown(by: extra) + e += binaryExponentFor10ToThe(extraPower) + } + upper = lower + upper.low += 2 + return e } diff --git a/stdlib/public/core/FloatingPointTypes.swift.gyb b/stdlib/public/core/FloatingPointTypes.swift.gyb index 3fb80cfd6c828..01092344be7bb 100644 --- a/stdlib/public/core/FloatingPointTypes.swift.gyb +++ b/stdlib/public/core/FloatingPointTypes.swift.gyb @@ -119,7 +119,7 @@ extension ${Self}: CustomDebugStringConvertible { if #available(SwiftStdlib 6.2, *) { var buffer = InlineArray<64, UTF8.CodeUnit>(repeating: 0) var span = buffer.mutableSpan - let textRange = Float${bits}ToASCII(value: self, buffer: &span) + let textRange = _Float${bits}ToASCII(value: self, buffer: &span) let textStart = unsafe span._start().assumingMemoryBound(to: UTF8.CodeUnit.self) + textRange.lowerBound let textLength = textRange.upperBound - textRange.lowerBound @@ -138,7 +138,7 @@ public func write(to target: inout Target) where Target: TextOutputStrea if #available(SwiftStdlib 6.2, *) { var buffer = InlineArray<64, UTF8.CodeUnit>(repeating: 0) var span = buffer.mutableSpan - let textRange = Float${bits}ToASCII(value: self, buffer: &span) + let textRange = _Float${bits}ToASCII(value: self, buffer: &span) let textStart = unsafe span._start().assumingMemoryBound(to: UTF8.CodeUnit.self) + textRange.lowerBound let textLength = textRange.upperBound - textRange.lowerBound From 2bbab2b01ce613b609747b3087817c4cf6108530 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Tue, 5 Aug 2025 15:52:23 -0700 Subject: [PATCH 13/19] Remove a layer of indirection --- .../public/core/FloatingPointToString.swift | 149 +++++++----------- 1 file changed, 53 insertions(+), 96 deletions(-) diff --git a/stdlib/public/core/FloatingPointToString.swift b/stdlib/public/core/FloatingPointToString.swift index 5a804dd023100..92aab0631cde1 100644 --- a/stdlib/public/core/FloatingPointToString.swift +++ b/stdlib/public/core/FloatingPointToString.swift @@ -155,19 +155,8 @@ public func _float16ToStringImpl( return UInt64(truncatingIfNeeded: textLength) } -internal func _Float16ToASCII( - value f: Float16, - buffer utf8Buffer: inout MutableSpan -) -> Range { - if #available(SwiftStdlib 6.2, *) { - return _Float16ToASCIIImpl(value: f, buffer: &utf8Buffer) - } else { - return 0..<0 - } -} - @available(SwiftStdlib 6.2, *) -fileprivate func _Float16ToASCIIImpl( +internal func _Float16ToASCII( value f: Float16, buffer utf8Buffer: inout MutableSpan ) -> Range { @@ -429,36 +418,29 @@ public func _float32ToStringImpl( _ value: Float32, _ debug: Bool ) -> UInt64 { - // Code below works with raw memory. - var buffer = unsafe MutableSpan( - _unchecked: textBuffer, - count: Int(bufferLength)) - let textRange = _Float32ToASCII(value: value, buffer: &buffer) - let textLength = textRange.upperBound - textRange.lowerBound - - // Move the text to the start of the buffer - if textRange.lowerBound != 0 { - unsafe _memmove( - dest: textBuffer, - src: textBuffer + textRange.lowerBound, - size: UInt(truncatingIfNeeded: textLength)) - } - return UInt64(truncatingIfNeeded: textLength) -} - -internal func _Float32ToASCII( - value f: Float32, - buffer utf8Buffer: inout MutableSpan -) -> Range { if #available(SwiftStdlib 6.2, *) { - return _Float32ToASCIIImpl(value: f, buffer: &utf8Buffer) + // Code below works with raw memory. + var buffer = unsafe MutableSpan( + _unchecked: textBuffer, + count: Int(bufferLength)) + let textRange = _Float32ToASCII(value: value, buffer: &buffer) + let textLength = textRange.upperBound - textRange.lowerBound + + // Move the text to the start of the buffer + if textRange.lowerBound != 0 { + unsafe _memmove( + dest: textBuffer, + src: textBuffer + textRange.lowerBound, + size: UInt(truncatingIfNeeded: textLength)) + } + return UInt64(truncatingIfNeeded: textLength) } else { - return 0..<0 + fatalError() } } @available(SwiftStdlib 6.2, *) -fileprivate func _Float32ToASCIIImpl( +internal func _Float32ToASCII( value f: Float32, buffer utf8Buffer: inout MutableSpan ) -> Range { @@ -681,36 +663,29 @@ public func _float64ToStringImpl( _ value: Float64, _ debug: Bool ) -> UInt64 { - // Code below works with raw memory. - var buffer = unsafe MutableSpan( - _unchecked: textBuffer, - count: Int(bufferLength)) - let textRange = _Float64ToASCII(value: value, buffer: &buffer) - let textLength = textRange.upperBound - textRange.lowerBound - - // Move the text to the start of the buffer - if textRange.lowerBound != 0 { - unsafe _memmove( - dest: textBuffer, - src: textBuffer + textRange.lowerBound, - size: UInt(truncatingIfNeeded: textLength)) - } - return UInt64(truncatingIfNeeded: textLength) -} - -internal func _Float64ToASCII( - value d: Float64, - buffer utf8Buffer: inout MutableSpan -) -> Range { if #available(SwiftStdlib 6.2, *) { - return _Float64ToASCIIImpl(value: d, buffer: &utf8Buffer) + // Code below works with raw memory. + var buffer = unsafe MutableSpan( + _unchecked: textBuffer, + count: Int(bufferLength)) + let textRange = _Float64ToASCII(value: value, buffer: &buffer) + let textLength = textRange.upperBound - textRange.lowerBound + + // Move the text to the start of the buffer + if textRange.lowerBound != 0 { + unsafe _memmove( + dest: textBuffer, + src: textBuffer + textRange.lowerBound, + size: UInt(truncatingIfNeeded: textLength)) + } + return UInt64(truncatingIfNeeded: textLength) } else { - return 0..<0 + fatalError() } } @available(SwiftStdlib 6.2, *) -fileprivate func _Float64ToASCIIImpl( +internal func _Float64ToASCII( value d: Float64, buffer utf8Buffer: inout MutableSpan ) -> Range { @@ -1186,36 +1161,29 @@ internal func _float80ToStringImpl( _ value: Float80, _ debug: Bool ) -> UInt64 { - // Code below works with raw memory. - var buffer = unsafe MutableSpan( - _unchecked: textBuffer, - count: Int(bufferLength)) - let textRange = _Float80ToASCII(value: value, buffer: &buffer) - let textLength = textRange.upperBound - textRange.lowerBound - - // Move the text to the start of the buffer - if textRange.lowerBound != 0 { - unsafe _memmove( - dest: textBuffer, - src: textBuffer + textRange.lowerBound, - size: UInt(truncatingIfNeeded: textLength)) - } - return UInt64(truncatingIfNeeded: textLength) -} - -internal func _Float80ToASCII( - value d: Float80, - buffer utf8Buffer: inout MutableSpan -) -> Range { if #available(SwiftStdlib 6.2, *) { - return _Float80ToASCIIImpl(value: d, buffer: &utf8Buffer) + // Code below works with raw memory. + var buffer = unsafe MutableSpan( + _unchecked: textBuffer, + count: Int(bufferLength)) + let textRange = _Float80ToASCII(value: value, buffer: &buffer) + let textLength = textRange.upperBound - textRange.lowerBound + + // Move the text to the start of the buffer + if textRange.lowerBound != 0 { + unsafe _memmove( + dest: textBuffer, + src: textBuffer + textRange.lowerBound, + size: UInt(truncatingIfNeeded: textLength)) + } + return UInt64(truncatingIfNeeded: textLength) } else { - return 0..<0 + fatalError() } } @available(SwiftStdlib 6.2, *) -fileprivate func _Float80ToASCIIImpl( +internal func _Float80ToASCII( value f: Float80, buffer utf8Buffer: inout MutableSpan ) -> Range { @@ -1336,19 +1304,8 @@ fileprivate func _Float80ToASCIIImpl( // backwards compatibility, and the legacy ABI never supported // Float128. -internal func _Float128ToASCII( - value d: Float128, - buffer utf8Buffer: inout MutableSpan -) -> Range { - if #available(SwiftStdlib 6.2, *) { - return _Float128ToASCIIImpl(value: d, buffer: &utf8Buffer) - } else { - return 0..<0 - } -} - @available(SwiftStdlib 6.2, *) -fileprivate func _Float128ToASCIIImpl( +internal func _Float128ToASCII( value d: Float128, buffer utf8Buffer: inout MutableSpan ) -> Range { From 09feb404be512e0d1b9ddc509a102c0aa0d22bf1 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Tue, 5 Aug 2025 16:03:27 -0700 Subject: [PATCH 14/19] Fix build when Int is 16 bits --- stdlib/public/core/FloatingPointToString.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/public/core/FloatingPointToString.swift b/stdlib/public/core/FloatingPointToString.swift index 92aab0631cde1..79cf4c27c6cdd 100644 --- a/stdlib/public/core/FloatingPointToString.swift +++ b/stdlib/public/core/FloatingPointToString.swift @@ -789,7 +789,7 @@ internal func _Float64ToASCII( // will only need 32-bit division in that case.) let bulkFirstDigits = 7 - let bulkFirstDigitFactor = 1000000 // 10^(bulkFirstDigits - 1) + let bulkFirstDigitFactor: UInt32 = 1000000 // 10^(bulkFirstDigits - 1) let powerOfTenExponent = _intervalContainingPowerOf10_Binary64( p: -base10Exponent &+ bulkFirstDigits &- 1, From c5dcaa737453d343bacdd0496287b466e19795c9 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Wed, 6 Aug 2025 13:47:41 -0700 Subject: [PATCH 15/19] Fix Float32 formatting on 32-bit architectures --- stdlib/public/core/FloatingPointToString.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/public/core/FloatingPointToString.swift b/stdlib/public/core/FloatingPointToString.swift index 79cf4c27c6cdd..19eb374873dfe 100644 --- a/stdlib/public/core/FloatingPointToString.swift +++ b/stdlib/public/core/FloatingPointToString.swift @@ -549,7 +549,7 @@ internal func _Float32ToASCII( // Step 6: Align first digit, adjust exponent - while u < (1 &<< fractionBits) { + while u < (UInt64(1) &<< fractionBits) { base10Exponent &-= 1 l &*= 10 u &*= 10 From 0d135876a915b0be4a57516962572d4074a0ee5e Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Wed, 6 Aug 2025 14:26:28 -0700 Subject: [PATCH 16/19] Remove extra semicolons --- stdlib/public/core/FloatingPointToString.swift | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/stdlib/public/core/FloatingPointToString.swift b/stdlib/public/core/FloatingPointToString.swift index 19eb374873dfe..5c91fc94cce28 100644 --- a/stdlib/public/core/FloatingPointToString.swift +++ b/stdlib/public/core/FloatingPointToString.swift @@ -167,13 +167,13 @@ internal func _Float16ToASCII( // Step 1: Handle various input cases: let binaryExponent: Int let significand: Float16.RawSignificand - let exponentBias = (1 << (Float16.exponentBitCount - 1)) - 2; // 14 + let exponentBias = (1 << (Float16.exponentBitCount - 1)) - 2 // 14 if (f.exponentBitPattern == 0x1f) { // NaN or Infinity if (f.isInfinite) { return _infinity(buffer: &buffer, sign: f.sign) } else { // f.isNaN let quietBit = - (f.significandBitPattern >> (Float16.significandBitCount - 1)) & 1; + (f.significandBitPattern >> (Float16.significandBitCount - 1)) & 1 let payloadMask = UInt16(1 &<< (Float16.significandBitCount - 2)) - 1 let payload16 = f.significandBitPattern & payloadMask return nan_details( @@ -456,7 +456,7 @@ internal func _Float32ToASCII( let binaryExponent: Int let significand: Float.RawSignificand - let exponentBias = (1 << (Float.exponentBitCount - 1)) - 2; // 126 + let exponentBias = (1 << (Float.exponentBitCount - 1)) - 2 // 126 if (f.exponentBitPattern == 0xff) { if (f.isInfinite) { return _infinity(buffer: &buffer, sign: f.sign) @@ -615,7 +615,7 @@ internal func _Float32ToASCII( } let one = UInt64(1) << (64 - integerBits) let lastAccurateBit = UInt64(1) << 24 - let fractionMask = (one - 1) & ~(lastAccurateBit - 1); + let fractionMask = (one - 1) & ~(lastAccurateBit - 1) let oneHalf = one >> 1 var lastDigit = unsafe buffer.unsafeLoad( fromUncheckedByteOffset: nextDigit &- 1, @@ -698,7 +698,7 @@ internal func _Float64ToASCII( // let binaryExponent: Int let significand: Double.RawSignificand - let exponentBias = (1 << (Double.exponentBitCount - 1)) - 2; // 1022 + let exponentBias = (1 << (Double.exponentBitCount - 1)) - 2 // 1022 if (d.exponentBitPattern == 0x7ff) { if (d.isInfinite) { @@ -1040,7 +1040,7 @@ internal func _Float64ToASCII( // >90% of random binary64 values need at least 15 digits. // We have seven so there's probably at least 8 more, which // we can grab all at once. - let TenToTheEighth = 100000000 as UInt128; // 10^(15-bulkFirstDigits) + let TenToTheEighth = 100000000 as UInt128 // 10^(15-bulkFirstDigits) let d0 = delta * TenToTheEighth var t0 = t * TenToTheEighth // The integer part of t0 is the next 8 digits @@ -1091,8 +1091,8 @@ internal func _Float64ToASCII( truncatingIfNeeded: t >> (64 - integerBits + adjustIntegerBits)) let one = UInt64(1) << (64 - adjustIntegerBits) - let adjustFractionMask = one - 1; - let oneHalf = one >> 1; + let adjustFractionMask = one - 1 + let oneHalf = one >> 1 if deltaHigh64 >= tHigh64 &+ one { // The `skew` is the difference between our // computed digits and the original exact value. @@ -1203,7 +1203,7 @@ internal func _Float80ToASCII( let rawSignificand = f._representation.explicitSignificand let binaryExponent: Int let significand: Float80.RawSignificand - let exponentBias = (1 << (Float80.exponentBitCount - 1)) - 2; // 16382 + let exponentBias = (1 << (Float80.exponentBitCount - 1)) - 2 // 16382 let isBoundary = f.significandBitPattern == 0 if f.exponentBitPattern == 0x7fff { // NaN or Infinity // 80387 semantics and 80287 semantics differ somewhat; From 97c52c9f25c69cf515782330cff334977ee0e40e Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Mon, 11 Aug 2025 14:59:57 -0700 Subject: [PATCH 17/19] Try to fix a crash only seen so far on WASI32 --- stdlib/public/core/FloatingPointToString.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/public/core/FloatingPointToString.swift b/stdlib/public/core/FloatingPointToString.swift index 5c91fc94cce28..a451c8e9165f2 100644 --- a/stdlib/public/core/FloatingPointToString.swift +++ b/stdlib/public/core/FloatingPointToString.swift @@ -706,7 +706,7 @@ internal func _Float64ToASCII( } else { // d.isNaN let quietBit = (d.significandBitPattern >> (Double.significandBitCount - 1)) & 1 - let payloadMask = UInt64(1 << (Double.significandBitCount - 2)) - 1 + let payloadMask = (UInt64(1) << (Double.significandBitCount - 2)) - 1 let payload64 = d.significandBitPattern & payloadMask return nan_details( buffer: &buffer, From 742d7abf843dec7096227d25f9a323e03238ed22 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Wed, 13 Aug 2025 07:04:38 -0700 Subject: [PATCH 18/19] Minor safety and performance tweaks --- stdlib/public/core/FloatingPointToString.swift | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/stdlib/public/core/FloatingPointToString.swift b/stdlib/public/core/FloatingPointToString.swift index a451c8e9165f2..4f4b21e9af8c8 100644 --- a/stdlib/public/core/FloatingPointToString.swift +++ b/stdlib/public/core/FloatingPointToString.swift @@ -264,7 +264,7 @@ internal func _Float16ToASCII( nextDigit &+= 1 } } - let digit = 0x30 &+ (t &+ (1 &<< 27)) >> 28 + let digit = 0x30 &+ (t &+ (1 << 27)) >> 28 unsafe buffer.storeBytes( of: UInt8(truncatingIfNeeded: digit), toUncheckedByteOffset: nextDigit, @@ -606,7 +606,7 @@ internal func _Float32ToASCII( // Adjust the final digit to be closer to the original value let isBoundary = (f.significandBitPattern == 0) - if delta > t &+ (1 &<< fractionBits) { + if delta > t &+ (1 << fractionBits) { let skew: UInt64 if isBoundary { skew = delta &- delta / 3 &- t @@ -698,7 +698,7 @@ internal func _Float64ToASCII( // let binaryExponent: Int let significand: Double.RawSignificand - let exponentBias = (1 << (Double.exponentBitCount - 1)) - 2 // 1022 + let exponentBias = 1022 // (1 << (Double.exponentBitCount - 1)) - 2 if (d.exponentBitPattern == 0x7ff) { if (d.isInfinite) { @@ -706,7 +706,7 @@ internal func _Float64ToASCII( } else { // d.isNaN let quietBit = (d.significandBitPattern >> (Double.significandBitCount - 1)) & 1 - let payloadMask = (UInt64(1) << (Double.significandBitCount - 2)) - 1 + let payloadMask = (UInt64(1) &<< (Double.significandBitCount - 2)) - 1 let payload64 = d.significandBitPattern & payloadMask return nan_details( buffer: &buffer, @@ -1649,9 +1649,9 @@ fileprivate func _finishFormatting( let zeroEnd = firstDigit &+ base10Exponent &+ 3 // TODO: Find out how to use C memset() here: // Blast 8 "0" digits into the buffer - unsafe buffer.storeBytes( + buffer.storeBytes( of: 0x3030303030303030 as UInt64, - toUncheckedByteOffset: nextDigit, + toByteOffset: nextDigit, as: UInt64.self) // Add more "0" digits if needed... // (Note: Can't use a standard range loop because nextDigit+8 From d57edb761dc2d2a4334d0be541818d0629fc4f93 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Fri, 22 Aug 2025 13:05:52 -0700 Subject: [PATCH 19/19] Prefill the work buffer with '0' characters For example, this avoids the need to do any explicit byte-by-byte writes when expanding "123" out to "123000000.0". This also required reworking the "back out extra digits" process for Float64 to ensure the unused digits get written as '0' characters instead of null bytes. --- .../public/core/FloatingPointToString.swift | 102 ++++++++++-------- .../public/core/FloatingPointTypes.swift.gyb | 4 +- 2 files changed, 59 insertions(+), 47 deletions(-) diff --git a/stdlib/public/core/FloatingPointToString.swift b/stdlib/public/core/FloatingPointToString.swift index 4f4b21e9af8c8..3f09d416df57e 100644 --- a/stdlib/public/core/FloatingPointToString.swift +++ b/stdlib/public/core/FloatingPointToString.swift @@ -155,12 +155,24 @@ public func _float16ToStringImpl( return UInt64(truncatingIfNeeded: textLength) } +// Convert a Float16 to an optimal ASCII representation. +// See notes above for comments on the output format here. +// Inputs: +// * `value`: Float16 input +// * `buffer`: Buffer to place the result +// Returns: Range of bytes within `buffer` that contain the result +// +// Buffer must be at least 32 bytes long and must be pre-filled +// with "0" characters, e.g., via +// `InlineArray<32,UTF8.CodeUnit>(repeating:0x30)` + @available(SwiftStdlib 6.2, *) internal func _Float16ToASCII( value f: Float16, buffer utf8Buffer: inout MutableSpan ) -> Range { // We need a MutableRawSpan in order to use wide store/load operations + // TODO: Tune this value down to the actual minimum for Float16 precondition(utf8Buffer.count >= 32) var buffer = utf8Buffer.mutableBytes @@ -338,11 +350,7 @@ internal func _Float16ToASCII( if fractionPart == 0 { // Step 6: No fraction, so ".0" and we're done - // Last write on this branch, so use a checked store - buffer.storeBytes( - of: 0x30, - toByteOffset: nextDigit, - as: UInt8.self) + // "0" write is free since buffer is pre-initialized nextDigit &+= 1 } else { // Step 7: Emit the fractional part by repeatedly @@ -439,6 +447,17 @@ public func _float32ToStringImpl( } } +// Convert a Float32 to an optimal ASCII representation. +// See notes above for comments on the output format here. +// See _Float64ToASCII for comments on the algorithm. +// Inputs: +// * `value`: Float32 input +// * `buffer`: Buffer to place the result +// Returns: Range of bytes within `buffer` that contain the result +// +// Buffer must be at least 32 bytes long and must be pre-filled +// with "0" characters, e.g., via +// `InlineArray<32,UTF8.CodeUnit>(repeating:0x30)` @available(SwiftStdlib 6.2, *) internal func _Float32ToASCII( value f: Float32, @@ -449,6 +468,8 @@ internal func _Float32ToASCII( // more detailed comments and explanation. // We need a MutableRawSpan in order to use wide store/load operations + // TODO: Tune this limit down to the actual minimum we need here + // TODO: `assert` that the buffer is filled with 0x30 bytes (in debug builds) precondition(utf8Buffer.count >= 32) var buffer = utf8Buffer.mutableBytes @@ -561,12 +582,6 @@ internal func _Float32ToASCII( var delta = u &- l let fractionMask: UInt64 = (1 << fractionBits) - 1 - // Write 8 leading zeros to the beginning of the buffer: - unsafe buffer.storeBytes( - of: 0x3030303030303030, - toUncheckedByteOffset: 0, - as: UInt64.self) - // Overwrite the first digit at index 7: let firstDigit = 7 let digit = (t >> fractionBits) &+ 0x30 @@ -684,6 +699,18 @@ public func _float64ToStringImpl( } } +// Convert a Float64 to an optimal ASCII representation. +// See notes above for comments on the output format here. +// The algorithm is extensively commented inline; the comments +// at the top of this source file give additional context. +// Inputs: +// * `value`: Float64 input +// * `buffer`: Buffer to place the result +// Returns: Range of bytes within `buffer` that contain the result +// +// Buffer must be at least 32 bytes long and must be pre-filled +// with "0" characters, e.g., via +// `InlineArray<32,UTF8.CodeUnit>(repeating:0x30)` @available(SwiftStdlib 6.2, *) internal func _Float64ToASCII( value d: Float64, @@ -937,10 +964,6 @@ internal func _Float64ToASCII( var nextDigit = 5 var firstDigit = nextDigit - unsafe buffer.storeBytes( - of: 0x3030303030303030 as UInt64, - toUncheckedByteOffset: 0, - as: UInt64.self) // Our initial scaling gave us the first 7 digits already: let d12345678 = UInt32(truncatingIfNeeded: t._high >> 32) @@ -1015,13 +1038,14 @@ internal func _Float64ToASCII( t0 &= ~1 } // t0 has t0digits digits. Write them out - let text = _intToEightDigits(t0) >> ((8 - t0digits) * 8) + let text = _intToEightDigits(t0) buffer.storeBytes( of: text, toByteOffset: nextDigit, as: UInt64.self) - nextDigit &+= t0digits - firstDigit &+= 1 + nextDigit &+= 8 + // Skip the leading zeros + firstDigit &+= 9 - t0digits } else { // Our initial scaling did not produce too many digits. The // `d12345678` value holds the first 7 digits (plus a leading @@ -1182,6 +1206,17 @@ internal func _float80ToStringImpl( } } +// Convert a Float80 to an optimal ASCII representation. +// See notes above for comments on the output format here. +// See _Float64ToASCII for comments on the algorithm. +// Inputs: +// * `value`: Float80 input +// * `buffer`: Buffer to place the result +// Returns: Range of bytes within `buffer` that contain the result +// +// Buffer must be at least 32 bytes long and must be pre-filled +// with "0" characters, e.g., via +// `InlineArray<32,UTF8.CodeUnit>(repeating:0x30)` @available(SwiftStdlib 6.2, *) internal func _Float80ToASCII( value f: Float80, @@ -1408,13 +1443,7 @@ fileprivate func _backend_256bit( // Step 7: Generate digits - // Include 8 "0" characters at the beginning of the buffer - // for finishFormatting to use - buffer.storeBytes( - of: 0x3030303030303030, - toByteOffset: 0, - as: UInt64.self) - // Start writing digits just after that + // Leave 8 bytes at the beginning for finishFormatting to use let firstDigit = 8 var nextDigit = firstDigit buffer.storeBytes( @@ -1526,7 +1555,7 @@ fileprivate func _backend_256bit( // inserting decimal points, minus signs, exponents, etc, as // necessary. To minimize the work here, this assumes that there are // at least 5 unused bytes at the beginning of `buffer` before -// `firstDigit` and that those bytes are filled with `"0"` (0x30) +// `firstDigit` and that all unused bytes are filled with `"0"` (0x30) // characters. @available(SwiftStdlib 6.2, *) @@ -1646,25 +1675,8 @@ fileprivate func _finishFormatting( // "12345678900.0" // Fill trailing zeros, put ".0" at the end // so the result is obviously floating-point. - let zeroEnd = firstDigit &+ base10Exponent &+ 3 - // TODO: Find out how to use C memset() here: - // Blast 8 "0" digits into the buffer - buffer.storeBytes( - of: 0x3030303030303030 as UInt64, - toByteOffset: nextDigit, - as: UInt64.self) - // Add more "0" digits if needed... - // (Note: Can't use a standard range loop because nextDigit+8 - // can legitimately be larger than zeroEnd here.) - var i = nextDigit + 8 - while i < zeroEnd { - unsafe buffer.storeBytes( - of: 0x30, - toUncheckedByteOffset: i, - as: UInt8.self) - i &+= 1 - } - nextDigit = zeroEnd + // Remember buffer was initialized with "0" + nextDigit = firstDigit &+ base10Exponent &+ 3 buffer.storeBytes( of: 0x2e, toByteOffset: nextDigit &- 2, diff --git a/stdlib/public/core/FloatingPointTypes.swift.gyb b/stdlib/public/core/FloatingPointTypes.swift.gyb index 01092344be7bb..21d1ebc7b2f0f 100644 --- a/stdlib/public/core/FloatingPointTypes.swift.gyb +++ b/stdlib/public/core/FloatingPointTypes.swift.gyb @@ -117,7 +117,7 @@ extension ${Self}: CustomDebugStringConvertible { /// that NaN values are printed in an extended format. public var debugDescription: String { if #available(SwiftStdlib 6.2, *) { - var buffer = InlineArray<64, UTF8.CodeUnit>(repeating: 0) + var buffer = InlineArray<64, UTF8.CodeUnit>(repeating: 0x30) var span = buffer.mutableSpan let textRange = _Float${bits}ToASCII(value: self, buffer: &span) let textStart = unsafe span._start().assumingMemoryBound(to: UTF8.CodeUnit.self) + textRange.lowerBound @@ -136,7 +136,7 @@ ${Availability(bits)} extension ${Self}: TextOutputStreamable { public func write(to target: inout Target) where Target: TextOutputStream { if #available(SwiftStdlib 6.2, *) { - var buffer = InlineArray<64, UTF8.CodeUnit>(repeating: 0) + var buffer = InlineArray<64, UTF8.CodeUnit>(repeating: 0x30) var span = buffer.mutableSpan let textRange = _Float${bits}ToASCII(value: self, buffer: &span) let textStart = unsafe span._start().assumingMemoryBound(to: UTF8.CodeUnit.self) + textRange.lowerBound