1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
//! Estimate the error in an 80-bit approximation of a float.
//!
//! This estimates the error in a floating-point representation.
//!
//! This implementation is loosely based off the Golang implementation,
//! found here:
//!     https://golang.org/src/strconv/atof.go

use crate::float::*;
use crate::util::*;

pub trait FloatErrors: Mantissa {
    /// Get the full error scale.
    fn error_scale() -> u32;
    /// Get the half error scale.
    fn error_halfscale() -> u32;
    /// Determine if the number of errors is tolerable for float precision.
    fn error_is_accurate<F: Float>(count: u32, fp: &ExtendedFloat<Self>, kind: RoundingKind) -> bool;
}

perftools_inline!{
/// Check if the error is accurate with a round-nearest rounding scheme.
fn nearest_error_is_accurate(errors: u64, fp: &ExtendedFloat<u64>, extrabits: u64)
    -> bool
{
    // Round-to-nearest, need to use the halfway point.
    if extrabits == 65 {
        // Underflow, we have a shift larger than the mantissa.
        // Representation is valid **only** if the value is close enough
        // overflow to the next bit within errors. If it overflows,
        // the representation is **not** valid.
        !fp.mant.overflowing_add(errors).1
    } else {
        let mask: u64 = lower_n_mask(extrabits);
        let extra: u64 = fp.mant & mask;

        // Round-to-nearest, need to check if we're close to halfway.
        // IE, b10100 | 100000, where `|` signifies the truncation point.
        let halfway: u64 = lower_n_halfway(extrabits);
        let cmp1 = halfway.wrapping_sub(errors) < extra;
        let cmp2 = extra < halfway.wrapping_add(errors);

        // If both comparisons are true, we have significant rounding error,
        // and the value cannot be exactly represented. Otherwise, the
        // representation is valid.
        !(cmp1 && cmp2)
    }
}}

perftools_inline!{
/// Check if the error is accurate with a round-toward rounding scheme.
#[cfg(feature = "rounding")]
fn toward_error_is_accurate(errors: u64, fp: &ExtendedFloat<u64>, extrabits: u64)
    -> bool
{
    if extrabits == 65 {
        // Underflow, we have a literal 0.
        true
    } else {
        let mask: u64 = lower_n_mask(extrabits);
        let extra: u64 = fp.mant & mask;

        // Round-towards, need to use `1 << extrabits`.
        if extrabits == 64 {
            // Round toward something, we need to check if either operation can overflow,
            // since we cannot exactly represent the comparison point as the type
            // in question.
            let cmp1 = extra.checked_sub(errors).is_none();
            let cmp2 = extra.checked_add(errors).is_none();
            // If either comparison is true, we have significant rounding error,
            // since we cannot distinguish the value (1 << 64).
            cmp1 || cmp2
        } else {
            // Round toward something, need to check if we're close to
            // IE, b10101 | 000000, where `|` signifies the truncation point.
            // If the extract bits +/- the error can overflow, then  we have
            // an issue.
            let fullway: u64 = nth_bit(extrabits);
            let cmp1 = fullway.wrapping_sub(errors) < extra;
            let cmp2 = extra < fullway.wrapping_add(errors);

            // If both comparisons are true, we have significant rounding error,
            // and the value cannot be exactly represented. Otherwise, the
            // representation is valid.
            !(cmp1 && cmp2)
        }
    }
}}

impl FloatErrors for u64 {
    perftools_inline!{
    fn error_scale() -> u32 {
        8
    }}

    perftools_inline!{
    fn error_halfscale() -> u32 {
        u64::error_scale() / 2
    }}

    perftools_inline!{
    #[allow(unused_variables)]
    fn error_is_accurate<F: Float>(count: u32, fp: &ExtendedFloat<u64>, kind: RoundingKind)
        -> bool
    {
        // Determine if extended-precision float is a good approximation.
        // If the error has affected too many units, the float will be
        // inaccurate, or if the representation is too close to halfway
        // that any operations could affect this halfway representation.
        // See the documentation for dtoa for more information.
        let bias = -(F::EXPONENT_BIAS - F::MANTISSA_SIZE);
        let denormal_exp = bias - 63;
        // This is always a valid u32, since (denormal_exp - fp.exp)
        // will always be positive and the significand size is {23, 52}.
        let extrabits = match fp.exp <= denormal_exp {
            true  => 64 - F::MANTISSA_SIZE + denormal_exp - fp.exp,
            false => 63 - F::MANTISSA_SIZE,
        };

        // Our logic is as follows: we want to determine if the actual
        // mantissa and the errors during calculation differ significantly
        // from the rounding point. The rounding point for round-nearest
        // is the halfway point, IE, this when the truncated bits start
        // with b1000..., while the rounding point for the round-toward
        // is when the truncated bits are equal to 0.
        // To do so, we can check whether the rounding point +/- the error
        // are >/< the actual lower n bits.
        //
        // For whether we need to use signed or unsigned types for this
        // analysis, see this example, using u8 rather than u64 to simplify
        // things.
        //
        // # Comparisons
        //      cmp1 = (halfway - errors) < extra
        //      cmp1 = extra < (halfway + errors)
        //
        // # Large Extrabits, Low Errors
        //
        //      extrabits = 8
        //      halfway          =  0b10000000
        //      extra            =  0b10000010
        //      errors           =  0b00000100
        //      halfway - errors =  0b01111100
        //      halfway + errors =  0b10000100
        //
        //      Unsigned:
        //          halfway - errors = 124
        //          halfway + errors = 132
        //          extra            = 130
        //          cmp1             = true
        //          cmp2             = true
        //      Signed:
        //          halfway - errors = 124
        //          halfway + errors = -124
        //          extra            = -126
        //          cmp1             = false
        //          cmp2             = true
        //
        // # Conclusion
        //
        // Since errors will always be small, and since we want to detect
        // if the representation is accurate, we need to use an **unsigned**
        // type for comparisons.

        let extrabits = extrabits.as_u64();
        let errors = count.as_u64();
        if extrabits > 65 {
            // Underflow, we have a literal 0.
            return true;
        }

        #[cfg(not(feature = "rounding"))] {
            nearest_error_is_accurate(errors, fp, extrabits)
        }

        #[cfg(feature = "rounding")] {
            if is_nearest(kind) {
                nearest_error_is_accurate(errors, fp, extrabits)
            } else {
                toward_error_is_accurate(errors, fp, extrabits)
            }
        }
    }}
}

// 128-bit representation is always accurate, ignore this.
impl FloatErrors for u128 {
    perftools_inline!{
    fn error_scale() -> u32 {
        0
    }}

    perftools_inline!{
    fn error_halfscale() -> u32 {
        0
    }}

    perftools_inline!{
    fn error_is_accurate<F: Float>(_: u32, _: &ExtendedFloat<u128>, _: RoundingKind) -> bool {
        // Ignore the halfway problem, use more bits to aim for accuracy,
        // but short-circuit to avoid extremely slow operations.
        true
    }}
}