float.go
7.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
// Copyright (c) 2012-2018 Ugorji Nwoke. All rights reserved.
// Use of this source code is governed by a MIT license found in the LICENSE file.
package codec
import "strconv"
// func parseFloat(b []byte, bitsize int) (f float64, err error) {
// if bitsize == 32 {
// return parseFloat32(b)
// } else {
// return parseFloat64(b)
// }
// }
func parseFloat32(b []byte) (f float32, err error) {
return parseFloat32_custom(b)
// return parseFloat32_strconv(b)
}
func parseFloat64(b []byte) (f float64, err error) {
return parseFloat64_custom(b)
// return parseFloat64_strconv(b)
}
func parseFloat32_strconv(b []byte) (f float32, err error) {
f64, err := strconv.ParseFloat(stringView(b), 32)
f = float32(f64)
return
}
func parseFloat64_strconv(b []byte) (f float64, err error) {
return strconv.ParseFloat(stringView(b), 64)
}
// ------ parseFloat custom below --------
// We assume that a lot of floating point numbers in json files will be
// those that are handwritten, and with defined precision (in terms of number
// of digits after decimal point), etc.
//
// We further assume that this ones can be written in exact format.
//
// strconv.ParseFloat has some unnecessary overhead which we can do without
// for the common case:
//
// - expensive char-by-char check to see if underscores are in right place
// - testing for and skipping underscores
// - check if the string matches ignorecase +/- inf, +/- infinity, nan
// - support for base 16 (0xFFFF...)
//
// The functions below will try a fast-path for floats which can be decoded
// without any loss of precision, meaning they:
//
// - fits within the significand bits of the 32-bits or 64-bits
// - exponent fits within the exponent value
// - there is no truncation (any extra numbers are all trailing zeros)
//
// To figure out what the values are for maxMantDigits, use this idea below:
//
// 2^23 = 838 8608 (between 10^ 6 and 10^ 7) (significand bits of uint32)
// 2^32 = 42 9496 7296 (between 10^ 9 and 10^10) (full uint32)
// 2^52 = 4503 5996 2737 0496 (between 10^15 and 10^16) (significand bits of uint64)
// 2^64 = 1844 6744 0737 0955 1616 (between 10^19 and 10^20) (full uint64)
//
// Since we only allow for up to what can comfortably fit into the significand
// ignoring the exponent, and we only try to parse iff significand fits into the
// Exact powers of 10.
var float64pow10 = [...]float64{
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
1e20, 1e21, 1e22,
}
var float32pow10 = [...]float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}
type floatinfo struct {
mantbits uint8
expbits uint8
bias int16
exactPow10 int8 // Exact powers of ten are <= 10^N (32: 10, 64: 22)
exactInts int8 // Exact integers are <= 10^N
maxMantDigits int8 // 10^19 fits in uint64, while 10^9 fits in uint32
}
var fi32 = floatinfo{23, 8, -127, 10, 7, 9} // maxMantDigits = 9
var fi64 = floatinfo{52, 11, -1023, 22, 15, 19} // maxMantDigits = 19
const fMax64 = 1e15
const fMax32 = 1e7
const fBase = 10
func parseFloatErr(b []byte) error {
return &strconv.NumError{
Func: "ParseFloat",
Err: strconv.ErrSyntax,
Num: string(b),
}
}
func parseFloat32_custom(b []byte) (f float32, err error) {
mantissa, exp, neg, trunc, bad, ok := readFloat(b, fi32)
_ = trunc
if bad {
return 0, parseFloatErr(b)
}
if ok {
// parseFloatDebug(b, 32, false, exp, trunc, ok)
f = float32(mantissa)
if neg {
f = -f
}
if exp != 0 {
indx := fExpIndx(exp)
if exp < 0 { // int / 10^k
f /= float32pow10[indx]
} else { // exp > 0
if exp > fi32.exactPow10 {
f *= float32pow10[exp-fi32.exactPow10]
if f < -fMax32 || f > fMax32 { // exponent too large - outside range
goto FALLBACK
}
indx = uint8(fi32.exactPow10)
}
f *= float32pow10[indx]
}
}
return
}
FALLBACK:
// parseFloatDebug(b, 32, true, exp, trunc, ok)
return parseFloat32_strconv(b)
}
func parseFloat64_custom(b []byte) (f float64, err error) {
mantissa, exp, neg, trunc, bad, ok := readFloat(b, fi64)
_ = trunc
if bad {
return 0, parseFloatErr(b)
}
if ok {
f = float64(mantissa)
if neg {
f = -f
}
if exp != 0 {
indx := fExpIndx(exp)
if exp < 0 { // int / 10^k
f /= float64pow10[indx]
} else { // exp > 0
if exp > fi64.exactPow10 {
f *= float64pow10[exp-fi64.exactPow10]
if f < -fMax64 || f > fMax64 { // exponent too large - outside range
goto FALLBACK
}
indx = uint8(fi64.exactPow10)
}
f *= float64pow10[indx]
}
}
return
}
FALLBACK:
return parseFloat64_strconv(b)
}
func fExpIndx(v int8) uint8 {
if v < 0 {
return uint8(-v)
}
return uint8(v)
}
func readFloat(s []byte, y floatinfo) (mantissa uint64, exp int8, neg, trunc, bad, ok bool) {
var i uint // make it uint, so that we eliminate bounds checking
var slen = uint(len(s))
if slen == 0 {
bad = true
return
}
switch s[0] {
case '+':
i++
case '-':
neg = true
i++
}
// we considered punting early if string has length > maxMantDigits, but this doesn't account
// for trailing 0's e.g. 700000000000000000000 can be encoded exactly as it is 7e20
// var sawdot, sawdigits, sawexp bool
var sawdot, sawexp bool
var nd, ndMant, dp int8
L:
for ; i < slen; i++ {
switch s[i] {
case '.':
if sawdot {
bad = true
return
}
sawdot = true
dp = nd
case '0':
if nd == 0 { // ignore leading zeros
dp--
continue
}
nd++
if ndMant < y.maxMantDigits {
// mantissa = (mantissa << 1) + (mantissa << 3)
mantissa *= fBase
ndMant++
}
case '1', '2', '3', '4', '5', '6', '7', '8', '9':
// sawdigits = true
nd++
if ndMant < y.maxMantDigits {
// mantissa = (mantissa << 1) + (mantissa << 3) + uint64(s[i]-'0')
mantissa = mantissa*fBase + uint64(s[i]-'0')
// mantissa *= fBase
// mantissa += uint64(s[i] - '0')
ndMant++
} else {
trunc = true
return // break L
}
case 'e', 'E':
sawexp = true
break L
default:
bad = true
return
}
}
// if !sawdigits {
// bad = true
// return
// }
if !sawdot {
dp = nd
}
if sawexp {
i++
if i < slen {
var eneg bool
if s[i] == '+' {
i++
} else if s[i] == '-' {
i++
eneg = true
}
if i < slen {
// for exact match, exponent is 1 or 2 digits (float64: -22 to 37, float32: -1 to 17).
// exit quick if exponent is more than 2 digits.
if i+2 < slen {
return
}
var e int8
if s[i] < '0' || s[i] > '9' {
bad = true
return
}
e = e*fBase + int8(s[i]-'0') // (e << 1) + (e << 3) + int8(s[i]-'0')
i++
if i < slen {
if s[i] < '0' || s[i] > '9' {
bad = true
return
}
e = e*fBase + int8(s[i]-'0') // (e << 1) + (e << 3) + int8(s[i]-'0')
i++
}
if eneg {
dp -= e
} else {
dp += e
}
}
}
}
if mantissa != 0 {
if mantissa>>y.mantbits != 0 {
return
}
exp = dp - ndMant
if exp < -y.exactPow10 || exp > y.exactInts+y.exactPow10 { // cannot handle it
return
}
}
ok = true // && !trunc // if trunc=true, we return early (so here trunc=false)
return
}
// fMul10ShiftU64
// func parseFloatDebug(b []byte, bitsize int, strconv bool, exp int8, trunc, ok bool) {
// if strconv {
// xdebugf("parseFloat%d: delegating: %s, exp: %d, trunc: %v, ok: %v", bitsize, b, exp, trunc, ok)
// } else {
// xdebug2f("parseFloat%d: attempting: %s, exp: %d, trunc: %v, ok: %v", bitsize, b, exp, trunc, ok)
// }
// }