float.go 7.4 KB

原文件审查历史永久链接

// Copyright (c) 2012-2018 Ugorji Nwoke. All rights reserved.
// Use of this source code is governed by a MIT license found in the LICENSE file.

package codec

import "strconv"

// func parseFloat(b []byte, bitsize int) (f float64, err error) {
// 	if bitsize == 32 {
// 		return parseFloat32(b)
// 	} else {
// 		return parseFloat64(b)
// 	}
// }

func parseFloat32(b []byte) (f float32, err error) {
	return parseFloat32_custom(b)
	// return parseFloat32_strconv(b)
}

func parseFloat64(b []byte) (f float64, err error) {
	return parseFloat64_custom(b)
	// return parseFloat64_strconv(b)
}

func parseFloat32_strconv(b []byte) (f float32, err error) {
	f64, err := strconv.ParseFloat(stringView(b), 32)
	f = float32(f64)
	return
}

func parseFloat64_strconv(b []byte) (f float64, err error) {
	return strconv.ParseFloat(stringView(b), 64)
}

// ------ parseFloat custom below --------

// We assume that a lot of floating point numbers in json files will be
// those that are handwritten, and with defined precision (in terms of number
// of digits after decimal point), etc.
//
// We further assume that this ones can be written in exact format.
//
// strconv.ParseFloat has some unnecessary overhead which we can do without
// for the common case:
//
//    - expensive char-by-char check to see if underscores are in right place
//    - testing for and skipping underscores
//    - check if the string matches ignorecase +/- inf, +/- infinity, nan
//    - support for base 16 (0xFFFF...)
//
// The functions below will try a fast-path for floats which can be decoded
// without any loss of precision, meaning they:
//
//    - fits within the significand bits of the 32-bits or 64-bits
//    - exponent fits within the exponent value
//    - there is no truncation (any extra numbers are all trailing zeros)
//
// To figure out what the values are for maxMantDigits, use this idea below:
//
// 2^23 =                 838 8608 (between 10^ 6 and 10^ 7) (significand bits of uint32)
// 2^32 =             42 9496 7296 (between 10^ 9 and 10^10) (full uint32)
// 2^52 =      4503 5996 2737 0496 (between 10^15 and 10^16) (significand bits of uint64)
// 2^64 = 1844 6744 0737 0955 1616 (between 10^19 and 10^20) (full uint64)
//
// Since we only allow for up to what can comfortably fit into the significand
// ignoring the exponent, and we only try to parse iff significand fits into the

// Exact powers of 10.
var float64pow10 = [...]float64{
	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
	1e20, 1e21, 1e22,
}
var float32pow10 = [...]float32{1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10}

type floatinfo struct {
	mantbits uint8
	expbits  uint8
	bias     int16

	exactPow10 int8 // Exact powers of ten are <= 10^N (32: 10, 64: 22)
	exactInts  int8 // Exact integers are <= 10^N

	maxMantDigits int8 // 10^19 fits in uint64, while 10^9 fits in uint32
}

var fi32 = floatinfo{23, 8, -127, 10, 7, 9}     // maxMantDigits = 9
var fi64 = floatinfo{52, 11, -1023, 22, 15, 19} // maxMantDigits = 19

const fMax64 = 1e15
const fMax32 = 1e7

const fBase = 10

func parseFloatErr(b []byte) error {
	return &strconv.NumError{
		Func: "ParseFloat",
		Err:  strconv.ErrSyntax,
		Num:  string(b),
	}
}

func parseFloat32_custom(b []byte) (f float32, err error) {
	mantissa, exp, neg, trunc, bad, ok := readFloat(b, fi32)
	_ = trunc
	if bad {
		return 0, parseFloatErr(b)
	}
	if ok {
		// parseFloatDebug(b, 32, false, exp, trunc, ok)
		f = float32(mantissa)
		if neg {
			f = -f
		}
		if exp != 0 {
			indx := fExpIndx(exp)
			if exp < 0 { // int / 10^k
				f /= float32pow10[indx]
			} else { // exp > 0
				if exp > fi32.exactPow10 {
					f *= float32pow10[exp-fi32.exactPow10]
					if f < -fMax32 || f > fMax32 { // exponent too large - outside range
						goto FALLBACK
					}
					indx = uint8(fi32.exactPow10)
				}
				f *= float32pow10[indx]
			}
		}
		return
	}
FALLBACK:
	// parseFloatDebug(b, 32, true, exp, trunc, ok)
	return parseFloat32_strconv(b)
}

func parseFloat64_custom(b []byte) (f float64, err error) {
	mantissa, exp, neg, trunc, bad, ok := readFloat(b, fi64)
	_ = trunc
	if bad {
		return 0, parseFloatErr(b)
	}
	if ok {
		f = float64(mantissa)
		if neg {
			f = -f
		}
		if exp != 0 {
			indx := fExpIndx(exp)
			if exp < 0 { // int / 10^k
				f /= float64pow10[indx]
			} else { // exp > 0
				if exp > fi64.exactPow10 {
					f *= float64pow10[exp-fi64.exactPow10]
					if f < -fMax64 || f > fMax64 { // exponent too large - outside range
						goto FALLBACK
					}
					indx = uint8(fi64.exactPow10)
				}
				f *= float64pow10[indx]
			}
		}
		return
	}
FALLBACK:
	return parseFloat64_strconv(b)
}

func fExpIndx(v int8) uint8 {
	if v < 0 {
		return uint8(-v)
	}
	return uint8(v)
}

func readFloat(s []byte, y floatinfo) (mantissa uint64, exp int8, neg, trunc, bad, ok bool) {
	var i uint // make it uint, so that we eliminate bounds checking
	var slen = uint(len(s))
	if slen == 0 {
		bad = true
		return
	}
	switch s[0] {
	case '+':
		i++
	case '-':
		neg = true
		i++
	}

	// we considered punting early if string has length > maxMantDigits, but this doesn't account
	// for trailing 0's e.g. 700000000000000000000 can be encoded exactly as it is 7e20

	// var sawdot, sawdigits, sawexp bool
	var sawdot, sawexp bool
	var nd, ndMant, dp int8
L:
	for ; i < slen; i++ {
		switch s[i] {
		case '.':
			if sawdot {
				bad = true
				return
			}
			sawdot = true
			dp = nd
		case '0':
			if nd == 0 { // ignore leading zeros
				dp--
				continue
			}
			nd++
			if ndMant < y.maxMantDigits {
				// mantissa = (mantissa << 1) + (mantissa << 3)
				mantissa *= fBase
				ndMant++
			}
		case '1', '2', '3', '4', '5', '6', '7', '8', '9':
			// sawdigits = true
			nd++
			if ndMant < y.maxMantDigits {
				// mantissa = (mantissa << 1) + (mantissa << 3) + uint64(s[i]-'0')
				mantissa = mantissa*fBase + uint64(s[i]-'0')
				// mantissa *= fBase
				// mantissa += uint64(s[i] - '0')
				ndMant++
			} else {
				trunc = true
				return // break L
			}
		case 'e', 'E':
			sawexp = true
			break L
		default:
			bad = true
			return
		}
	}
	// if !sawdigits {
	// 	bad = true
	// 	return
	// }
	if !sawdot {
		dp = nd
	}

	if sawexp {
		i++
		if i < slen {
			var eneg bool
			if s[i] == '+' {
				i++
			} else if s[i] == '-' {
				i++
				eneg = true
			}
			if i < slen {
				// for exact match, exponent is 1 or 2 digits (float64: -22 to 37, float32: -1 to 17).
				// exit quick if exponent is more than 2 digits.
				if i+2 < slen {
					return
				}

				var e int8

				if s[i] < '0' || s[i] > '9' {
					bad = true
					return
				}
				e = e*fBase + int8(s[i]-'0') // (e << 1) + (e << 3) + int8(s[i]-'0')
				i++

				if i < slen {
					if s[i] < '0' || s[i] > '9' {
						bad = true
						return
					}
					e = e*fBase + int8(s[i]-'0') // (e << 1) + (e << 3) + int8(s[i]-'0')
					i++
				}

				if eneg {
					dp -= e
				} else {
					dp += e
				}
			}
		}
	}

	if mantissa != 0 {
		if mantissa>>y.mantbits != 0 {
			return
		}
		exp = dp - ndMant
		if exp < -y.exactPow10 || exp > y.exactInts+y.exactPow10 { // cannot handle it
			return
		}
	}
	ok = true // && !trunc // if trunc=true, we return early (so here trunc=false)
	return
}

// fMul10ShiftU64

// func parseFloatDebug(b []byte, bitsize int, strconv bool, exp int8, trunc, ok bool) {
// 	if strconv {
// 		xdebugf("parseFloat%d: delegating: %s, exp: %d, trunc: %v, ok: %v", bitsize, b, exp, trunc, ok)
// 	} else {
// 		xdebug2f("parseFloat%d: attempting: %s, exp: %d, trunc: %v, ok: %v", bitsize, b, exp, trunc, ok)
// 	}
// }