iter.go
7.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
package jsoniter
import (
"encoding/json"
"fmt"
"io"
)
// ValueType the type for JSON element
type ValueType int
const (
// InvalidValue invalid JSON element
InvalidValue ValueType = iota
// StringValue JSON element "string"
StringValue
// NumberValue JSON element 100 or 0.10
NumberValue
// NilValue JSON element null
NilValue
// BoolValue JSON element true or false
BoolValue
// ArrayValue JSON element []
ArrayValue
// ObjectValue JSON element {}
ObjectValue
)
var hexDigits []byte
var valueTypes []ValueType
func init() {
hexDigits = make([]byte, 256)
for i := 0; i < len(hexDigits); i++ {
hexDigits[i] = 255
}
for i := '0'; i <= '9'; i++ {
hexDigits[i] = byte(i - '0')
}
for i := 'a'; i <= 'f'; i++ {
hexDigits[i] = byte((i - 'a') + 10)
}
for i := 'A'; i <= 'F'; i++ {
hexDigits[i] = byte((i - 'A') + 10)
}
valueTypes = make([]ValueType, 256)
for i := 0; i < len(valueTypes); i++ {
valueTypes[i] = InvalidValue
}
valueTypes['"'] = StringValue
valueTypes['-'] = NumberValue
valueTypes['0'] = NumberValue
valueTypes['1'] = NumberValue
valueTypes['2'] = NumberValue
valueTypes['3'] = NumberValue
valueTypes['4'] = NumberValue
valueTypes['5'] = NumberValue
valueTypes['6'] = NumberValue
valueTypes['7'] = NumberValue
valueTypes['8'] = NumberValue
valueTypes['9'] = NumberValue
valueTypes['t'] = BoolValue
valueTypes['f'] = BoolValue
valueTypes['n'] = NilValue
valueTypes['['] = ArrayValue
valueTypes['{'] = ObjectValue
}
// Iterator is a io.Reader like object, with JSON specific read functions.
// Error is not returned as return value, but stored as Error member on this iterator instance.
type Iterator struct {
cfg *frozenConfig
reader io.Reader
buf []byte
head int
tail int
captureStartedAt int
captured []byte
Error error
Attachment interface{} // open for customized decoder
}
// NewIterator creates an empty Iterator instance
func NewIterator(cfg API) *Iterator {
return &Iterator{
cfg: cfg.(*frozenConfig),
reader: nil,
buf: nil,
head: 0,
tail: 0,
}
}
// Parse creates an Iterator instance from io.Reader
func Parse(cfg API, reader io.Reader, bufSize int) *Iterator {
return &Iterator{
cfg: cfg.(*frozenConfig),
reader: reader,
buf: make([]byte, bufSize),
head: 0,
tail: 0,
}
}
// ParseBytes creates an Iterator instance from byte array
func ParseBytes(cfg API, input []byte) *Iterator {
return &Iterator{
cfg: cfg.(*frozenConfig),
reader: nil,
buf: input,
head: 0,
tail: len(input),
}
}
// ParseString creates an Iterator instance from string
func ParseString(cfg API, input string) *Iterator {
return ParseBytes(cfg, []byte(input))
}
// Pool returns a pool can provide more iterator with same configuration
func (iter *Iterator) Pool() IteratorPool {
return iter.cfg
}
// Reset reuse iterator instance by specifying another reader
func (iter *Iterator) Reset(reader io.Reader) *Iterator {
iter.reader = reader
iter.head = 0
iter.tail = 0
return iter
}
// ResetBytes reuse iterator instance by specifying another byte array as input
func (iter *Iterator) ResetBytes(input []byte) *Iterator {
iter.reader = nil
iter.buf = input
iter.head = 0
iter.tail = len(input)
return iter
}
// WhatIsNext gets ValueType of relatively next json element
func (iter *Iterator) WhatIsNext() ValueType {
valueType := valueTypes[iter.nextToken()]
iter.unreadByte()
return valueType
}
func (iter *Iterator) skipWhitespacesWithoutLoadMore() bool {
for i := iter.head; i < iter.tail; i++ {
c := iter.buf[i]
switch c {
case ' ', '\n', '\t', '\r':
continue
}
iter.head = i
return false
}
return true
}
func (iter *Iterator) isObjectEnd() bool {
c := iter.nextToken()
if c == ',' {
return false
}
if c == '}' {
return true
}
iter.ReportError("isObjectEnd", "object ended prematurely, unexpected char "+string([]byte{c}))
return true
}
func (iter *Iterator) nextToken() byte {
// a variation of skip whitespaces, returning the next non-whitespace token
for {
for i := iter.head; i < iter.tail; i++ {
c := iter.buf[i]
switch c {
case ' ', '\n', '\t', '\r':
continue
}
iter.head = i + 1
return c
}
if !iter.loadMore() {
return 0
}
}
}
// ReportError record a error in iterator instance with current position.
func (iter *Iterator) ReportError(operation string, msg string) {
if iter.Error != nil {
if iter.Error != io.EOF {
return
}
}
peekStart := iter.head - 10
if peekStart < 0 {
peekStart = 0
}
peekEnd := iter.head + 10
if peekEnd > iter.tail {
peekEnd = iter.tail
}
parsing := string(iter.buf[peekStart:peekEnd])
contextStart := iter.head - 50
if contextStart < 0 {
contextStart = 0
}
contextEnd := iter.head + 50
if contextEnd > iter.tail {
contextEnd = iter.tail
}
context := string(iter.buf[contextStart:contextEnd])
iter.Error = fmt.Errorf("%s: %s, error found in #%v byte of ...|%s|..., bigger context ...|%s|...",
operation, msg, iter.head-peekStart, parsing, context)
}
// CurrentBuffer gets current buffer as string for debugging purpose
func (iter *Iterator) CurrentBuffer() string {
peekStart := iter.head - 10
if peekStart < 0 {
peekStart = 0
}
return fmt.Sprintf("parsing #%v byte, around ...|%s|..., whole buffer ...|%s|...", iter.head,
string(iter.buf[peekStart:iter.head]), string(iter.buf[0:iter.tail]))
}
func (iter *Iterator) readByte() (ret byte) {
if iter.head == iter.tail {
if iter.loadMore() {
ret = iter.buf[iter.head]
iter.head++
return ret
}
return 0
}
ret = iter.buf[iter.head]
iter.head++
return ret
}
func (iter *Iterator) loadMore() bool {
if iter.reader == nil {
if iter.Error == nil {
iter.head = iter.tail
iter.Error = io.EOF
}
return false
}
if iter.captured != nil {
iter.captured = append(iter.captured,
iter.buf[iter.captureStartedAt:iter.tail]...)
iter.captureStartedAt = 0
}
for {
n, err := iter.reader.Read(iter.buf)
if n == 0 {
if err != nil {
if iter.Error == nil {
iter.Error = err
}
return false
}
} else {
iter.head = 0
iter.tail = n
return true
}
}
}
func (iter *Iterator) unreadByte() {
if iter.Error != nil {
return
}
iter.head--
return
}
// Read read the next JSON element as generic interface{}.
func (iter *Iterator) Read() interface{} {
valueType := iter.WhatIsNext()
switch valueType {
case StringValue:
return iter.ReadString()
case NumberValue:
if iter.cfg.configBeforeFrozen.UseNumber {
return json.Number(iter.readNumberAsString())
}
return iter.ReadFloat64()
case NilValue:
iter.skipFourBytes('n', 'u', 'l', 'l')
return nil
case BoolValue:
return iter.ReadBool()
case ArrayValue:
arr := []interface{}{}
iter.ReadArrayCB(func(iter *Iterator) bool {
var elem interface{}
iter.ReadVal(&elem)
arr = append(arr, elem)
return true
})
return arr
case ObjectValue:
obj := map[string]interface{}{}
iter.ReadMapCB(func(Iter *Iterator, field string) bool {
var elem interface{}
iter.ReadVal(&elem)
obj[field] = elem
return true
})
return obj
default:
iter.ReportError("Read", fmt.Sprintf("unexpected value type: %v", valueType))
return nil
}
}