parser.go 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191
  1. package jsonparser
  2. import (
  3. "bytes"
  4. "errors"
  5. "fmt"
  6. "math"
  7. "strconv"
  8. )
  9. // Errors
  10. var (
  11. KeyPathNotFoundError = errors.New("Key path not found")
  12. UnknownValueTypeError = errors.New("Unknown value type")
  13. MalformedJsonError = errors.New("Malformed JSON error")
  14. MalformedStringError = errors.New("Value is string, but can't find closing '\"' symbol")
  15. MalformedArrayError = errors.New("Value is array, but can't find closing ']' symbol")
  16. MalformedObjectError = errors.New("Value looks like object, but can't find closing '}' symbol")
  17. MalformedValueError = errors.New("Value looks like Number/Boolean/None, but can't find its end: ',' or '}' symbol")
  18. MalformedStringEscapeError = errors.New("Encountered an invalid escape sequence in a string")
  19. )
  20. // How much stack space to allocate for unescaping JSON strings; if a string longer
  21. // than this needs to be escaped, it will result in a heap allocation
  22. const unescapeStackBufSize = 64
  23. func tokenEnd(data []byte) int {
  24. for i, c := range data {
  25. switch c {
  26. case ' ', '\n', '\r', '\t', ',', '}', ']':
  27. return i
  28. }
  29. }
  30. return len(data)
  31. }
  32. func findTokenStart(data []byte, token byte) int {
  33. for i := len(data) - 1; i >= 0; i-- {
  34. switch data[i] {
  35. case token:
  36. return i
  37. case '[', '{':
  38. return 0
  39. }
  40. }
  41. return 0
  42. }
  43. func findKeyStart(data []byte, key string) (int, error) {
  44. i := 0
  45. ln := len(data)
  46. if ln > 0 && (data[0] == '{' || data[0] == '[') {
  47. i = 1
  48. }
  49. var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
  50. if ku, err := Unescape(StringToBytes(key), stackbuf[:]); err == nil {
  51. key = bytesToString(&ku)
  52. }
  53. for i < ln {
  54. switch data[i] {
  55. case '"':
  56. i++
  57. keyBegin := i
  58. strEnd, keyEscaped := stringEnd(data[i:])
  59. if strEnd == -1 {
  60. break
  61. }
  62. i += strEnd
  63. keyEnd := i - 1
  64. valueOffset := nextToken(data[i:])
  65. if valueOffset == -1 {
  66. break
  67. }
  68. i += valueOffset
  69. // if string is a key, and key level match
  70. k := data[keyBegin:keyEnd]
  71. // for unescape: if there are no escape sequences, this is cheap; if there are, it is a
  72. // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize
  73. if keyEscaped {
  74. if ku, err := Unescape(k, stackbuf[:]); err != nil {
  75. break
  76. } else {
  77. k = ku
  78. }
  79. }
  80. if data[i] == ':' && len(key) == len(k) && bytesToString(&k) == key {
  81. return keyBegin - 1, nil
  82. }
  83. case '[':
  84. i = blockEnd(data[i:], data[i], ']') + i
  85. case '{':
  86. i = blockEnd(data[i:], data[i], '}') + i
  87. }
  88. i++
  89. }
  90. return -1, KeyPathNotFoundError
  91. }
  92. func tokenStart(data []byte) int {
  93. for i := len(data) - 1; i >= 0; i-- {
  94. switch data[i] {
  95. case '\n', '\r', '\t', ',', '{', '[':
  96. return i
  97. }
  98. }
  99. return 0
  100. }
  101. // Find position of next character which is not whitespace
  102. func nextToken(data []byte) int {
  103. for i, c := range data {
  104. switch c {
  105. case ' ', '\n', '\r', '\t':
  106. continue
  107. default:
  108. return i
  109. }
  110. }
  111. return -1
  112. }
  113. // Find position of last character which is not whitespace
  114. func lastToken(data []byte) int {
  115. for i := len(data) - 1; i >= 0; i-- {
  116. switch data[i] {
  117. case ' ', '\n', '\r', '\t':
  118. continue
  119. default:
  120. return i
  121. }
  122. }
  123. return -1
  124. }
  125. // Tries to find the end of string
  126. // Support if string contains escaped quote symbols.
  127. func stringEnd(data []byte) (int, bool) {
  128. escaped := false
  129. for i, c := range data {
  130. if c == '"' {
  131. if !escaped {
  132. return i + 1, false
  133. } else {
  134. j := i - 1
  135. for {
  136. if j < 0 || data[j] != '\\' {
  137. return i + 1, true // even number of backslashes
  138. }
  139. j--
  140. if j < 0 || data[j] != '\\' {
  141. break // odd number of backslashes
  142. }
  143. j--
  144. }
  145. }
  146. } else if c == '\\' {
  147. escaped = true
  148. }
  149. }
  150. return -1, escaped
  151. }
  152. // Find end of the data structure, array or object.
  153. // For array openSym and closeSym will be '[' and ']', for object '{' and '}'
  154. func blockEnd(data []byte, openSym byte, closeSym byte) int {
  155. level := 0
  156. i := 0
  157. ln := len(data)
  158. for i < ln {
  159. switch data[i] {
  160. case '"': // If inside string, skip it
  161. se, _ := stringEnd(data[i+1:])
  162. if se == -1 {
  163. return -1
  164. }
  165. i += se
  166. case openSym: // If open symbol, increase level
  167. level++
  168. case closeSym: // If close symbol, increase level
  169. level--
  170. // If we have returned to the original level, we're done
  171. if level == 0 {
  172. return i + 1
  173. }
  174. }
  175. i++
  176. }
  177. return -1
  178. }
  179. func searchKeys(data []byte, keys ...string) int {
  180. keyLevel := 0
  181. level := 0
  182. i := 0
  183. ln := len(data)
  184. lk := len(keys)
  185. if lk == 0 {
  186. return 0
  187. }
  188. var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
  189. for i < ln {
  190. switch data[i] {
  191. case '"':
  192. i++
  193. keyBegin := i
  194. strEnd, keyEscaped := stringEnd(data[i:])
  195. if strEnd == -1 {
  196. return -1
  197. }
  198. i += strEnd
  199. keyEnd := i - 1
  200. valueOffset := nextToken(data[i:])
  201. if valueOffset == -1 {
  202. return -1
  203. }
  204. i += valueOffset
  205. // if string is a key, and key level match
  206. if data[i] == ':' && keyLevel == level-1 {
  207. if level < 1 {
  208. return -1
  209. }
  210. key := data[keyBegin:keyEnd]
  211. // for unescape: if there are no escape sequences, this is cheap; if there are, it is a
  212. // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize
  213. var keyUnesc []byte
  214. if !keyEscaped {
  215. keyUnesc = key
  216. } else if ku, err := Unescape(key, stackbuf[:]); err != nil {
  217. return -1
  218. } else {
  219. keyUnesc = ku
  220. }
  221. if equalStr(&keyUnesc, keys[level-1]) {
  222. keyLevel++
  223. // If we found all keys in path
  224. if keyLevel == lk {
  225. return i + 1
  226. }
  227. }
  228. } else {
  229. i--
  230. }
  231. case '{':
  232. level++
  233. case '}':
  234. level--
  235. if level == keyLevel {
  236. keyLevel--
  237. }
  238. case '[':
  239. // If we want to get array element by index
  240. if keyLevel == level && keys[level][0] == '[' {
  241. aIdx, err := strconv.Atoi(keys[level][1 : len(keys[level])-1])
  242. if err != nil {
  243. return -1
  244. }
  245. var curIdx int
  246. var valueFound []byte
  247. var valueOffset int
  248. var curI = i
  249. ArrayEach(data[i:], func(value []byte, dataType ValueType, offset int, err error) {
  250. if curIdx == aIdx {
  251. valueFound = value
  252. valueOffset = offset
  253. if dataType == String {
  254. valueOffset = valueOffset - 2
  255. valueFound = data[curI+valueOffset : curI+valueOffset+len(value)+2]
  256. }
  257. }
  258. curIdx += 1
  259. })
  260. if valueFound == nil {
  261. return -1
  262. } else {
  263. subIndex := searchKeys(valueFound, keys[level+1:]...)
  264. if subIndex < 0 {
  265. return -1
  266. }
  267. return i + valueOffset + subIndex
  268. }
  269. } else {
  270. // Do not search for keys inside arrays
  271. if arraySkip := blockEnd(data[i:], '[', ']'); arraySkip == -1 {
  272. return -1
  273. } else {
  274. i += arraySkip - 1
  275. }
  276. }
  277. }
  278. i++
  279. }
  280. return -1
  281. }
  282. var bitwiseFlags []int64
  283. func init() {
  284. for i := 0; i < 63; i++ {
  285. bitwiseFlags = append(bitwiseFlags, int64(math.Pow(2, float64(i))))
  286. }
  287. }
  288. func sameTree(p1, p2 []string) bool {
  289. minLen := len(p1)
  290. if len(p2) < minLen {
  291. minLen = len(p2)
  292. }
  293. for pi_1, p_1 := range p1[:minLen] {
  294. if p2[pi_1] != p_1 {
  295. return false
  296. }
  297. }
  298. return true
  299. }
  300. func EachKey(data []byte, cb func(int, []byte, ValueType, error), paths ...[]string) int {
  301. var pathFlags int64
  302. var level, pathsMatched, i int
  303. ln := len(data)
  304. var maxPath int
  305. for _, p := range paths {
  306. if len(p) > maxPath {
  307. maxPath = len(p)
  308. }
  309. }
  310. var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
  311. pathsBuf := make([]string, maxPath)
  312. for i < ln {
  313. switch data[i] {
  314. case '"':
  315. i++
  316. keyBegin := i
  317. strEnd, keyEscaped := stringEnd(data[i:])
  318. if strEnd == -1 {
  319. return -1
  320. }
  321. i += strEnd
  322. keyEnd := i - 1
  323. valueOffset := nextToken(data[i:])
  324. if valueOffset == -1 {
  325. return -1
  326. }
  327. i += valueOffset
  328. // if string is a key, and key level match
  329. if data[i] == ':' {
  330. match := -1
  331. key := data[keyBegin:keyEnd]
  332. // for unescape: if there are no escape sequences, this is cheap; if there are, it is a
  333. // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize
  334. var keyUnesc []byte
  335. if !keyEscaped {
  336. keyUnesc = key
  337. } else if ku, err := Unescape(key, stackbuf[:]); err != nil {
  338. return -1
  339. } else {
  340. keyUnesc = ku
  341. }
  342. if maxPath >= level {
  343. if level < 1 {
  344. cb(-1, nil, Unknown, MalformedJsonError)
  345. return -1
  346. }
  347. pathsBuf[level-1] = bytesToString(&keyUnesc)
  348. for pi, p := range paths {
  349. if len(p) != level || pathFlags&bitwiseFlags[pi+1] != 0 || !equalStr(&keyUnesc, p[level-1]) || !sameTree(p, pathsBuf[:level]) {
  350. continue
  351. }
  352. match = pi
  353. i++
  354. pathsMatched++
  355. pathFlags |= bitwiseFlags[pi+1]
  356. v, dt, of, e := Get(data[i:])
  357. cb(pi, v, dt, e)
  358. if of != -1 {
  359. i += of
  360. }
  361. if pathsMatched == len(paths) {
  362. break
  363. }
  364. }
  365. if pathsMatched == len(paths) {
  366. return i
  367. }
  368. }
  369. if match == -1 {
  370. tokenOffset := nextToken(data[i+1:])
  371. i += tokenOffset
  372. if data[i] == '{' {
  373. blockSkip := blockEnd(data[i:], '{', '}')
  374. i += blockSkip + 1
  375. }
  376. }
  377. if i < ln {
  378. switch data[i] {
  379. case '{', '}', '[', '"':
  380. i--
  381. }
  382. }
  383. } else {
  384. i--
  385. }
  386. case '{':
  387. level++
  388. case '}':
  389. level--
  390. case '[':
  391. var arrIdxFlags int64
  392. var pIdxFlags int64
  393. if level < 0 {
  394. cb(-1, nil, Unknown, MalformedJsonError)
  395. return -1
  396. }
  397. for pi, p := range paths {
  398. if len(p) < level+1 || pathFlags&bitwiseFlags[pi+1] != 0 || p[level][0] != '[' || !sameTree(p, pathsBuf[:level]) {
  399. continue
  400. }
  401. aIdx, _ := strconv.Atoi(p[level][1 : len(p[level])-1])
  402. arrIdxFlags |= bitwiseFlags[aIdx+1]
  403. pIdxFlags |= bitwiseFlags[pi+1]
  404. }
  405. if arrIdxFlags > 0 {
  406. level++
  407. var curIdx int
  408. arrOff, _ := ArrayEach(data[i:], func(value []byte, dataType ValueType, offset int, err error) {
  409. if arrIdxFlags&bitwiseFlags[curIdx+1] != 0 {
  410. for pi, p := range paths {
  411. if pIdxFlags&bitwiseFlags[pi+1] != 0 {
  412. aIdx, _ := strconv.Atoi(p[level-1][1 : len(p[level-1])-1])
  413. if curIdx == aIdx {
  414. of := searchKeys(value, p[level:]...)
  415. pathsMatched++
  416. pathFlags |= bitwiseFlags[pi+1]
  417. if of != -1 {
  418. v, dt, _, e := Get(value[of:])
  419. cb(pi, v, dt, e)
  420. }
  421. }
  422. }
  423. }
  424. }
  425. curIdx += 1
  426. })
  427. if pathsMatched == len(paths) {
  428. return i
  429. }
  430. i += arrOff - 1
  431. } else {
  432. // Do not search for keys inside arrays
  433. if arraySkip := blockEnd(data[i:], '[', ']'); arraySkip == -1 {
  434. return -1
  435. } else {
  436. i += arraySkip - 1
  437. }
  438. }
  439. case ']':
  440. level--
  441. }
  442. i++
  443. }
  444. return -1
  445. }
  446. // Data types available in valid JSON data.
  447. type ValueType int
  448. const (
  449. NotExist = ValueType(iota)
  450. String
  451. Number
  452. Object
  453. Array
  454. Boolean
  455. Null
  456. Unknown
  457. )
  458. func (vt ValueType) String() string {
  459. switch vt {
  460. case NotExist:
  461. return "non-existent"
  462. case String:
  463. return "string"
  464. case Number:
  465. return "number"
  466. case Object:
  467. return "object"
  468. case Array:
  469. return "array"
  470. case Boolean:
  471. return "boolean"
  472. case Null:
  473. return "null"
  474. default:
  475. return "unknown"
  476. }
  477. }
  478. var (
  479. trueLiteral = []byte("true")
  480. falseLiteral = []byte("false")
  481. nullLiteral = []byte("null")
  482. )
  483. func createInsertComponent(keys []string, setValue []byte, comma, object bool) []byte {
  484. var buffer bytes.Buffer
  485. isIndex := string(keys[0][0]) == "["
  486. if comma {
  487. buffer.WriteString(",")
  488. }
  489. if isIndex {
  490. buffer.WriteString("[")
  491. } else {
  492. if object {
  493. buffer.WriteString("{")
  494. }
  495. buffer.WriteString("\"")
  496. buffer.WriteString(keys[0])
  497. buffer.WriteString("\":")
  498. }
  499. for i := 1; i < len(keys); i++ {
  500. if string(keys[i][0]) == "[" {
  501. buffer.WriteString("[")
  502. } else {
  503. buffer.WriteString("{\"")
  504. buffer.WriteString(keys[i])
  505. buffer.WriteString("\":")
  506. }
  507. }
  508. buffer.Write(setValue)
  509. for i := len(keys) - 1; i > 0; i-- {
  510. if string(keys[i][0]) == "[" {
  511. buffer.WriteString("]")
  512. } else {
  513. buffer.WriteString("}")
  514. }
  515. }
  516. if isIndex {
  517. buffer.WriteString("]")
  518. }
  519. if object && !isIndex {
  520. buffer.WriteString("}")
  521. }
  522. return buffer.Bytes()
  523. }
  524. /*
  525. Del - Receives existing data structure, path to delete.
  526. Returns:
  527. `data` - return modified data
  528. */
  529. func Delete(data []byte, keys ...string) []byte {
  530. lk := len(keys)
  531. if lk == 0 {
  532. return data[:0]
  533. }
  534. array := false
  535. if len(keys[lk-1]) > 0 && string(keys[lk-1][0]) == "[" {
  536. array = true
  537. }
  538. var startOffset, keyOffset int
  539. endOffset := len(data)
  540. var err error
  541. if !array {
  542. if len(keys) > 1 {
  543. _, _, startOffset, endOffset, err = internalGet(data, keys[:lk-1]...)
  544. if err == KeyPathNotFoundError {
  545. // problem parsing the data
  546. return data
  547. }
  548. }
  549. keyOffset, err = findKeyStart(data[startOffset:endOffset], keys[lk-1])
  550. if err == KeyPathNotFoundError {
  551. // problem parsing the data
  552. return data
  553. }
  554. keyOffset += startOffset
  555. _, _, _, subEndOffset, _ := internalGet(data[startOffset:endOffset], keys[lk-1])
  556. endOffset = startOffset + subEndOffset
  557. tokEnd := tokenEnd(data[endOffset:])
  558. tokStart := findTokenStart(data[:keyOffset], ","[0])
  559. if data[endOffset+tokEnd] == ","[0] {
  560. endOffset += tokEnd + 1
  561. } else if data[endOffset+tokEnd] == " "[0] && len(data) > endOffset+tokEnd+1 && data[endOffset+tokEnd+1] == ","[0] {
  562. endOffset += tokEnd + 2
  563. } else if data[endOffset+tokEnd] == "}"[0] && data[tokStart] == ","[0] {
  564. keyOffset = tokStart
  565. }
  566. } else {
  567. _, _, keyOffset, endOffset, err = internalGet(data, keys...)
  568. if err == KeyPathNotFoundError {
  569. // problem parsing the data
  570. return data
  571. }
  572. tokEnd := tokenEnd(data[endOffset:])
  573. tokStart := findTokenStart(data[:keyOffset], ","[0])
  574. if data[endOffset+tokEnd] == ","[0] {
  575. endOffset += tokEnd + 1
  576. } else if data[endOffset+tokEnd] == "]"[0] && data[tokStart] == ","[0] {
  577. keyOffset = tokStart
  578. }
  579. }
  580. // We need to remove remaining trailing comma if we delete las element in the object
  581. prevTok := lastToken(data[:keyOffset])
  582. remainedValue := data[endOffset:]
  583. var newOffset int
  584. if nextToken(remainedValue) > -1 && remainedValue[nextToken(remainedValue)] == '}' && data[prevTok] == ',' {
  585. newOffset = prevTok
  586. } else {
  587. newOffset = prevTok + 1
  588. }
  589. data = append(data[:newOffset], data[endOffset:]...)
  590. return data
  591. }
  592. /*
  593. Set - Receives existing data structure, path to set, and data to set at that key.
  594. Returns:
  595. `value` - modified byte array
  596. `err` - On any parsing error
  597. */
  598. func Set(data []byte, setValue []byte, keys ...string) (value []byte, err error) {
  599. // ensure keys are set
  600. if len(keys) == 0 {
  601. return nil, KeyPathNotFoundError
  602. }
  603. _, _, startOffset, endOffset, err := internalGet(data, keys...)
  604. if err != nil {
  605. if err != KeyPathNotFoundError {
  606. // problem parsing the data
  607. return nil, err
  608. }
  609. // full path doesnt exist
  610. // does any subpath exist?
  611. var depth int
  612. for i := range keys {
  613. _, _, start, end, sErr := internalGet(data, keys[:i+1]...)
  614. if sErr != nil {
  615. break
  616. } else {
  617. endOffset = end
  618. startOffset = start
  619. depth++
  620. }
  621. }
  622. comma := true
  623. object := false
  624. if endOffset == -1 {
  625. firstToken := nextToken(data)
  626. // We can't set a top-level key if data isn't an object
  627. if len(data) == 0 || data[firstToken] != '{' {
  628. return nil, KeyPathNotFoundError
  629. }
  630. // Don't need a comma if the input is an empty object
  631. secondToken := firstToken + 1 + nextToken(data[firstToken+1:])
  632. if data[secondToken] == '}' {
  633. comma = false
  634. }
  635. // Set the top level key at the end (accounting for any trailing whitespace)
  636. // This assumes last token is valid like '}', could check and return error
  637. endOffset = lastToken(data)
  638. }
  639. depthOffset := endOffset
  640. if depth != 0 {
  641. // if subpath is a non-empty object, add to it
  642. if data[startOffset] == '{' && data[startOffset+1+nextToken(data[startOffset+1:])] != '}' {
  643. depthOffset--
  644. startOffset = depthOffset
  645. // otherwise, over-write it with a new object
  646. } else {
  647. comma = false
  648. object = true
  649. }
  650. } else {
  651. startOffset = depthOffset
  652. }
  653. value = append(data[:startOffset], append(createInsertComponent(keys[depth:], setValue, comma, object), data[depthOffset:]...)...)
  654. } else {
  655. // path currently exists
  656. startComponent := data[:startOffset]
  657. endComponent := data[endOffset:]
  658. value = make([]byte, len(startComponent)+len(endComponent)+len(setValue))
  659. newEndOffset := startOffset + len(setValue)
  660. copy(value[0:startOffset], startComponent)
  661. copy(value[startOffset:newEndOffset], setValue)
  662. copy(value[newEndOffset:], endComponent)
  663. }
  664. return value, nil
  665. }
  666. func getType(data []byte, offset int) ([]byte, ValueType, int, error) {
  667. var dataType ValueType
  668. endOffset := offset
  669. // if string value
  670. if data[offset] == '"' {
  671. dataType = String
  672. if idx, _ := stringEnd(data[offset+1:]); idx != -1 {
  673. endOffset += idx + 1
  674. } else {
  675. return nil, dataType, offset, MalformedStringError
  676. }
  677. } else if data[offset] == '[' { // if array value
  678. dataType = Array
  679. // break label, for stopping nested loops
  680. endOffset = blockEnd(data[offset:], '[', ']')
  681. if endOffset == -1 {
  682. return nil, dataType, offset, MalformedArrayError
  683. }
  684. endOffset += offset
  685. } else if data[offset] == '{' { // if object value
  686. dataType = Object
  687. // break label, for stopping nested loops
  688. endOffset = blockEnd(data[offset:], '{', '}')
  689. if endOffset == -1 {
  690. return nil, dataType, offset, MalformedObjectError
  691. }
  692. endOffset += offset
  693. } else {
  694. // Number, Boolean or None
  695. end := tokenEnd(data[endOffset:])
  696. if end == -1 {
  697. return nil, dataType, offset, MalformedValueError
  698. }
  699. value := data[offset : endOffset+end]
  700. switch data[offset] {
  701. case 't', 'f': // true or false
  702. if bytes.Equal(value, trueLiteral) || bytes.Equal(value, falseLiteral) {
  703. dataType = Boolean
  704. } else {
  705. return nil, Unknown, offset, UnknownValueTypeError
  706. }
  707. case 'u', 'n': // undefined or null
  708. if bytes.Equal(value, nullLiteral) {
  709. dataType = Null
  710. } else {
  711. return nil, Unknown, offset, UnknownValueTypeError
  712. }
  713. case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-':
  714. dataType = Number
  715. default:
  716. return nil, Unknown, offset, UnknownValueTypeError
  717. }
  718. endOffset += end
  719. }
  720. return data[offset:endOffset], dataType, endOffset, nil
  721. }
  722. /*
  723. Get - Receives data structure, and key path to extract value from.
  724. Returns:
  725. `value` - Pointer to original data structure containing key value, or just empty slice if nothing found or error
  726. `dataType` - Can be: `NotExist`, `String`, `Number`, `Object`, `Array`, `Boolean` or `Null`
  727. `offset` - Offset from provided data structure where key value ends. Used mostly internally, for example for `ArrayEach` helper.
  728. `err` - If key not found or any other parsing issue it should return error. If key not found it also sets `dataType` to `NotExist`
  729. Accept multiple keys to specify path to JSON value (in case of quering nested structures).
  730. If no keys provided it will try to extract closest JSON value (simple ones or object/array), useful for reading streams or arrays, see `ArrayEach` implementation.
  731. */
  732. func Get(data []byte, keys ...string) (value []byte, dataType ValueType, offset int, err error) {
  733. a, b, _, d, e := internalGet(data, keys...)
  734. return a, b, d, e
  735. }
  736. func internalGet(data []byte, keys ...string) (value []byte, dataType ValueType, offset, endOffset int, err error) {
  737. if len(keys) > 0 {
  738. if offset = searchKeys(data, keys...); offset == -1 {
  739. return nil, NotExist, -1, -1, KeyPathNotFoundError
  740. }
  741. }
  742. // Go to closest value
  743. nO := nextToken(data[offset:])
  744. if nO == -1 {
  745. return nil, NotExist, offset, -1, MalformedJsonError
  746. }
  747. offset += nO
  748. value, dataType, endOffset, err = getType(data, offset)
  749. if err != nil {
  750. return value, dataType, offset, endOffset, err
  751. }
  752. // Strip quotes from string values
  753. if dataType == String {
  754. value = value[1 : len(value)-1]
  755. }
  756. return value, dataType, offset, endOffset, nil
  757. }
  758. // ArrayEach is used when iterating arrays, accepts a callback function with the same return arguments as `Get`.
  759. func ArrayEach(data []byte, cb func(value []byte, dataType ValueType, offset int, err error), keys ...string) (offset int, err error) {
  760. if len(data) == 0 {
  761. return -1, MalformedObjectError
  762. }
  763. offset = 1
  764. if len(keys) > 0 {
  765. if offset = searchKeys(data, keys...); offset == -1 {
  766. return offset, KeyPathNotFoundError
  767. }
  768. // Go to closest value
  769. nO := nextToken(data[offset:])
  770. if nO == -1 {
  771. return offset, MalformedJsonError
  772. }
  773. offset += nO
  774. if data[offset] != '[' {
  775. return offset, MalformedArrayError
  776. }
  777. offset++
  778. }
  779. nO := nextToken(data[offset:])
  780. if nO == -1 {
  781. return offset, MalformedJsonError
  782. }
  783. offset += nO
  784. if data[offset] == ']' {
  785. return offset, nil
  786. }
  787. for true {
  788. v, t, o, e := Get(data[offset:])
  789. if e != nil {
  790. return offset, e
  791. }
  792. if o == 0 {
  793. break
  794. }
  795. if t != NotExist {
  796. cb(v, t, offset+o-len(v), e)
  797. }
  798. if e != nil {
  799. break
  800. }
  801. offset += o
  802. skipToToken := nextToken(data[offset:])
  803. if skipToToken == -1 {
  804. return offset, MalformedArrayError
  805. }
  806. offset += skipToToken
  807. if data[offset] == ']' {
  808. break
  809. }
  810. if data[offset] != ',' {
  811. return offset, MalformedArrayError
  812. }
  813. offset++
  814. }
  815. return offset, nil
  816. }
  817. // ObjectEach iterates over the key-value pairs of a JSON object, invoking a given callback for each such entry
  818. func ObjectEach(data []byte, callback func(key []byte, value []byte, dataType ValueType, offset int) error, keys ...string) (err error) {
  819. var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
  820. offset := 0
  821. // Descend to the desired key, if requested
  822. if len(keys) > 0 {
  823. if off := searchKeys(data, keys...); off == -1 {
  824. return KeyPathNotFoundError
  825. } else {
  826. offset = off
  827. }
  828. }
  829. // Validate and skip past opening brace
  830. if off := nextToken(data[offset:]); off == -1 {
  831. return MalformedObjectError
  832. } else if offset += off; data[offset] != '{' {
  833. return MalformedObjectError
  834. } else {
  835. offset++
  836. }
  837. // Skip to the first token inside the object, or stop if we find the ending brace
  838. if off := nextToken(data[offset:]); off == -1 {
  839. return MalformedJsonError
  840. } else if offset += off; data[offset] == '}' {
  841. return nil
  842. }
  843. // Loop pre-condition: data[offset] points to what should be either the next entry's key, or the closing brace (if it's anything else, the JSON is malformed)
  844. for offset < len(data) {
  845. // Step 1: find the next key
  846. var key []byte
  847. // Check what the the next token is: start of string, end of object, or something else (error)
  848. switch data[offset] {
  849. case '"':
  850. offset++ // accept as string and skip opening quote
  851. case '}':
  852. return nil // we found the end of the object; stop and return success
  853. default:
  854. return MalformedObjectError
  855. }
  856. // Find the end of the key string
  857. var keyEscaped bool
  858. if off, esc := stringEnd(data[offset:]); off == -1 {
  859. return MalformedJsonError
  860. } else {
  861. key, keyEscaped = data[offset:offset+off-1], esc
  862. offset += off
  863. }
  864. // Unescape the string if needed
  865. if keyEscaped {
  866. if keyUnescaped, err := Unescape(key, stackbuf[:]); err != nil {
  867. return MalformedStringEscapeError
  868. } else {
  869. key = keyUnescaped
  870. }
  871. }
  872. // Step 2: skip the colon
  873. if off := nextToken(data[offset:]); off == -1 {
  874. return MalformedJsonError
  875. } else if offset += off; data[offset] != ':' {
  876. return MalformedJsonError
  877. } else {
  878. offset++
  879. }
  880. // Step 3: find the associated value, then invoke the callback
  881. if value, valueType, off, err := Get(data[offset:]); err != nil {
  882. return err
  883. } else if err := callback(key, value, valueType, offset+off); err != nil { // Invoke the callback here!
  884. return err
  885. } else {
  886. offset += off
  887. }
  888. // Step 4: skip over the next comma to the following token, or stop if we hit the ending brace
  889. if off := nextToken(data[offset:]); off == -1 {
  890. return MalformedArrayError
  891. } else {
  892. offset += off
  893. switch data[offset] {
  894. case '}':
  895. return nil // Stop if we hit the close brace
  896. case ',':
  897. offset++ // Ignore the comma
  898. default:
  899. return MalformedObjectError
  900. }
  901. }
  902. // Skip to the next token after the comma
  903. if off := nextToken(data[offset:]); off == -1 {
  904. return MalformedArrayError
  905. } else {
  906. offset += off
  907. }
  908. }
  909. return MalformedObjectError // we shouldn't get here; it's expected that we will return via finding the ending brace
  910. }
  911. // GetUnsafeString returns the value retrieved by `Get`, use creates string without memory allocation by mapping string to slice memory. It does not handle escape symbols.
  912. func GetUnsafeString(data []byte, keys ...string) (val string, err error) {
  913. v, _, _, e := Get(data, keys...)
  914. if e != nil {
  915. return "", e
  916. }
  917. return bytesToString(&v), nil
  918. }
  919. // GetString returns the value retrieved by `Get`, cast to a string if possible, trying to properly handle escape and utf8 symbols
  920. // If key data type do not match, it will return an error.
  921. func GetString(data []byte, keys ...string) (val string, err error) {
  922. v, t, _, e := Get(data, keys...)
  923. if e != nil {
  924. return "", e
  925. }
  926. if t != String {
  927. return "", fmt.Errorf("Value is not a string: %s", string(v))
  928. }
  929. // If no escapes return raw conten
  930. if bytes.IndexByte(v, '\\') == -1 {
  931. return string(v), nil
  932. }
  933. return ParseString(v)
  934. }
  935. // GetFloat returns the value retrieved by `Get`, cast to a float64 if possible.
  936. // The offset is the same as in `Get`.
  937. // If key data type do not match, it will return an error.
  938. func GetFloat(data []byte, keys ...string) (val float64, err error) {
  939. v, t, _, e := Get(data, keys...)
  940. if e != nil {
  941. return 0, e
  942. }
  943. if t != Number {
  944. return 0, fmt.Errorf("Value is not a number: %s", string(v))
  945. }
  946. return ParseFloat(v)
  947. }
  948. // GetInt returns the value retrieved by `Get`, cast to a int64 if possible.
  949. // If key data type do not match, it will return an error.
  950. func GetInt(data []byte, keys ...string) (val int64, err error) {
  951. v, t, _, e := Get(data, keys...)
  952. if e != nil {
  953. return 0, e
  954. }
  955. if t != Number {
  956. return 0, fmt.Errorf("Value is not a number: %s", string(v))
  957. }
  958. return ParseInt(v)
  959. }
  960. // GetBoolean returns the value retrieved by `Get`, cast to a bool if possible.
  961. // The offset is the same as in `Get`.
  962. // If key data type do not match, it will return error.
  963. func GetBoolean(data []byte, keys ...string) (val bool, err error) {
  964. v, t, _, e := Get(data, keys...)
  965. if e != nil {
  966. return false, e
  967. }
  968. if t != Boolean {
  969. return false, fmt.Errorf("Value is not a boolean: %s", string(v))
  970. }
  971. return ParseBoolean(v)
  972. }
  973. // ParseBoolean parses a Boolean ValueType into a Go bool (not particularly useful, but here for completeness)
  974. func ParseBoolean(b []byte) (bool, error) {
  975. switch {
  976. case bytes.Equal(b, trueLiteral):
  977. return true, nil
  978. case bytes.Equal(b, falseLiteral):
  979. return false, nil
  980. default:
  981. return false, MalformedValueError
  982. }
  983. }
  984. // ParseString parses a String ValueType into a Go string (the main parsing work is unescaping the JSON string)
  985. func ParseString(b []byte) (string, error) {
  986. var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
  987. if bU, err := Unescape(b, stackbuf[:]); err != nil {
  988. return "", MalformedValueError
  989. } else {
  990. return string(bU), nil
  991. }
  992. }
  993. // ParseNumber parses a Number ValueType into a Go float64
  994. func ParseFloat(b []byte) (float64, error) {
  995. if v, err := parseFloat(&b); err != nil {
  996. return 0, MalformedValueError
  997. } else {
  998. return v, nil
  999. }
  1000. }
  1001. // ParseInt parses a Number ValueType into a Go int64
  1002. func ParseInt(b []byte) (int64, error) {
  1003. if v, ok := parseInt(b); !ok {
  1004. return 0, MalformedValueError
  1005. } else {
  1006. return v, nil
  1007. }
  1008. }