parser.go 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. /*
  2. Copyright 2015 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package jsonpath
  14. import (
  15. "fmt"
  16. "regexp"
  17. "strconv"
  18. "strings"
  19. "unicode"
  20. "unicode/utf8"
  21. )
  22. const eof = -1
  23. const (
  24. leftDelim = "{"
  25. rightDelim = "}"
  26. )
  27. type Parser struct {
  28. Name string
  29. Root *ListNode
  30. input string
  31. cur *ListNode
  32. pos int
  33. start int
  34. width int
  35. }
  36. // Parse parsed the given text and return a node Parser.
  37. // If an error is encountered, parsing stops and an empty
  38. // Parser is returned with the error
  39. func Parse(name, text string) (*Parser, error) {
  40. p := NewParser(name)
  41. err := p.Parse(text)
  42. if err != nil {
  43. p = nil
  44. }
  45. return p, err
  46. }
  47. func NewParser(name string) *Parser {
  48. return &Parser{
  49. Name: name,
  50. }
  51. }
  52. // parseAction parsed the expression inside delimiter
  53. func parseAction(name, text string) (*Parser, error) {
  54. p, err := Parse(name, fmt.Sprintf("%s%s%s", leftDelim, text, rightDelim))
  55. // when error happens, p will be nil, so we need to return here
  56. if err != nil {
  57. return p, err
  58. }
  59. p.Root = p.Root.Nodes[0].(*ListNode)
  60. return p, nil
  61. }
  62. func (p *Parser) Parse(text string) error {
  63. p.input = text
  64. p.Root = newList()
  65. p.pos = 0
  66. return p.parseText(p.Root)
  67. }
  68. // consumeText return the parsed text since last cosumeText
  69. func (p *Parser) consumeText() string {
  70. value := p.input[p.start:p.pos]
  71. p.start = p.pos
  72. return value
  73. }
  74. // next returns the next rune in the input.
  75. func (p *Parser) next() rune {
  76. if int(p.pos) >= len(p.input) {
  77. p.width = 0
  78. return eof
  79. }
  80. r, w := utf8.DecodeRuneInString(p.input[p.pos:])
  81. p.width = w
  82. p.pos += p.width
  83. return r
  84. }
  85. // peek returns but does not consume the next rune in the input.
  86. func (p *Parser) peek() rune {
  87. r := p.next()
  88. p.backup()
  89. return r
  90. }
  91. // backup steps back one rune. Can only be called once per call of next.
  92. func (p *Parser) backup() {
  93. p.pos -= p.width
  94. }
  95. func (p *Parser) parseText(cur *ListNode) error {
  96. for {
  97. if strings.HasPrefix(p.input[p.pos:], leftDelim) {
  98. if p.pos > p.start {
  99. cur.append(newText(p.consumeText()))
  100. }
  101. return p.parseLeftDelim(cur)
  102. }
  103. if p.next() == eof {
  104. break
  105. }
  106. }
  107. // Correctly reached EOF.
  108. if p.pos > p.start {
  109. cur.append(newText(p.consumeText()))
  110. }
  111. return nil
  112. }
  113. // parseLeftDelim scans the left delimiter, which is known to be present.
  114. func (p *Parser) parseLeftDelim(cur *ListNode) error {
  115. p.pos += len(leftDelim)
  116. p.consumeText()
  117. newNode := newList()
  118. cur.append(newNode)
  119. cur = newNode
  120. return p.parseInsideAction(cur)
  121. }
  122. func (p *Parser) parseInsideAction(cur *ListNode) error {
  123. prefixMap := map[string]func(*ListNode) error{
  124. rightDelim: p.parseRightDelim,
  125. "[?(": p.parseFilter,
  126. "..": p.parseRecursive,
  127. }
  128. for prefix, parseFunc := range prefixMap {
  129. if strings.HasPrefix(p.input[p.pos:], prefix) {
  130. return parseFunc(cur)
  131. }
  132. }
  133. switch r := p.next(); {
  134. case r == eof || isEndOfLine(r):
  135. return fmt.Errorf("unclosed action")
  136. case r == ' ':
  137. p.consumeText()
  138. case r == '@' || r == '$': //the current object, just pass it
  139. p.consumeText()
  140. case r == '[':
  141. return p.parseArray(cur)
  142. case r == '"':
  143. return p.parseQuote(cur)
  144. case r == '.':
  145. return p.parseField(cur)
  146. case r == '+' || r == '-' || unicode.IsDigit(r):
  147. p.backup()
  148. return p.parseNumber(cur)
  149. case isAlphaNumeric(r):
  150. p.backup()
  151. return p.parseIdentifier(cur)
  152. default:
  153. return fmt.Errorf("unrecognized character in action: %#U", r)
  154. }
  155. return p.parseInsideAction(cur)
  156. }
  157. // parseRightDelim scans the right delimiter, which is known to be present.
  158. func (p *Parser) parseRightDelim(cur *ListNode) error {
  159. p.pos += len(rightDelim)
  160. p.consumeText()
  161. cur = p.Root
  162. return p.parseText(cur)
  163. }
  164. // parseIdentifier scans build-in keywords, like "range" "end"
  165. func (p *Parser) parseIdentifier(cur *ListNode) error {
  166. var r rune
  167. for {
  168. r = p.next()
  169. if isTerminator(r) {
  170. p.backup()
  171. break
  172. }
  173. }
  174. value := p.consumeText()
  175. cur.append(newIdentifier(value))
  176. return p.parseInsideAction(cur)
  177. }
  178. // parseRecursive scans the recursive desent operator ..
  179. func (p *Parser) parseRecursive(cur *ListNode) error {
  180. p.pos += len("..")
  181. p.consumeText()
  182. cur.append(newRecursive())
  183. if r := p.peek(); isAlphaNumeric(r) {
  184. return p.parseField(cur)
  185. }
  186. return p.parseInsideAction(cur)
  187. }
  188. // parseNumber scans number
  189. func (p *Parser) parseNumber(cur *ListNode) error {
  190. r := p.peek()
  191. if r == '+' || r == '-' {
  192. r = p.next()
  193. }
  194. for {
  195. r = p.next()
  196. if r != '.' && !unicode.IsDigit(r) {
  197. p.backup()
  198. break
  199. }
  200. }
  201. value := p.consumeText()
  202. i, err := strconv.Atoi(value)
  203. if err == nil {
  204. cur.append(newInt(i))
  205. return p.parseInsideAction(cur)
  206. }
  207. d, err := strconv.ParseFloat(value, 64)
  208. if err == nil {
  209. cur.append(newFloat(d))
  210. return p.parseInsideAction(cur)
  211. }
  212. return fmt.Errorf("cannot parse number %s", value)
  213. }
  214. // parseArray scans array index selection
  215. func (p *Parser) parseArray(cur *ListNode) error {
  216. Loop:
  217. for {
  218. switch p.next() {
  219. case eof, '\n':
  220. return fmt.Errorf("unterminated array")
  221. case ']':
  222. break Loop
  223. }
  224. }
  225. text := p.consumeText()
  226. text = string(text[1 : len(text)-1])
  227. if text == "*" {
  228. text = ":"
  229. }
  230. //union operator
  231. strs := strings.Split(text, ",")
  232. if len(strs) > 1 {
  233. union := []*ListNode{}
  234. for _, str := range strs {
  235. parser, err := parseAction("union", fmt.Sprintf("[%s]", strings.Trim(str, " ")))
  236. if err != nil {
  237. return err
  238. }
  239. union = append(union, parser.Root)
  240. }
  241. cur.append(newUnion(union))
  242. return p.parseInsideAction(cur)
  243. }
  244. // dict key
  245. reg := regexp.MustCompile(`^'([^']*)'$`)
  246. value := reg.FindStringSubmatch(text)
  247. if value != nil {
  248. parser, err := parseAction("arraydict", fmt.Sprintf(".%s", value[1]))
  249. if err != nil {
  250. return err
  251. }
  252. for _, node := range parser.Root.Nodes {
  253. cur.append(node)
  254. }
  255. return p.parseInsideAction(cur)
  256. }
  257. //slice operator
  258. reg = regexp.MustCompile(`^(-?[\d]*)(:-?[\d]*)?(:[\d]*)?$`)
  259. value = reg.FindStringSubmatch(text)
  260. if value == nil {
  261. return fmt.Errorf("invalid array index %s", text)
  262. }
  263. value = value[1:]
  264. params := [3]ParamsEntry{}
  265. for i := 0; i < 3; i++ {
  266. if value[i] != "" {
  267. if i > 0 {
  268. value[i] = value[i][1:]
  269. }
  270. if i > 0 && value[i] == "" {
  271. params[i].Known = false
  272. } else {
  273. var err error
  274. params[i].Known = true
  275. params[i].Value, err = strconv.Atoi(value[i])
  276. if err != nil {
  277. return fmt.Errorf("array index %s is not a number", value[i])
  278. }
  279. }
  280. } else {
  281. if i == 1 {
  282. params[i].Known = true
  283. params[i].Value = params[0].Value + 1
  284. } else {
  285. params[i].Known = false
  286. params[i].Value = 0
  287. }
  288. }
  289. }
  290. cur.append(newArray(params))
  291. return p.parseInsideAction(cur)
  292. }
  293. // parseFilter scans filter inside array selection
  294. func (p *Parser) parseFilter(cur *ListNode) error {
  295. p.pos += len("[?(")
  296. p.consumeText()
  297. Loop:
  298. for {
  299. switch p.next() {
  300. case eof, '\n':
  301. return fmt.Errorf("unterminated filter")
  302. case ')':
  303. break Loop
  304. }
  305. }
  306. if p.next() != ']' {
  307. return fmt.Errorf("unclosed array expect ]")
  308. }
  309. reg := regexp.MustCompile(`^([^!<>=]+)([!<>=]+)(.+?)$`)
  310. text := p.consumeText()
  311. text = string(text[:len(text)-2])
  312. value := reg.FindStringSubmatch(text)
  313. if value == nil {
  314. parser, err := parseAction("text", text)
  315. if err != nil {
  316. return err
  317. }
  318. cur.append(newFilter(parser.Root, newList(), "exists"))
  319. } else {
  320. leftParser, err := parseAction("left", value[1])
  321. if err != nil {
  322. return err
  323. }
  324. rightParser, err := parseAction("right", value[3])
  325. if err != nil {
  326. return err
  327. }
  328. cur.append(newFilter(leftParser.Root, rightParser.Root, value[2]))
  329. }
  330. return p.parseInsideAction(cur)
  331. }
  332. // parseQuote unquotes string inside double quote
  333. func (p *Parser) parseQuote(cur *ListNode) error {
  334. Loop:
  335. for {
  336. switch p.next() {
  337. case eof, '\n':
  338. return fmt.Errorf("unterminated quoted string")
  339. case '"':
  340. break Loop
  341. }
  342. }
  343. value := p.consumeText()
  344. s, err := strconv.Unquote(value)
  345. if err != nil {
  346. return fmt.Errorf("unquote string %s error %v", value, err)
  347. }
  348. cur.append(newText(s))
  349. return p.parseInsideAction(cur)
  350. }
  351. // parseField scans a field until a terminator
  352. func (p *Parser) parseField(cur *ListNode) error {
  353. p.consumeText()
  354. var r rune
  355. for {
  356. r = p.next()
  357. if isTerminator(r) {
  358. p.backup()
  359. break
  360. }
  361. }
  362. value := p.consumeText()
  363. if value == "*" {
  364. cur.append(newWildcard())
  365. } else {
  366. cur.append(newField(value))
  367. }
  368. return p.parseInsideAction(cur)
  369. }
  370. // isTerminator reports whether the input is at valid termination character to appear after an identifier.
  371. func isTerminator(r rune) bool {
  372. if isSpace(r) || isEndOfLine(r) {
  373. return true
  374. }
  375. switch r {
  376. case eof, '.', ',', '[', ']', '$', '@', '{', '}':
  377. return true
  378. }
  379. return false
  380. }
  381. // isSpace reports whether r is a space character.
  382. func isSpace(r rune) bool {
  383. return r == ' ' || r == '\t'
  384. }
  385. // isEndOfLine reports whether r is an end-of-line character.
  386. func isEndOfLine(r rune) bool {
  387. return r == '\r' || r == '\n'
  388. }
  389. // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
  390. func isAlphaNumeric(r rune) bool {
  391. return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
  392. }