123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427 |
- /*
- Copyright 2015 The Kubernetes Authors.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package jsonpath
- import (
- "fmt"
- "regexp"
- "strconv"
- "strings"
- "unicode"
- "unicode/utf8"
- )
- const eof = -1
- const (
- leftDelim = "{"
- rightDelim = "}"
- )
- type Parser struct {
- Name string
- Root *ListNode
- input string
- cur *ListNode
- pos int
- start int
- width int
- }
- // Parse parsed the given text and return a node Parser.
- // If an error is encountered, parsing stops and an empty
- // Parser is returned with the error
- func Parse(name, text string) (*Parser, error) {
- p := NewParser(name)
- err := p.Parse(text)
- if err != nil {
- p = nil
- }
- return p, err
- }
- func NewParser(name string) *Parser {
- return &Parser{
- Name: name,
- }
- }
- // parseAction parsed the expression inside delimiter
- func parseAction(name, text string) (*Parser, error) {
- p, err := Parse(name, fmt.Sprintf("%s%s%s", leftDelim, text, rightDelim))
- // when error happens, p will be nil, so we need to return here
- if err != nil {
- return p, err
- }
- p.Root = p.Root.Nodes[0].(*ListNode)
- return p, nil
- }
- func (p *Parser) Parse(text string) error {
- p.input = text
- p.Root = newList()
- p.pos = 0
- return p.parseText(p.Root)
- }
- // consumeText return the parsed text since last cosumeText
- func (p *Parser) consumeText() string {
- value := p.input[p.start:p.pos]
- p.start = p.pos
- return value
- }
- // next returns the next rune in the input.
- func (p *Parser) next() rune {
- if int(p.pos) >= len(p.input) {
- p.width = 0
- return eof
- }
- r, w := utf8.DecodeRuneInString(p.input[p.pos:])
- p.width = w
- p.pos += p.width
- return r
- }
- // peek returns but does not consume the next rune in the input.
- func (p *Parser) peek() rune {
- r := p.next()
- p.backup()
- return r
- }
- // backup steps back one rune. Can only be called once per call of next.
- func (p *Parser) backup() {
- p.pos -= p.width
- }
- func (p *Parser) parseText(cur *ListNode) error {
- for {
- if strings.HasPrefix(p.input[p.pos:], leftDelim) {
- if p.pos > p.start {
- cur.append(newText(p.consumeText()))
- }
- return p.parseLeftDelim(cur)
- }
- if p.next() == eof {
- break
- }
- }
- // Correctly reached EOF.
- if p.pos > p.start {
- cur.append(newText(p.consumeText()))
- }
- return nil
- }
- // parseLeftDelim scans the left delimiter, which is known to be present.
- func (p *Parser) parseLeftDelim(cur *ListNode) error {
- p.pos += len(leftDelim)
- p.consumeText()
- newNode := newList()
- cur.append(newNode)
- cur = newNode
- return p.parseInsideAction(cur)
- }
- func (p *Parser) parseInsideAction(cur *ListNode) error {
- prefixMap := map[string]func(*ListNode) error{
- rightDelim: p.parseRightDelim,
- "[?(": p.parseFilter,
- "..": p.parseRecursive,
- }
- for prefix, parseFunc := range prefixMap {
- if strings.HasPrefix(p.input[p.pos:], prefix) {
- return parseFunc(cur)
- }
- }
- switch r := p.next(); {
- case r == eof || isEndOfLine(r):
- return fmt.Errorf("unclosed action")
- case r == ' ':
- p.consumeText()
- case r == '@' || r == '$': //the current object, just pass it
- p.consumeText()
- case r == '[':
- return p.parseArray(cur)
- case r == '"':
- return p.parseQuote(cur)
- case r == '.':
- return p.parseField(cur)
- case r == '+' || r == '-' || unicode.IsDigit(r):
- p.backup()
- return p.parseNumber(cur)
- case isAlphaNumeric(r):
- p.backup()
- return p.parseIdentifier(cur)
- default:
- return fmt.Errorf("unrecognized character in action: %#U", r)
- }
- return p.parseInsideAction(cur)
- }
- // parseRightDelim scans the right delimiter, which is known to be present.
- func (p *Parser) parseRightDelim(cur *ListNode) error {
- p.pos += len(rightDelim)
- p.consumeText()
- cur = p.Root
- return p.parseText(cur)
- }
- // parseIdentifier scans build-in keywords, like "range" "end"
- func (p *Parser) parseIdentifier(cur *ListNode) error {
- var r rune
- for {
- r = p.next()
- if isTerminator(r) {
- p.backup()
- break
- }
- }
- value := p.consumeText()
- cur.append(newIdentifier(value))
- return p.parseInsideAction(cur)
- }
- // parseRecursive scans the recursive desent operator ..
- func (p *Parser) parseRecursive(cur *ListNode) error {
- p.pos += len("..")
- p.consumeText()
- cur.append(newRecursive())
- if r := p.peek(); isAlphaNumeric(r) {
- return p.parseField(cur)
- }
- return p.parseInsideAction(cur)
- }
- // parseNumber scans number
- func (p *Parser) parseNumber(cur *ListNode) error {
- r := p.peek()
- if r == '+' || r == '-' {
- r = p.next()
- }
- for {
- r = p.next()
- if r != '.' && !unicode.IsDigit(r) {
- p.backup()
- break
- }
- }
- value := p.consumeText()
- i, err := strconv.Atoi(value)
- if err == nil {
- cur.append(newInt(i))
- return p.parseInsideAction(cur)
- }
- d, err := strconv.ParseFloat(value, 64)
- if err == nil {
- cur.append(newFloat(d))
- return p.parseInsideAction(cur)
- }
- return fmt.Errorf("cannot parse number %s", value)
- }
- // parseArray scans array index selection
- func (p *Parser) parseArray(cur *ListNode) error {
- Loop:
- for {
- switch p.next() {
- case eof, '\n':
- return fmt.Errorf("unterminated array")
- case ']':
- break Loop
- }
- }
- text := p.consumeText()
- text = string(text[1 : len(text)-1])
- if text == "*" {
- text = ":"
- }
- //union operator
- strs := strings.Split(text, ",")
- if len(strs) > 1 {
- union := []*ListNode{}
- for _, str := range strs {
- parser, err := parseAction("union", fmt.Sprintf("[%s]", strings.Trim(str, " ")))
- if err != nil {
- return err
- }
- union = append(union, parser.Root)
- }
- cur.append(newUnion(union))
- return p.parseInsideAction(cur)
- }
- // dict key
- reg := regexp.MustCompile(`^'([^']*)'$`)
- value := reg.FindStringSubmatch(text)
- if value != nil {
- parser, err := parseAction("arraydict", fmt.Sprintf(".%s", value[1]))
- if err != nil {
- return err
- }
- for _, node := range parser.Root.Nodes {
- cur.append(node)
- }
- return p.parseInsideAction(cur)
- }
- //slice operator
- reg = regexp.MustCompile(`^(-?[\d]*)(:-?[\d]*)?(:[\d]*)?$`)
- value = reg.FindStringSubmatch(text)
- if value == nil {
- return fmt.Errorf("invalid array index %s", text)
- }
- value = value[1:]
- params := [3]ParamsEntry{}
- for i := 0; i < 3; i++ {
- if value[i] != "" {
- if i > 0 {
- value[i] = value[i][1:]
- }
- if i > 0 && value[i] == "" {
- params[i].Known = false
- } else {
- var err error
- params[i].Known = true
- params[i].Value, err = strconv.Atoi(value[i])
- if err != nil {
- return fmt.Errorf("array index %s is not a number", value[i])
- }
- }
- } else {
- if i == 1 {
- params[i].Known = true
- params[i].Value = params[0].Value + 1
- } else {
- params[i].Known = false
- params[i].Value = 0
- }
- }
- }
- cur.append(newArray(params))
- return p.parseInsideAction(cur)
- }
- // parseFilter scans filter inside array selection
- func (p *Parser) parseFilter(cur *ListNode) error {
- p.pos += len("[?(")
- p.consumeText()
- Loop:
- for {
- switch p.next() {
- case eof, '\n':
- return fmt.Errorf("unterminated filter")
- case ')':
- break Loop
- }
- }
- if p.next() != ']' {
- return fmt.Errorf("unclosed array expect ]")
- }
- reg := regexp.MustCompile(`^([^!<>=]+)([!<>=]+)(.+?)$`)
- text := p.consumeText()
- text = string(text[:len(text)-2])
- value := reg.FindStringSubmatch(text)
- if value == nil {
- parser, err := parseAction("text", text)
- if err != nil {
- return err
- }
- cur.append(newFilter(parser.Root, newList(), "exists"))
- } else {
- leftParser, err := parseAction("left", value[1])
- if err != nil {
- return err
- }
- rightParser, err := parseAction("right", value[3])
- if err != nil {
- return err
- }
- cur.append(newFilter(leftParser.Root, rightParser.Root, value[2]))
- }
- return p.parseInsideAction(cur)
- }
- // parseQuote unquotes string inside double quote
- func (p *Parser) parseQuote(cur *ListNode) error {
- Loop:
- for {
- switch p.next() {
- case eof, '\n':
- return fmt.Errorf("unterminated quoted string")
- case '"':
- break Loop
- }
- }
- value := p.consumeText()
- s, err := strconv.Unquote(value)
- if err != nil {
- return fmt.Errorf("unquote string %s error %v", value, err)
- }
- cur.append(newText(s))
- return p.parseInsideAction(cur)
- }
- // parseField scans a field until a terminator
- func (p *Parser) parseField(cur *ListNode) error {
- p.consumeText()
- var r rune
- for {
- r = p.next()
- if isTerminator(r) {
- p.backup()
- break
- }
- }
- value := p.consumeText()
- if value == "*" {
- cur.append(newWildcard())
- } else {
- cur.append(newField(value))
- }
- return p.parseInsideAction(cur)
- }
- // isTerminator reports whether the input is at valid termination character to appear after an identifier.
- func isTerminator(r rune) bool {
- if isSpace(r) || isEndOfLine(r) {
- return true
- }
- switch r {
- case eof, '.', ',', '[', ']', '$', '@', '{', '}':
- return true
- }
- return false
- }
- // isSpace reports whether r is a space character.
- func isSpace(r rune) bool {
- return r == ' ' || r == '\t'
- }
- // isEndOfLine reports whether r is an end-of-line character.
- func isEndOfLine(r rune) bool {
- return r == '\r' || r == '\n'
- }
- // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
- func isAlphaNumeric(r rune) bool {
- return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
- }
|