text_parser.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. package plist
  2. import (
  3. "encoding/binary"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "io/ioutil"
  8. "runtime"
  9. "strings"
  10. "time"
  11. "unicode/utf16"
  12. "unicode/utf8"
  13. )
  14. type textPlistParser struct {
  15. reader io.Reader
  16. format int
  17. input string
  18. start int
  19. pos int
  20. width int
  21. }
  22. func convertU16(buffer []byte, bo binary.ByteOrder) (string, error) {
  23. if len(buffer)%2 != 0 {
  24. return "", errors.New("truncated utf16")
  25. }
  26. tmp := make([]uint16, len(buffer)/2)
  27. for i := 0; i < len(buffer); i += 2 {
  28. tmp[i/2] = bo.Uint16(buffer[i : i+2])
  29. }
  30. return string(utf16.Decode(tmp)), nil
  31. }
  32. func guessEncodingAndConvert(buffer []byte) (string, error) {
  33. if len(buffer) >= 3 && buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF {
  34. // UTF-8 BOM
  35. return zeroCopy8BitString(buffer, 3, len(buffer)-3), nil
  36. } else if len(buffer) >= 2 {
  37. // UTF-16 guesses
  38. switch {
  39. // stream is big-endian (BOM is FE FF or head is 00 XX)
  40. case (buffer[0] == 0xFE && buffer[1] == 0xFF):
  41. return convertU16(buffer[2:], binary.BigEndian)
  42. case (buffer[0] == 0 && buffer[1] != 0):
  43. return convertU16(buffer, binary.BigEndian)
  44. // stream is little-endian (BOM is FE FF or head is XX 00)
  45. case (buffer[0] == 0xFF && buffer[1] == 0xFE):
  46. return convertU16(buffer[2:], binary.LittleEndian)
  47. case (buffer[0] != 0 && buffer[1] == 0):
  48. return convertU16(buffer, binary.LittleEndian)
  49. }
  50. }
  51. // fallback: assume ASCII (not great!)
  52. return zeroCopy8BitString(buffer, 0, len(buffer)), nil
  53. }
  54. func (p *textPlistParser) parseDocument() (pval cfValue, parseError error) {
  55. defer func() {
  56. if r := recover(); r != nil {
  57. if _, ok := r.(runtime.Error); ok {
  58. panic(r)
  59. }
  60. // Wrap all non-invalid-plist errors.
  61. parseError = plistParseError{"text", r.(error)}
  62. }
  63. }()
  64. buffer, err := ioutil.ReadAll(p.reader)
  65. if err != nil {
  66. panic(err)
  67. }
  68. p.input, err = guessEncodingAndConvert(buffer)
  69. if err != nil {
  70. panic(err)
  71. }
  72. val := p.parsePlistValue()
  73. p.skipWhitespaceAndComments()
  74. if p.peek() != eof {
  75. if _, ok := val.(cfString); !ok {
  76. p.error("garbage after end of document")
  77. }
  78. p.start = 0
  79. p.pos = 0
  80. val = p.parseDictionary(true)
  81. }
  82. pval = val
  83. return
  84. }
  85. const eof rune = -1
  86. func (p *textPlistParser) error(e string, args ...interface{}) {
  87. line := strings.Count(p.input[:p.pos], "\n")
  88. char := p.pos - strings.LastIndex(p.input[:p.pos], "\n") - 1
  89. panic(fmt.Errorf("%s at line %d character %d", fmt.Sprintf(e, args...), line, char))
  90. }
  91. func (p *textPlistParser) next() rune {
  92. if int(p.pos) >= len(p.input) {
  93. p.width = 0
  94. return eof
  95. }
  96. r, w := utf8.DecodeRuneInString(p.input[p.pos:])
  97. p.width = w
  98. p.pos += p.width
  99. return r
  100. }
  101. func (p *textPlistParser) backup() {
  102. p.pos -= p.width
  103. }
  104. func (p *textPlistParser) peek() rune {
  105. r := p.next()
  106. p.backup()
  107. return r
  108. }
  109. func (p *textPlistParser) emit() string {
  110. s := p.input[p.start:p.pos]
  111. p.start = p.pos
  112. return s
  113. }
  114. func (p *textPlistParser) ignore() {
  115. p.start = p.pos
  116. }
  117. func (p *textPlistParser) empty() bool {
  118. return p.start == p.pos
  119. }
  120. func (p *textPlistParser) scanUntil(ch rune) {
  121. if x := strings.IndexRune(p.input[p.pos:], ch); x >= 0 {
  122. p.pos += x
  123. return
  124. }
  125. p.pos = len(p.input)
  126. }
  127. func (p *textPlistParser) scanUntilAny(chs string) {
  128. if x := strings.IndexAny(p.input[p.pos:], chs); x >= 0 {
  129. p.pos += x
  130. return
  131. }
  132. p.pos = len(p.input)
  133. }
  134. func (p *textPlistParser) scanCharactersInSet(ch *characterSet) {
  135. for ch.Contains(p.next()) {
  136. }
  137. p.backup()
  138. }
  139. func (p *textPlistParser) scanCharactersNotInSet(ch *characterSet) {
  140. var r rune
  141. for {
  142. r = p.next()
  143. if r == eof || ch.Contains(r) {
  144. break
  145. }
  146. }
  147. p.backup()
  148. }
  149. func (p *textPlistParser) skipWhitespaceAndComments() {
  150. for {
  151. p.scanCharactersInSet(&whitespace)
  152. if strings.HasPrefix(p.input[p.pos:], "//") {
  153. p.scanCharactersNotInSet(&newlineCharacterSet)
  154. } else if strings.HasPrefix(p.input[p.pos:], "/*") {
  155. if x := strings.Index(p.input[p.pos:], "*/"); x >= 0 {
  156. p.pos += x + 2 // skip the */ as well
  157. continue // consume more whitespace
  158. } else {
  159. p.error("unexpected eof in block comment")
  160. }
  161. } else {
  162. break
  163. }
  164. }
  165. p.ignore()
  166. }
  167. func (p *textPlistParser) parseOctalDigits(max int) uint64 {
  168. var val uint64
  169. for i := 0; i < max; i++ {
  170. r := p.next()
  171. if r >= '0' && r <= '7' {
  172. val <<= 3
  173. val |= uint64((r - '0'))
  174. } else {
  175. p.backup()
  176. break
  177. }
  178. }
  179. return val
  180. }
  181. func (p *textPlistParser) parseHexDigits(max int) uint64 {
  182. var val uint64
  183. for i := 0; i < max; i++ {
  184. r := p.next()
  185. if r >= 'a' && r <= 'f' {
  186. val <<= 4
  187. val |= 10 + uint64((r - 'a'))
  188. } else if r >= 'A' && r <= 'F' {
  189. val <<= 4
  190. val |= 10 + uint64((r - 'A'))
  191. } else if r >= '0' && r <= '9' {
  192. val <<= 4
  193. val |= uint64((r - '0'))
  194. } else {
  195. p.backup()
  196. break
  197. }
  198. }
  199. return val
  200. }
  201. // the \ has already been consumed
  202. func (p *textPlistParser) parseEscape() string {
  203. var s string
  204. switch p.next() {
  205. case 'a':
  206. s = "\a"
  207. case 'b':
  208. s = "\b"
  209. case 'v':
  210. s = "\v"
  211. case 'f':
  212. s = "\f"
  213. case 't':
  214. s = "\t"
  215. case 'r':
  216. s = "\r"
  217. case 'n':
  218. s = "\n"
  219. case '\\':
  220. s = `\`
  221. case '"':
  222. s = `"`
  223. case 'x':
  224. s = string(rune(p.parseHexDigits(2)))
  225. case 'u', 'U':
  226. s = string(rune(p.parseHexDigits(4)))
  227. case '0', '1', '2', '3', '4', '5', '6', '7':
  228. p.backup() // we've already consumed one of the digits
  229. s = string(rune(p.parseOctalDigits(3)))
  230. default:
  231. p.backup() // everything else should be accepted
  232. }
  233. p.ignore() // skip the entire escape sequence
  234. return s
  235. }
  236. // the " has already been consumed
  237. func (p *textPlistParser) parseQuotedString() cfString {
  238. p.ignore() // ignore the "
  239. slowPath := false
  240. s := ""
  241. for {
  242. p.scanUntilAny(`"\`)
  243. switch p.peek() {
  244. case eof:
  245. p.error("unexpected eof in quoted string")
  246. case '"':
  247. section := p.emit()
  248. p.pos++ // skip "
  249. if !slowPath {
  250. return cfString(section)
  251. } else {
  252. s += section
  253. return cfString(s)
  254. }
  255. case '\\':
  256. slowPath = true
  257. s += p.emit()
  258. p.next() // consume \
  259. s += p.parseEscape()
  260. }
  261. }
  262. }
  263. func (p *textPlistParser) parseUnquotedString() cfString {
  264. p.scanCharactersNotInSet(&gsQuotable)
  265. s := p.emit()
  266. if s == "" {
  267. p.error("invalid unquoted string (found an unquoted character that should be quoted?)")
  268. }
  269. return cfString(s)
  270. }
  271. // the { has already been consumed
  272. func (p *textPlistParser) parseDictionary(ignoreEof bool) *cfDictionary {
  273. //p.ignore() // ignore the {
  274. var keypv cfValue
  275. keys := make([]string, 0, 32)
  276. values := make([]cfValue, 0, 32)
  277. outer:
  278. for {
  279. p.skipWhitespaceAndComments()
  280. switch p.next() {
  281. case eof:
  282. if !ignoreEof {
  283. p.error("unexpected eof in dictionary")
  284. }
  285. fallthrough
  286. case '}':
  287. break outer
  288. case '"':
  289. keypv = p.parseQuotedString()
  290. default:
  291. p.backup()
  292. keypv = p.parseUnquotedString()
  293. }
  294. // INVARIANT: key can't be nil; parseQuoted and parseUnquoted
  295. // will panic out before they return nil.
  296. p.skipWhitespaceAndComments()
  297. var val cfValue
  298. n := p.next()
  299. if n == ';' {
  300. val = keypv
  301. } else if n == '=' {
  302. // whitespace is consumed within
  303. val = p.parsePlistValue()
  304. p.skipWhitespaceAndComments()
  305. if p.next() != ';' {
  306. p.error("missing ; in dictionary")
  307. }
  308. } else {
  309. p.error("missing = in dictionary")
  310. }
  311. keys = append(keys, string(keypv.(cfString)))
  312. values = append(values, val)
  313. }
  314. return &cfDictionary{keys: keys, values: values}
  315. }
  316. // the ( has already been consumed
  317. func (p *textPlistParser) parseArray() *cfArray {
  318. //p.ignore() // ignore the (
  319. values := make([]cfValue, 0, 32)
  320. outer:
  321. for {
  322. p.skipWhitespaceAndComments()
  323. switch p.next() {
  324. case eof:
  325. p.error("unexpected eof in array")
  326. case ')':
  327. break outer // done here
  328. case ',':
  329. continue // restart; ,) is valid and we don't want to blow it
  330. default:
  331. p.backup()
  332. }
  333. pval := p.parsePlistValue() // whitespace is consumed within
  334. if str, ok := pval.(cfString); ok && string(str) == "" {
  335. // Empty strings in arrays are apparently skipped?
  336. // TODO: Figure out why this was implemented.
  337. continue
  338. }
  339. values = append(values, pval)
  340. }
  341. return &cfArray{values}
  342. }
  343. // the <* have already been consumed
  344. func (p *textPlistParser) parseGNUStepValue() cfValue {
  345. typ := p.next()
  346. p.ignore()
  347. p.scanUntil('>')
  348. if typ == eof || typ == '>' || p.empty() || p.peek() == eof {
  349. p.error("invalid GNUStep extended value")
  350. }
  351. v := p.emit()
  352. p.next() // consume the >
  353. switch typ {
  354. case 'I':
  355. if v[0] == '-' {
  356. n := mustParseInt(v, 10, 64)
  357. return &cfNumber{signed: true, value: uint64(n)}
  358. } else {
  359. n := mustParseUint(v, 10, 64)
  360. return &cfNumber{signed: false, value: n}
  361. }
  362. case 'R':
  363. n := mustParseFloat(v, 64)
  364. return &cfReal{wide: true, value: n} // TODO(DH) 32/64
  365. case 'B':
  366. b := v[0] == 'Y'
  367. return cfBoolean(b)
  368. case 'D':
  369. t, err := time.Parse(textPlistTimeLayout, v)
  370. if err != nil {
  371. p.error(err.Error())
  372. }
  373. return cfDate(t.In(time.UTC))
  374. }
  375. p.error("invalid GNUStep type " + string(typ))
  376. return nil
  377. }
  378. // The < has already been consumed
  379. func (p *textPlistParser) parseHexData() cfData {
  380. buf := make([]byte, 256)
  381. i := 0
  382. c := 0
  383. for {
  384. r := p.next()
  385. switch r {
  386. case eof:
  387. p.error("unexpected eof in data")
  388. case '>':
  389. if c&1 == 1 {
  390. p.error("uneven number of hex digits in data")
  391. }
  392. p.ignore()
  393. return cfData(buf[:i])
  394. case ' ', '\t', '\n', '\r', '\u2028', '\u2029': // more lax than apple here: skip spaces
  395. continue
  396. }
  397. buf[i] <<= 4
  398. if r >= 'a' && r <= 'f' {
  399. buf[i] |= 10 + byte((r - 'a'))
  400. } else if r >= 'A' && r <= 'F' {
  401. buf[i] |= 10 + byte((r - 'A'))
  402. } else if r >= '0' && r <= '9' {
  403. buf[i] |= byte((r - '0'))
  404. } else {
  405. p.error("unexpected hex digit `%c'", r)
  406. }
  407. c++
  408. if c&1 == 0 {
  409. i++
  410. if i >= len(buf) {
  411. realloc := make([]byte, len(buf)*2)
  412. copy(realloc, buf)
  413. buf = realloc
  414. }
  415. }
  416. }
  417. }
  418. func (p *textPlistParser) parsePlistValue() cfValue {
  419. for {
  420. p.skipWhitespaceAndComments()
  421. switch p.next() {
  422. case eof:
  423. return &cfDictionary{}
  424. case '<':
  425. if p.next() == '*' {
  426. p.format = GNUStepFormat
  427. return p.parseGNUStepValue()
  428. }
  429. p.backup()
  430. return p.parseHexData()
  431. case '"':
  432. return p.parseQuotedString()
  433. case '{':
  434. return p.parseDictionary(false)
  435. case '(':
  436. return p.parseArray()
  437. default:
  438. p.backup()
  439. return p.parseUnquotedString()
  440. }
  441. }
  442. }
  443. func newTextPlistParser(r io.Reader) *textPlistParser {
  444. return &textPlistParser{
  445. reader: r,
  446. format: OpenStepFormat,
  447. }
  448. }