escape_test.go 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. package jsonparser
  2. import (
  3. "bytes"
  4. "testing"
  5. )
  6. func TestH2I(t *testing.T) {
  7. hexChars := []byte{'0', '9', 'A', 'F', 'a', 'f', 'x', '\000'}
  8. hexValues := []int{0, 9, 10, 15, 10, 15, -1, -1}
  9. for i, c := range hexChars {
  10. if v := h2I(c); v != hexValues[i] {
  11. t.Errorf("h2I('%c') returned wrong value (obtained %d, expected %d)", c, v, hexValues[i])
  12. }
  13. }
  14. }
  15. type escapedUnicodeRuneTest struct {
  16. in string
  17. isErr bool
  18. out rune
  19. len int
  20. }
  21. var commonUnicodeEscapeTests = []escapedUnicodeRuneTest{
  22. {in: `\u0041`, out: 'A', len: 6},
  23. {in: `\u0000`, out: 0, len: 6},
  24. {in: `\u00b0`, out: '°', len: 6},
  25. {in: `\u00B0`, out: '°', len: 6},
  26. {in: `\x1234`, out: 0x1234, len: 6}, // These functions do not check the \u prefix
  27. {in: ``, isErr: true},
  28. {in: `\`, isErr: true},
  29. {in: `\u`, isErr: true},
  30. {in: `\u1`, isErr: true},
  31. {in: `\u11`, isErr: true},
  32. {in: `\u111`, isErr: true},
  33. {in: `\u123X`, isErr: true},
  34. }
  35. var singleUnicodeEscapeTests = append([]escapedUnicodeRuneTest{
  36. {in: `\uD83D`, out: 0xD83D, len: 6},
  37. {in: `\uDE03`, out: 0xDE03, len: 6},
  38. {in: `\uFFFF`, out: 0xFFFF, len: 6},
  39. {in: `\uFF11`, out: '1', len: 6},
  40. }, commonUnicodeEscapeTests...)
  41. var multiUnicodeEscapeTests = append([]escapedUnicodeRuneTest{
  42. {in: `\uD83D`, isErr: true},
  43. {in: `\uDE03`, isErr: true},
  44. {in: `\uFFFF`, out: '\uFFFF', len: 6},
  45. {in: `\uFF11`, out: '1', len: 6},
  46. {in: `\uD83D\uDE03`, out: '\U0001F603', len: 12},
  47. {in: `\uD800\uDC00`, out: '\U00010000', len: 12},
  48. {in: `\uD800\`, isErr: true},
  49. {in: `\uD800\u`, isErr: true},
  50. {in: `\uD800\uD`, isErr: true},
  51. {in: `\uD800\uDC`, isErr: true},
  52. {in: `\uD800\uDC0`, isErr: true},
  53. {in: `\uD800\uDBFF`, isErr: true}, // invalid low surrogate
  54. }, commonUnicodeEscapeTests...)
  55. func TestDecodeSingleUnicodeEscape(t *testing.T) {
  56. for _, test := range singleUnicodeEscapeTests {
  57. r, ok := decodeSingleUnicodeEscape([]byte(test.in))
  58. isErr := !ok
  59. if isErr != test.isErr {
  60. t.Errorf("decodeSingleUnicodeEscape(%s) returned isErr mismatch: expected %t, obtained %t", test.in, test.isErr, isErr)
  61. } else if isErr {
  62. continue
  63. } else if r != test.out {
  64. t.Errorf("decodeSingleUnicodeEscape(%s) returned rune mismatch: expected %x (%c), obtained %x (%c)", test.in, test.out, test.out, r, r)
  65. }
  66. }
  67. }
  68. func TestDecodeUnicodeEscape(t *testing.T) {
  69. for _, test := range multiUnicodeEscapeTests {
  70. r, len := decodeUnicodeEscape([]byte(test.in))
  71. isErr := (len == -1)
  72. if isErr != test.isErr {
  73. t.Errorf("decodeUnicodeEscape(%s) returned isErr mismatch: expected %t, obtained %t", test.in, test.isErr, isErr)
  74. } else if isErr {
  75. continue
  76. } else if len != test.len {
  77. t.Errorf("decodeUnicodeEscape(%s) returned length mismatch: expected %d, obtained %d", test.in, test.len, len)
  78. } else if r != test.out {
  79. t.Errorf("decodeUnicodeEscape(%s) returned rune mismatch: expected %x (%c), obtained %x (%c)", test.in, test.out, test.out, r, r)
  80. }
  81. }
  82. }
  83. type unescapeTest struct {
  84. in string // escaped string
  85. out string // expected unescaped string
  86. canAlloc bool // can unescape cause an allocation (depending on buffer size)? true iff 'in' contains escape sequence(s)
  87. isErr bool // should this operation result in an error
  88. }
  89. var unescapeTests = []unescapeTest{
  90. {in: ``, out: ``, canAlloc: false},
  91. {in: `a`, out: `a`, canAlloc: false},
  92. {in: `abcde`, out: `abcde`, canAlloc: false},
  93. {in: `ab\\de`, out: `ab\de`, canAlloc: true},
  94. {in: `ab\"de`, out: `ab"de`, canAlloc: true},
  95. {in: `ab \u00B0 de`, out: `ab ° de`, canAlloc: true},
  96. {in: `ab \uFF11 de`, out: `ab 1 de`, canAlloc: true},
  97. {in: `\uFFFF`, out: "\uFFFF", canAlloc: true},
  98. {in: `ab \uD83D\uDE03 de`, out: "ab \U0001F603 de", canAlloc: true},
  99. {in: `\u0000\u0000\u0000\u0000\u0000`, out: "\u0000\u0000\u0000\u0000\u0000", canAlloc: true},
  100. {in: `\u0000 \u0000 \u0000 \u0000 \u0000`, out: "\u0000 \u0000 \u0000 \u0000 \u0000", canAlloc: true},
  101. {in: ` \u0000 \u0000 \u0000 \u0000 \u0000 `, out: " \u0000 \u0000 \u0000 \u0000 \u0000 ", canAlloc: true},
  102. {in: `\uD800`, isErr: true},
  103. {in: `abcde\`, isErr: true},
  104. {in: `abcde\x`, isErr: true},
  105. {in: `abcde\u`, isErr: true},
  106. {in: `abcde\u1`, isErr: true},
  107. {in: `abcde\u12`, isErr: true},
  108. {in: `abcde\u123`, isErr: true},
  109. {in: `abcde\uD800`, isErr: true},
  110. {in: `ab\uD800de`, isErr: true},
  111. {in: `\uD800abcde`, isErr: true},
  112. }
  113. // isSameMemory checks if two slices contain the same memory pointer (meaning one is a
  114. // subslice of the other, with possibly differing lengths/capacities).
  115. func isSameMemory(a, b []byte) bool {
  116. if cap(a) == 0 || cap(b) == 0 {
  117. return cap(a) == cap(b)
  118. } else if a, b = a[:1], b[:1]; a[0] != b[0] {
  119. return false
  120. } else {
  121. a[0]++
  122. same := (a[0] == b[0])
  123. a[0]--
  124. return same
  125. }
  126. }
  127. func TestUnescape(t *testing.T) {
  128. for _, test := range unescapeTests {
  129. type bufferTestCase struct {
  130. buf []byte
  131. isTooSmall bool
  132. }
  133. var bufs []bufferTestCase
  134. if len(test.in) == 0 {
  135. // If the input string is length 0, only a buffer of size 0 is a meaningful test
  136. bufs = []bufferTestCase{{nil, false}}
  137. } else {
  138. // For non-empty input strings, we can try several buffer sizes (0, len-1, len)
  139. bufs = []bufferTestCase{
  140. {nil, true},
  141. {make([]byte, 0, len(test.in)-1), true},
  142. {make([]byte, 0, len(test.in)), false},
  143. }
  144. }
  145. for _, buftest := range bufs {
  146. in := []byte(test.in)
  147. buf := buftest.buf
  148. out, err := Unescape(in, buf)
  149. isErr := (err != nil)
  150. isAlloc := !isSameMemory(out, in) && !isSameMemory(out, buf)
  151. if isErr != test.isErr {
  152. t.Errorf("Unescape(`%s`, bufsize=%d) returned isErr mismatch: expected %t, obtained %t", test.in, cap(buf), test.isErr, isErr)
  153. break
  154. } else if isErr {
  155. continue
  156. } else if !bytes.Equal(out, []byte(test.out)) {
  157. t.Errorf("Unescape(`%s`, bufsize=%d) returned unescaped mismatch: expected `%s` (%v, len %d), obtained `%s` (%v, len %d)", test.in, cap(buf), test.out, []byte(test.out), len(test.out), string(out), out, len(out))
  158. break
  159. } else if isAlloc != (test.canAlloc && buftest.isTooSmall) {
  160. t.Errorf("Unescape(`%s`, bufsize=%d) returned isAlloc mismatch: expected %t, obtained %t", test.in, cap(buf), buftest.isTooSmall, isAlloc)
  161. break
  162. }
  163. }
  164. }
  165. }