convert.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. // Copyright 2015 Huan Du. All rights reserved.
  2. // Licensed under the MIT license that can be found in the LICENSE file.
  3. package xstrings
  4. import (
  5. "math/rand"
  6. "unicode"
  7. "unicode/utf8"
  8. )
  9. // ToCamelCase is to convert words separated by space, underscore and hyphen to camel case.
  10. //
  11. // Some samples.
  12. // "some_words" => "SomeWords"
  13. // "http_server" => "HttpServer"
  14. // "no_https" => "NoHttps"
  15. // "_complex__case_" => "_Complex_Case_"
  16. // "some words" => "SomeWords"
  17. func ToCamelCase(str string) string {
  18. if len(str) == 0 {
  19. return ""
  20. }
  21. buf := &stringBuilder{}
  22. var r0, r1 rune
  23. var size int
  24. // leading connector will appear in output.
  25. for len(str) > 0 {
  26. r0, size = utf8.DecodeRuneInString(str)
  27. str = str[size:]
  28. if !isConnector(r0) {
  29. r0 = unicode.ToUpper(r0)
  30. break
  31. }
  32. buf.WriteRune(r0)
  33. }
  34. if len(str) == 0 {
  35. // A special case for a string contains only 1 rune.
  36. if size != 0 {
  37. buf.WriteRune(r0)
  38. }
  39. return buf.String()
  40. }
  41. for len(str) > 0 {
  42. r1 = r0
  43. r0, size = utf8.DecodeRuneInString(str)
  44. str = str[size:]
  45. if isConnector(r0) && isConnector(r1) {
  46. buf.WriteRune(r1)
  47. continue
  48. }
  49. if isConnector(r1) {
  50. r0 = unicode.ToUpper(r0)
  51. } else {
  52. r0 = unicode.ToLower(r0)
  53. buf.WriteRune(r1)
  54. }
  55. }
  56. buf.WriteRune(r0)
  57. return buf.String()
  58. }
  59. // ToSnakeCase can convert all upper case characters in a string to
  60. // snake case format.
  61. //
  62. // Some samples.
  63. // "FirstName" => "first_name"
  64. // "HTTPServer" => "http_server"
  65. // "NoHTTPS" => "no_https"
  66. // "GO_PATH" => "go_path"
  67. // "GO PATH" => "go_path" // space is converted to underscore.
  68. // "GO-PATH" => "go_path" // hyphen is converted to underscore.
  69. // "http2xx" => "http_2xx" // insert an underscore before a number and after an alphabet.
  70. // "HTTP20xOK" => "http_20x_ok"
  71. // "Duration2m3s" => "duration_2m3s"
  72. // "Bld4Floor3rd" => "bld4_floor_3rd"
  73. func ToSnakeCase(str string) string {
  74. return camelCaseToLowerCase(str, '_')
  75. }
  76. // ToKebabCase can convert all upper case characters in a string to
  77. // kebab case format.
  78. //
  79. // Some samples.
  80. // "FirstName" => "first-name"
  81. // "HTTPServer" => "http-server"
  82. // "NoHTTPS" => "no-https"
  83. // "GO_PATH" => "go-path"
  84. // "GO PATH" => "go-path" // space is converted to '-'.
  85. // "GO-PATH" => "go-path" // hyphen is converted to '-'.
  86. // "http2xx" => "http-2xx" // insert an underscore before a number and after an alphabet.
  87. // "HTTP20xOK" => "http-20x-ok"
  88. // "Duration2m3s" => "duration-2m3s"
  89. // "Bld4Floor3rd" => "bld4-floor-3rd"
  90. func ToKebabCase(str string) string {
  91. return camelCaseToLowerCase(str, '-')
  92. }
  93. func camelCaseToLowerCase(str string, connector rune) string {
  94. if len(str) == 0 {
  95. return ""
  96. }
  97. buf := &stringBuilder{}
  98. wt, word, remaining := nextWord(str)
  99. for len(remaining) > 0 {
  100. if wt != connectorWord {
  101. toLower(buf, wt, word, connector)
  102. }
  103. prev := wt
  104. last := word
  105. wt, word, remaining = nextWord(remaining)
  106. switch prev {
  107. case numberWord:
  108. for wt == alphabetWord || wt == numberWord {
  109. toLower(buf, wt, word, connector)
  110. wt, word, remaining = nextWord(remaining)
  111. }
  112. if wt != invalidWord && wt != punctWord {
  113. buf.WriteRune(connector)
  114. }
  115. case connectorWord:
  116. toLower(buf, prev, last, connector)
  117. case punctWord:
  118. // nothing.
  119. default:
  120. if wt != numberWord {
  121. if wt != connectorWord && wt != punctWord {
  122. buf.WriteRune(connector)
  123. }
  124. break
  125. }
  126. if len(remaining) == 0 {
  127. break
  128. }
  129. last := word
  130. wt, word, remaining = nextWord(remaining)
  131. // consider number as a part of previous word.
  132. // e.g. "Bld4Floor" => "bld4_floor"
  133. if wt != alphabetWord {
  134. toLower(buf, numberWord, last, connector)
  135. if wt != connectorWord && wt != punctWord {
  136. buf.WriteRune(connector)
  137. }
  138. break
  139. }
  140. // if there are some lower case letters following a number,
  141. // add connector before the number.
  142. // e.g. "HTTP2xx" => "http_2xx"
  143. buf.WriteRune(connector)
  144. toLower(buf, numberWord, last, connector)
  145. for wt == alphabetWord || wt == numberWord {
  146. toLower(buf, wt, word, connector)
  147. wt, word, remaining = nextWord(remaining)
  148. }
  149. if wt != invalidWord && wt != connectorWord && wt != punctWord {
  150. buf.WriteRune(connector)
  151. }
  152. }
  153. }
  154. toLower(buf, wt, word, connector)
  155. return buf.String()
  156. }
  157. func isConnector(r rune) bool {
  158. return r == '-' || r == '_' || unicode.IsSpace(r)
  159. }
  160. type wordType int
  161. const (
  162. invalidWord wordType = iota
  163. numberWord
  164. upperCaseWord
  165. alphabetWord
  166. connectorWord
  167. punctWord
  168. otherWord
  169. )
  170. func nextWord(str string) (wt wordType, word, remaining string) {
  171. if len(str) == 0 {
  172. return
  173. }
  174. var offset int
  175. remaining = str
  176. r, size := nextValidRune(remaining, utf8.RuneError)
  177. offset += size
  178. if r == utf8.RuneError {
  179. wt = invalidWord
  180. word = str[:offset]
  181. remaining = str[offset:]
  182. return
  183. }
  184. switch {
  185. case isConnector(r):
  186. wt = connectorWord
  187. remaining = remaining[size:]
  188. for len(remaining) > 0 {
  189. r, size = nextValidRune(remaining, r)
  190. if !isConnector(r) {
  191. break
  192. }
  193. offset += size
  194. remaining = remaining[size:]
  195. }
  196. case unicode.IsPunct(r):
  197. wt = punctWord
  198. remaining = remaining[size:]
  199. for len(remaining) > 0 {
  200. r, size = nextValidRune(remaining, r)
  201. if !unicode.IsPunct(r) {
  202. break
  203. }
  204. offset += size
  205. remaining = remaining[size:]
  206. }
  207. case unicode.IsUpper(r):
  208. wt = upperCaseWord
  209. remaining = remaining[size:]
  210. if len(remaining) == 0 {
  211. break
  212. }
  213. r, size = nextValidRune(remaining, r)
  214. switch {
  215. case unicode.IsUpper(r):
  216. prevSize := size
  217. offset += size
  218. remaining = remaining[size:]
  219. for len(remaining) > 0 {
  220. r, size = nextValidRune(remaining, r)
  221. if !unicode.IsUpper(r) {
  222. break
  223. }
  224. prevSize = size
  225. offset += size
  226. remaining = remaining[size:]
  227. }
  228. // it's a bit complex when dealing with a case like "HTTPStatus".
  229. // it's expected to be splitted into "HTTP" and "Status".
  230. // Therefore "S" should be in remaining instead of word.
  231. if len(remaining) > 0 && isAlphabet(r) {
  232. offset -= prevSize
  233. remaining = str[offset:]
  234. }
  235. case isAlphabet(r):
  236. offset += size
  237. remaining = remaining[size:]
  238. for len(remaining) > 0 {
  239. r, size = nextValidRune(remaining, r)
  240. if !isAlphabet(r) || unicode.IsUpper(r) {
  241. break
  242. }
  243. offset += size
  244. remaining = remaining[size:]
  245. }
  246. }
  247. case isAlphabet(r):
  248. wt = alphabetWord
  249. remaining = remaining[size:]
  250. for len(remaining) > 0 {
  251. r, size = nextValidRune(remaining, r)
  252. if !isAlphabet(r) || unicode.IsUpper(r) {
  253. break
  254. }
  255. offset += size
  256. remaining = remaining[size:]
  257. }
  258. case unicode.IsNumber(r):
  259. wt = numberWord
  260. remaining = remaining[size:]
  261. for len(remaining) > 0 {
  262. r, size = nextValidRune(remaining, r)
  263. if !unicode.IsNumber(r) {
  264. break
  265. }
  266. offset += size
  267. remaining = remaining[size:]
  268. }
  269. default:
  270. wt = otherWord
  271. remaining = remaining[size:]
  272. for len(remaining) > 0 {
  273. r, size = nextValidRune(remaining, r)
  274. if size == 0 || isConnector(r) || isAlphabet(r) || unicode.IsNumber(r) || unicode.IsPunct(r) {
  275. break
  276. }
  277. offset += size
  278. remaining = remaining[size:]
  279. }
  280. }
  281. word = str[:offset]
  282. return
  283. }
  284. func nextValidRune(str string, prev rune) (r rune, size int) {
  285. var sz int
  286. for len(str) > 0 {
  287. r, sz = utf8.DecodeRuneInString(str)
  288. size += sz
  289. if r != utf8.RuneError {
  290. return
  291. }
  292. str = str[sz:]
  293. }
  294. r = prev
  295. return
  296. }
  297. func toLower(buf *stringBuilder, wt wordType, str string, connector rune) {
  298. buf.Grow(buf.Len() + len(str))
  299. if wt != upperCaseWord && wt != connectorWord {
  300. buf.WriteString(str)
  301. return
  302. }
  303. for len(str) > 0 {
  304. r, size := utf8.DecodeRuneInString(str)
  305. str = str[size:]
  306. if isConnector(r) {
  307. buf.WriteRune(connector)
  308. } else if unicode.IsUpper(r) {
  309. buf.WriteRune(unicode.ToLower(r))
  310. } else {
  311. buf.WriteRune(r)
  312. }
  313. }
  314. }
  315. // SwapCase will swap characters case from upper to lower or lower to upper.
  316. func SwapCase(str string) string {
  317. var r rune
  318. var size int
  319. buf := &stringBuilder{}
  320. for len(str) > 0 {
  321. r, size = utf8.DecodeRuneInString(str)
  322. switch {
  323. case unicode.IsUpper(r):
  324. buf.WriteRune(unicode.ToLower(r))
  325. case unicode.IsLower(r):
  326. buf.WriteRune(unicode.ToUpper(r))
  327. default:
  328. buf.WriteRune(r)
  329. }
  330. str = str[size:]
  331. }
  332. return buf.String()
  333. }
  334. // FirstRuneToUpper converts first rune to upper case if necessary.
  335. func FirstRuneToUpper(str string) string {
  336. if str == "" {
  337. return str
  338. }
  339. r, size := utf8.DecodeRuneInString(str)
  340. if !unicode.IsLower(r) {
  341. return str
  342. }
  343. buf := &stringBuilder{}
  344. buf.WriteRune(unicode.ToUpper(r))
  345. buf.WriteString(str[size:])
  346. return buf.String()
  347. }
  348. // FirstRuneToLower converts first rune to lower case if necessary.
  349. func FirstRuneToLower(str string) string {
  350. if str == "" {
  351. return str
  352. }
  353. r, size := utf8.DecodeRuneInString(str)
  354. if !unicode.IsUpper(r) {
  355. return str
  356. }
  357. buf := &stringBuilder{}
  358. buf.WriteRune(unicode.ToLower(r))
  359. buf.WriteString(str[size:])
  360. return buf.String()
  361. }
  362. // Shuffle randomizes runes in a string and returns the result.
  363. // It uses default random source in `math/rand`.
  364. func Shuffle(str string) string {
  365. if str == "" {
  366. return str
  367. }
  368. runes := []rune(str)
  369. index := 0
  370. for i := len(runes) - 1; i > 0; i-- {
  371. index = rand.Intn(i + 1)
  372. if i != index {
  373. runes[i], runes[index] = runes[index], runes[i]
  374. }
  375. }
  376. return string(runes)
  377. }
  378. // ShuffleSource randomizes runes in a string with given random source.
  379. func ShuffleSource(str string, src rand.Source) string {
  380. if str == "" {
  381. return str
  382. }
  383. runes := []rune(str)
  384. index := 0
  385. r := rand.New(src)
  386. for i := len(runes) - 1; i > 0; i-- {
  387. index = r.Intn(i + 1)
  388. if i != index {
  389. runes[i], runes[index] = runes[index], runes[i]
  390. }
  391. }
  392. return string(runes)
  393. }
  394. // Successor returns the successor to string.
  395. //
  396. // If there is one alphanumeric rune is found in string, increase the rune by 1.
  397. // If increment generates a "carry", the rune to the left of it is incremented.
  398. // This process repeats until there is no carry, adding an additional rune if necessary.
  399. //
  400. // If there is no alphanumeric rune, the rightmost rune will be increased by 1
  401. // regardless whether the result is a valid rune or not.
  402. //
  403. // Only following characters are alphanumeric.
  404. // * a - z
  405. // * A - Z
  406. // * 0 - 9
  407. //
  408. // Samples (borrowed from ruby's String#succ document):
  409. // "abcd" => "abce"
  410. // "THX1138" => "THX1139"
  411. // "<<koala>>" => "<<koalb>>"
  412. // "1999zzz" => "2000aaa"
  413. // "ZZZ9999" => "AAAA0000"
  414. // "***" => "**+"
  415. func Successor(str string) string {
  416. if str == "" {
  417. return str
  418. }
  419. var r rune
  420. var i int
  421. carry := ' '
  422. runes := []rune(str)
  423. l := len(runes)
  424. lastAlphanumeric := l
  425. for i = l - 1; i >= 0; i-- {
  426. r = runes[i]
  427. if ('a' <= r && r <= 'y') ||
  428. ('A' <= r && r <= 'Y') ||
  429. ('0' <= r && r <= '8') {
  430. runes[i]++
  431. carry = ' '
  432. lastAlphanumeric = i
  433. break
  434. }
  435. switch r {
  436. case 'z':
  437. runes[i] = 'a'
  438. carry = 'a'
  439. lastAlphanumeric = i
  440. case 'Z':
  441. runes[i] = 'A'
  442. carry = 'A'
  443. lastAlphanumeric = i
  444. case '9':
  445. runes[i] = '0'
  446. carry = '0'
  447. lastAlphanumeric = i
  448. }
  449. }
  450. // Needs to add one character for carry.
  451. if i < 0 && carry != ' ' {
  452. buf := &stringBuilder{}
  453. buf.Grow(l + 4) // Reserve enough space for write.
  454. if lastAlphanumeric != 0 {
  455. buf.WriteString(str[:lastAlphanumeric])
  456. }
  457. buf.WriteRune(carry)
  458. for _, r = range runes[lastAlphanumeric:] {
  459. buf.WriteRune(r)
  460. }
  461. return buf.String()
  462. }
  463. // No alphanumeric character. Simply increase last rune's value.
  464. if lastAlphanumeric == l {
  465. runes[l-1]++
  466. }
  467. return string(runes)
  468. }