inflector.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. // Copyright 2013 Akeda Bagus <admin@gedex.web.id>. All rights reserved.
  2. //
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. /*
  6. Package inflector pluralizes and singularizes English nouns.
  7. There are only two exported functions: `Pluralize` and `Singularize`.
  8. s := "People"
  9. fmt.Println(inflector.Singularize(s)) // will print "Person"
  10. s2 := "octopus"
  11. fmt.Println(inflector.Pluralize(s2)) // will print "octopuses"
  12. */
  13. package inflector
  14. import (
  15. "bytes"
  16. "fmt"
  17. "regexp"
  18. "strings"
  19. "sync"
  20. )
  21. // Rule represents name of the inflector rule, can be
  22. // Plural or Singular
  23. type Rule int
  24. const (
  25. Plural = iota
  26. Singular
  27. )
  28. // InflectorRule represents inflector rule
  29. type InflectorRule struct {
  30. Rules []*ruleItem
  31. Irregular []*irregularItem
  32. Uninflected []string
  33. compiledIrregular *regexp.Regexp
  34. compiledUninflected *regexp.Regexp
  35. compiledRules []*compiledRule
  36. }
  37. type ruleItem struct {
  38. pattern string
  39. replacement string
  40. }
  41. type irregularItem struct {
  42. word string
  43. replacement string
  44. }
  45. // compiledRule represents compiled version of Inflector.Rules.
  46. type compiledRule struct {
  47. replacement string
  48. *regexp.Regexp
  49. }
  50. // threadsafe access to rules and caches
  51. var mutex sync.Mutex
  52. var rules = make(map[Rule]*InflectorRule)
  53. // Words that should not be inflected
  54. var uninflected = []string{
  55. `Amoyese`, `bison`, `Borghese`, `bream`, `breeches`, `britches`, `buffalo`,
  56. `cantus`, `carp`, `chassis`, `clippers`, `cod`, `coitus`, `Congoese`,
  57. `contretemps`, `corps`, `debris`, `diabetes`, `djinn`, `eland`, `elk`,
  58. `equipment`, `Faroese`, `flounder`, `Foochowese`, `gallows`, `Genevese`,
  59. `Genoese`, `Gilbertese`, `graffiti`, `headquarters`, `herpes`, `hijinks`,
  60. `Hottentotese`, `information`, `innings`, `jackanapes`, `Kiplingese`,
  61. `Kongoese`, `Lucchese`, `mackerel`, `Maltese`, `.*?media`, `mews`, `moose`,
  62. `mumps`, `Nankingese`, `news`, `nexus`, `Niasese`, `Pekingese`,
  63. `Piedmontese`, `pincers`, `Pistoiese`, `pliers`, `Portuguese`, `proceedings`,
  64. `rabies`, `rice`, `rhinoceros`, `salmon`, `Sarawakese`, `scissors`,
  65. `sea[- ]bass`, `series`, `Shavese`, `shears`, `siemens`, `species`, `swine`,
  66. `testes`, `trousers`, `trout`, `tuna`, `Vermontese`, `Wenchowese`, `whiting`,
  67. `wildebeest`, `Yengeese`,
  68. }
  69. // Plural words that should not be inflected
  70. var uninflectedPlurals = []string{
  71. `.*[nrlm]ese`, `.*deer`, `.*fish`, `.*measles`, `.*ois`, `.*pox`, `.*sheep`,
  72. `people`,
  73. }
  74. // Singular words that should not be inflected
  75. var uninflectedSingulars = []string{
  76. `.*[nrlm]ese`, `.*deer`, `.*fish`, `.*measles`, `.*ois`, `.*pox`, `.*sheep`,
  77. `.*ss`,
  78. }
  79. type cache map[string]string
  80. // Inflected words that already cached for immediate retrieval from a given Rule
  81. var caches = make(map[Rule]cache)
  82. // map of irregular words where its key is a word and its value is the replacement
  83. var irregularMaps = make(map[Rule]cache)
  84. var (
  85. // https://github.com/golang/lint/blob/master/lint.go#L770
  86. commonInitialisms = []string{"API", "ASCII", "CPU", "CSS", "DNS", "EOF", "GUID", "HTML", "HTTP", "HTTPS", "ID", "IP", "JSON", "LHS", "QPS", "RAM", "RHS", "RPC", "SLA", "SMTP", "SSH", "TLS", "TTL", "UID", "UI", "UUID", "URI", "URL", "UTF8", "VM", "XML", "XSRF", "XSS"}
  87. commonInitialismsReplacer *strings.Replacer
  88. )
  89. func init() {
  90. rules[Plural] = &InflectorRule{
  91. Rules: []*ruleItem{
  92. {`(?i)(s)tatus$`, `${1}${2}tatuses`},
  93. {`(?i)(quiz)$`, `${1}zes`},
  94. {`(?i)^(ox)$`, `${1}${2}en`},
  95. {`(?i)([m|l])ouse$`, `${1}ice`},
  96. {`(?i)(matr|vert|ind)(ix|ex)$`, `${1}ices`},
  97. {`(?i)(x|ch|ss|sh)$`, `${1}es`},
  98. {`(?i)([^aeiouy]|qu)y$`, `${1}ies`},
  99. {`(?i)(hive)$`, `$1s`},
  100. {`(?i)(?:([^f])fe|([lre])f)$`, `${1}${2}ves`},
  101. {`(?i)sis$`, `ses`},
  102. {`(?i)([ti])um$`, `${1}a`},
  103. {`(?i)(p)erson$`, `${1}eople`},
  104. {`(?i)(m)an$`, `${1}en`},
  105. {`(?i)(c)hild$`, `${1}hildren`},
  106. {`(?i)(buffal|tomat)o$`, `${1}${2}oes`},
  107. {`(?i)(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|vir)us$`, `${1}i`},
  108. {`(?i)us$`, `uses`},
  109. {`(?i)(alias)$`, `${1}es`},
  110. {`(?i)(ax|cris|test)is$`, `${1}es`},
  111. {`s$`, `s`},
  112. {`^$`, ``},
  113. {`$`, `s`},
  114. },
  115. Irregular: []*irregularItem{
  116. {`atlas`, `atlases`},
  117. {`beef`, `beefs`},
  118. {`brother`, `brothers`},
  119. {`cafe`, `cafes`},
  120. {`child`, `children`},
  121. {`cookie`, `cookies`},
  122. {`corpus`, `corpuses`},
  123. {`cow`, `cows`},
  124. {`ganglion`, `ganglions`},
  125. {`genie`, `genies`},
  126. {`genus`, `genera`},
  127. {`graffito`, `graffiti`},
  128. {`hoof`, `hoofs`},
  129. {`loaf`, `loaves`},
  130. {`man`, `men`},
  131. {`money`, `monies`},
  132. {`mongoose`, `mongooses`},
  133. {`move`, `moves`},
  134. {`mythos`, `mythoi`},
  135. {`niche`, `niches`},
  136. {`numen`, `numina`},
  137. {`occiput`, `occiputs`},
  138. {`octopus`, `octopuses`},
  139. {`opus`, `opuses`},
  140. {`ox`, `oxen`},
  141. {`penis`, `penises`},
  142. {`person`, `people`},
  143. {`sex`, `sexes`},
  144. {`soliloquy`, `soliloquies`},
  145. {`testis`, `testes`},
  146. {`trilby`, `trilbys`},
  147. {`turf`, `turfs`},
  148. {`potato`, `potatoes`},
  149. {`hero`, `heroes`},
  150. {`tooth`, `teeth`},
  151. {`goose`, `geese`},
  152. {`foot`, `feet`},
  153. },
  154. }
  155. prepare(Plural)
  156. rules[Singular] = &InflectorRule{
  157. Rules: []*ruleItem{
  158. {`(?i)(s)tatuses$`, `${1}${2}tatus`},
  159. {`(?i)^(.*)(menu)s$`, `${1}${2}`},
  160. {`(?i)(quiz)zes$`, `$1`},
  161. {`(?i)(matr)ices$`, `${1}ix`},
  162. {`(?i)(vert|ind)ices$`, `${1}ex`},
  163. {`(?i)^(ox)en`, `$1`},
  164. {`(?i)(alias)(es)*$`, `$1`},
  165. {`(?i)(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|viri?)i$`, `${1}us`},
  166. {`(?i)([ftw]ax)es`, `$1`},
  167. {`(?i)(cris|ax|test)es$`, `${1}is`},
  168. {`(?i)(shoe|slave)s$`, `$1`},
  169. {`(?i)(o)es$`, `$1`},
  170. {`ouses$`, `ouse`},
  171. {`([^a])uses$`, `${1}us`},
  172. {`(?i)([m|l])ice$`, `${1}ouse`},
  173. {`(?i)(x|ch|ss|sh)es$`, `$1`},
  174. {`(?i)(m)ovies$`, `${1}${2}ovie`},
  175. {`(?i)(s)eries$`, `${1}${2}eries`},
  176. {`(?i)([^aeiouy]|qu)ies$`, `${1}y`},
  177. {`(?i)(tive)s$`, `$1`},
  178. {`(?i)([lre])ves$`, `${1}f`},
  179. {`(?i)([^fo])ves$`, `${1}fe`},
  180. {`(?i)(hive)s$`, `$1`},
  181. {`(?i)(drive)s$`, `$1`},
  182. {`(?i)(^analy)ses$`, `${1}sis`},
  183. {`(?i)(analy|diagno|^ba|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$`, `${1}${2}sis`},
  184. {`(?i)([ti])a$`, `${1}um`},
  185. {`(?i)(p)eople$`, `${1}${2}erson`},
  186. {`(?i)(m)en$`, `${1}an`},
  187. {`(?i)(c)hildren$`, `${1}${2}hild`},
  188. {`(?i)(n)ews$`, `${1}${2}ews`},
  189. {`eaus$`, `eau`},
  190. {`^(.*us)$`, `$1`},
  191. {`(?i)s$`, ``},
  192. },
  193. Irregular: []*irregularItem{
  194. {`foes`, `foe`},
  195. {`waves`, `wave`},
  196. {`curves`, `curve`},
  197. {`atlases`, `atlas`},
  198. {`beefs`, `beef`},
  199. {`brothers`, `brother`},
  200. {`cafes`, `cafe`},
  201. {`children`, `child`},
  202. {`cookies`, `cookie`},
  203. {`corpuses`, `corpus`},
  204. {`cows`, `cow`},
  205. {`ganglions`, `ganglion`},
  206. {`genies`, `genie`},
  207. {`genera`, `genus`},
  208. {`graffiti`, `graffito`},
  209. {`hoofs`, `hoof`},
  210. {`loaves`, `loaf`},
  211. {`men`, `man`},
  212. {`monies`, `money`},
  213. {`mongooses`, `mongoose`},
  214. {`moves`, `move`},
  215. {`mythoi`, `mythos`},
  216. {`niches`, `niche`},
  217. {`numina`, `numen`},
  218. {`occiputs`, `occiput`},
  219. {`octopuses`, `octopus`},
  220. {`opuses`, `opus`},
  221. {`oxen`, `ox`},
  222. {`penises`, `penis`},
  223. {`people`, `person`},
  224. {`sexes`, `sex`},
  225. {`soliloquies`, `soliloquy`},
  226. {`testes`, `testis`},
  227. {`trilbys`, `trilby`},
  228. {`turfs`, `turf`},
  229. {`potatoes`, `potato`},
  230. {`heroes`, `hero`},
  231. {`teeth`, `tooth`},
  232. {`geese`, `goose`},
  233. {`feet`, `foot`},
  234. },
  235. }
  236. prepare(Singular)
  237. commonInitialismsForReplacer := make([]string, 0, len(commonInitialisms))
  238. for _, initialism := range commonInitialisms {
  239. commonInitialismsForReplacer = append(commonInitialismsForReplacer, initialism, strings.Title(strings.ToLower(initialism)))
  240. }
  241. commonInitialismsReplacer = strings.NewReplacer(commonInitialismsForReplacer...)
  242. }
  243. // prepare rule, e.g., compile the pattern.
  244. func prepare(r Rule) error {
  245. var reString string
  246. switch r {
  247. case Plural:
  248. // Merge global uninflected with singularsUninflected
  249. rules[r].Uninflected = merge(uninflected, uninflectedPlurals)
  250. case Singular:
  251. // Merge global uninflected with singularsUninflected
  252. rules[r].Uninflected = merge(uninflected, uninflectedSingulars)
  253. }
  254. // Set InflectorRule.compiledUninflected by joining InflectorRule.Uninflected into
  255. // a single string then compile it.
  256. reString = fmt.Sprintf(`(?i)(^(?:%s))$`, strings.Join(rules[r].Uninflected, `|`))
  257. rules[r].compiledUninflected = regexp.MustCompile(reString)
  258. // Prepare irregularMaps
  259. irregularMaps[r] = make(cache, len(rules[r].Irregular))
  260. // Set InflectorRule.compiledIrregular by joining the irregularItem.word of Inflector.Irregular
  261. // into a single string then compile it.
  262. vIrregulars := make([]string, len(rules[r].Irregular))
  263. for i, item := range rules[r].Irregular {
  264. vIrregulars[i] = item.word
  265. irregularMaps[r][item.word] = item.replacement
  266. }
  267. reString = fmt.Sprintf(`(?i)(.*)\b((?:%s))$`, strings.Join(vIrregulars, `|`))
  268. rules[r].compiledIrregular = regexp.MustCompile(reString)
  269. // Compile all patterns in InflectorRule.Rules
  270. rules[r].compiledRules = make([]*compiledRule, len(rules[r].Rules))
  271. for i, item := range rules[r].Rules {
  272. rules[r].compiledRules[i] = &compiledRule{item.replacement, regexp.MustCompile(item.pattern)}
  273. }
  274. // Prepare caches
  275. caches[r] = make(cache)
  276. return nil
  277. }
  278. // merge slice a and slice b
  279. func merge(a []string, b []string) []string {
  280. result := make([]string, len(a)+len(b))
  281. copy(result, a)
  282. copy(result[len(a):], b)
  283. return result
  284. }
  285. func getInflected(r Rule, s string) string {
  286. mutex.Lock()
  287. defer mutex.Unlock()
  288. if v, ok := caches[r][s]; ok {
  289. return v
  290. }
  291. // Check for irregular words
  292. if res := rules[r].compiledIrregular.FindStringSubmatch(s); len(res) >= 3 {
  293. var buf bytes.Buffer
  294. buf.WriteString(res[1])
  295. buf.WriteString(s[0:1])
  296. buf.WriteString(irregularMaps[r][strings.ToLower(res[2])][1:])
  297. // Cache it then returns
  298. caches[r][s] = buf.String()
  299. return caches[r][s]
  300. }
  301. // Check for uninflected words
  302. if rules[r].compiledUninflected.MatchString(s) {
  303. caches[r][s] = s
  304. return caches[r][s]
  305. }
  306. // Check each rule
  307. for _, re := range rules[r].compiledRules {
  308. if re.MatchString(s) {
  309. caches[r][s] = re.ReplaceAllString(s, re.replacement)
  310. return caches[r][s]
  311. }
  312. }
  313. // Returns unaltered
  314. caches[r][s] = s
  315. return caches[r][s]
  316. }
  317. // Pluralize returns string s in plural form.
  318. func Pluralize(s string) string {
  319. return getInflected(Plural, s)
  320. }
  321. // Singularize returns string s in singular form.
  322. func Singularize(s string) string {
  323. return getInflected(Singular, s)
  324. }
  325. var (
  326. camelizeReg = regexp.MustCompile(`[^A-Za-z0-9]+`)
  327. )
  328. // Camelize Converts a word like "send_email" to "SendEmail"
  329. func Camelize(s string) string {
  330. s = camelizeReg.ReplaceAllString(s, " ")
  331. return strings.Replace(strings.Title(s), " ", "", -1)
  332. }
  333. // Camel2id Converts a word like "SendEmail" to "send_email"
  334. func Camel2id(name string) string {
  335. var (
  336. value = commonInitialismsReplacer.Replace(name)
  337. buf strings.Builder
  338. lastCase, nextCase, nextNumber bool // upper case == true
  339. curCase = value[0] <= 'Z' && value[0] >= 'A'
  340. )
  341. for i, v := range value[:len(value)-1] {
  342. nextCase = value[i+1] <= 'Z' && value[i+1] >= 'A'
  343. nextNumber = value[i+1] >= '0' && value[i+1] <= '9'
  344. if curCase {
  345. if lastCase && (nextCase || nextNumber) {
  346. buf.WriteRune(v + 32)
  347. } else {
  348. if i > 0 && value[i-1] != '_' && value[i+1] != '_' {
  349. buf.WriteByte('_')
  350. }
  351. buf.WriteRune(v + 32)
  352. }
  353. } else {
  354. buf.WriteRune(v)
  355. }
  356. lastCase = curCase
  357. curCase = nextCase
  358. }
  359. if curCase {
  360. if !lastCase && len(value) > 1 {
  361. buf.WriteByte('_')
  362. }
  363. buf.WriteByte(value[len(value)-1] + 32)
  364. } else {
  365. buf.WriteByte(value[len(value)-1])
  366. }
  367. ret := buf.String()
  368. return ret
  369. }
  370. // Camel2words Converts a CamelCase name into space-separated words.
  371. // For example, 'send_email' will be converted to 'Send Email'.
  372. func Camel2words(s string) string {
  373. s = camelizeReg.ReplaceAllString(s, " ")
  374. return strings.Title(s)
  375. }