inflector.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. package inflector
  2. import (
  3. "bytes"
  4. "fmt"
  5. "regexp"
  6. "strings"
  7. "sync"
  8. )
  9. // Rule represents name of the inflector rule, can be
  10. // Plural or Singular
  11. type Rule int
  12. const (
  13. Plural = iota
  14. Singular
  15. )
  16. // InflectorRule represents inflector rule
  17. type InflectorRule struct {
  18. Rules []*ruleItem
  19. Irregular []*irregularItem
  20. Uninflected []string
  21. compiledIrregular *regexp.Regexp
  22. compiledUninflected *regexp.Regexp
  23. compiledRules []*compiledRule
  24. }
  25. type ruleItem struct {
  26. pattern string
  27. replacement string
  28. }
  29. type irregularItem struct {
  30. word string
  31. replacement string
  32. }
  33. // compiledRule represents compiled version of Inflector.Rules.
  34. type compiledRule struct {
  35. replacement string
  36. *regexp.Regexp
  37. }
  38. // threadsafe access to rules and caches
  39. var mutex sync.Mutex
  40. var rules = make(map[Rule]*InflectorRule)
  41. // Words that should not be inflected
  42. var uninflected = []string{
  43. `Amoyese`, `bison`, `Borghese`, `bream`, `breeches`, `britches`, `buffalo`,
  44. `cantus`, `carp`, `chassis`, `clippers`, `cod`, `coitus`, `Congoese`,
  45. `contretemps`, `corps`, `debris`, `diabetes`, `djinn`, `eland`, `elk`,
  46. `equipment`, `Faroese`, `flounder`, `Foochowese`, `gallows`, `Genevese`,
  47. `Genoese`, `Gilbertese`, `graffiti`, `headquarters`, `herpes`, `hijinks`,
  48. `Hottentotese`, `information`, `innings`, `jackanapes`, `Kiplingese`,
  49. `Kongoese`, `Lucchese`, `mackerel`, `Maltese`, `.*?media`, `mews`, `moose`,
  50. `mumps`, `Nankingese`, `news`, `nexus`, `Niasese`, `Pekingese`,
  51. `Piedmontese`, `pincers`, `Pistoiese`, `pliers`, `Portuguese`, `proceedings`,
  52. `rabies`, `rice`, `rhinoceros`, `salmon`, `Sarawakese`, `scissors`,
  53. `sea[- ]bass`, `series`, `Shavese`, `shears`, `siemens`, `species`, `swine`,
  54. `testes`, `trousers`, `trout`, `tuna`, `Vermontese`, `Wenchowese`, `whiting`,
  55. `wildebeest`, `Yengeese`,
  56. }
  57. // Plural words that should not be inflected
  58. var uninflectedPlurals = []string{
  59. `.*[nrlm]ese`, `.*deer`, `.*fish`, `.*measles`, `.*ois`, `.*pox`, `.*sheep`,
  60. `people`,
  61. }
  62. // Singular words that should not be inflected
  63. var uninflectedSingulars = []string{
  64. `.*[nrlm]ese`, `.*deer`, `.*fish`, `.*measles`, `.*ois`, `.*pox`, `.*sheep`,
  65. `.*ss`,
  66. }
  67. type cache map[string]string
  68. // Inflected words that already cached for immediate retrieval from a given Rule
  69. var caches = make(map[Rule]cache)
  70. // map of irregular words where its key is a word and its value is the replacement
  71. var irregularMaps = make(map[Rule]cache)
  72. var (
  73. // https://github.com/golang/lint/blob/master/lint.go#L770
  74. commonInitialisms = []string{"API", "ASCII", "CPU", "CSS", "DNS", "EOF", "GUID", "HTML", "HTTP", "HTTPS", "ID", "IP", "JSON", "LHS", "QPS", "RAM", "RHS", "RPC", "SLA", "SMTP", "SSH", "TLS", "TTL", "UID", "UI", "UUID", "URI", "URL", "UTF8", "VM", "XML", "XSRF", "XSS"}
  75. commonInitialismsReplacer *strings.Replacer
  76. )
  77. func init() {
  78. rules[Plural] = &InflectorRule{
  79. Rules: []*ruleItem{
  80. {`(?i)(s)tatus$`, `${1}${2}tatuses`},
  81. {`(?i)(quiz)$`, `${1}zes`},
  82. {`(?i)^(ox)$`, `${1}${2}en`},
  83. {`(?i)([m|l])ouse$`, `${1}ice`},
  84. {`(?i)(matr|vert|ind)(ix|ex)$`, `${1}ices`},
  85. {`(?i)(x|ch|ss|sh)$`, `${1}es`},
  86. {`(?i)([^aeiouy]|qu)y$`, `${1}ies`},
  87. {`(?i)(hive)$`, `$1s`},
  88. {`(?i)(?:([^f])fe|([lre])f)$`, `${1}${2}ves`},
  89. {`(?i)sis$`, `ses`},
  90. {`(?i)([ti])um$`, `${1}a`},
  91. {`(?i)(p)erson$`, `${1}eople`},
  92. {`(?i)(m)an$`, `${1}en`},
  93. {`(?i)(c)hild$`, `${1}hildren`},
  94. {`(?i)(buffal|tomat)o$`, `${1}${2}oes`},
  95. {`(?i)(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|vir)us$`, `${1}i`},
  96. {`(?i)us$`, `uses`},
  97. {`(?i)(alias)$`, `${1}es`},
  98. {`(?i)(ax|cris|test)is$`, `${1}es`},
  99. {`s$`, `s`},
  100. {`^$`, ``},
  101. {`$`, `s`},
  102. },
  103. Irregular: []*irregularItem{
  104. {`atlas`, `atlases`},
  105. {`beef`, `beefs`},
  106. {`brother`, `brothers`},
  107. {`cafe`, `cafes`},
  108. {`child`, `children`},
  109. {`cookie`, `cookies`},
  110. {`corpus`, `corpuses`},
  111. {`cow`, `cows`},
  112. {`ganglion`, `ganglions`},
  113. {`genie`, `genies`},
  114. {`genus`, `genera`},
  115. {`graffito`, `graffiti`},
  116. {`hoof`, `hoofs`},
  117. {`loaf`, `loaves`},
  118. {`man`, `men`},
  119. {`money`, `monies`},
  120. {`mongoose`, `mongooses`},
  121. {`move`, `moves`},
  122. {`mythos`, `mythoi`},
  123. {`niche`, `niches`},
  124. {`numen`, `numina`},
  125. {`occiput`, `occiputs`},
  126. {`octopus`, `octopuses`},
  127. {`opus`, `opuses`},
  128. {`ox`, `oxen`},
  129. {`penis`, `penises`},
  130. {`person`, `people`},
  131. {`sex`, `sexes`},
  132. {`soliloquy`, `soliloquies`},
  133. {`testis`, `testes`},
  134. {`trilby`, `trilbys`},
  135. {`turf`, `turfs`},
  136. {`potato`, `potatoes`},
  137. {`hero`, `heroes`},
  138. {`tooth`, `teeth`},
  139. {`goose`, `geese`},
  140. {`foot`, `feet`},
  141. },
  142. }
  143. prepare(Plural)
  144. rules[Singular] = &InflectorRule{
  145. Rules: []*ruleItem{
  146. {`(?i)(s)tatuses$`, `${1}${2}tatus`},
  147. {`(?i)^(.*)(menu)s$`, `${1}${2}`},
  148. {`(?i)(quiz)zes$`, `$1`},
  149. {`(?i)(matr)ices$`, `${1}ix`},
  150. {`(?i)(vert|ind)ices$`, `${1}ex`},
  151. {`(?i)^(ox)en`, `$1`},
  152. {`(?i)(alias)(es)*$`, `$1`},
  153. {`(?i)(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|viri?)i$`, `${1}us`},
  154. {`(?i)([ftw]ax)es`, `$1`},
  155. {`(?i)(cris|ax|test)es$`, `${1}is`},
  156. {`(?i)(shoe|slave)s$`, `$1`},
  157. {`(?i)(o)es$`, `$1`},
  158. {`ouses$`, `ouse`},
  159. {`([^a])uses$`, `${1}us`},
  160. {`(?i)([m|l])ice$`, `${1}ouse`},
  161. {`(?i)(x|ch|ss|sh)es$`, `$1`},
  162. {`(?i)(m)ovies$`, `${1}${2}ovie`},
  163. {`(?i)(s)eries$`, `${1}${2}eries`},
  164. {`(?i)([^aeiouy]|qu)ies$`, `${1}y`},
  165. {`(?i)(tive)s$`, `$1`},
  166. {`(?i)([lre])ves$`, `${1}f`},
  167. {`(?i)([^fo])ves$`, `${1}fe`},
  168. {`(?i)(hive)s$`, `$1`},
  169. {`(?i)(drive)s$`, `$1`},
  170. {`(?i)(^analy)ses$`, `${1}sis`},
  171. {`(?i)(analy|diagno|^ba|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$`, `${1}${2}sis`},
  172. {`(?i)([ti])a$`, `${1}um`},
  173. {`(?i)(p)eople$`, `${1}${2}erson`},
  174. {`(?i)(m)en$`, `${1}an`},
  175. {`(?i)(c)hildren$`, `${1}${2}hild`},
  176. {`(?i)(n)ews$`, `${1}${2}ews`},
  177. {`eaus$`, `eau`},
  178. {`^(.*us)$`, `$1`},
  179. {`(?i)s$`, ``},
  180. },
  181. Irregular: []*irregularItem{
  182. {`foes`, `foe`},
  183. {`waves`, `wave`},
  184. {`curves`, `curve`},
  185. {`atlases`, `atlas`},
  186. {`beefs`, `beef`},
  187. {`brothers`, `brother`},
  188. {`cafes`, `cafe`},
  189. {`children`, `child`},
  190. {`cookies`, `cookie`},
  191. {`corpuses`, `corpus`},
  192. {`cows`, `cow`},
  193. {`ganglions`, `ganglion`},
  194. {`genies`, `genie`},
  195. {`genera`, `genus`},
  196. {`graffiti`, `graffito`},
  197. {`hoofs`, `hoof`},
  198. {`loaves`, `loaf`},
  199. {`men`, `man`},
  200. {`monies`, `money`},
  201. {`mongooses`, `mongoose`},
  202. {`moves`, `move`},
  203. {`mythoi`, `mythos`},
  204. {`niches`, `niche`},
  205. {`numina`, `numen`},
  206. {`occiputs`, `occiput`},
  207. {`octopuses`, `octopus`},
  208. {`opuses`, `opus`},
  209. {`oxen`, `ox`},
  210. {`penises`, `penis`},
  211. {`people`, `person`},
  212. {`sexes`, `sex`},
  213. {`soliloquies`, `soliloquy`},
  214. {`testes`, `testis`},
  215. {`trilbys`, `trilby`},
  216. {`turfs`, `turf`},
  217. {`potatoes`, `potato`},
  218. {`heroes`, `hero`},
  219. {`teeth`, `tooth`},
  220. {`geese`, `goose`},
  221. {`feet`, `foot`},
  222. },
  223. }
  224. prepare(Singular)
  225. commonInitialismsForReplacer := make([]string, 0, len(commonInitialisms))
  226. for _, initialism := range commonInitialisms {
  227. commonInitialismsForReplacer = append(commonInitialismsForReplacer, initialism, strings.Title(strings.ToLower(initialism)))
  228. }
  229. commonInitialismsReplacer = strings.NewReplacer(commonInitialismsForReplacer...)
  230. }
  231. // prepare rule, e.g., compile the pattern.
  232. func prepare(r Rule) error {
  233. var reString string
  234. switch r {
  235. case Plural:
  236. // Merge global uninflected with singularsUninflected
  237. rules[r].Uninflected = merge(uninflected, uninflectedPlurals)
  238. case Singular:
  239. // Merge global uninflected with singularsUninflected
  240. rules[r].Uninflected = merge(uninflected, uninflectedSingulars)
  241. }
  242. // Set InflectorRule.compiledUninflected by joining InflectorRule.Uninflected into
  243. // a single string then compile it.
  244. reString = fmt.Sprintf(`(?i)(^(?:%s))$`, strings.Join(rules[r].Uninflected, `|`))
  245. rules[r].compiledUninflected = regexp.MustCompile(reString)
  246. // Prepare irregularMaps
  247. irregularMaps[r] = make(cache, len(rules[r].Irregular))
  248. // Set InflectorRule.compiledIrregular by joining the irregularItem.word of Inflector.Irregular
  249. // into a single string then compile it.
  250. vIrregulars := make([]string, len(rules[r].Irregular))
  251. for i, item := range rules[r].Irregular {
  252. vIrregulars[i] = item.word
  253. irregularMaps[r][item.word] = item.replacement
  254. }
  255. reString = fmt.Sprintf(`(?i)(.*)\b((?:%s))$`, strings.Join(vIrregulars, `|`))
  256. rules[r].compiledIrregular = regexp.MustCompile(reString)
  257. // Compile all patterns in InflectorRule.Rules
  258. rules[r].compiledRules = make([]*compiledRule, len(rules[r].Rules))
  259. for i, item := range rules[r].Rules {
  260. rules[r].compiledRules[i] = &compiledRule{item.replacement, regexp.MustCompile(item.pattern)}
  261. }
  262. // Prepare caches
  263. caches[r] = make(cache)
  264. return nil
  265. }
  266. // merge slice a and slice b
  267. func merge(a []string, b []string) []string {
  268. result := make([]string, len(a)+len(b))
  269. copy(result, a)
  270. copy(result[len(a):], b)
  271. return result
  272. }
  273. func getInflected(r Rule, s string) string {
  274. mutex.Lock()
  275. defer mutex.Unlock()
  276. if v, ok := caches[r][s]; ok {
  277. return v
  278. }
  279. // Check for irregular words
  280. if res := rules[r].compiledIrregular.FindStringSubmatch(s); len(res) >= 3 {
  281. var buf bytes.Buffer
  282. buf.WriteString(res[1])
  283. buf.WriteString(s[0:1])
  284. buf.WriteString(irregularMaps[r][strings.ToLower(res[2])][1:])
  285. // Cache it then returns
  286. caches[r][s] = buf.String()
  287. return caches[r][s]
  288. }
  289. // Check for uninflected words
  290. if rules[r].compiledUninflected.MatchString(s) {
  291. caches[r][s] = s
  292. return caches[r][s]
  293. }
  294. // Check each rule
  295. for _, re := range rules[r].compiledRules {
  296. if re.MatchString(s) {
  297. caches[r][s] = re.ReplaceAllString(s, re.replacement)
  298. return caches[r][s]
  299. }
  300. }
  301. // Returns unaltered
  302. caches[r][s] = s
  303. return caches[r][s]
  304. }
  305. // Pluralize returns string s in plural form.
  306. func Pluralize(s string) string {
  307. return getInflected(Plural, s)
  308. }
  309. // Singularize returns string s in singular form.
  310. func Singularize(s string) string {
  311. return getInflected(Singular, s)
  312. }
  313. var (
  314. camelizeReg = regexp.MustCompile(`[^A-Za-z0-9]+`)
  315. )
  316. // Camelize Converts a word like "send_email" to "SendEmail"
  317. func Camelize(s string) string {
  318. s = camelizeReg.ReplaceAllString(s, " ")
  319. return strings.Replace(strings.Title(s), " ", "", -1)
  320. }
  321. // Camel2id Converts a word like "SendEmail" to "send_email"
  322. func Camel2id(name string) string {
  323. var (
  324. value = commonInitialismsReplacer.Replace(name)
  325. buf strings.Builder
  326. lastCase, nextCase, nextNumber bool // upper case == true
  327. curCase = value[0] <= 'Z' && value[0] >= 'A'
  328. )
  329. for i, v := range value[:len(value)-1] {
  330. nextCase = value[i+1] <= 'Z' && value[i+1] >= 'A'
  331. nextNumber = value[i+1] >= '0' && value[i+1] <= '9'
  332. if curCase {
  333. if lastCase && (nextCase || nextNumber) {
  334. buf.WriteRune(v + 32)
  335. } else {
  336. if i > 0 && value[i-1] != '_' && value[i+1] != '_' {
  337. buf.WriteByte('_')
  338. }
  339. buf.WriteRune(v + 32)
  340. }
  341. } else {
  342. buf.WriteRune(v)
  343. }
  344. lastCase = curCase
  345. curCase = nextCase
  346. }
  347. if curCase {
  348. if !lastCase && len(value) > 1 {
  349. buf.WriteByte('_')
  350. }
  351. buf.WriteByte(value[len(value)-1] + 32)
  352. } else {
  353. buf.WriteByte(value[len(value)-1])
  354. }
  355. ret := buf.String()
  356. return ret
  357. }
  358. // Camel2words Converts a CamelCase name into space-separated words.
  359. // For example, 'send_email' will be converted to 'Send Email'.
  360. func Camel2words(s string) string {
  361. s = camelizeReg.ReplaceAllString(s, " ")
  362. return strings.Title(s)
  363. }