123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408 |
- package inflector
- import (
- "bytes"
- "fmt"
- "regexp"
- "strings"
- "sync"
- )
- // Rule represents name of the inflector rule, can be
- // Plural or Singular
- type Rule int
- const (
- Plural = iota
- Singular
- )
- // InflectorRule represents inflector rule
- type InflectorRule struct {
- Rules []*ruleItem
- Irregular []*irregularItem
- Uninflected []string
- compiledIrregular *regexp.Regexp
- compiledUninflected *regexp.Regexp
- compiledRules []*compiledRule
- }
- type ruleItem struct {
- pattern string
- replacement string
- }
- type irregularItem struct {
- word string
- replacement string
- }
- // compiledRule represents compiled version of Inflector.Rules.
- type compiledRule struct {
- replacement string
- *regexp.Regexp
- }
- // threadsafe access to rules and caches
- var mutex sync.Mutex
- var rules = make(map[Rule]*InflectorRule)
- // Words that should not be inflected
- var uninflected = []string{
- `Amoyese`, `bison`, `Borghese`, `bream`, `breeches`, `britches`, `buffalo`,
- `cantus`, `carp`, `chassis`, `clippers`, `cod`, `coitus`, `Congoese`,
- `contretemps`, `corps`, `debris`, `diabetes`, `djinn`, `eland`, `elk`,
- `equipment`, `Faroese`, `flounder`, `Foochowese`, `gallows`, `Genevese`,
- `Genoese`, `Gilbertese`, `graffiti`, `headquarters`, `herpes`, `hijinks`,
- `Hottentotese`, `information`, `innings`, `jackanapes`, `Kiplingese`,
- `Kongoese`, `Lucchese`, `mackerel`, `Maltese`, `.*?media`, `mews`, `moose`,
- `mumps`, `Nankingese`, `news`, `nexus`, `Niasese`, `Pekingese`,
- `Piedmontese`, `pincers`, `Pistoiese`, `pliers`, `Portuguese`, `proceedings`,
- `rabies`, `rice`, `rhinoceros`, `salmon`, `Sarawakese`, `scissors`,
- `sea[- ]bass`, `series`, `Shavese`, `shears`, `siemens`, `species`, `swine`,
- `testes`, `trousers`, `trout`, `tuna`, `Vermontese`, `Wenchowese`, `whiting`,
- `wildebeest`, `Yengeese`,
- }
- // Plural words that should not be inflected
- var uninflectedPlurals = []string{
- `.*[nrlm]ese`, `.*deer`, `.*fish`, `.*measles`, `.*ois`, `.*pox`, `.*sheep`,
- `people`,
- }
- // Singular words that should not be inflected
- var uninflectedSingulars = []string{
- `.*[nrlm]ese`, `.*deer`, `.*fish`, `.*measles`, `.*ois`, `.*pox`, `.*sheep`,
- `.*ss`,
- }
- type cache map[string]string
- // Inflected words that already cached for immediate retrieval from a given Rule
- var caches = make(map[Rule]cache)
- // map of irregular words where its key is a word and its value is the replacement
- var irregularMaps = make(map[Rule]cache)
- var (
- // https://github.com/golang/lint/blob/master/lint.go#L770
- commonInitialisms = []string{"API", "ASCII", "CPU", "CSS", "DNS", "EOF", "GUID", "HTML", "HTTP", "HTTPS", "ID", "IP", "JSON", "LHS", "QPS", "RAM", "RHS", "RPC", "SLA", "SMTP", "SSH", "TLS", "TTL", "UID", "UI", "UUID", "URI", "URL", "UTF8", "VM", "XML", "XSRF", "XSS"}
- commonInitialismsReplacer *strings.Replacer
- )
- func init() {
- rules[Plural] = &InflectorRule{
- Rules: []*ruleItem{
- {`(?i)(s)tatus$`, `${1}${2}tatuses`},
- {`(?i)(quiz)$`, `${1}zes`},
- {`(?i)^(ox)$`, `${1}${2}en`},
- {`(?i)([m|l])ouse$`, `${1}ice`},
- {`(?i)(matr|vert|ind)(ix|ex)$`, `${1}ices`},
- {`(?i)(x|ch|ss|sh)$`, `${1}es`},
- {`(?i)([^aeiouy]|qu)y$`, `${1}ies`},
- {`(?i)(hive)$`, `$1s`},
- {`(?i)(?:([^f])fe|([lre])f)$`, `${1}${2}ves`},
- {`(?i)sis$`, `ses`},
- {`(?i)([ti])um$`, `${1}a`},
- {`(?i)(p)erson$`, `${1}eople`},
- {`(?i)(m)an$`, `${1}en`},
- {`(?i)(c)hild$`, `${1}hildren`},
- {`(?i)(buffal|tomat)o$`, `${1}${2}oes`},
- {`(?i)(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|vir)us$`, `${1}i`},
- {`(?i)us$`, `uses`},
- {`(?i)(alias)$`, `${1}es`},
- {`(?i)(ax|cris|test)is$`, `${1}es`},
- {`s$`, `s`},
- {`^$`, ``},
- {`$`, `s`},
- },
- Irregular: []*irregularItem{
- {`atlas`, `atlases`},
- {`beef`, `beefs`},
- {`brother`, `brothers`},
- {`cafe`, `cafes`},
- {`child`, `children`},
- {`cookie`, `cookies`},
- {`corpus`, `corpuses`},
- {`cow`, `cows`},
- {`ganglion`, `ganglions`},
- {`genie`, `genies`},
- {`genus`, `genera`},
- {`graffito`, `graffiti`},
- {`hoof`, `hoofs`},
- {`loaf`, `loaves`},
- {`man`, `men`},
- {`money`, `monies`},
- {`mongoose`, `mongooses`},
- {`move`, `moves`},
- {`mythos`, `mythoi`},
- {`niche`, `niches`},
- {`numen`, `numina`},
- {`occiput`, `occiputs`},
- {`octopus`, `octopuses`},
- {`opus`, `opuses`},
- {`ox`, `oxen`},
- {`penis`, `penises`},
- {`person`, `people`},
- {`sex`, `sexes`},
- {`soliloquy`, `soliloquies`},
- {`testis`, `testes`},
- {`trilby`, `trilbys`},
- {`turf`, `turfs`},
- {`potato`, `potatoes`},
- {`hero`, `heroes`},
- {`tooth`, `teeth`},
- {`goose`, `geese`},
- {`foot`, `feet`},
- },
- }
- prepare(Plural)
- rules[Singular] = &InflectorRule{
- Rules: []*ruleItem{
- {`(?i)(s)tatuses$`, `${1}${2}tatus`},
- {`(?i)^(.*)(menu)s$`, `${1}${2}`},
- {`(?i)(quiz)zes$`, `$1`},
- {`(?i)(matr)ices$`, `${1}ix`},
- {`(?i)(vert|ind)ices$`, `${1}ex`},
- {`(?i)^(ox)en`, `$1`},
- {`(?i)(alias)(es)*$`, `$1`},
- {`(?i)(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|viri?)i$`, `${1}us`},
- {`(?i)([ftw]ax)es`, `$1`},
- {`(?i)(cris|ax|test)es$`, `${1}is`},
- {`(?i)(shoe|slave)s$`, `$1`},
- {`(?i)(o)es$`, `$1`},
- {`ouses$`, `ouse`},
- {`([^a])uses$`, `${1}us`},
- {`(?i)([m|l])ice$`, `${1}ouse`},
- {`(?i)(x|ch|ss|sh)es$`, `$1`},
- {`(?i)(m)ovies$`, `${1}${2}ovie`},
- {`(?i)(s)eries$`, `${1}${2}eries`},
- {`(?i)([^aeiouy]|qu)ies$`, `${1}y`},
- {`(?i)(tive)s$`, `$1`},
- {`(?i)([lre])ves$`, `${1}f`},
- {`(?i)([^fo])ves$`, `${1}fe`},
- {`(?i)(hive)s$`, `$1`},
- {`(?i)(drive)s$`, `$1`},
- {`(?i)(^analy)ses$`, `${1}sis`},
- {`(?i)(analy|diagno|^ba|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$`, `${1}${2}sis`},
- {`(?i)([ti])a$`, `${1}um`},
- {`(?i)(p)eople$`, `${1}${2}erson`},
- {`(?i)(m)en$`, `${1}an`},
- {`(?i)(c)hildren$`, `${1}${2}hild`},
- {`(?i)(n)ews$`, `${1}${2}ews`},
- {`eaus$`, `eau`},
- {`^(.*us)$`, `$1`},
- {`(?i)s$`, ``},
- },
- Irregular: []*irregularItem{
- {`foes`, `foe`},
- {`waves`, `wave`},
- {`curves`, `curve`},
- {`atlases`, `atlas`},
- {`beefs`, `beef`},
- {`brothers`, `brother`},
- {`cafes`, `cafe`},
- {`children`, `child`},
- {`cookies`, `cookie`},
- {`corpuses`, `corpus`},
- {`cows`, `cow`},
- {`ganglions`, `ganglion`},
- {`genies`, `genie`},
- {`genera`, `genus`},
- {`graffiti`, `graffito`},
- {`hoofs`, `hoof`},
- {`loaves`, `loaf`},
- {`men`, `man`},
- {`monies`, `money`},
- {`mongooses`, `mongoose`},
- {`moves`, `move`},
- {`mythoi`, `mythos`},
- {`niches`, `niche`},
- {`numina`, `numen`},
- {`occiputs`, `occiput`},
- {`octopuses`, `octopus`},
- {`opuses`, `opus`},
- {`oxen`, `ox`},
- {`penises`, `penis`},
- {`people`, `person`},
- {`sexes`, `sex`},
- {`soliloquies`, `soliloquy`},
- {`testes`, `testis`},
- {`trilbys`, `trilby`},
- {`turfs`, `turf`},
- {`potatoes`, `potato`},
- {`heroes`, `hero`},
- {`teeth`, `tooth`},
- {`geese`, `goose`},
- {`feet`, `foot`},
- },
- }
- prepare(Singular)
- commonInitialismsForReplacer := make([]string, 0, len(commonInitialisms))
- for _, initialism := range commonInitialisms {
- commonInitialismsForReplacer = append(commonInitialismsForReplacer, initialism, strings.Title(strings.ToLower(initialism)))
- }
- commonInitialismsReplacer = strings.NewReplacer(commonInitialismsForReplacer...)
- }
- // prepare rule, e.g., compile the pattern.
- func prepare(r Rule) error {
- var reString string
- switch r {
- case Plural:
- // Merge global uninflected with singularsUninflected
- rules[r].Uninflected = merge(uninflected, uninflectedPlurals)
- case Singular:
- // Merge global uninflected with singularsUninflected
- rules[r].Uninflected = merge(uninflected, uninflectedSingulars)
- }
- // Set InflectorRule.compiledUninflected by joining InflectorRule.Uninflected into
- // a single string then compile it.
- reString = fmt.Sprintf(`(?i)(^(?:%s))$`, strings.Join(rules[r].Uninflected, `|`))
- rules[r].compiledUninflected = regexp.MustCompile(reString)
- // Prepare irregularMaps
- irregularMaps[r] = make(cache, len(rules[r].Irregular))
- // Set InflectorRule.compiledIrregular by joining the irregularItem.word of Inflector.Irregular
- // into a single string then compile it.
- vIrregulars := make([]string, len(rules[r].Irregular))
- for i, item := range rules[r].Irregular {
- vIrregulars[i] = item.word
- irregularMaps[r][item.word] = item.replacement
- }
- reString = fmt.Sprintf(`(?i)(.*)\b((?:%s))$`, strings.Join(vIrregulars, `|`))
- rules[r].compiledIrregular = regexp.MustCompile(reString)
- // Compile all patterns in InflectorRule.Rules
- rules[r].compiledRules = make([]*compiledRule, len(rules[r].Rules))
- for i, item := range rules[r].Rules {
- rules[r].compiledRules[i] = &compiledRule{item.replacement, regexp.MustCompile(item.pattern)}
- }
- // Prepare caches
- caches[r] = make(cache)
- return nil
- }
- // merge slice a and slice b
- func merge(a []string, b []string) []string {
- result := make([]string, len(a)+len(b))
- copy(result, a)
- copy(result[len(a):], b)
- return result
- }
- func getInflected(r Rule, s string) string {
- mutex.Lock()
- defer mutex.Unlock()
- if v, ok := caches[r][s]; ok {
- return v
- }
- // Check for irregular words
- if res := rules[r].compiledIrregular.FindStringSubmatch(s); len(res) >= 3 {
- var buf bytes.Buffer
- buf.WriteString(res[1])
- buf.WriteString(s[0:1])
- buf.WriteString(irregularMaps[r][strings.ToLower(res[2])][1:])
- // Cache it then returns
- caches[r][s] = buf.String()
- return caches[r][s]
- }
- // Check for uninflected words
- if rules[r].compiledUninflected.MatchString(s) {
- caches[r][s] = s
- return caches[r][s]
- }
- // Check each rule
- for _, re := range rules[r].compiledRules {
- if re.MatchString(s) {
- caches[r][s] = re.ReplaceAllString(s, re.replacement)
- return caches[r][s]
- }
- }
- // Returns unaltered
- caches[r][s] = s
- return caches[r][s]
- }
- // Pluralize returns string s in plural form.
- func Pluralize(s string) string {
- return getInflected(Plural, s)
- }
- // Singularize returns string s in singular form.
- func Singularize(s string) string {
- return getInflected(Singular, s)
- }
- var (
- camelizeReg = regexp.MustCompile(`[^A-Za-z0-9]+`)
- )
- // Camelize Converts a word like "send_email" to "SendEmail"
- func Camelize(s string) string {
- s = camelizeReg.ReplaceAllString(s, " ")
- return strings.Replace(strings.Title(s), " ", "", -1)
- }
- // Camel2id Converts a word like "SendEmail" to "send_email"
- func Camel2id(name string) string {
- var (
- value = commonInitialismsReplacer.Replace(name)
- buf strings.Builder
- lastCase, nextCase, nextNumber bool // upper case == true
- curCase = value[0] <= 'Z' && value[0] >= 'A'
- )
- for i, v := range value[:len(value)-1] {
- nextCase = value[i+1] <= 'Z' && value[i+1] >= 'A'
- nextNumber = value[i+1] >= '0' && value[i+1] <= '9'
- if curCase {
- if lastCase && (nextCase || nextNumber) {
- buf.WriteRune(v + 32)
- } else {
- if i > 0 && value[i-1] != '_' && value[i+1] != '_' {
- buf.WriteByte('_')
- }
- buf.WriteRune(v + 32)
- }
- } else {
- buf.WriteRune(v)
- }
- lastCase = curCase
- curCase = nextCase
- }
- if curCase {
- if !lastCase && len(value) > 1 {
- buf.WriteByte('_')
- }
- buf.WriteByte(value[len(value)-1] + 32)
- } else {
- buf.WriteByte(value[len(value)-1])
- }
- ret := buf.String()
- return ret
- }
- // Camel2words Converts a CamelCase name into space-separated words.
- // For example, 'send_email' will be converted to 'Send Email'.
- func Camel2words(s string) string {
- s = camelizeReg.ReplaceAllString(s, " ")
- return strings.Title(s)
- }
|