// Copyright 2013 Akeda Bagus . All rights reserved. // // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. /* Package inflector pluralizes and singularizes English nouns. There are only two exported functions: `Pluralize` and `Singularize`. s := "People" fmt.Println(inflector.Singularize(s)) // will print "Person" s2 := "octopus" fmt.Println(inflector.Pluralize(s2)) // will print "octopuses" */ package inflector import ( "bytes" "fmt" "regexp" "strings" "sync" ) // Rule represents name of the inflector rule, can be // Plural or Singular type Rule int const ( Plural = iota Singular ) // InflectorRule represents inflector rule type InflectorRule struct { Rules []*ruleItem Irregular []*irregularItem Uninflected []string compiledIrregular *regexp.Regexp compiledUninflected *regexp.Regexp compiledRules []*compiledRule } type ruleItem struct { pattern string replacement string } type irregularItem struct { word string replacement string } // compiledRule represents compiled version of Inflector.Rules. type compiledRule struct { replacement string *regexp.Regexp } // threadsafe access to rules and caches var mutex sync.Mutex var rules = make(map[Rule]*InflectorRule) // Words that should not be inflected var uninflected = []string{ `Amoyese`, `bison`, `Borghese`, `bream`, `breeches`, `britches`, `buffalo`, `cantus`, `carp`, `chassis`, `clippers`, `cod`, `coitus`, `Congoese`, `contretemps`, `corps`, `debris`, `diabetes`, `djinn`, `eland`, `elk`, `equipment`, `Faroese`, `flounder`, `Foochowese`, `gallows`, `Genevese`, `Genoese`, `Gilbertese`, `graffiti`, `headquarters`, `herpes`, `hijinks`, `Hottentotese`, `information`, `innings`, `jackanapes`, `Kiplingese`, `Kongoese`, `Lucchese`, `mackerel`, `Maltese`, `.*?media`, `mews`, `moose`, `mumps`, `Nankingese`, `news`, `nexus`, `Niasese`, `Pekingese`, `Piedmontese`, `pincers`, `Pistoiese`, `pliers`, `Portuguese`, `proceedings`, `rabies`, `rice`, `rhinoceros`, `salmon`, `Sarawakese`, `scissors`, `sea[- ]bass`, `series`, `Shavese`, `shears`, `siemens`, `species`, `swine`, `testes`, `trousers`, `trout`, `tuna`, `Vermontese`, `Wenchowese`, `whiting`, `wildebeest`, `Yengeese`, } // Plural words that should not be inflected var uninflectedPlurals = []string{ `.*[nrlm]ese`, `.*deer`, `.*fish`, `.*measles`, `.*ois`, `.*pox`, `.*sheep`, `people`, } // Singular words that should not be inflected var uninflectedSingulars = []string{ `.*[nrlm]ese`, `.*deer`, `.*fish`, `.*measles`, `.*ois`, `.*pox`, `.*sheep`, `.*ss`, } type cache map[string]string // Inflected words that already cached for immediate retrieval from a given Rule var caches = make(map[Rule]cache) // map of irregular words where its key is a word and its value is the replacement var irregularMaps = make(map[Rule]cache) var ( // https://github.com/golang/lint/blob/master/lint.go#L770 commonInitialisms = []string{"API", "ASCII", "CPU", "CSS", "DNS", "EOF", "GUID", "HTML", "HTTP", "HTTPS", "ID", "IP", "JSON", "LHS", "QPS", "RAM", "RHS", "RPC", "SLA", "SMTP", "SSH", "TLS", "TTL", "UID", "UI", "UUID", "URI", "URL", "UTF8", "VM", "XML", "XSRF", "XSS"} commonInitialismsReplacer *strings.Replacer ) func init() { rules[Plural] = &InflectorRule{ Rules: []*ruleItem{ {`(?i)(s)tatus$`, `${1}${2}tatuses`}, {`(?i)(quiz)$`, `${1}zes`}, {`(?i)^(ox)$`, `${1}${2}en`}, {`(?i)([m|l])ouse$`, `${1}ice`}, {`(?i)(matr|vert|ind)(ix|ex)$`, `${1}ices`}, {`(?i)(x|ch|ss|sh)$`, `${1}es`}, {`(?i)([^aeiouy]|qu)y$`, `${1}ies`}, {`(?i)(hive)$`, `$1s`}, {`(?i)(?:([^f])fe|([lre])f)$`, `${1}${2}ves`}, {`(?i)sis$`, `ses`}, {`(?i)([ti])um$`, `${1}a`}, {`(?i)(p)erson$`, `${1}eople`}, {`(?i)(m)an$`, `${1}en`}, {`(?i)(c)hild$`, `${1}hildren`}, {`(?i)(buffal|tomat)o$`, `${1}${2}oes`}, {`(?i)(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|vir)us$`, `${1}i`}, {`(?i)us$`, `uses`}, {`(?i)(alias)$`, `${1}es`}, {`(?i)(ax|cris|test)is$`, `${1}es`}, {`s$`, `s`}, {`^$`, ``}, {`$`, `s`}, }, Irregular: []*irregularItem{ {`atlas`, `atlases`}, {`beef`, `beefs`}, {`brother`, `brothers`}, {`cafe`, `cafes`}, {`child`, `children`}, {`cookie`, `cookies`}, {`corpus`, `corpuses`}, {`cow`, `cows`}, {`ganglion`, `ganglions`}, {`genie`, `genies`}, {`genus`, `genera`}, {`graffito`, `graffiti`}, {`hoof`, `hoofs`}, {`loaf`, `loaves`}, {`man`, `men`}, {`money`, `monies`}, {`mongoose`, `mongooses`}, {`move`, `moves`}, {`mythos`, `mythoi`}, {`niche`, `niches`}, {`numen`, `numina`}, {`occiput`, `occiputs`}, {`octopus`, `octopuses`}, {`opus`, `opuses`}, {`ox`, `oxen`}, {`penis`, `penises`}, {`person`, `people`}, {`sex`, `sexes`}, {`soliloquy`, `soliloquies`}, {`testis`, `testes`}, {`trilby`, `trilbys`}, {`turf`, `turfs`}, {`potato`, `potatoes`}, {`hero`, `heroes`}, {`tooth`, `teeth`}, {`goose`, `geese`}, {`foot`, `feet`}, }, } prepare(Plural) rules[Singular] = &InflectorRule{ Rules: []*ruleItem{ {`(?i)(s)tatuses$`, `${1}${2}tatus`}, {`(?i)^(.*)(menu)s$`, `${1}${2}`}, {`(?i)(quiz)zes$`, `$1`}, {`(?i)(matr)ices$`, `${1}ix`}, {`(?i)(vert|ind)ices$`, `${1}ex`}, {`(?i)^(ox)en`, `$1`}, {`(?i)(alias)(es)*$`, `$1`}, {`(?i)(alumn|bacill|cact|foc|fung|nucle|radi|stimul|syllab|termin|viri?)i$`, `${1}us`}, {`(?i)([ftw]ax)es`, `$1`}, {`(?i)(cris|ax|test)es$`, `${1}is`}, {`(?i)(shoe|slave)s$`, `$1`}, {`(?i)(o)es$`, `$1`}, {`ouses$`, `ouse`}, {`([^a])uses$`, `${1}us`}, {`(?i)([m|l])ice$`, `${1}ouse`}, {`(?i)(x|ch|ss|sh)es$`, `$1`}, {`(?i)(m)ovies$`, `${1}${2}ovie`}, {`(?i)(s)eries$`, `${1}${2}eries`}, {`(?i)([^aeiouy]|qu)ies$`, `${1}y`}, {`(?i)(tive)s$`, `$1`}, {`(?i)([lre])ves$`, `${1}f`}, {`(?i)([^fo])ves$`, `${1}fe`}, {`(?i)(hive)s$`, `$1`}, {`(?i)(drive)s$`, `$1`}, {`(?i)(^analy)ses$`, `${1}sis`}, {`(?i)(analy|diagno|^ba|(p)arenthe|(p)rogno|(s)ynop|(t)he)ses$`, `${1}${2}sis`}, {`(?i)([ti])a$`, `${1}um`}, {`(?i)(p)eople$`, `${1}${2}erson`}, {`(?i)(m)en$`, `${1}an`}, {`(?i)(c)hildren$`, `${1}${2}hild`}, {`(?i)(n)ews$`, `${1}${2}ews`}, {`eaus$`, `eau`}, {`^(.*us)$`, `$1`}, {`(?i)s$`, ``}, }, Irregular: []*irregularItem{ {`foes`, `foe`}, {`waves`, `wave`}, {`curves`, `curve`}, {`atlases`, `atlas`}, {`beefs`, `beef`}, {`brothers`, `brother`}, {`cafes`, `cafe`}, {`children`, `child`}, {`cookies`, `cookie`}, {`corpuses`, `corpus`}, {`cows`, `cow`}, {`ganglions`, `ganglion`}, {`genies`, `genie`}, {`genera`, `genus`}, {`graffiti`, `graffito`}, {`hoofs`, `hoof`}, {`loaves`, `loaf`}, {`men`, `man`}, {`monies`, `money`}, {`mongooses`, `mongoose`}, {`moves`, `move`}, {`mythoi`, `mythos`}, {`niches`, `niche`}, {`numina`, `numen`}, {`occiputs`, `occiput`}, {`octopuses`, `octopus`}, {`opuses`, `opus`}, {`oxen`, `ox`}, {`penises`, `penis`}, {`people`, `person`}, {`sexes`, `sex`}, {`soliloquies`, `soliloquy`}, {`testes`, `testis`}, {`trilbys`, `trilby`}, {`turfs`, `turf`}, {`potatoes`, `potato`}, {`heroes`, `hero`}, {`teeth`, `tooth`}, {`geese`, `goose`}, {`feet`, `foot`}, }, } prepare(Singular) commonInitialismsForReplacer := make([]string, 0, len(commonInitialisms)) for _, initialism := range commonInitialisms { commonInitialismsForReplacer = append(commonInitialismsForReplacer, initialism, strings.Title(strings.ToLower(initialism))) } commonInitialismsReplacer = strings.NewReplacer(commonInitialismsForReplacer...) } // prepare rule, e.g., compile the pattern. func prepare(r Rule) error { var reString string switch r { case Plural: // Merge global uninflected with singularsUninflected rules[r].Uninflected = merge(uninflected, uninflectedPlurals) case Singular: // Merge global uninflected with singularsUninflected rules[r].Uninflected = merge(uninflected, uninflectedSingulars) } // Set InflectorRule.compiledUninflected by joining InflectorRule.Uninflected into // a single string then compile it. reString = fmt.Sprintf(`(?i)(^(?:%s))$`, strings.Join(rules[r].Uninflected, `|`)) rules[r].compiledUninflected = regexp.MustCompile(reString) // Prepare irregularMaps irregularMaps[r] = make(cache, len(rules[r].Irregular)) // Set InflectorRule.compiledIrregular by joining the irregularItem.word of Inflector.Irregular // into a single string then compile it. vIrregulars := make([]string, len(rules[r].Irregular)) for i, item := range rules[r].Irregular { vIrregulars[i] = item.word irregularMaps[r][item.word] = item.replacement } reString = fmt.Sprintf(`(?i)(.*)\b((?:%s))$`, strings.Join(vIrregulars, `|`)) rules[r].compiledIrregular = regexp.MustCompile(reString) // Compile all patterns in InflectorRule.Rules rules[r].compiledRules = make([]*compiledRule, len(rules[r].Rules)) for i, item := range rules[r].Rules { rules[r].compiledRules[i] = &compiledRule{item.replacement, regexp.MustCompile(item.pattern)} } // Prepare caches caches[r] = make(cache) return nil } // merge slice a and slice b func merge(a []string, b []string) []string { result := make([]string, len(a)+len(b)) copy(result, a) copy(result[len(a):], b) return result } func getInflected(r Rule, s string) string { mutex.Lock() defer mutex.Unlock() if v, ok := caches[r][s]; ok { return v } // Check for irregular words if res := rules[r].compiledIrregular.FindStringSubmatch(s); len(res) >= 3 { var buf bytes.Buffer buf.WriteString(res[1]) buf.WriteString(s[0:1]) buf.WriteString(irregularMaps[r][strings.ToLower(res[2])][1:]) // Cache it then returns caches[r][s] = buf.String() return caches[r][s] } // Check for uninflected words if rules[r].compiledUninflected.MatchString(s) { caches[r][s] = s return caches[r][s] } // Check each rule for _, re := range rules[r].compiledRules { if re.MatchString(s) { caches[r][s] = re.ReplaceAllString(s, re.replacement) return caches[r][s] } } // Returns unaltered caches[r][s] = s return caches[r][s] } // Pluralize returns string s in plural form. func Pluralize(s string) string { return getInflected(Plural, s) } // Singularize returns string s in singular form. func Singularize(s string) string { return getInflected(Singular, s) } var ( camelizeReg = regexp.MustCompile(`[^A-Za-z0-9]+`) ) // Camelize Converts a word like "send_email" to "SendEmail" func Camelize(s string) string { s = camelizeReg.ReplaceAllString(s, " ") return strings.Replace(strings.Title(s), " ", "", -1) } // Camel2id Converts a word like "SendEmail" to "send_email" func Camel2id(name string) string { var ( value = commonInitialismsReplacer.Replace(name) buf strings.Builder lastCase, nextCase, nextNumber bool // upper case == true curCase = value[0] <= 'Z' && value[0] >= 'A' ) for i, v := range value[:len(value)-1] { nextCase = value[i+1] <= 'Z' && value[i+1] >= 'A' nextNumber = value[i+1] >= '0' && value[i+1] <= '9' if curCase { if lastCase && (nextCase || nextNumber) { buf.WriteRune(v + 32) } else { if i > 0 && value[i-1] != '_' && value[i+1] != '_' { buf.WriteByte('_') } buf.WriteRune(v + 32) } } else { buf.WriteRune(v) } lastCase = curCase curCase = nextCase } if curCase { if !lastCase && len(value) > 1 { buf.WriteByte('_') } buf.WriteByte(value[len(value)-1] + 32) } else { buf.WriteByte(value[len(value)-1]) } ret := buf.String() return ret } // Camel2words Converts a CamelCase name into space-separated words. // For example, 'send_email' will be converted to 'Send Email'. func Camel2words(s string) string { s = camelizeReg.ReplaceAllString(s, " ") return strings.Title(s) }