123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216 |
- // Copyright 2015 Huan Du. All rights reserved.
- // Licensed under the MIT license that can be found in the LICENSE file.
- package xstrings
- import (
- "strings"
- "unicode/utf8"
- )
- // Reverse a utf8 encoded string.
- func Reverse(str string) string {
- var size int
- tail := len(str)
- buf := make([]byte, tail)
- s := buf
- for len(str) > 0 {
- _, size = utf8.DecodeRuneInString(str)
- tail -= size
- s = append(s[:tail], []byte(str[:size])...)
- str = str[size:]
- }
- return string(buf)
- }
- // Slice a string by rune.
- //
- // Start must satisfy 0 <= start <= rune length.
- //
- // End can be positive, zero or negative.
- // If end >= 0, start and end must satisfy start <= end <= rune length.
- // If end < 0, it means slice to the end of string.
- //
- // Otherwise, Slice will panic as out of range.
- func Slice(str string, start, end int) string {
- var size, startPos, endPos int
- origin := str
- if start < 0 || end > len(str) || (end >= 0 && start > end) {
- panic("out of range")
- }
- if end >= 0 {
- end -= start
- }
- for start > 0 && len(str) > 0 {
- _, size = utf8.DecodeRuneInString(str)
- start--
- startPos += size
- str = str[size:]
- }
- if end < 0 {
- return origin[startPos:]
- }
- endPos = startPos
- for end > 0 && len(str) > 0 {
- _, size = utf8.DecodeRuneInString(str)
- end--
- endPos += size
- str = str[size:]
- }
- if len(str) == 0 && (start > 0 || end > 0) {
- panic("out of range")
- }
- return origin[startPos:endPos]
- }
- // Partition splits a string by sep into three parts.
- // The return value is a slice of strings with head, match and tail.
- //
- // If str contains sep, for example "hello" and "l", Partition returns
- // "he", "l", "lo"
- //
- // If str doesn't contain sep, for example "hello" and "x", Partition returns
- // "hello", "", ""
- func Partition(str, sep string) (head, match, tail string) {
- index := strings.Index(str, sep)
- if index == -1 {
- head = str
- return
- }
- head = str[:index]
- match = str[index : index+len(sep)]
- tail = str[index+len(sep):]
- return
- }
- // LastPartition splits a string by last instance of sep into three parts.
- // The return value is a slice of strings with head, match and tail.
- //
- // If str contains sep, for example "hello" and "l", LastPartition returns
- // "hel", "l", "o"
- //
- // If str doesn't contain sep, for example "hello" and "x", LastPartition returns
- // "", "", "hello"
- func LastPartition(str, sep string) (head, match, tail string) {
- index := strings.LastIndex(str, sep)
- if index == -1 {
- tail = str
- return
- }
- head = str[:index]
- match = str[index : index+len(sep)]
- tail = str[index+len(sep):]
- return
- }
- // Insert src into dst at given rune index.
- // Index is counted by runes instead of bytes.
- //
- // If index is out of range of dst, panic with out of range.
- func Insert(dst, src string, index int) string {
- return Slice(dst, 0, index) + src + Slice(dst, index, -1)
- }
- // Scrub scrubs invalid utf8 bytes with repl string.
- // Adjacent invalid bytes are replaced only once.
- func Scrub(str, repl string) string {
- var buf *stringBuilder
- var r rune
- var size, pos int
- var hasError bool
- origin := str
- for len(str) > 0 {
- r, size = utf8.DecodeRuneInString(str)
- if r == utf8.RuneError {
- if !hasError {
- if buf == nil {
- buf = &stringBuilder{}
- }
- buf.WriteString(origin[:pos])
- hasError = true
- }
- } else if hasError {
- hasError = false
- buf.WriteString(repl)
- origin = origin[pos:]
- pos = 0
- }
- pos += size
- str = str[size:]
- }
- if buf != nil {
- buf.WriteString(origin)
- return buf.String()
- }
- // No invalid byte.
- return origin
- }
- // WordSplit splits a string into words. Returns a slice of words.
- // If there is no word in a string, return nil.
- //
- // Word is defined as a locale dependent string containing alphabetic characters,
- // which may also contain but not start with `'` and `-` characters.
- func WordSplit(str string) []string {
- var word string
- var words []string
- var r rune
- var size, pos int
- inWord := false
- for len(str) > 0 {
- r, size = utf8.DecodeRuneInString(str)
- switch {
- case isAlphabet(r):
- if !inWord {
- inWord = true
- word = str
- pos = 0
- }
- case inWord && (r == '\'' || r == '-'):
- // Still in word.
- default:
- if inWord {
- inWord = false
- words = append(words, word[:pos])
- }
- }
- pos += size
- str = str[size:]
- }
- if inWord {
- words = append(words, word[:pos])
- }
- return words
- }
|