links.go 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. /*
  2. Copyright 2015 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package main
  14. import (
  15. "errors"
  16. "fmt"
  17. "net/url"
  18. "os"
  19. "path"
  20. "regexp"
  21. "strings"
  22. )
  23. var (
  24. // Finds markdown links of the form [foo](bar "alt-text").
  25. linkRE = regexp.MustCompile(`\[([^]]*)\]\(([^)]*)\)`)
  26. // Finds markdown link typos of the form (foo)[bar]
  27. badLinkRE = regexp.MustCompile(`\([^]()]*\)\[[^]()]*\]`)
  28. // Splits the link target into link target and alt-text.
  29. altTextRE = regexp.MustCompile(`([^)]*)( ".*")`)
  30. )
  31. func processLink(in string, filePath string) (string, error) {
  32. var errs []string
  33. out := linkRE.ReplaceAllStringFunc(in, func(in string) string {
  34. var err error
  35. match := linkRE.FindStringSubmatch(in)
  36. if match == nil {
  37. errs = append(errs, fmt.Sprintf("Detected this line had a link, but unable to parse, %v", in))
  38. return ""
  39. }
  40. // match[0] is the entire expression;
  41. visibleText := match[1]
  42. linkText := match[2]
  43. altText := ""
  44. if parts := altTextRE.FindStringSubmatch(linkText); parts != nil {
  45. linkText = parts[1]
  46. altText = parts[2]
  47. }
  48. // clean up some random garbage I found in our docs.
  49. linkText = strings.Trim(linkText, " ")
  50. linkText = strings.Trim(linkText, "\n")
  51. linkText = strings.Trim(linkText, " ")
  52. u, terr := url.Parse(linkText)
  53. if terr != nil {
  54. errs = append(errs, fmt.Sprintf("link %q is unparsable: %v", linkText, terr))
  55. return in
  56. }
  57. if u.Host != "" && u.Host != "github.com" {
  58. // We only care about relative links and links within github.
  59. return in
  60. }
  61. suggestedVisibleText := visibleText
  62. if u.Path != "" && !strings.HasPrefix(linkText, "TODO:") {
  63. newPath, targetExists := checkPath(filePath, path.Clean(u.Path))
  64. if !targetExists {
  65. errs = append(errs, fmt.Sprintf("%q: target not found", linkText))
  66. return in
  67. }
  68. u.Path = newPath
  69. if strings.HasPrefix(u.Path, "/") {
  70. u.Host = "github.com"
  71. u.Scheme = "https"
  72. } else {
  73. // Remove host and scheme from relative paths
  74. u.Host = ""
  75. u.Scheme = ""
  76. }
  77. // Make the visible text show the absolute path if it's
  78. // not nested in or beneath the current directory.
  79. if strings.HasPrefix(u.Path, "..") {
  80. dir := path.Dir(filePath)
  81. suggestedVisibleText, err = makeRepoRelative(path.Join(dir, u.Path), filePath)
  82. if err != nil {
  83. errs = append(errs, fmt.Sprintf("%q: unable to make path relative", filePath))
  84. return in
  85. }
  86. } else {
  87. suggestedVisibleText = u.Path
  88. }
  89. var unescaped string
  90. if unescaped, err = url.QueryUnescape(u.String()); err != nil {
  91. // Remove %28 type stuff, be nice to humans.
  92. // And don't fight with the toc generator.
  93. linkText = unescaped
  94. } else {
  95. linkText = u.String()
  96. }
  97. }
  98. // If the current visible text is trying to be a file name, use
  99. // the correct file name.
  100. if strings.HasSuffix(visibleText, ".md") && !strings.ContainsAny(visibleText, ` '"`+"`") {
  101. visibleText = suggestedVisibleText
  102. }
  103. return fmt.Sprintf("[%s](%s)", visibleText, linkText+altText)
  104. })
  105. if len(errs) != 0 {
  106. return "", errors.New(strings.Join(errs, ","))
  107. }
  108. return out, nil
  109. }
  110. // updateLinks assumes lines has links in markdown syntax, and verifies that
  111. // any relative links actually point to files that exist.
  112. func updateLinks(filePath string, mlines mungeLines) (mungeLines, error) {
  113. var out mungeLines
  114. allErrs := []string{}
  115. for lineNum, mline := range mlines {
  116. if mline.preformatted {
  117. out = append(out, mline)
  118. continue
  119. }
  120. if badMatch := badLinkRE.FindString(mline.data); badMatch != "" {
  121. allErrs = append(allErrs,
  122. fmt.Sprintf("On line %d: found backwards markdown link %q", lineNum, badMatch))
  123. }
  124. if !mline.link {
  125. out = append(out, mline)
  126. continue
  127. }
  128. line, err := processLink(mline.data, filePath)
  129. if err != nil {
  130. var s = fmt.Sprintf("On line %d: %s", lineNum, err.Error())
  131. err := errors.New(s)
  132. allErrs = append(allErrs, err.Error())
  133. }
  134. ml := newMungeLine(line)
  135. out = append(out, ml)
  136. }
  137. err := error(nil)
  138. if len(allErrs) != 0 {
  139. err = fmt.Errorf("%s", strings.Join(allErrs, "\n"))
  140. }
  141. return out, err
  142. }
  143. // We have to append together before path.Clean will be able to tell that stuff
  144. // like ../docs isn't needed.
  145. func cleanPath(dirPath, linkPath string) string {
  146. clean := path.Clean(path.Join(dirPath, linkPath))
  147. if strings.HasPrefix(clean, dirPath+"/") {
  148. out := strings.TrimPrefix(clean, dirPath+"/")
  149. if out != linkPath {
  150. fmt.Printf("%s -> %s\n", linkPath, out)
  151. }
  152. return out
  153. }
  154. return linkPath
  155. }
  156. func checkPath(filePath, linkPath string) (newPath string, ok bool) {
  157. dir := path.Dir(filePath)
  158. absFilePrefixes := []string{
  159. "/kubernetes/kubernetes/blob/master/",
  160. "/kubernetes/kubernetes/tree/master/",
  161. }
  162. for _, prefix := range absFilePrefixes {
  163. if strings.HasPrefix(linkPath, prefix) {
  164. linkPath = strings.TrimPrefix(linkPath, prefix)
  165. // Now linkPath is relative to the root of the repo. The below
  166. // loop that adds ../ at the beginning of the path should find
  167. // the right path.
  168. break
  169. }
  170. }
  171. if strings.HasPrefix(linkPath, "/") {
  172. // These links might go to e.g. the github issues page, or a
  173. // file at a particular revision, or another github project
  174. // entirely.
  175. return linkPath, true
  176. }
  177. linkPath = cleanPath(dir, linkPath)
  178. // Fast exit if the link is already correct.
  179. if info, err := os.Stat(path.Join(dir, linkPath)); err == nil {
  180. if info.IsDir() {
  181. return linkPath + "/", true
  182. }
  183. return linkPath, true
  184. }
  185. for strings.HasPrefix(linkPath, "../") {
  186. linkPath = strings.TrimPrefix(linkPath, "../")
  187. }
  188. // Fix - vs _ automatically
  189. nameMungers := []func(string) string{
  190. func(s string) string { return s },
  191. func(s string) string { return strings.Replace(s, "-", "_", -1) },
  192. func(s string) string { return strings.Replace(s, "_", "-", -1) },
  193. }
  194. // Fix being moved into/out of admin (replace "admin" with directory
  195. // you're doing mass movements to/from).
  196. pathMungers := []func(string) string{
  197. func(s string) string { return s },
  198. func(s string) string { return path.Join("admin", s) },
  199. func(s string) string { return strings.TrimPrefix(s, "admin/") },
  200. }
  201. for _, namer := range nameMungers {
  202. for _, pather := range pathMungers {
  203. newPath = pather(namer(linkPath))
  204. for i := 0; i < 7; i++ {
  205. // The file must exist.
  206. target := path.Join(dir, newPath)
  207. if info, err := os.Stat(target); err == nil {
  208. if info.IsDir() {
  209. return newPath + "/", true
  210. }
  211. return cleanPath(dir, newPath), true
  212. }
  213. newPath = path.Join("..", newPath)
  214. }
  215. }
  216. }
  217. return linkPath, false
  218. }