transport.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. /*
  2. Copyright 2014 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package proxy
  14. import (
  15. "bytes"
  16. "compress/gzip"
  17. "fmt"
  18. "io"
  19. "io/ioutil"
  20. "net/http"
  21. "net/url"
  22. "path"
  23. "strings"
  24. "github.com/golang/glog"
  25. "golang.org/x/net/html"
  26. "golang.org/x/net/html/atom"
  27. "k8s.io/kubernetes/pkg/util/net"
  28. "k8s.io/kubernetes/pkg/util/sets"
  29. )
  30. // atomsToAttrs states which attributes of which tags require URL substitution.
  31. // Sources: http://www.w3.org/TR/REC-html40/index/attributes.html
  32. // http://www.w3.org/html/wg/drafts/html/master/index.html#attributes-1
  33. var atomsToAttrs = map[atom.Atom]sets.String{
  34. atom.A: sets.NewString("href"),
  35. atom.Applet: sets.NewString("codebase"),
  36. atom.Area: sets.NewString("href"),
  37. atom.Audio: sets.NewString("src"),
  38. atom.Base: sets.NewString("href"),
  39. atom.Blockquote: sets.NewString("cite"),
  40. atom.Body: sets.NewString("background"),
  41. atom.Button: sets.NewString("formaction"),
  42. atom.Command: sets.NewString("icon"),
  43. atom.Del: sets.NewString("cite"),
  44. atom.Embed: sets.NewString("src"),
  45. atom.Form: sets.NewString("action"),
  46. atom.Frame: sets.NewString("longdesc", "src"),
  47. atom.Head: sets.NewString("profile"),
  48. atom.Html: sets.NewString("manifest"),
  49. atom.Iframe: sets.NewString("longdesc", "src"),
  50. atom.Img: sets.NewString("longdesc", "src", "usemap"),
  51. atom.Input: sets.NewString("src", "usemap", "formaction"),
  52. atom.Ins: sets.NewString("cite"),
  53. atom.Link: sets.NewString("href"),
  54. atom.Object: sets.NewString("classid", "codebase", "data", "usemap"),
  55. atom.Q: sets.NewString("cite"),
  56. atom.Script: sets.NewString("src"),
  57. atom.Source: sets.NewString("src"),
  58. atom.Video: sets.NewString("poster", "src"),
  59. // TODO: css URLs hidden in style elements.
  60. }
  61. // Transport is a transport for text/html content that replaces URLs in html
  62. // content with the prefix of the proxy server
  63. type Transport struct {
  64. Scheme string
  65. Host string
  66. PathPrepend string
  67. http.RoundTripper
  68. }
  69. // RoundTrip implements the http.RoundTripper interface
  70. func (t *Transport) RoundTrip(req *http.Request) (*http.Response, error) {
  71. // Add reverse proxy headers.
  72. forwardedURI := path.Join(t.PathPrepend, req.URL.Path)
  73. if strings.HasSuffix(req.URL.Path, "/") {
  74. forwardedURI = forwardedURI + "/"
  75. }
  76. req.Header.Set("X-Forwarded-Uri", forwardedURI)
  77. if len(t.Host) > 0 {
  78. req.Header.Set("X-Forwarded-Host", t.Host)
  79. }
  80. if len(t.Scheme) > 0 {
  81. req.Header.Set("X-Forwarded-Proto", t.Scheme)
  82. }
  83. rt := t.RoundTripper
  84. if rt == nil {
  85. rt = http.DefaultTransport
  86. }
  87. resp, err := rt.RoundTrip(req)
  88. if err != nil {
  89. message := fmt.Sprintf("Error: '%s'\nTrying to reach: '%v'", err.Error(), req.URL.String())
  90. resp = &http.Response{
  91. StatusCode: http.StatusServiceUnavailable,
  92. Body: ioutil.NopCloser(strings.NewReader(message)),
  93. }
  94. return resp, nil
  95. }
  96. if redirect := resp.Header.Get("Location"); redirect != "" {
  97. resp.Header.Set("Location", t.rewriteURL(redirect, req.URL))
  98. return resp, nil
  99. }
  100. cType := resp.Header.Get("Content-Type")
  101. cType = strings.TrimSpace(strings.SplitN(cType, ";", 2)[0])
  102. if cType != "text/html" {
  103. // Do nothing, simply pass through
  104. return resp, nil
  105. }
  106. return t.rewriteResponse(req, resp)
  107. }
  108. var _ = net.RoundTripperWrapper(&Transport{})
  109. func (rt *Transport) WrappedRoundTripper() http.RoundTripper {
  110. return rt.RoundTripper
  111. }
  112. // rewriteURL rewrites a single URL to go through the proxy, if the URL refers
  113. // to the same host as sourceURL, which is the page on which the target URL
  114. // occurred. If any error occurs (e.g. parsing), it returns targetURL.
  115. func (t *Transport) rewriteURL(targetURL string, sourceURL *url.URL) string {
  116. url, err := url.Parse(targetURL)
  117. if err != nil {
  118. return targetURL
  119. }
  120. isDifferentHost := url.Host != "" && url.Host != sourceURL.Host
  121. isRelative := !strings.HasPrefix(url.Path, "/")
  122. if isDifferentHost || isRelative {
  123. return targetURL
  124. }
  125. url.Scheme = t.Scheme
  126. url.Host = t.Host
  127. origPath := url.Path
  128. // Do not rewrite URL if the sourceURL already contains the necessary prefix.
  129. if strings.HasPrefix(url.Path, t.PathPrepend) {
  130. return url.String()
  131. }
  132. url.Path = path.Join(t.PathPrepend, url.Path)
  133. if strings.HasSuffix(origPath, "/") {
  134. // Add back the trailing slash, which was stripped by path.Join().
  135. url.Path += "/"
  136. }
  137. return url.String()
  138. }
  139. // rewriteHTML scans the HTML for tags with url-valued attributes, and updates
  140. // those values with the urlRewriter function. The updated HTML is output to the
  141. // writer.
  142. func rewriteHTML(reader io.Reader, writer io.Writer, urlRewriter func(string) string) error {
  143. // Note: This assumes the content is UTF-8.
  144. tokenizer := html.NewTokenizer(reader)
  145. var err error
  146. for err == nil {
  147. tokenType := tokenizer.Next()
  148. switch tokenType {
  149. case html.ErrorToken:
  150. err = tokenizer.Err()
  151. case html.StartTagToken, html.SelfClosingTagToken:
  152. token := tokenizer.Token()
  153. if urlAttrs, ok := atomsToAttrs[token.DataAtom]; ok {
  154. for i, attr := range token.Attr {
  155. if urlAttrs.Has(attr.Key) {
  156. token.Attr[i].Val = urlRewriter(attr.Val)
  157. }
  158. }
  159. }
  160. _, err = writer.Write([]byte(token.String()))
  161. default:
  162. _, err = writer.Write(tokenizer.Raw())
  163. }
  164. }
  165. if err != io.EOF {
  166. return err
  167. }
  168. return nil
  169. }
  170. // rewriteResponse modifies an HTML response by updating absolute links referring
  171. // to the original host to instead refer to the proxy transport.
  172. func (t *Transport) rewriteResponse(req *http.Request, resp *http.Response) (*http.Response, error) {
  173. origBody := resp.Body
  174. defer origBody.Close()
  175. newContent := &bytes.Buffer{}
  176. var reader io.Reader = origBody
  177. var writer io.Writer = newContent
  178. encoding := resp.Header.Get("Content-Encoding")
  179. switch encoding {
  180. case "gzip":
  181. var err error
  182. reader, err = gzip.NewReader(reader)
  183. if err != nil {
  184. return nil, fmt.Errorf("errorf making gzip reader: %v", err)
  185. }
  186. gzw := gzip.NewWriter(writer)
  187. defer gzw.Close()
  188. writer = gzw
  189. // TODO: support flate, other encodings.
  190. case "":
  191. // This is fine
  192. default:
  193. // Some encoding we don't understand-- don't try to parse this
  194. glog.Errorf("Proxy encountered encoding %v for text/html; can't understand this so not fixing links.", encoding)
  195. return resp, nil
  196. }
  197. urlRewriter := func(targetUrl string) string {
  198. return t.rewriteURL(targetUrl, req.URL)
  199. }
  200. err := rewriteHTML(reader, writer, urlRewriter)
  201. if err != nil {
  202. glog.Errorf("Failed to rewrite URLs: %v", err)
  203. return resp, err
  204. }
  205. resp.Body = ioutil.NopCloser(newContent)
  206. // Update header node with new content-length
  207. // TODO: Remove any hash/signature headers here?
  208. resp.Header.Del("Content-Length")
  209. resp.ContentLength = int64(newContent.Len())
  210. return resp, err
  211. }