kubelet_network.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package kubelet
  14. import (
  15. "fmt"
  16. "io"
  17. "io/ioutil"
  18. "net"
  19. "strings"
  20. "github.com/golang/glog"
  21. "k8s.io/kubernetes/pkg/api"
  22. "k8s.io/kubernetes/pkg/apis/componentconfig"
  23. "k8s.io/kubernetes/pkg/kubelet/network"
  24. "k8s.io/kubernetes/pkg/util/bandwidth"
  25. utiliptables "k8s.io/kubernetes/pkg/util/iptables"
  26. "k8s.io/kubernetes/pkg/util/sets"
  27. )
  28. const (
  29. // the mark-for-masquerade chain
  30. // TODO: clean up this logic in kube-proxy
  31. KubeMarkMasqChain utiliptables.Chain = "KUBE-MARK-MASQ"
  32. // the mark-for-drop chain
  33. KubeMarkDropChain utiliptables.Chain = "KUBE-MARK-DROP"
  34. // kubernetes postrouting rules
  35. KubePostroutingChain utiliptables.Chain = "KUBE-POSTROUTING"
  36. // kubernetes firewall rules
  37. KubeFirewallChain utiliptables.Chain = "KUBE-FIREWALL"
  38. )
  39. // effectiveHairpinMode determines the effective hairpin mode given the
  40. // configured mode, container runtime, and whether cbr0 should be configured.
  41. func effectiveHairpinMode(hairpinMode componentconfig.HairpinMode, containerRuntime string, configureCBR0 bool, networkPlugin string) (componentconfig.HairpinMode, error) {
  42. // The hairpin mode setting doesn't matter if:
  43. // - We're not using a bridge network. This is hard to check because we might
  44. // be using a plugin. It matters if --configure-cbr0=true, and we currently
  45. // don't pipe it down to any plugins.
  46. // - It's set to hairpin-veth for a container runtime that doesn't know how
  47. // to set the hairpin flag on the veth's of containers. Currently the
  48. // docker runtime is the only one that understands this.
  49. // - It's set to "none".
  50. if hairpinMode == componentconfig.PromiscuousBridge || hairpinMode == componentconfig.HairpinVeth {
  51. // Only on docker.
  52. if containerRuntime != "docker" {
  53. glog.Warningf("Hairpin mode set to %q but container runtime is %q, ignoring", hairpinMode, containerRuntime)
  54. return componentconfig.HairpinNone, nil
  55. }
  56. if hairpinMode == componentconfig.PromiscuousBridge && !configureCBR0 && networkPlugin != "kubenet" {
  57. // This is not a valid combination. Users might be using the
  58. // default values (from before the hairpin-mode flag existed) and we
  59. // should keep the old behavior.
  60. glog.Warningf("Hairpin mode set to %q but configureCBR0 is false, falling back to %q", hairpinMode, componentconfig.HairpinVeth)
  61. return componentconfig.HairpinVeth, nil
  62. }
  63. } else if hairpinMode == componentconfig.HairpinNone {
  64. if configureCBR0 {
  65. glog.Warningf("Hairpin mode set to %q and configureCBR0 is true, this might result in loss of hairpin packets", hairpinMode)
  66. }
  67. } else {
  68. return "", fmt.Errorf("unknown value: %q", hairpinMode)
  69. }
  70. return hairpinMode, nil
  71. }
  72. // Validate given node IP belongs to the current host
  73. func (kl *Kubelet) validateNodeIP() error {
  74. if kl.nodeIP == nil {
  75. return nil
  76. }
  77. // Honor IP limitations set in setNodeStatus()
  78. if kl.nodeIP.IsLoopback() {
  79. return fmt.Errorf("nodeIP can't be loopback address")
  80. }
  81. if kl.nodeIP.To4() == nil {
  82. return fmt.Errorf("nodeIP must be IPv4 address")
  83. }
  84. addrs, err := net.InterfaceAddrs()
  85. if err != nil {
  86. return err
  87. }
  88. for _, addr := range addrs {
  89. var ip net.IP
  90. switch v := addr.(type) {
  91. case *net.IPNet:
  92. ip = v.IP
  93. case *net.IPAddr:
  94. ip = v.IP
  95. }
  96. if ip != nil && ip.Equal(kl.nodeIP) {
  97. return nil
  98. }
  99. }
  100. return fmt.Errorf("Node IP: %q not found in the host's network interfaces", kl.nodeIP.String())
  101. }
  102. // providerRequiresNetworkingConfiguration returns whether the cloud provider
  103. // requires special networking configuration.
  104. func (kl *Kubelet) providerRequiresNetworkingConfiguration() bool {
  105. // TODO: We should have a mechanism to say whether native cloud provider
  106. // is used or whether we are using overlay networking. We should return
  107. // true for cloud providers if they implement Routes() interface and
  108. // we are not using overlay networking.
  109. if kl.cloud == nil || kl.cloud.ProviderName() != "gce" || kl.flannelExperimentalOverlay {
  110. return false
  111. }
  112. _, supported := kl.cloud.Routes()
  113. return supported
  114. }
  115. // Returns the list of DNS servers and DNS search domains.
  116. func (kl *Kubelet) parseResolvConf(reader io.Reader) (nameservers []string, searches []string, err error) {
  117. var scrubber dnsScrubber
  118. if kl.cloud != nil {
  119. scrubber = kl.cloud
  120. }
  121. return parseResolvConf(reader, scrubber)
  122. }
  123. // A helper for testing.
  124. type dnsScrubber interface {
  125. ScrubDNS(nameservers, searches []string) (nsOut, srchOut []string)
  126. }
  127. // parseResolveConf reads a resolv.conf file from the given reader, and parses
  128. // it into nameservers and searches, possibly returning an error. The given
  129. // dnsScrubber allows cloud providers to post-process dns names.
  130. // TODO: move to utility package
  131. func parseResolvConf(reader io.Reader, dnsScrubber dnsScrubber) (nameservers []string, searches []string, err error) {
  132. file, err := ioutil.ReadAll(reader)
  133. if err != nil {
  134. return nil, nil, err
  135. }
  136. // Lines of the form "nameserver 1.2.3.4" accumulate.
  137. nameservers = []string{}
  138. // Lines of the form "search example.com" overrule - last one wins.
  139. searches = []string{}
  140. lines := strings.Split(string(file), "\n")
  141. for l := range lines {
  142. trimmed := strings.TrimSpace(lines[l])
  143. if strings.HasPrefix(trimmed, "#") {
  144. continue
  145. }
  146. fields := strings.Fields(trimmed)
  147. if len(fields) == 0 {
  148. continue
  149. }
  150. if fields[0] == "nameserver" {
  151. nameservers = append(nameservers, fields[1:]...)
  152. }
  153. if fields[0] == "search" {
  154. searches = fields[1:]
  155. }
  156. }
  157. // Give the cloud-provider a chance to post-process DNS settings.
  158. if dnsScrubber != nil {
  159. nameservers, searches = dnsScrubber.ScrubDNS(nameservers, searches)
  160. }
  161. return nameservers, searches, nil
  162. }
  163. // cleanupBandwidthLimits updates the status of bandwidth-limited containers
  164. // and ensures that only the appropriate CIDRs are active on the node.
  165. func (kl *Kubelet) cleanupBandwidthLimits(allPods []*api.Pod) error {
  166. if kl.shaper == nil {
  167. return nil
  168. }
  169. currentCIDRs, err := kl.shaper.GetCIDRs()
  170. if err != nil {
  171. return err
  172. }
  173. possibleCIDRs := sets.String{}
  174. for ix := range allPods {
  175. pod := allPods[ix]
  176. ingress, egress, err := bandwidth.ExtractPodBandwidthResources(pod.Annotations)
  177. if err != nil {
  178. return err
  179. }
  180. if ingress == nil && egress == nil {
  181. glog.V(8).Infof("Not a bandwidth limited container...")
  182. continue
  183. }
  184. status, found := kl.statusManager.GetPodStatus(pod.UID)
  185. if !found {
  186. // TODO(random-liu): Cleanup status get functions. (issue #20477)
  187. s, err := kl.containerRuntime.GetPodStatus(pod.UID, pod.Name, pod.Namespace)
  188. if err != nil {
  189. return err
  190. }
  191. status = kl.generateAPIPodStatus(pod, s)
  192. }
  193. if status.Phase == api.PodRunning {
  194. possibleCIDRs.Insert(fmt.Sprintf("%s/32", status.PodIP))
  195. }
  196. }
  197. for _, cidr := range currentCIDRs {
  198. if !possibleCIDRs.Has(cidr) {
  199. glog.V(2).Infof("Removing CIDR: %s (%v)", cidr, possibleCIDRs)
  200. if err := kl.shaper.Reset(cidr); err != nil {
  201. return err
  202. }
  203. }
  204. }
  205. return nil
  206. }
  207. // TODO: remove when kubenet plugin is ready
  208. // NOTE!!! if you make changes here, also make them to kubenet
  209. func (kl *Kubelet) reconcileCBR0(podCIDR string) error {
  210. if podCIDR == "" {
  211. glog.V(5).Info("PodCIDR not set. Will not configure cbr0.")
  212. return nil
  213. }
  214. glog.V(5).Infof("PodCIDR is set to %q", podCIDR)
  215. _, cidr, err := net.ParseCIDR(podCIDR)
  216. if err != nil {
  217. return err
  218. }
  219. // Set cbr0 interface address to first address in IPNet
  220. cidr.IP.To4()[3] += 1
  221. if err := ensureCbr0(cidr, kl.hairpinMode == componentconfig.PromiscuousBridge, kl.babysitDaemons); err != nil {
  222. return err
  223. }
  224. if kl.shapingEnabled() {
  225. if kl.shaper == nil {
  226. glog.V(5).Info("Shaper is nil, creating")
  227. kl.shaper = bandwidth.NewTCShaper("cbr0")
  228. }
  229. return kl.shaper.ReconcileInterface()
  230. }
  231. return nil
  232. }
  233. // syncNetworkStatus updates the network state, ensuring that the network is
  234. // configured correctly if the kubelet is set to configure cbr0:
  235. // * handshake flannel helper if the flannel experimental overlay is being used.
  236. // * ensure that iptables masq rules are setup
  237. // * reconcile cbr0 with the pod CIDR
  238. func (kl *Kubelet) syncNetworkStatus() {
  239. var err error
  240. if kl.configureCBR0 {
  241. if kl.flannelExperimentalOverlay {
  242. podCIDR, err := kl.flannelHelper.Handshake()
  243. if err != nil {
  244. glog.Infof("Flannel server handshake failed %v", err)
  245. return
  246. }
  247. kl.updatePodCIDR(podCIDR)
  248. }
  249. if err := ensureIPTablesMasqRule(kl.iptClient, kl.nonMasqueradeCIDR); err != nil {
  250. err = fmt.Errorf("Error on adding ip table rules: %v", err)
  251. glog.Error(err)
  252. kl.runtimeState.setNetworkState(err)
  253. return
  254. }
  255. podCIDR := kl.runtimeState.podCIDR()
  256. if len(podCIDR) == 0 {
  257. err = fmt.Errorf("ConfigureCBR0 requested, but PodCIDR not set. Will not configure CBR0 right now")
  258. glog.Warning(err)
  259. } else if err = kl.reconcileCBR0(podCIDR); err != nil {
  260. err = fmt.Errorf("Error configuring cbr0: %v", err)
  261. glog.Error(err)
  262. }
  263. if err != nil {
  264. kl.runtimeState.setNetworkState(err)
  265. return
  266. }
  267. }
  268. kl.runtimeState.setNetworkState(kl.networkPlugin.Status())
  269. }
  270. // updatePodCIDR updates the pod CIDR in the runtime state if it is different
  271. // from the current CIDR.
  272. func (kl *Kubelet) updatePodCIDR(cidr string) {
  273. podCIDR := kl.runtimeState.podCIDR()
  274. if podCIDR == cidr {
  275. return
  276. }
  277. glog.Infof("Setting Pod CIDR: %v -> %v", podCIDR, cidr)
  278. kl.runtimeState.setPodCIDR(cidr)
  279. if kl.networkPlugin != nil {
  280. details := make(map[string]interface{})
  281. details[network.NET_PLUGIN_EVENT_POD_CIDR_CHANGE_DETAIL_CIDR] = cidr
  282. kl.networkPlugin.Event(network.NET_PLUGIN_EVENT_POD_CIDR_CHANGE, details)
  283. }
  284. }
  285. // shapingEnabled returns whether traffic shaping is enabled.
  286. func (kl *Kubelet) shapingEnabled() bool {
  287. // Disable shaping if a network plugin is defined and supports shaping
  288. if kl.networkPlugin != nil && kl.networkPlugin.Capabilities().Has(network.NET_PLUGIN_CAPABILITY_SHAPING) {
  289. return false
  290. }
  291. return true
  292. }
  293. // syncNetworkUtil ensures the network utility are present on host.
  294. // Network util includes:
  295. // 1. In nat table, KUBE-MARK-DROP rule to mark connections for dropping
  296. // Marked connection will be drop on INPUT/OUTPUT Chain in filter table
  297. // 2. In nat table, KUBE-MARK-MASQ rule to mark connections for SNAT
  298. // Marked connection will get SNAT on POSTROUTING Chain in nat table
  299. func (kl *Kubelet) syncNetworkUtil() {
  300. if kl.iptablesMasqueradeBit < 0 || kl.iptablesMasqueradeBit > 31 {
  301. glog.Errorf("invalid iptables-masquerade-bit %v not in [0, 31]", kl.iptablesMasqueradeBit)
  302. return
  303. }
  304. if kl.iptablesDropBit < 0 || kl.iptablesDropBit > 31 {
  305. glog.Errorf("invalid iptables-drop-bit %v not in [0, 31]", kl.iptablesDropBit)
  306. return
  307. }
  308. if kl.iptablesDropBit == kl.iptablesMasqueradeBit {
  309. glog.Errorf("iptables-masquerade-bit %v and iptables-drop-bit %v must be different", kl.iptablesMasqueradeBit, kl.iptablesDropBit)
  310. return
  311. }
  312. // Setup KUBE-MARK-DROP rules
  313. dropMark := getIPTablesMark(kl.iptablesDropBit)
  314. if _, err := kl.iptClient.EnsureChain(utiliptables.TableNAT, KubeMarkDropChain); err != nil {
  315. glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, KubeMarkDropChain, err)
  316. return
  317. }
  318. if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubeMarkDropChain, "-j", "MARK", "--set-xmark", dropMark); err != nil {
  319. glog.Errorf("Failed to ensure marking rule for %v: %v", KubeMarkDropChain, err)
  320. return
  321. }
  322. if _, err := kl.iptClient.EnsureChain(utiliptables.TableFilter, KubeFirewallChain); err != nil {
  323. glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableFilter, KubeFirewallChain, err)
  324. return
  325. }
  326. if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableFilter, KubeFirewallChain,
  327. "-m", "comment", "--comment", "kubernetes firewall for dropping marked packets",
  328. "-m", "mark", "--mark", dropMark,
  329. "-j", "DROP"); err != nil {
  330. glog.Errorf("Failed to ensure rule to drop packet marked by %v in %v chain %v: %v", KubeMarkDropChain, utiliptables.TableFilter, KubeFirewallChain, err)
  331. return
  332. }
  333. if _, err := kl.iptClient.EnsureRule(utiliptables.Prepend, utiliptables.TableFilter, utiliptables.ChainOutput, "-j", string(KubeFirewallChain)); err != nil {
  334. glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableFilter, utiliptables.ChainOutput, KubeFirewallChain, err)
  335. return
  336. }
  337. if _, err := kl.iptClient.EnsureRule(utiliptables.Prepend, utiliptables.TableFilter, utiliptables.ChainInput, "-j", string(KubeFirewallChain)); err != nil {
  338. glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableFilter, utiliptables.ChainInput, KubeFirewallChain, err)
  339. return
  340. }
  341. // Setup KUBE-MARK-MASQ rules
  342. masqueradeMark := getIPTablesMark(kl.iptablesMasqueradeBit)
  343. if _, err := kl.iptClient.EnsureChain(utiliptables.TableNAT, KubeMarkMasqChain); err != nil {
  344. glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, KubeMarkMasqChain, err)
  345. return
  346. }
  347. if _, err := kl.iptClient.EnsureChain(utiliptables.TableNAT, KubePostroutingChain); err != nil {
  348. glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, KubePostroutingChain, err)
  349. return
  350. }
  351. if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubeMarkMasqChain, "-j", "MARK", "--set-xmark", masqueradeMark); err != nil {
  352. glog.Errorf("Failed to ensure marking rule for %v: %v", KubeMarkMasqChain, err)
  353. return
  354. }
  355. if _, err := kl.iptClient.EnsureRule(utiliptables.Prepend, utiliptables.TableNAT, utiliptables.ChainPostrouting,
  356. "-m", "comment", "--comment", "kubernetes postrouting rules", "-j", string(KubePostroutingChain)); err != nil {
  357. glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, KubePostroutingChain, err)
  358. return
  359. }
  360. if _, err := kl.iptClient.EnsureRule(utiliptables.Append, utiliptables.TableNAT, KubePostroutingChain,
  361. "-m", "comment", "--comment", "kubernetes service traffic requiring SNAT",
  362. "-m", "mark", "--mark", masqueradeMark, "-j", "MASQUERADE"); err != nil {
  363. glog.Errorf("Failed to ensure SNAT rule for packets marked by %v in %v chain %v: %v", KubeMarkMasqChain, utiliptables.TableNAT, KubePostroutingChain, err)
  364. return
  365. }
  366. }
  367. // getIPTablesMark returns the fwmark given the bit
  368. func getIPTablesMark(bit int) string {
  369. value := 1 << uint(bit)
  370. return fmt.Sprintf("%#08x/%#08x", value, value)
  371. }