nodeoutofdisk.go 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. /*
  2. Copyright 2015 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package e2e
  14. import (
  15. "encoding/json"
  16. "fmt"
  17. "time"
  18. cadvisorapi "github.com/google/cadvisor/info/v1"
  19. "k8s.io/kubernetes/pkg/api"
  20. "k8s.io/kubernetes/pkg/api/resource"
  21. client "k8s.io/kubernetes/pkg/client/unversioned"
  22. "k8s.io/kubernetes/pkg/fields"
  23. "k8s.io/kubernetes/pkg/util/wait"
  24. "k8s.io/kubernetes/test/e2e/framework"
  25. . "github.com/onsi/ginkgo"
  26. . "github.com/onsi/gomega"
  27. )
  28. const (
  29. mb = 1024 * 1024
  30. gb = 1024 * mb
  31. // TODO(madhusudancs): find a way to query kubelet's disk space manager to obtain this value. 256MB
  32. // is the default that is set today. This test might break if the default value changes. This value
  33. // can be configured by setting the "low-diskspace-threshold-mb" flag while starting a kubelet.
  34. // However, kubelets are started as part of the cluster start up, once, before any e2e test is run,
  35. // and remain unchanged until all the tests are run and the cluster is brought down. Changing the
  36. // flag value affects all the e2e tests. So we are hard-coding this value for now.
  37. lowDiskSpaceThreshold uint64 = 256 * mb
  38. nodeOODTimeOut = 5 * time.Minute
  39. numNodeOODPods = 3
  40. )
  41. // Plan:
  42. // 1. Fill disk space on all nodes except one. One node is left out so that we can schedule pods
  43. // on that node. Arbitrarily choose that node to be node with index 0. This makes this a disruptive test.
  44. // 2. Get the CPU capacity on unfilled node.
  45. // 3. Divide the available CPU into one less than the number of pods we want to schedule. We want
  46. // to schedule 3 pods, so divide CPU capacity by 2.
  47. // 4. Request the divided CPU for each pod.
  48. // 5. Observe that 2 of the pods schedule onto the node whose disk is not full, and the remaining
  49. // pod stays pending and does not schedule onto the nodes whose disks are full nor the node
  50. // with the other two pods, since there is not enough free CPU capacity there.
  51. // 6. Recover disk space from one of the nodes whose disk space was previously filled. Arbritrarily
  52. // choose that node to be node with index 1.
  53. // 7. Observe that the pod in pending status schedules on that node.
  54. //
  55. // Flaky issue #20015. We have no clear path for how to test this functionality in a non-flaky way.
  56. var _ = framework.KubeDescribe("NodeOutOfDisk [Serial] [Flaky] [Disruptive]", func() {
  57. var c *client.Client
  58. var unfilledNodeName, recoveredNodeName string
  59. f := framework.NewDefaultFramework("node-outofdisk")
  60. BeforeEach(func() {
  61. c = f.Client
  62. nodelist := framework.GetReadySchedulableNodesOrDie(c)
  63. // Skip this test on small clusters. No need to fail since it is not a use
  64. // case that any cluster of small size needs to support.
  65. framework.SkipUnlessNodeCountIsAtLeast(2)
  66. unfilledNodeName = nodelist.Items[0].Name
  67. for _, node := range nodelist.Items[1:] {
  68. fillDiskSpace(c, &node)
  69. }
  70. })
  71. AfterEach(func() {
  72. nodelist := framework.GetReadySchedulableNodesOrDie(c)
  73. Expect(len(nodelist.Items)).ToNot(BeZero())
  74. for _, node := range nodelist.Items {
  75. if unfilledNodeName == node.Name || recoveredNodeName == node.Name {
  76. continue
  77. }
  78. recoverDiskSpace(c, &node)
  79. }
  80. })
  81. It("runs out of disk space", func() {
  82. unfilledNode, err := c.Nodes().Get(unfilledNodeName)
  83. framework.ExpectNoError(err)
  84. By(fmt.Sprintf("Calculating CPU availability on node %s", unfilledNode.Name))
  85. milliCpu, err := availCpu(c, unfilledNode)
  86. framework.ExpectNoError(err)
  87. // Per pod CPU should be just enough to fit only (numNodeOODPods - 1) pods on the given
  88. // node. We compute this value by dividing the available CPU capacity on the node by
  89. // (numNodeOODPods - 1) and subtracting ϵ from it. We arbitrarily choose ϵ to be 1%
  90. // of the available CPU per pod, i.e. 0.01 * milliCpu/(numNodeOODPods-1). Instead of
  91. // subtracting 1% from the value, we directly use 0.99 as the multiplier.
  92. podCPU := int64(float64(milliCpu/(numNodeOODPods-1)) * 0.99)
  93. ns := f.Namespace.Name
  94. podClient := c.Pods(ns)
  95. By("Creating pods and waiting for all but one pods to be scheduled")
  96. for i := 0; i < numNodeOODPods-1; i++ {
  97. name := fmt.Sprintf("pod-node-outofdisk-%d", i)
  98. createOutOfDiskPod(c, ns, name, podCPU)
  99. framework.ExpectNoError(f.WaitForPodRunning(name))
  100. pod, err := podClient.Get(name)
  101. framework.ExpectNoError(err)
  102. Expect(pod.Spec.NodeName).To(Equal(unfilledNodeName))
  103. }
  104. pendingPodName := fmt.Sprintf("pod-node-outofdisk-%d", numNodeOODPods-1)
  105. createOutOfDiskPod(c, ns, pendingPodName, podCPU)
  106. By(fmt.Sprintf("Finding a failed scheduler event for pod %s", pendingPodName))
  107. wait.Poll(2*time.Second, 5*time.Minute, func() (bool, error) {
  108. selector := fields.Set{
  109. "involvedObject.kind": "Pod",
  110. "involvedObject.name": pendingPodName,
  111. "involvedObject.namespace": ns,
  112. "source": api.DefaultSchedulerName,
  113. "reason": "FailedScheduling",
  114. }.AsSelector()
  115. options := api.ListOptions{FieldSelector: selector}
  116. schedEvents, err := c.Events(ns).List(options)
  117. framework.ExpectNoError(err)
  118. if len(schedEvents.Items) > 0 {
  119. return true, nil
  120. } else {
  121. return false, nil
  122. }
  123. })
  124. nodelist := framework.GetReadySchedulableNodesOrDie(c)
  125. Expect(len(nodelist.Items)).To(BeNumerically(">", 1))
  126. nodeToRecover := nodelist.Items[1]
  127. Expect(nodeToRecover.Name).ToNot(Equal(unfilledNodeName))
  128. recoverDiskSpace(c, &nodeToRecover)
  129. recoveredNodeName = nodeToRecover.Name
  130. By(fmt.Sprintf("Verifying that pod %s schedules on node %s", pendingPodName, recoveredNodeName))
  131. framework.ExpectNoError(f.WaitForPodRunning(pendingPodName))
  132. pendingPod, err := podClient.Get(pendingPodName)
  133. framework.ExpectNoError(err)
  134. Expect(pendingPod.Spec.NodeName).To(Equal(recoveredNodeName))
  135. })
  136. })
  137. // createOutOfDiskPod creates a pod in the given namespace with the requested amount of CPU.
  138. func createOutOfDiskPod(c *client.Client, ns, name string, milliCPU int64) {
  139. podClient := c.Pods(ns)
  140. pod := &api.Pod{
  141. ObjectMeta: api.ObjectMeta{
  142. Name: name,
  143. },
  144. Spec: api.PodSpec{
  145. Containers: []api.Container{
  146. {
  147. Name: "pause",
  148. Image: framework.GetPauseImageName(c),
  149. Resources: api.ResourceRequirements{
  150. Requests: api.ResourceList{
  151. // Request enough CPU to fit only two pods on a given node.
  152. api.ResourceCPU: *resource.NewMilliQuantity(milliCPU, resource.DecimalSI),
  153. },
  154. },
  155. },
  156. },
  157. },
  158. }
  159. _, err := podClient.Create(pod)
  160. framework.ExpectNoError(err)
  161. }
  162. // availCpu calculates the available CPU on a given node by subtracting the CPU requested by
  163. // all the pods from the total available CPU capacity on the node.
  164. func availCpu(c *client.Client, node *api.Node) (int64, error) {
  165. podClient := c.Pods(api.NamespaceAll)
  166. selector := fields.Set{"spec.nodeName": node.Name}.AsSelector()
  167. options := api.ListOptions{FieldSelector: selector}
  168. pods, err := podClient.List(options)
  169. if err != nil {
  170. return 0, fmt.Errorf("failed to retrieve all the pods on node %s: %v", node.Name, err)
  171. }
  172. avail := node.Status.Capacity.Cpu().MilliValue()
  173. for _, pod := range pods.Items {
  174. for _, cont := range pod.Spec.Containers {
  175. avail -= cont.Resources.Requests.Cpu().MilliValue()
  176. }
  177. }
  178. return avail, nil
  179. }
  180. // availSize returns the available disk space on a given node by querying node stats which
  181. // is in turn obtained internally from cadvisor.
  182. func availSize(c *client.Client, node *api.Node) (uint64, error) {
  183. statsResource := fmt.Sprintf("api/v1/proxy/nodes/%s/stats/", node.Name)
  184. framework.Logf("Querying stats for node %s using url %s", node.Name, statsResource)
  185. res, err := c.Get().AbsPath(statsResource).Timeout(time.Minute).Do().Raw()
  186. if err != nil {
  187. return 0, fmt.Errorf("error querying cAdvisor API: %v", err)
  188. }
  189. ci := cadvisorapi.ContainerInfo{}
  190. err = json.Unmarshal(res, &ci)
  191. if err != nil {
  192. return 0, fmt.Errorf("couldn't unmarshal container info: %v", err)
  193. }
  194. return ci.Stats[len(ci.Stats)-1].Filesystem[0].Available, nil
  195. }
  196. // fillDiskSpace fills the available disk space on a given node by creating a large file. The disk
  197. // space on the node is filled in such a way that the available space after filling the disk is just
  198. // below the lowDiskSpaceThreshold mark.
  199. func fillDiskSpace(c *client.Client, node *api.Node) {
  200. avail, err := availSize(c, node)
  201. framework.ExpectNoError(err, "Node %s: couldn't obtain available disk size %v", node.Name, err)
  202. fillSize := (avail - lowDiskSpaceThreshold + (100 * mb))
  203. framework.Logf("Node %s: disk space available %d bytes", node.Name, avail)
  204. By(fmt.Sprintf("Node %s: creating a file of size %d bytes to fill the available disk space", node.Name, fillSize))
  205. cmd := fmt.Sprintf("fallocate -l %d test.img", fillSize)
  206. framework.ExpectNoError(framework.IssueSSHCommand(cmd, framework.TestContext.Provider, node))
  207. ood := framework.WaitForNodeToBe(c, node.Name, api.NodeOutOfDisk, true, nodeOODTimeOut)
  208. Expect(ood).To(BeTrue(), "Node %s did not run out of disk within %v", node.Name, nodeOODTimeOut)
  209. avail, err = availSize(c, node)
  210. framework.Logf("Node %s: disk space available %d bytes", node.Name, avail)
  211. Expect(avail < lowDiskSpaceThreshold).To(BeTrue())
  212. }
  213. // recoverDiskSpace recovers disk space, filled by creating a large file, on a given node.
  214. func recoverDiskSpace(c *client.Client, node *api.Node) {
  215. By(fmt.Sprintf("Recovering disk space on node %s", node.Name))
  216. cmd := "rm -f test.img"
  217. framework.ExpectNoError(framework.IssueSSHCommand(cmd, framework.TestContext.Provider, node))
  218. ood := framework.WaitForNodeToBe(c, node.Name, api.NodeOutOfDisk, false, nodeOODTimeOut)
  219. Expect(ood).To(BeTrue(), "Node %s's out of disk condition status did not change to false within %v", node.Name, nodeOODTimeOut)
  220. }