load.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. /*
  2. Copyright 2015 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package e2e
  14. import (
  15. "fmt"
  16. "math"
  17. "math/rand"
  18. "os"
  19. "strconv"
  20. "sync"
  21. "time"
  22. "k8s.io/kubernetes/pkg/api"
  23. client "k8s.io/kubernetes/pkg/client/unversioned"
  24. "k8s.io/kubernetes/pkg/labels"
  25. "k8s.io/kubernetes/pkg/util/intstr"
  26. "k8s.io/kubernetes/test/e2e/framework"
  27. . "github.com/onsi/ginkgo"
  28. . "github.com/onsi/gomega"
  29. )
  30. const (
  31. smallRCSize = 5
  32. mediumRCSize = 30
  33. bigRCSize = 250
  34. smallRCGroupName = "load-small-rc"
  35. mediumRCGroupName = "load-medium-rc"
  36. bigRCGroupName = "load-big-rc"
  37. smallRCBatchSize = 30
  38. mediumRCBatchSize = 5
  39. bigRCBatchSize = 1
  40. // We start RCs/Services/pods/... in different namespace in this test.
  41. // nodeCountPerNamespace determines how many namespaces we will be using
  42. // depending on the number of nodes in the underlying cluster.
  43. nodeCountPerNamespace = 100
  44. )
  45. // This test suite can take a long time to run, so by default it is added to
  46. // the ginkgo.skip list (see driver.go).
  47. // To run this suite you must explicitly ask for it by setting the
  48. // -t/--test flag or ginkgo.focus flag.
  49. var _ = framework.KubeDescribe("Load capacity", func() {
  50. var c *client.Client
  51. var nodeCount int
  52. var ns string
  53. var configs []*framework.RCConfig
  54. var namespaces []*api.Namespace
  55. // Gathers metrics before teardown
  56. // TODO add flag that allows to skip cleanup on failure
  57. AfterEach(func() {
  58. // Verify latency metrics
  59. highLatencyRequests, err := framework.HighLatencyRequests(c)
  60. framework.ExpectNoError(err, "Too many instances metrics above the threshold")
  61. Expect(highLatencyRequests).NotTo(BeNumerically(">", 0))
  62. })
  63. // We assume a default throughput of 10 pods/second throughput.
  64. // We may want to revisit it in the future.
  65. // However, this can be overriden by LOAD_TEST_THROUGHPUT env var.
  66. throughput := 10
  67. if throughputEnv := os.Getenv("LOAD_TEST_THROUGHPUT"); throughputEnv != "" {
  68. if newThroughput, err := strconv.Atoi(throughputEnv); err == nil {
  69. throughput = newThroughput
  70. }
  71. }
  72. // Explicitly put here, to delete namespace at the end of the test
  73. // (after measuring latency metrics, etc.).
  74. options := framework.FrameworkOptions{
  75. ClientQPS: float32(math.Max(50.0, float64(2*throughput))),
  76. ClientBurst: int(math.Max(100.0, float64(4*throughput))),
  77. }
  78. f := framework.NewFramework("load", options, nil)
  79. f.NamespaceDeletionTimeout = time.Hour
  80. BeforeEach(func() {
  81. c = f.Client
  82. // In large clusters we may get to this point but still have a bunch
  83. // of nodes without Routes created. Since this would make a node
  84. // unschedulable, we need to wait until all of them are schedulable.
  85. framework.ExpectNoError(framework.WaitForAllNodesSchedulable(c))
  86. ns = f.Namespace.Name
  87. nodes := framework.GetReadySchedulableNodesOrDie(c)
  88. nodeCount = len(nodes.Items)
  89. Expect(nodeCount).NotTo(BeZero())
  90. // Terminating a namespace (deleting the remaining objects from it - which
  91. // generally means events) can affect the current run. Thus we wait for all
  92. // terminating namespace to be finally deleted before starting this test.
  93. err := framework.CheckTestingNSDeletedExcept(c, ns)
  94. framework.ExpectNoError(err)
  95. framework.ExpectNoError(framework.ResetMetrics(c))
  96. })
  97. type Load struct {
  98. podsPerNode int
  99. image string
  100. command []string
  101. }
  102. loadTests := []Load{
  103. // The container will consume 1 cpu and 512mb of memory.
  104. {podsPerNode: 3, image: "jess/stress", command: []string{"stress", "-c", "1", "-m", "2"}},
  105. {podsPerNode: 30, image: "gcr.io/google_containers/serve_hostname:v1.4"},
  106. }
  107. for _, testArg := range loadTests {
  108. name := fmt.Sprintf("should be able to handle %v pods per node", testArg.podsPerNode)
  109. if testArg.podsPerNode == 30 {
  110. name = "[Feature:Performance] " + name
  111. } else {
  112. name = "[Feature:ManualPerformance] " + name
  113. }
  114. itArg := testArg
  115. It(name, func() {
  116. // Create a number of namespaces.
  117. namespaces = createNamespaces(f, nodeCount, itArg.podsPerNode)
  118. totalPods := itArg.podsPerNode * nodeCount
  119. configs = generateRCConfigs(totalPods, itArg.image, itArg.command, c, namespaces)
  120. var services []*api.Service
  121. // Read the environment variable to see if we want to create services
  122. createServices := os.Getenv("CREATE_SERVICES")
  123. if createServices == "true" {
  124. framework.Logf("Creating services")
  125. services := generateServicesForConfigs(configs)
  126. for _, service := range services {
  127. _, err := c.Services(service.Namespace).Create(service)
  128. framework.ExpectNoError(err)
  129. }
  130. } else {
  131. framework.Logf("Skipping service creation")
  132. }
  133. // Simulate lifetime of RC:
  134. // * create with initial size
  135. // * scale RC to a random size and list all pods
  136. // * scale RC to a random size and list all pods
  137. // * delete it
  138. //
  139. // This will generate ~5 creations/deletions per second assuming:
  140. // - X small RCs each 5 pods [ 5 * X = totalPods / 2 ]
  141. // - Y medium RCs each 30 pods [ 30 * Y = totalPods / 4 ]
  142. // - Z big RCs each 250 pods [ 250 * Z = totalPods / 4]
  143. // We would like to spread creating replication controllers over time
  144. // to make it possible to create/schedule them in the meantime.
  145. // Currently we assume <throughput> pods/second average throughput.
  146. // We may want to revisit it in the future.
  147. creatingTime := time.Duration(totalPods/throughput) * time.Second
  148. createAllRC(configs, creatingTime)
  149. By("============================================================================")
  150. // We would like to spread scaling replication controllers over time
  151. // to make it possible to create/schedule & delete them in the meantime.
  152. // Currently we assume that <throughput> pods/second average throughput.
  153. // The expected number of created/deleted pods is less than totalPods/3.
  154. scalingTime := time.Duration(totalPods/(3*throughput)) * time.Second
  155. scaleAllRC(configs, scalingTime)
  156. By("============================================================================")
  157. scaleAllRC(configs, scalingTime)
  158. By("============================================================================")
  159. // Cleanup all created replication controllers.
  160. // Currently we assume <throughput> pods/second average deletion throughput.
  161. // We may want to revisit it in the future.
  162. deletingTime := time.Duration(totalPods/throughput) * time.Second
  163. deleteAllRC(configs, deletingTime)
  164. if createServices == "true" {
  165. for _, service := range services {
  166. err := c.Services(ns).Delete(service.Name)
  167. framework.ExpectNoError(err)
  168. }
  169. framework.Logf("%v Services created.", len(services))
  170. }
  171. })
  172. }
  173. })
  174. func createNamespaces(f *framework.Framework, nodeCount, podsPerNode int) []*api.Namespace {
  175. namespaceCount := (nodeCount + nodeCountPerNamespace - 1) / nodeCountPerNamespace
  176. namespaces := []*api.Namespace{}
  177. for i := 1; i <= namespaceCount; i++ {
  178. namespace, err := f.CreateNamespace(fmt.Sprintf("load-%d-nodepods-%d", podsPerNode, i), nil)
  179. framework.ExpectNoError(err)
  180. namespaces = append(namespaces, namespace)
  181. }
  182. return namespaces
  183. }
  184. func computeRCCounts(total int) (int, int, int) {
  185. // Small RCs owns ~0.5 of total number of pods, medium and big RCs ~0.25 each.
  186. // For example for 3000 pods (100 nodes, 30 pods per node) there are:
  187. // - 300 small RCs each 5 pods
  188. // - 25 medium RCs each 30 pods
  189. // - 3 big RCs each 250 pods
  190. bigRCCount := total / 4 / bigRCSize
  191. total -= bigRCCount * bigRCSize
  192. mediumRCCount := total / 3 / mediumRCSize
  193. total -= mediumRCCount * mediumRCSize
  194. smallRCCount := total / smallRCSize
  195. return smallRCCount, mediumRCCount, bigRCCount
  196. }
  197. func generateRCConfigs(totalPods int, image string, command []string, c *client.Client, nss []*api.Namespace) []*framework.RCConfig {
  198. configs := make([]*framework.RCConfig, 0)
  199. smallRCCount, mediumRCCount, bigRCCount := computeRCCounts(totalPods)
  200. configs = append(configs, generateRCConfigsForGroup(c, nss, smallRCGroupName, smallRCSize, smallRCCount, image, command)...)
  201. configs = append(configs, generateRCConfigsForGroup(c, nss, mediumRCGroupName, mediumRCSize, mediumRCCount, image, command)...)
  202. configs = append(configs, generateRCConfigsForGroup(c, nss, bigRCGroupName, bigRCSize, bigRCCount, image, command)...)
  203. return configs
  204. }
  205. func generateRCConfigsForGroup(c *client.Client, nss []*api.Namespace, groupName string, size, count int, image string, command []string) []*framework.RCConfig {
  206. configs := make([]*framework.RCConfig, 0, count)
  207. for i := 1; i <= count; i++ {
  208. config := &framework.RCConfig{
  209. Client: c,
  210. Name: groupName + "-" + strconv.Itoa(i),
  211. Namespace: nss[i%len(nss)].Name,
  212. Timeout: 10 * time.Minute,
  213. Image: image,
  214. Command: command,
  215. Replicas: size,
  216. CpuRequest: 10, // 0.01 core
  217. MemRequest: 26214400, // 25MB
  218. }
  219. configs = append(configs, config)
  220. }
  221. return configs
  222. }
  223. func generateServicesForConfigs(configs []*framework.RCConfig) []*api.Service {
  224. services := make([]*api.Service, 0, len(configs))
  225. for _, config := range configs {
  226. serviceName := config.Name + "-svc"
  227. labels := map[string]string{"name": config.Name}
  228. service := &api.Service{
  229. ObjectMeta: api.ObjectMeta{
  230. Name: serviceName,
  231. Namespace: config.Namespace,
  232. },
  233. Spec: api.ServiceSpec{
  234. Selector: labels,
  235. Ports: []api.ServicePort{{
  236. Port: 80,
  237. TargetPort: intstr.FromInt(80),
  238. }},
  239. },
  240. }
  241. services = append(services, service)
  242. }
  243. return services
  244. }
  245. func sleepUpTo(d time.Duration) {
  246. time.Sleep(time.Duration(rand.Int63n(d.Nanoseconds())))
  247. }
  248. func createAllRC(configs []*framework.RCConfig, creatingTime time.Duration) {
  249. var wg sync.WaitGroup
  250. wg.Add(len(configs))
  251. for _, config := range configs {
  252. go createRC(&wg, config, creatingTime)
  253. }
  254. wg.Wait()
  255. }
  256. func createRC(wg *sync.WaitGroup, config *framework.RCConfig, creatingTime time.Duration) {
  257. defer GinkgoRecover()
  258. defer wg.Done()
  259. sleepUpTo(creatingTime)
  260. framework.ExpectNoError(framework.RunRC(*config), fmt.Sprintf("creating rc %s", config.Name))
  261. }
  262. func scaleAllRC(configs []*framework.RCConfig, scalingTime time.Duration) {
  263. var wg sync.WaitGroup
  264. wg.Add(len(configs))
  265. for _, config := range configs {
  266. go scaleRC(&wg, config, scalingTime)
  267. }
  268. wg.Wait()
  269. }
  270. // Scales RC to a random size within [0.5*size, 1.5*size] and lists all the pods afterwards.
  271. // Scaling happens always based on original size, not the current size.
  272. func scaleRC(wg *sync.WaitGroup, config *framework.RCConfig, scalingTime time.Duration) {
  273. defer GinkgoRecover()
  274. defer wg.Done()
  275. sleepUpTo(scalingTime)
  276. newSize := uint(rand.Intn(config.Replicas) + config.Replicas/2)
  277. framework.ExpectNoError(framework.ScaleRC(config.Client, config.Namespace, config.Name, newSize, true),
  278. fmt.Sprintf("scaling rc %s for the first time", config.Name))
  279. selector := labels.SelectorFromSet(labels.Set(map[string]string{"name": config.Name}))
  280. options := api.ListOptions{
  281. LabelSelector: selector,
  282. ResourceVersion: "0",
  283. }
  284. _, err := config.Client.Pods(config.Namespace).List(options)
  285. framework.ExpectNoError(err, fmt.Sprintf("listing pods from rc %v", config.Name))
  286. }
  287. func deleteAllRC(configs []*framework.RCConfig, deletingTime time.Duration) {
  288. var wg sync.WaitGroup
  289. wg.Add(len(configs))
  290. for _, config := range configs {
  291. go deleteRC(&wg, config, deletingTime)
  292. }
  293. wg.Wait()
  294. }
  295. func deleteRC(wg *sync.WaitGroup, config *framework.RCConfig, deletingTime time.Duration) {
  296. defer GinkgoRecover()
  297. defer wg.Done()
  298. sleepUpTo(deletingTime)
  299. if framework.TestContext.GarbageCollectorEnabled {
  300. framework.ExpectNoError(framework.DeleteRCAndWaitForGC(config.Client, config.Namespace, config.Name), fmt.Sprintf("deleting rc %s", config.Name))
  301. } else {
  302. framework.ExpectNoError(framework.DeleteRCAndPods(config.Client, config.Namespace, config.Name), fmt.Sprintf("deleting rc %s", config.Name))
  303. }
  304. }