123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642 |
- /*
- Copyright 2015 The Kubernetes Authors.
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- http://www.apache.org/licenses/LICENSE-2.0
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- */
- package e2e
- import (
- "fmt"
- "os/exec"
- "regexp"
- "strings"
- "time"
- "k8s.io/kubernetes/pkg/api"
- "k8s.io/kubernetes/pkg/api/unversioned"
- "k8s.io/kubernetes/pkg/apimachinery/registered"
- client "k8s.io/kubernetes/pkg/client/unversioned"
- "k8s.io/kubernetes/pkg/labels"
- "k8s.io/kubernetes/pkg/util/intstr"
- "k8s.io/kubernetes/test/e2e/framework"
- "github.com/aws/aws-sdk-go/aws/session"
- "github.com/aws/aws-sdk-go/service/autoscaling"
- . "github.com/onsi/ginkgo"
- . "github.com/onsi/gomega"
- "k8s.io/kubernetes/pkg/client/cache"
- awscloud "k8s.io/kubernetes/pkg/cloudprovider/providers/aws"
- controllerframework "k8s.io/kubernetes/pkg/controller/framework"
- "k8s.io/kubernetes/pkg/fields"
- "k8s.io/kubernetes/pkg/runtime"
- "k8s.io/kubernetes/pkg/watch"
- )
- const (
- serveHostnameImage = "gcr.io/google_containers/serve_hostname:v1.4"
- resizeNodeReadyTimeout = 2 * time.Minute
- resizeNodeNotReadyTimeout = 2 * time.Minute
- nodeReadinessTimeout = 3 * time.Minute
- podNotReadyTimeout = 1 * time.Minute
- podReadyTimeout = 2 * time.Minute
- testPort = 9376
- )
- func ResizeGroup(group string, size int32) error {
- if framework.TestContext.ReportDir != "" {
- framework.CoreDump(framework.TestContext.ReportDir)
- defer framework.CoreDump(framework.TestContext.ReportDir)
- }
- if framework.TestContext.Provider == "gce" || framework.TestContext.Provider == "gke" {
- // TODO: make this hit the compute API directly instead of shelling out to gcloud.
- // TODO: make gce/gke implement InstanceGroups, so we can eliminate the per-provider logic
- output, err := exec.Command("gcloud", "compute", "instance-groups", "managed", "resize",
- group, fmt.Sprintf("--size=%v", size),
- "--project="+framework.TestContext.CloudConfig.ProjectID, "--zone="+framework.TestContext.CloudConfig.Zone).CombinedOutput()
- if err != nil {
- framework.Logf("Failed to resize node instance group: %v", string(output))
- }
- return err
- } else if framework.TestContext.Provider == "aws" {
- client := autoscaling.New(session.New())
- return awscloud.ResizeInstanceGroup(client, group, int(size))
- } else {
- return fmt.Errorf("Provider does not support InstanceGroups")
- }
- }
- func GetGroupNodes(group string) ([]string, error) {
- if framework.TestContext.Provider == "gce" || framework.TestContext.Provider == "gke" {
- // TODO: make this hit the compute API directly instead of shelling out to gcloud.
- // TODO: make gce/gke implement InstanceGroups, so we can eliminate the per-provider logic
- output, err := exec.Command("gcloud", "compute", "instance-groups", "managed",
- "list-instances", group, "--project="+framework.TestContext.CloudConfig.ProjectID,
- "--zone="+framework.TestContext.CloudConfig.Zone).CombinedOutput()
- if err != nil {
- return nil, err
- }
- re := regexp.MustCompile(".*RUNNING")
- lines := re.FindAllString(string(output), -1)
- for i, line := range lines {
- lines[i] = line[:strings.Index(line, " ")]
- }
- return lines, nil
- } else {
- return nil, fmt.Errorf("provider does not support InstanceGroups")
- }
- }
- func GroupSize(group string) (int, error) {
- if framework.TestContext.Provider == "gce" || framework.TestContext.Provider == "gke" {
- // TODO: make this hit the compute API directly instead of shelling out to gcloud.
- // TODO: make gce/gke implement InstanceGroups, so we can eliminate the per-provider logic
- output, err := exec.Command("gcloud", "compute", "instance-groups", "managed",
- "list-instances", group, "--project="+framework.TestContext.CloudConfig.ProjectID,
- "--zone="+framework.TestContext.CloudConfig.Zone).CombinedOutput()
- if err != nil {
- return -1, err
- }
- re := regexp.MustCompile("RUNNING")
- return len(re.FindAllString(string(output), -1)), nil
- } else if framework.TestContext.Provider == "aws" {
- client := autoscaling.New(session.New())
- instanceGroup, err := awscloud.DescribeInstanceGroup(client, group)
- if err != nil {
- return -1, fmt.Errorf("error describing instance group: %v", err)
- }
- if instanceGroup == nil {
- return -1, fmt.Errorf("instance group not found: %s", group)
- }
- return instanceGroup.CurrentSize()
- } else {
- return -1, fmt.Errorf("provider does not support InstanceGroups")
- }
- }
- func WaitForGroupSize(group string, size int32) error {
- timeout := 10 * time.Minute
- for start := time.Now(); time.Since(start) < timeout; time.Sleep(5 * time.Second) {
- currentSize, err := GroupSize(group)
- if err != nil {
- framework.Logf("Failed to get node instance group size: %v", err)
- continue
- }
- if currentSize != int(size) {
- framework.Logf("Waiting for node instance group size %d, current size %d", size, currentSize)
- continue
- }
- framework.Logf("Node instance group has reached the desired size %d", size)
- return nil
- }
- return fmt.Errorf("timeout waiting %v for node instance group size to be %d", timeout, size)
- }
- func svcByName(name string, port int) *api.Service {
- return &api.Service{
- ObjectMeta: api.ObjectMeta{
- Name: name,
- },
- Spec: api.ServiceSpec{
- Type: api.ServiceTypeNodePort,
- Selector: map[string]string{
- "name": name,
- },
- Ports: []api.ServicePort{{
- Port: int32(port),
- TargetPort: intstr.FromInt(port),
- }},
- },
- }
- }
- func newSVCByName(c *client.Client, ns, name string) error {
- _, err := c.Services(ns).Create(svcByName(name, testPort))
- return err
- }
- func podOnNode(podName, nodeName string, image string) *api.Pod {
- return &api.Pod{
- ObjectMeta: api.ObjectMeta{
- Name: podName,
- Labels: map[string]string{
- "name": podName,
- },
- },
- Spec: api.PodSpec{
- Containers: []api.Container{
- {
- Name: podName,
- Image: image,
- Ports: []api.ContainerPort{{ContainerPort: 9376}},
- },
- },
- NodeName: nodeName,
- RestartPolicy: api.RestartPolicyNever,
- },
- }
- }
- func newPodOnNode(c *client.Client, namespace, podName, nodeName string) error {
- pod, err := c.Pods(namespace).Create(podOnNode(podName, nodeName, serveHostnameImage))
- if err == nil {
- framework.Logf("Created pod %s on node %s", pod.ObjectMeta.Name, nodeName)
- } else {
- framework.Logf("Failed to create pod %s on node %s: %v", podName, nodeName, err)
- }
- return err
- }
- func rcByName(name string, replicas int32, image string, labels map[string]string) *api.ReplicationController {
- return rcByNameContainer(name, replicas, image, labels, api.Container{
- Name: name,
- Image: image,
- })
- }
- func rcByNamePort(name string, replicas int32, image string, port int, protocol api.Protocol, labels map[string]string) *api.ReplicationController {
- return rcByNameContainer(name, replicas, image, labels, api.Container{
- Name: name,
- Image: image,
- Ports: []api.ContainerPort{{ContainerPort: int32(port), Protocol: protocol}},
- })
- }
- func rcByNameContainer(name string, replicas int32, image string, labels map[string]string, c api.Container) *api.ReplicationController {
- // Add "name": name to the labels, overwriting if it exists.
- labels["name"] = name
- gracePeriod := int64(0)
- return &api.ReplicationController{
- TypeMeta: unversioned.TypeMeta{
- Kind: "ReplicationController",
- APIVersion: registered.GroupOrDie(api.GroupName).GroupVersion.String(),
- },
- ObjectMeta: api.ObjectMeta{
- Name: name,
- },
- Spec: api.ReplicationControllerSpec{
- Replicas: replicas,
- Selector: map[string]string{
- "name": name,
- },
- Template: &api.PodTemplateSpec{
- ObjectMeta: api.ObjectMeta{
- Labels: labels,
- },
- Spec: api.PodSpec{
- Containers: []api.Container{c},
- TerminationGracePeriodSeconds: &gracePeriod,
- },
- },
- },
- }
- }
- // newRCByName creates a replication controller with a selector by name of name.
- func newRCByName(c *client.Client, ns, name string, replicas int32) (*api.ReplicationController, error) {
- By(fmt.Sprintf("creating replication controller %s", name))
- return c.ReplicationControllers(ns).Create(rcByNamePort(
- name, replicas, serveHostnameImage, 9376, api.ProtocolTCP, map[string]string{}))
- }
- func resizeRC(c *client.Client, ns, name string, replicas int32) error {
- rc, err := c.ReplicationControllers(ns).Get(name)
- if err != nil {
- return err
- }
- rc.Spec.Replicas = replicas
- _, err = c.ReplicationControllers(rc.Namespace).Update(rc)
- return err
- }
- func getMaster(c *client.Client) string {
- master := ""
- switch framework.TestContext.Provider {
- case "gce":
- eps, err := c.Endpoints(api.NamespaceDefault).Get("kubernetes")
- if err != nil {
- framework.Failf("Fail to get kubernetes endpoinds: %v", err)
- }
- if len(eps.Subsets) != 1 || len(eps.Subsets[0].Addresses) != 1 {
- framework.Failf("There are more than 1 endpoints for kubernetes service: %+v", eps)
- }
- master = eps.Subsets[0].Addresses[0].IP
- case "gke":
- master = strings.TrimPrefix(framework.TestContext.Host, "https://")
- case "aws":
- // TODO(justinsb): Avoid hardcoding this.
- master = "172.20.0.9"
- default:
- framework.Failf("This test is not supported for provider %s and should be disabled", framework.TestContext.Provider)
- }
- return master
- }
- // Return node external IP concatenated with port 22 for ssh
- // e.g. 1.2.3.4:22
- func getNodeExternalIP(node *api.Node) string {
- framework.Logf("Getting external IP address for %s", node.Name)
- host := ""
- for _, a := range node.Status.Addresses {
- if a.Type == api.NodeExternalIP {
- host = a.Address + ":22"
- break
- }
- }
- if host == "" {
- framework.Failf("Couldn't get the external IP of host %s with addresses %v", node.Name, node.Status.Addresses)
- }
- return host
- }
- // Blocks outgoing network traffic on 'node'. Then verifies that 'podNameToDisappear',
- // that belongs to replication controller 'rcName', really disappeared.
- // Finally, it checks that the replication controller recreates the
- // pods on another node and that now the number of replicas is equal 'replicas'.
- // At the end (even in case of errors), the network traffic is brought back to normal.
- // This function executes commands on a node so it will work only for some
- // environments.
- func performTemporaryNetworkFailure(c *client.Client, ns, rcName string, replicas int32, podNameToDisappear string, node *api.Node) {
- host := getNodeExternalIP(node)
- master := getMaster(c)
- By(fmt.Sprintf("block network traffic from node %s to the master", node.Name))
- defer func() {
- // This code will execute even if setting the iptables rule failed.
- // It is on purpose because we may have an error even if the new rule
- // had been inserted. (yes, we could look at the error code and ssh error
- // separately, but I prefer to stay on the safe side).
- By(fmt.Sprintf("Unblock network traffic from node %s to the master", node.Name))
- framework.UnblockNetwork(host, master)
- }()
- framework.Logf("Waiting %v to ensure node %s is ready before beginning test...", resizeNodeReadyTimeout, node.Name)
- if !framework.WaitForNodeToBe(c, node.Name, api.NodeReady, true, resizeNodeReadyTimeout) {
- framework.Failf("Node %s did not become ready within %v", node.Name, resizeNodeReadyTimeout)
- }
- framework.BlockNetwork(host, master)
- framework.Logf("Waiting %v for node %s to be not ready after simulated network failure", resizeNodeNotReadyTimeout, node.Name)
- if !framework.WaitForNodeToBe(c, node.Name, api.NodeReady, false, resizeNodeNotReadyTimeout) {
- framework.Failf("Node %s did not become not-ready within %v", node.Name, resizeNodeNotReadyTimeout)
- }
- framework.Logf("Waiting for pod %s to be removed", podNameToDisappear)
- err := framework.WaitForRCPodToDisappear(c, ns, rcName, podNameToDisappear)
- Expect(err).NotTo(HaveOccurred())
- By("verifying whether the pod from the unreachable node is recreated")
- err = framework.VerifyPods(c, ns, rcName, true, replicas)
- Expect(err).NotTo(HaveOccurred())
- // network traffic is unblocked in a deferred function
- }
- func expectNodeReadiness(isReady bool, newNode chan *api.Node) {
- timeout := false
- expected := false
- timer := time.After(nodeReadinessTimeout)
- for !expected && !timeout {
- select {
- case n := <-newNode:
- if framework.IsNodeConditionSetAsExpected(n, api.NodeReady, isReady) {
- expected = true
- } else {
- framework.Logf("Observed node ready status is NOT %v as expected", isReady)
- }
- case <-timer:
- timeout = true
- }
- }
- if !expected {
- framework.Failf("Failed to observe node ready status change to %v", isReady)
- }
- }
- var _ = framework.KubeDescribe("Nodes [Disruptive]", func() {
- f := framework.NewDefaultFramework("resize-nodes")
- var systemPodsNo int32
- var c *client.Client
- var ns string
- ignoreLabels := framework.ImagePullerLabels
- var group string
- BeforeEach(func() {
- c = f.Client
- ns = f.Namespace.Name
- systemPods, err := framework.GetPodsInNamespace(c, ns, ignoreLabels)
- Expect(err).NotTo(HaveOccurred())
- systemPodsNo = int32(len(systemPods))
- if strings.Index(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") >= 0 {
- framework.Failf("Test dose not support cluster setup with more than one MIG: %s", framework.TestContext.CloudConfig.NodeInstanceGroup)
- } else {
- group = framework.TestContext.CloudConfig.NodeInstanceGroup
- }
- })
- // Slow issue #13323 (8 min)
- framework.KubeDescribe("Resize [Slow]", func() {
- var skipped bool
- BeforeEach(func() {
- skipped = true
- framework.SkipUnlessProviderIs("gce", "gke", "aws")
- framework.SkipUnlessNodeCountIsAtLeast(2)
- skipped = false
- })
- AfterEach(func() {
- if skipped {
- return
- }
- By("restoring the original node instance group size")
- if err := ResizeGroup(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil {
- framework.Failf("Couldn't restore the original node instance group size: %v", err)
- }
- // In GKE, our current tunneling setup has the potential to hold on to a broken tunnel (from a
- // rebooted/deleted node) for up to 5 minutes before all tunnels are dropped and recreated.
- // Most tests make use of some proxy feature to verify functionality. So, if a reboot test runs
- // right before a test that tries to get logs, for example, we may get unlucky and try to use a
- // closed tunnel to a node that was recently rebooted. There's no good way to framework.Poll for proxies
- // being closed, so we sleep.
- //
- // TODO(cjcullen) reduce this sleep (#19314)
- if framework.ProviderIs("gke") {
- By("waiting 5 minutes for all dead tunnels to be dropped")
- time.Sleep(5 * time.Minute)
- }
- if err := WaitForGroupSize(group, int32(framework.TestContext.CloudConfig.NumNodes)); err != nil {
- framework.Failf("Couldn't restore the original node instance group size: %v", err)
- }
- if err := framework.WaitForClusterSize(c, framework.TestContext.CloudConfig.NumNodes, 10*time.Minute); err != nil {
- framework.Failf("Couldn't restore the original cluster size: %v", err)
- }
- // Many e2e tests assume that the cluster is fully healthy before they start. Wait until
- // the cluster is restored to health.
- By("waiting for system pods to successfully restart")
- err := framework.WaitForPodsRunningReady(c, api.NamespaceSystem, systemPodsNo, framework.PodReadyBeforeTimeout, ignoreLabels)
- Expect(err).NotTo(HaveOccurred())
- By("waiting for image prepulling pods to complete")
- framework.WaitForPodsSuccess(c, api.NamespaceSystem, framework.ImagePullerLabels, imagePrePullingTimeout)
- })
- It("should be able to delete nodes", func() {
- // Create a replication controller for a service that serves its hostname.
- // The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
- name := "my-hostname-delete-node"
- replicas := int32(framework.TestContext.CloudConfig.NumNodes)
- newRCByName(c, ns, name, replicas)
- err := framework.VerifyPods(c, ns, name, true, replicas)
- Expect(err).NotTo(HaveOccurred())
- By(fmt.Sprintf("decreasing cluster size to %d", replicas-1))
- err = ResizeGroup(group, replicas-1)
- Expect(err).NotTo(HaveOccurred())
- err = WaitForGroupSize(group, replicas-1)
- Expect(err).NotTo(HaveOccurred())
- err = framework.WaitForClusterSize(c, int(replicas-1), 10*time.Minute)
- Expect(err).NotTo(HaveOccurred())
- By("verifying whether the pods from the removed node are recreated")
- err = framework.VerifyPods(c, ns, name, true, replicas)
- Expect(err).NotTo(HaveOccurred())
- })
- // TODO: Bug here - testName is not correct
- It("should be able to add nodes", func() {
- // Create a replication controller for a service that serves its hostname.
- // The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
- name := "my-hostname-add-node"
- newSVCByName(c, ns, name)
- replicas := int32(framework.TestContext.CloudConfig.NumNodes)
- newRCByName(c, ns, name, replicas)
- err := framework.VerifyPods(c, ns, name, true, replicas)
- Expect(err).NotTo(HaveOccurred())
- By(fmt.Sprintf("increasing cluster size to %d", replicas+1))
- err = ResizeGroup(group, replicas+1)
- Expect(err).NotTo(HaveOccurred())
- err = WaitForGroupSize(group, replicas+1)
- Expect(err).NotTo(HaveOccurred())
- err = framework.WaitForClusterSize(c, int(replicas+1), 10*time.Minute)
- Expect(err).NotTo(HaveOccurred())
- By(fmt.Sprintf("increasing size of the replication controller to %d and verifying all pods are running", replicas+1))
- err = resizeRC(c, ns, name, replicas+1)
- Expect(err).NotTo(HaveOccurred())
- err = framework.VerifyPods(c, ns, name, true, replicas+1)
- Expect(err).NotTo(HaveOccurred())
- })
- })
- framework.KubeDescribe("Network", func() {
- Context("when a node becomes unreachable", func() {
- BeforeEach(func() {
- framework.SkipUnlessProviderIs("gce", "gke", "aws")
- framework.SkipUnlessNodeCountIsAtLeast(2)
- })
- // TODO marekbiskup 2015-06-19 #10085
- // This test has nothing to do with resizing nodes so it should be moved elsewhere.
- // Two things are tested here:
- // 1. pods from a uncontactable nodes are rescheduled
- // 2. when a node joins the cluster, it can host new pods.
- // Factor out the cases into two separate tests.
- It("[replication controller] recreates pods scheduled on the unreachable node "+
- "AND allows scheduling of pods on a node after it rejoins the cluster", func() {
- // Create a replication controller for a service that serves its hostname.
- // The source for the Docker container kubernetes/serve_hostname is in contrib/for-demos/serve_hostname
- name := "my-hostname-net"
- newSVCByName(c, ns, name)
- replicas := int32(framework.TestContext.CloudConfig.NumNodes)
- newRCByName(c, ns, name, replicas)
- err := framework.VerifyPods(c, ns, name, true, replicas)
- Expect(err).NotTo(HaveOccurred(), "Each pod should start running and responding")
- By("choose a node with at least one pod - we will block some network traffic on this node")
- label := labels.SelectorFromSet(labels.Set(map[string]string{"name": name}))
- options := api.ListOptions{LabelSelector: label}
- pods, err := c.Pods(ns).List(options) // list pods after all have been scheduled
- Expect(err).NotTo(HaveOccurred())
- nodeName := pods.Items[0].Spec.NodeName
- node, err := c.Nodes().Get(nodeName)
- Expect(err).NotTo(HaveOccurred())
- By(fmt.Sprintf("block network traffic from node %s", node.Name))
- performTemporaryNetworkFailure(c, ns, name, replicas, pods.Items[0].Name, node)
- framework.Logf("Waiting %v for node %s to be ready once temporary network failure ends", resizeNodeReadyTimeout, node.Name)
- if !framework.WaitForNodeToBeReady(c, node.Name, resizeNodeReadyTimeout) {
- framework.Failf("Node %s did not become ready within %v", node.Name, resizeNodeReadyTimeout)
- }
- // sleep a bit, to allow Watch in NodeController to catch up.
- time.Sleep(5 * time.Second)
- By("verify whether new pods can be created on the re-attached node")
- // increasing the RC size is not a valid way to test this
- // since we have no guarantees the pod will be scheduled on our node.
- additionalPod := "additionalpod"
- err = newPodOnNode(c, ns, additionalPod, node.Name)
- Expect(err).NotTo(HaveOccurred())
- err = framework.VerifyPods(c, ns, additionalPod, true, 1)
- Expect(err).NotTo(HaveOccurred())
- // verify that it is really on the requested node
- {
- pod, err := c.Pods(ns).Get(additionalPod)
- Expect(err).NotTo(HaveOccurred())
- if pod.Spec.NodeName != node.Name {
- framework.Logf("Pod %s found on invalid node: %s instead of %s", pod.Name, pod.Spec.NodeName, node.Name)
- }
- }
- })
- // What happens in this test:
- // Network traffic from a node to master is cut off to simulate network partition
- // Expect to observe:
- // 1. Node is marked NotReady after timeout by nodecontroller (40seconds)
- // 2. All pods on node are marked NotReady shortly after #1
- // 3. Node and pods return to Ready after connectivivty recovers
- It("All pods on the unreachable node should be marked as NotReady upon the node turn NotReady "+
- "AND all pods should be mark back to Ready when the node get back to Ready before pod eviction timeout", func() {
- By("choose a node - we will block all network traffic on this node")
- var podOpts api.ListOptions
- nodeOpts := api.ListOptions{}
- nodes, err := c.Nodes().List(nodeOpts)
- Expect(err).NotTo(HaveOccurred())
- framework.FilterNodes(nodes, func(node api.Node) bool {
- if !framework.IsNodeConditionSetAsExpected(&node, api.NodeReady, true) {
- return false
- }
- podOpts = api.ListOptions{FieldSelector: fields.OneTermEqualSelector(api.PodHostField, node.Name)}
- pods, err := c.Pods(api.NamespaceAll).List(podOpts)
- if err != nil || len(pods.Items) <= 0 {
- return false
- }
- return true
- })
- if len(nodes.Items) <= 0 {
- framework.Failf("No eligible node were found: %d", len(nodes.Items))
- }
- node := nodes.Items[0]
- podOpts = api.ListOptions{FieldSelector: fields.OneTermEqualSelector(api.PodHostField, node.Name)}
- if err = framework.WaitForMatchPodsCondition(c, podOpts, "Running and Ready", podReadyTimeout, framework.PodRunningReady); err != nil {
- framework.Failf("Pods on node %s are not ready and running within %v: %v", node.Name, podReadyTimeout, err)
- }
- By("Set up watch on node status")
- nodeSelector := fields.OneTermEqualSelector("metadata.name", node.Name)
- stopCh := make(chan struct{})
- newNode := make(chan *api.Node)
- var controller *controllerframework.Controller
- _, controller = controllerframework.NewInformer(
- &cache.ListWatch{
- ListFunc: func(options api.ListOptions) (runtime.Object, error) {
- options.FieldSelector = nodeSelector
- return f.Client.Nodes().List(options)
- },
- WatchFunc: func(options api.ListOptions) (watch.Interface, error) {
- options.FieldSelector = nodeSelector
- return f.Client.Nodes().Watch(options)
- },
- },
- &api.Node{},
- 0,
- controllerframework.ResourceEventHandlerFuncs{
- UpdateFunc: func(oldObj, newObj interface{}) {
- n, ok := newObj.(*api.Node)
- Expect(ok).To(Equal(true))
- newNode <- n
- },
- },
- )
- defer func() {
- // Will not explicitly close newNode channel here due to
- // race condition where stopCh and newNode are closed but informer onUpdate still executes.
- close(stopCh)
- }()
- go controller.Run(stopCh)
- By(fmt.Sprintf("Block traffic from node %s to the master", node.Name))
- host := getNodeExternalIP(&node)
- master := getMaster(c)
- defer func() {
- By(fmt.Sprintf("Unblock traffic from node %s to the master", node.Name))
- framework.UnblockNetwork(host, master)
- if CurrentGinkgoTestDescription().Failed {
- return
- }
- By("Expect to observe node and pod status change from NotReady to Ready after network connectivity recovers")
- expectNodeReadiness(true, newNode)
- if err = framework.WaitForMatchPodsCondition(c, podOpts, "Running and Ready", podReadyTimeout, framework.PodRunningReady); err != nil {
- framework.Failf("Pods on node %s did not become ready and running within %v: %v", node.Name, podReadyTimeout, err)
- }
- }()
- framework.BlockNetwork(host, master)
- By("Expect to observe node and pod status change from Ready to NotReady after network partition")
- expectNodeReadiness(false, newNode)
- if err = framework.WaitForMatchPodsCondition(c, podOpts, "NotReady", podNotReadyTimeout, framework.PodNotReady); err != nil {
- framework.Failf("Pods on node %s did not become NotReady within %v: %v", node.Name, podNotReadyTimeout, err)
- }
- })
- })
- })
- })
|