controller_utils.go 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774
  1. /*
  2. Copyright 2014 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package controller
  14. import (
  15. "fmt"
  16. "sync"
  17. "sync/atomic"
  18. "time"
  19. "github.com/golang/glog"
  20. "k8s.io/kubernetes/pkg/api"
  21. "k8s.io/kubernetes/pkg/api/meta"
  22. "k8s.io/kubernetes/pkg/api/unversioned"
  23. "k8s.io/kubernetes/pkg/api/validation"
  24. "k8s.io/kubernetes/pkg/apis/extensions"
  25. "k8s.io/kubernetes/pkg/client/cache"
  26. clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
  27. "k8s.io/kubernetes/pkg/client/record"
  28. "k8s.io/kubernetes/pkg/controller/framework"
  29. "k8s.io/kubernetes/pkg/labels"
  30. "k8s.io/kubernetes/pkg/runtime"
  31. "k8s.io/kubernetes/pkg/util/clock"
  32. "k8s.io/kubernetes/pkg/util/integer"
  33. "k8s.io/kubernetes/pkg/util/sets"
  34. )
  35. const (
  36. // If a watch drops a delete event for a pod, it'll take this long
  37. // before a dormant controller waiting for those packets is woken up anyway. It is
  38. // specifically targeted at the case where some problem prevents an update
  39. // of expectations, without it the controller could stay asleep forever. This should
  40. // be set based on the expected latency of watch events.
  41. //
  42. // Currently a controller can service (create *and* observe the watch events for said
  43. // creation) about 10 pods a second, so it takes about 1 min to service
  44. // 500 pods. Just creation is limited to 20qps, and watching happens with ~10-30s
  45. // latency/pod at the scale of 3000 pods over 100 nodes.
  46. ExpectationsTimeout = 5 * time.Minute
  47. )
  48. var (
  49. KeyFunc = framework.DeletionHandlingMetaNamespaceKeyFunc
  50. )
  51. type ResyncPeriodFunc func() time.Duration
  52. // Returns 0 for resyncPeriod in case resyncing is not needed.
  53. func NoResyncPeriodFunc() time.Duration {
  54. return 0
  55. }
  56. // StaticResyncPeriodFunc returns the resync period specified
  57. func StaticResyncPeriodFunc(resyncPeriod time.Duration) ResyncPeriodFunc {
  58. return func() time.Duration {
  59. return resyncPeriod
  60. }
  61. }
  62. // Expectations are a way for controllers to tell the controller manager what they expect. eg:
  63. // ControllerExpectations: {
  64. // controller1: expects 2 adds in 2 minutes
  65. // controller2: expects 2 dels in 2 minutes
  66. // controller3: expects -1 adds in 2 minutes => controller3's expectations have already been met
  67. // }
  68. //
  69. // Implementation:
  70. // ControlleeExpectation = pair of atomic counters to track controllee's creation/deletion
  71. // ControllerExpectationsStore = TTLStore + a ControlleeExpectation per controller
  72. //
  73. // * Once set expectations can only be lowered
  74. // * A controller isn't synced till its expectations are either fulfilled, or expire
  75. // * Controllers that don't set expectations will get woken up for every matching controllee
  76. // ExpKeyFunc to parse out the key from a ControlleeExpectation
  77. var ExpKeyFunc = func(obj interface{}) (string, error) {
  78. if e, ok := obj.(*ControlleeExpectations); ok {
  79. return e.key, nil
  80. }
  81. return "", fmt.Errorf("Could not find key for obj %#v", obj)
  82. }
  83. // ControllerExpectationsInterface is an interface that allows users to set and wait on expectations.
  84. // Only abstracted out for testing.
  85. // Warning: if using KeyFunc it is not safe to use a single ControllerExpectationsInterface with different
  86. // types of controllers, because the keys might conflict across types.
  87. type ControllerExpectationsInterface interface {
  88. GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error)
  89. SatisfiedExpectations(controllerKey string) bool
  90. DeleteExpectations(controllerKey string)
  91. SetExpectations(controllerKey string, add, del int) error
  92. ExpectCreations(controllerKey string, adds int) error
  93. ExpectDeletions(controllerKey string, dels int) error
  94. CreationObserved(controllerKey string)
  95. DeletionObserved(controllerKey string)
  96. RaiseExpectations(controllerKey string, add, del int)
  97. LowerExpectations(controllerKey string, add, del int)
  98. }
  99. // ControllerExpectations is a cache mapping controllers to what they expect to see before being woken up for a sync.
  100. type ControllerExpectations struct {
  101. cache.Store
  102. }
  103. // GetExpectations returns the ControlleeExpectations of the given controller.
  104. func (r *ControllerExpectations) GetExpectations(controllerKey string) (*ControlleeExpectations, bool, error) {
  105. if exp, exists, err := r.GetByKey(controllerKey); err == nil && exists {
  106. return exp.(*ControlleeExpectations), true, nil
  107. } else {
  108. return nil, false, err
  109. }
  110. }
  111. // DeleteExpectations deletes the expectations of the given controller from the TTLStore.
  112. func (r *ControllerExpectations) DeleteExpectations(controllerKey string) {
  113. if exp, exists, err := r.GetByKey(controllerKey); err == nil && exists {
  114. if err := r.Delete(exp); err != nil {
  115. glog.V(2).Infof("Error deleting expectations for controller %v: %v", controllerKey, err)
  116. }
  117. }
  118. }
  119. // SatisfiedExpectations returns true if the required adds/dels for the given controller have been observed.
  120. // Add/del counts are established by the controller at sync time, and updated as controllees are observed by the controller
  121. // manager.
  122. func (r *ControllerExpectations) SatisfiedExpectations(controllerKey string) bool {
  123. if exp, exists, err := r.GetExpectations(controllerKey); exists {
  124. if exp.Fulfilled() {
  125. return true
  126. } else if exp.isExpired() {
  127. glog.V(4).Infof("Controller expectations expired %#v", exp)
  128. return true
  129. } else {
  130. glog.V(4).Infof("Controller still waiting on expectations %#v", exp)
  131. return false
  132. }
  133. } else if err != nil {
  134. glog.V(2).Infof("Error encountered while checking expectations %#v, forcing sync", err)
  135. } else {
  136. // When a new controller is created, it doesn't have expectations.
  137. // When it doesn't see expected watch events for > TTL, the expectations expire.
  138. // - In this case it wakes up, creates/deletes controllees, and sets expectations again.
  139. // When it has satisfied expectations and no controllees need to be created/destroyed > TTL, the expectations expire.
  140. // - In this case it continues without setting expectations till it needs to create/delete controllees.
  141. glog.V(4).Infof("Controller %v either never recorded expectations, or the ttl expired.", controllerKey)
  142. }
  143. // Trigger a sync if we either encountered and error (which shouldn't happen since we're
  144. // getting from local store) or this controller hasn't established expectations.
  145. return true
  146. }
  147. // TODO: Extend ExpirationCache to support explicit expiration.
  148. // TODO: Make this possible to disable in tests.
  149. // TODO: Support injection of clock.
  150. func (exp *ControlleeExpectations) isExpired() bool {
  151. return clock.RealClock{}.Since(exp.timestamp) > ExpectationsTimeout
  152. }
  153. // SetExpectations registers new expectations for the given controller. Forgets existing expectations.
  154. func (r *ControllerExpectations) SetExpectations(controllerKey string, add, del int) error {
  155. exp := &ControlleeExpectations{add: int64(add), del: int64(del), key: controllerKey, timestamp: clock.RealClock{}.Now()}
  156. glog.V(4).Infof("Setting expectations %#v", exp)
  157. return r.Add(exp)
  158. }
  159. func (r *ControllerExpectations) ExpectCreations(controllerKey string, adds int) error {
  160. return r.SetExpectations(controllerKey, adds, 0)
  161. }
  162. func (r *ControllerExpectations) ExpectDeletions(controllerKey string, dels int) error {
  163. return r.SetExpectations(controllerKey, 0, dels)
  164. }
  165. // Decrements the expectation counts of the given controller.
  166. func (r *ControllerExpectations) LowerExpectations(controllerKey string, add, del int) {
  167. if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists {
  168. exp.Add(int64(-add), int64(-del))
  169. // The expectations might've been modified since the update on the previous line.
  170. glog.V(4).Infof("Lowered expectations %#v", exp)
  171. }
  172. }
  173. // Increments the expectation counts of the given controller.
  174. func (r *ControllerExpectations) RaiseExpectations(controllerKey string, add, del int) {
  175. if exp, exists, err := r.GetExpectations(controllerKey); err == nil && exists {
  176. exp.Add(int64(add), int64(del))
  177. // The expectations might've been modified since the update on the previous line.
  178. glog.V(4).Infof("Raised expectations %#v", exp)
  179. }
  180. }
  181. // CreationObserved atomically decrements the `add` expectation count of the given controller.
  182. func (r *ControllerExpectations) CreationObserved(controllerKey string) {
  183. r.LowerExpectations(controllerKey, 1, 0)
  184. }
  185. // DeletionObserved atomically decrements the `del` expectation count of the given controller.
  186. func (r *ControllerExpectations) DeletionObserved(controllerKey string) {
  187. r.LowerExpectations(controllerKey, 0, 1)
  188. }
  189. // Expectations are either fulfilled, or expire naturally.
  190. type Expectations interface {
  191. Fulfilled() bool
  192. }
  193. // ControlleeExpectations track controllee creates/deletes.
  194. type ControlleeExpectations struct {
  195. add int64
  196. del int64
  197. key string
  198. timestamp time.Time
  199. }
  200. // Add increments the add and del counters.
  201. func (e *ControlleeExpectations) Add(add, del int64) {
  202. atomic.AddInt64(&e.add, add)
  203. atomic.AddInt64(&e.del, del)
  204. }
  205. // Fulfilled returns true if this expectation has been fulfilled.
  206. func (e *ControlleeExpectations) Fulfilled() bool {
  207. // TODO: think about why this line being atomic doesn't matter
  208. return atomic.LoadInt64(&e.add) <= 0 && atomic.LoadInt64(&e.del) <= 0
  209. }
  210. // GetExpectations returns the add and del expectations of the controllee.
  211. func (e *ControlleeExpectations) GetExpectations() (int64, int64) {
  212. return atomic.LoadInt64(&e.add), atomic.LoadInt64(&e.del)
  213. }
  214. // NewControllerExpectations returns a store for ControllerExpectations.
  215. func NewControllerExpectations() *ControllerExpectations {
  216. return &ControllerExpectations{cache.NewStore(ExpKeyFunc)}
  217. }
  218. // UIDSetKeyFunc to parse out the key from a UIDSet.
  219. var UIDSetKeyFunc = func(obj interface{}) (string, error) {
  220. if u, ok := obj.(*UIDSet); ok {
  221. return u.key, nil
  222. }
  223. return "", fmt.Errorf("Could not find key for obj %#v", obj)
  224. }
  225. // UIDSet holds a key and a set of UIDs. Used by the
  226. // UIDTrackingControllerExpectations to remember which UID it has seen/still
  227. // waiting for.
  228. type UIDSet struct {
  229. sets.String
  230. key string
  231. }
  232. // UIDTrackingControllerExpectations tracks the UID of the pods it deletes.
  233. // This cache is needed over plain old expectations to safely handle graceful
  234. // deletion. The desired behavior is to treat an update that sets the
  235. // DeletionTimestamp on an object as a delete. To do so consistenly, one needs
  236. // to remember the expected deletes so they aren't double counted.
  237. // TODO: Track creates as well (#22599)
  238. type UIDTrackingControllerExpectations struct {
  239. ControllerExpectationsInterface
  240. // TODO: There is a much nicer way to do this that involves a single store,
  241. // a lock per entry, and a ControlleeExpectationsInterface type.
  242. uidStoreLock sync.Mutex
  243. // Store used for the UIDs associated with any expectation tracked via the
  244. // ControllerExpectationsInterface.
  245. uidStore cache.Store
  246. }
  247. // GetUIDs is a convenience method to avoid exposing the set of expected uids.
  248. // The returned set is not thread safe, all modifications must be made holding
  249. // the uidStoreLock.
  250. func (u *UIDTrackingControllerExpectations) GetUIDs(controllerKey string) sets.String {
  251. if uid, exists, err := u.uidStore.GetByKey(controllerKey); err == nil && exists {
  252. return uid.(*UIDSet).String
  253. }
  254. return nil
  255. }
  256. // ExpectDeletions records expectations for the given deleteKeys, against the given controller.
  257. func (u *UIDTrackingControllerExpectations) ExpectDeletions(rcKey string, deletedKeys []string) error {
  258. u.uidStoreLock.Lock()
  259. defer u.uidStoreLock.Unlock()
  260. if existing := u.GetUIDs(rcKey); existing != nil && existing.Len() != 0 {
  261. glog.Errorf("Clobbering existing delete keys: %+v", existing)
  262. }
  263. expectedUIDs := sets.NewString()
  264. for _, k := range deletedKeys {
  265. expectedUIDs.Insert(k)
  266. }
  267. glog.V(4).Infof("Controller %v waiting on deletions for: %+v", rcKey, deletedKeys)
  268. if err := u.uidStore.Add(&UIDSet{expectedUIDs, rcKey}); err != nil {
  269. return err
  270. }
  271. return u.ControllerExpectationsInterface.ExpectDeletions(rcKey, expectedUIDs.Len())
  272. }
  273. // DeletionObserved records the given deleteKey as a deletion, for the given rc.
  274. func (u *UIDTrackingControllerExpectations) DeletionObserved(rcKey, deleteKey string) {
  275. u.uidStoreLock.Lock()
  276. defer u.uidStoreLock.Unlock()
  277. uids := u.GetUIDs(rcKey)
  278. if uids != nil && uids.Has(deleteKey) {
  279. glog.V(4).Infof("Controller %v received delete for pod %v", rcKey, deleteKey)
  280. u.ControllerExpectationsInterface.DeletionObserved(rcKey)
  281. uids.Delete(deleteKey)
  282. }
  283. }
  284. // DeleteExpectations deletes the UID set and invokes DeleteExpectations on the
  285. // underlying ControllerExpectationsInterface.
  286. func (u *UIDTrackingControllerExpectations) DeleteExpectations(rcKey string) {
  287. u.uidStoreLock.Lock()
  288. defer u.uidStoreLock.Unlock()
  289. u.ControllerExpectationsInterface.DeleteExpectations(rcKey)
  290. if uidExp, exists, err := u.uidStore.GetByKey(rcKey); err == nil && exists {
  291. if err := u.uidStore.Delete(uidExp); err != nil {
  292. glog.V(2).Infof("Error deleting uid expectations for controller %v: %v", rcKey, err)
  293. }
  294. }
  295. }
  296. // NewUIDTrackingControllerExpectations returns a wrapper around
  297. // ControllerExpectations that is aware of deleteKeys.
  298. func NewUIDTrackingControllerExpectations(ce ControllerExpectationsInterface) *UIDTrackingControllerExpectations {
  299. return &UIDTrackingControllerExpectations{ControllerExpectationsInterface: ce, uidStore: cache.NewStore(UIDSetKeyFunc)}
  300. }
  301. // PodControlInterface is an interface that knows how to add or delete pods
  302. // created as an interface to allow testing.
  303. type PodControlInterface interface {
  304. // CreatePods creates new pods according to the spec.
  305. CreatePods(namespace string, template *api.PodTemplateSpec, object runtime.Object) error
  306. // CreatePodsOnNode creates a new pod accorting to the spec on the specified node.
  307. CreatePodsOnNode(nodeName, namespace string, template *api.PodTemplateSpec, object runtime.Object) error
  308. // CreatePodsWithControllerRef creates new pods according to the spec, and sets object as the pod's controller.
  309. CreatePodsWithControllerRef(namespace string, template *api.PodTemplateSpec, object runtime.Object, controllerRef *api.OwnerReference) error
  310. // DeletePod deletes the pod identified by podID.
  311. DeletePod(namespace string, podID string, object runtime.Object) error
  312. // PatchPod patches the pod.
  313. PatchPod(namespace, name string, data []byte) error
  314. }
  315. // RealPodControl is the default implementation of PodControlInterface.
  316. type RealPodControl struct {
  317. KubeClient clientset.Interface
  318. Recorder record.EventRecorder
  319. }
  320. var _ PodControlInterface = &RealPodControl{}
  321. func getPodsLabelSet(template *api.PodTemplateSpec) labels.Set {
  322. desiredLabels := make(labels.Set)
  323. for k, v := range template.Labels {
  324. desiredLabels[k] = v
  325. }
  326. return desiredLabels
  327. }
  328. func getPodsAnnotationSet(template *api.PodTemplateSpec, object runtime.Object) (labels.Set, error) {
  329. desiredAnnotations := make(labels.Set)
  330. for k, v := range template.Annotations {
  331. desiredAnnotations[k] = v
  332. }
  333. createdByRef, err := api.GetReference(object)
  334. if err != nil {
  335. return desiredAnnotations, fmt.Errorf("unable to get controller reference: %v", err)
  336. }
  337. // TODO: this code was not safe previously - as soon as new code came along that switched to v2, old clients
  338. // would be broken upon reading it. This is explicitly hardcoded to v1 to guarantee predictable deployment.
  339. // We need to consistently handle this case of annotation versioning.
  340. codec := api.Codecs.LegacyCodec(unversioned.GroupVersion{Group: api.GroupName, Version: "v1"})
  341. createdByRefJson, err := runtime.Encode(codec, &api.SerializedReference{
  342. Reference: *createdByRef,
  343. })
  344. if err != nil {
  345. return desiredAnnotations, fmt.Errorf("unable to serialize controller reference: %v", err)
  346. }
  347. desiredAnnotations[api.CreatedByAnnotation] = string(createdByRefJson)
  348. return desiredAnnotations, nil
  349. }
  350. func getPodsPrefix(controllerName string) string {
  351. // use the dash (if the name isn't too long) to make the pod name a bit prettier
  352. prefix := fmt.Sprintf("%s-", controllerName)
  353. if len(validation.ValidatePodName(prefix, true)) != 0 {
  354. prefix = controllerName
  355. }
  356. return prefix
  357. }
  358. func (r RealPodControl) CreatePods(namespace string, template *api.PodTemplateSpec, object runtime.Object) error {
  359. return r.createPods("", namespace, template, object, nil)
  360. }
  361. func (r RealPodControl) CreatePodsWithControllerRef(namespace string, template *api.PodTemplateSpec, controllerObject runtime.Object, controllerRef *api.OwnerReference) error {
  362. if controllerRef == nil {
  363. return fmt.Errorf("controllerRef is nil")
  364. }
  365. if len(controllerRef.APIVersion) == 0 {
  366. return fmt.Errorf("controllerRef has empty APIVersion")
  367. }
  368. if len(controllerRef.Kind) == 0 {
  369. return fmt.Errorf("controllerRef has empty Kind")
  370. }
  371. if controllerRef.Controller == nil || *controllerRef.Controller != true {
  372. return fmt.Errorf("controllerRef.Controller is not set")
  373. }
  374. return r.createPods("", namespace, template, controllerObject, controllerRef)
  375. }
  376. func (r RealPodControl) CreatePodsOnNode(nodeName, namespace string, template *api.PodTemplateSpec, object runtime.Object) error {
  377. return r.createPods(nodeName, namespace, template, object, nil)
  378. }
  379. func (r RealPodControl) PatchPod(namespace, name string, data []byte) error {
  380. _, err := r.KubeClient.Core().Pods(namespace).Patch(name, api.StrategicMergePatchType, data)
  381. return err
  382. }
  383. func GetPodFromTemplate(template *api.PodTemplateSpec, parentObject runtime.Object, controllerRef *api.OwnerReference) (*api.Pod, error) {
  384. desiredLabels := getPodsLabelSet(template)
  385. desiredAnnotations, err := getPodsAnnotationSet(template, parentObject)
  386. if err != nil {
  387. return nil, err
  388. }
  389. accessor, err := meta.Accessor(parentObject)
  390. if err != nil {
  391. return nil, fmt.Errorf("parentObject does not have ObjectMeta, %v", err)
  392. }
  393. prefix := getPodsPrefix(accessor.GetName())
  394. pod := &api.Pod{
  395. ObjectMeta: api.ObjectMeta{
  396. Labels: desiredLabels,
  397. Annotations: desiredAnnotations,
  398. GenerateName: prefix,
  399. },
  400. }
  401. if controllerRef != nil {
  402. pod.OwnerReferences = append(pod.OwnerReferences, *controllerRef)
  403. }
  404. if err := api.Scheme.Convert(&template.Spec, &pod.Spec, nil); err != nil {
  405. return nil, fmt.Errorf("unable to convert pod template: %v", err)
  406. }
  407. return pod, nil
  408. }
  409. func (r RealPodControl) createPods(nodeName, namespace string, template *api.PodTemplateSpec, object runtime.Object, controllerRef *api.OwnerReference) error {
  410. pod, err := GetPodFromTemplate(template, object, controllerRef)
  411. if err != nil {
  412. return err
  413. }
  414. if len(nodeName) != 0 {
  415. pod.Spec.NodeName = nodeName
  416. }
  417. if labels.Set(pod.Labels).AsSelectorPreValidated().Empty() {
  418. return fmt.Errorf("unable to create pods, no labels")
  419. }
  420. if newPod, err := r.KubeClient.Core().Pods(namespace).Create(pod); err != nil {
  421. r.Recorder.Eventf(object, api.EventTypeWarning, "FailedCreate", "Error creating: %v", err)
  422. return fmt.Errorf("unable to create pods: %v", err)
  423. } else {
  424. accessor, err := meta.Accessor(object)
  425. if err != nil {
  426. glog.Errorf("parentObject does not have ObjectMeta, %v", err)
  427. return nil
  428. }
  429. glog.V(4).Infof("Controller %v created pod %v", accessor.GetName(), newPod.Name)
  430. r.Recorder.Eventf(object, api.EventTypeNormal, "SuccessfulCreate", "Created pod: %v", newPod.Name)
  431. }
  432. return nil
  433. }
  434. func (r RealPodControl) DeletePod(namespace string, podID string, object runtime.Object) error {
  435. accessor, err := meta.Accessor(object)
  436. if err != nil {
  437. return fmt.Errorf("object does not have ObjectMeta, %v", err)
  438. }
  439. if err := r.KubeClient.Core().Pods(namespace).Delete(podID, nil); err != nil {
  440. r.Recorder.Eventf(object, api.EventTypeWarning, "FailedDelete", "Error deleting: %v", err)
  441. return fmt.Errorf("unable to delete pods: %v", err)
  442. } else {
  443. glog.V(4).Infof("Controller %v deleted pod %v", accessor.GetName(), podID)
  444. r.Recorder.Eventf(object, api.EventTypeNormal, "SuccessfulDelete", "Deleted pod: %v", podID)
  445. }
  446. return nil
  447. }
  448. type FakePodControl struct {
  449. sync.Mutex
  450. Templates []api.PodTemplateSpec
  451. ControllerRefs []api.OwnerReference
  452. DeletePodName []string
  453. Patches [][]byte
  454. Err error
  455. }
  456. var _ PodControlInterface = &FakePodControl{}
  457. func (f *FakePodControl) PatchPod(namespace, name string, data []byte) error {
  458. f.Lock()
  459. defer f.Unlock()
  460. f.Patches = append(f.Patches, data)
  461. if f.Err != nil {
  462. return f.Err
  463. }
  464. return nil
  465. }
  466. func (f *FakePodControl) CreatePods(namespace string, spec *api.PodTemplateSpec, object runtime.Object) error {
  467. f.Lock()
  468. defer f.Unlock()
  469. f.Templates = append(f.Templates, *spec)
  470. if f.Err != nil {
  471. return f.Err
  472. }
  473. return nil
  474. }
  475. func (f *FakePodControl) CreatePodsWithControllerRef(namespace string, spec *api.PodTemplateSpec, object runtime.Object, controllerRef *api.OwnerReference) error {
  476. f.Lock()
  477. defer f.Unlock()
  478. f.Templates = append(f.Templates, *spec)
  479. f.ControllerRefs = append(f.ControllerRefs, *controllerRef)
  480. if f.Err != nil {
  481. return f.Err
  482. }
  483. return nil
  484. }
  485. func (f *FakePodControl) CreatePodsOnNode(nodeName, namespace string, template *api.PodTemplateSpec, object runtime.Object) error {
  486. f.Lock()
  487. defer f.Unlock()
  488. f.Templates = append(f.Templates, *template)
  489. if f.Err != nil {
  490. return f.Err
  491. }
  492. return nil
  493. }
  494. func (f *FakePodControl) DeletePod(namespace string, podID string, object runtime.Object) error {
  495. f.Lock()
  496. defer f.Unlock()
  497. f.DeletePodName = append(f.DeletePodName, podID)
  498. if f.Err != nil {
  499. return f.Err
  500. }
  501. return nil
  502. }
  503. func (f *FakePodControl) Clear() {
  504. f.Lock()
  505. defer f.Unlock()
  506. f.DeletePodName = []string{}
  507. f.Templates = []api.PodTemplateSpec{}
  508. f.ControllerRefs = []api.OwnerReference{}
  509. f.Patches = [][]byte{}
  510. }
  511. // ByLogging allows custom sorting of pods so the best one can be picked for getting its logs.
  512. type ByLogging []*api.Pod
  513. func (s ByLogging) Len() int { return len(s) }
  514. func (s ByLogging) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
  515. func (s ByLogging) Less(i, j int) bool {
  516. // 1. assigned < unassigned
  517. if s[i].Spec.NodeName != s[j].Spec.NodeName && (len(s[i].Spec.NodeName) == 0 || len(s[j].Spec.NodeName) == 0) {
  518. return len(s[i].Spec.NodeName) > 0
  519. }
  520. // 2. PodRunning < PodUnknown < PodPending
  521. m := map[api.PodPhase]int{api.PodRunning: 0, api.PodUnknown: 1, api.PodPending: 2}
  522. if m[s[i].Status.Phase] != m[s[j].Status.Phase] {
  523. return m[s[i].Status.Phase] < m[s[j].Status.Phase]
  524. }
  525. // 3. ready < not ready
  526. if api.IsPodReady(s[i]) != api.IsPodReady(s[j]) {
  527. return api.IsPodReady(s[i])
  528. }
  529. // TODO: take availability into account when we push minReadySeconds information from deployment into pods,
  530. // see https://github.com/kubernetes/kubernetes/issues/22065
  531. // 4. Been ready for more time < less time < empty time
  532. if api.IsPodReady(s[i]) && api.IsPodReady(s[j]) && !podReadyTime(s[i]).Equal(podReadyTime(s[j])) {
  533. return afterOrZero(podReadyTime(s[j]), podReadyTime(s[i]))
  534. }
  535. // 5. Pods with containers with higher restart counts < lower restart counts
  536. if maxContainerRestarts(s[i]) != maxContainerRestarts(s[j]) {
  537. return maxContainerRestarts(s[i]) > maxContainerRestarts(s[j])
  538. }
  539. // 6. older pods < newer pods < empty timestamp pods
  540. if !s[i].CreationTimestamp.Equal(s[j].CreationTimestamp) {
  541. return afterOrZero(s[j].CreationTimestamp, s[i].CreationTimestamp)
  542. }
  543. return false
  544. }
  545. // ActivePods type allows custom sorting of pods so a controller can pick the best ones to delete.
  546. type ActivePods []*api.Pod
  547. func (s ActivePods) Len() int { return len(s) }
  548. func (s ActivePods) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
  549. func (s ActivePods) Less(i, j int) bool {
  550. // 1. Unassigned < assigned
  551. // If only one of the pods is unassigned, the unassigned one is smaller
  552. if s[i].Spec.NodeName != s[j].Spec.NodeName && (len(s[i].Spec.NodeName) == 0 || len(s[j].Spec.NodeName) == 0) {
  553. return len(s[i].Spec.NodeName) == 0
  554. }
  555. // 2. PodPending < PodUnknown < PodRunning
  556. m := map[api.PodPhase]int{api.PodPending: 0, api.PodUnknown: 1, api.PodRunning: 2}
  557. if m[s[i].Status.Phase] != m[s[j].Status.Phase] {
  558. return m[s[i].Status.Phase] < m[s[j].Status.Phase]
  559. }
  560. // 3. Not ready < ready
  561. // If only one of the pods is not ready, the not ready one is smaller
  562. if api.IsPodReady(s[i]) != api.IsPodReady(s[j]) {
  563. return !api.IsPodReady(s[i])
  564. }
  565. // TODO: take availability into account when we push minReadySeconds information from deployment into pods,
  566. // see https://github.com/kubernetes/kubernetes/issues/22065
  567. // 4. Been ready for empty time < less time < more time
  568. // If both pods are ready, the latest ready one is smaller
  569. if api.IsPodReady(s[i]) && api.IsPodReady(s[j]) && !podReadyTime(s[i]).Equal(podReadyTime(s[j])) {
  570. return afterOrZero(podReadyTime(s[i]), podReadyTime(s[j]))
  571. }
  572. // 5. Pods with containers with higher restart counts < lower restart counts
  573. if maxContainerRestarts(s[i]) != maxContainerRestarts(s[j]) {
  574. return maxContainerRestarts(s[i]) > maxContainerRestarts(s[j])
  575. }
  576. // 6. Empty creation time pods < newer pods < older pods
  577. if !s[i].CreationTimestamp.Equal(s[j].CreationTimestamp) {
  578. return afterOrZero(s[i].CreationTimestamp, s[j].CreationTimestamp)
  579. }
  580. return false
  581. }
  582. // afterOrZero checks if time t1 is after time t2; if one of them
  583. // is zero, the zero time is seen as after non-zero time.
  584. func afterOrZero(t1, t2 unversioned.Time) bool {
  585. if t1.Time.IsZero() || t2.Time.IsZero() {
  586. return t1.Time.IsZero()
  587. }
  588. return t1.After(t2.Time)
  589. }
  590. func podReadyTime(pod *api.Pod) unversioned.Time {
  591. if api.IsPodReady(pod) {
  592. for _, c := range pod.Status.Conditions {
  593. // we only care about pod ready conditions
  594. if c.Type == api.PodReady && c.Status == api.ConditionTrue {
  595. return c.LastTransitionTime
  596. }
  597. }
  598. }
  599. return unversioned.Time{}
  600. }
  601. func maxContainerRestarts(pod *api.Pod) int {
  602. maxRestarts := 0
  603. for _, c := range pod.Status.ContainerStatuses {
  604. maxRestarts = integer.IntMax(maxRestarts, int(c.RestartCount))
  605. }
  606. return maxRestarts
  607. }
  608. // FilterActivePods returns pods that have not terminated.
  609. func FilterActivePods(pods []*api.Pod) []*api.Pod {
  610. var result []*api.Pod
  611. for _, p := range pods {
  612. if IsPodActive(p) {
  613. result = append(result, p)
  614. } else {
  615. glog.V(4).Infof("Ignoring inactive pod %v/%v in state %v, deletion time %v",
  616. p.Namespace, p.Name, p.Status.Phase, p.DeletionTimestamp)
  617. }
  618. }
  619. return result
  620. }
  621. func IsPodActive(p *api.Pod) bool {
  622. return api.PodSucceeded != p.Status.Phase &&
  623. api.PodFailed != p.Status.Phase &&
  624. p.DeletionTimestamp == nil
  625. }
  626. // FilterActiveReplicaSets returns replica sets that have (or at least ought to have) pods.
  627. func FilterActiveReplicaSets(replicaSets []*extensions.ReplicaSet) []*extensions.ReplicaSet {
  628. active := []*extensions.ReplicaSet{}
  629. for i := range replicaSets {
  630. rs := replicaSets[i]
  631. if rs != nil && rs.Spec.Replicas > 0 {
  632. active = append(active, replicaSets[i])
  633. }
  634. }
  635. return active
  636. }
  637. // PodKey returns a key unique to the given pod within a cluster.
  638. // It's used so we consistently use the same key scheme in this module.
  639. // It does exactly what cache.MetaNamespaceKeyFunc would have done
  640. // except there's not possibility for error since we know the exact type.
  641. func PodKey(pod *api.Pod) string {
  642. return fmt.Sprintf("%v/%v", pod.Namespace, pod.Name)
  643. }
  644. // ControllersByCreationTimestamp sorts a list of ReplicationControllers by creation timestamp, using their names as a tie breaker.
  645. type ControllersByCreationTimestamp []*api.ReplicationController
  646. func (o ControllersByCreationTimestamp) Len() int { return len(o) }
  647. func (o ControllersByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
  648. func (o ControllersByCreationTimestamp) Less(i, j int) bool {
  649. if o[i].CreationTimestamp.Equal(o[j].CreationTimestamp) {
  650. return o[i].Name < o[j].Name
  651. }
  652. return o[i].CreationTimestamp.Before(o[j].CreationTimestamp)
  653. }
  654. // ReplicaSetsByCreationTimestamp sorts a list of ReplicaSet by creation timestamp, using their names as a tie breaker.
  655. type ReplicaSetsByCreationTimestamp []*extensions.ReplicaSet
  656. func (o ReplicaSetsByCreationTimestamp) Len() int { return len(o) }
  657. func (o ReplicaSetsByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
  658. func (o ReplicaSetsByCreationTimestamp) Less(i, j int) bool {
  659. if o[i].CreationTimestamp.Equal(o[j].CreationTimestamp) {
  660. return o[i].Name < o[j].Name
  661. }
  662. return o[i].CreationTimestamp.Before(o[j].CreationTimestamp)
  663. }
  664. // ReplicaSetsBySizeOlder sorts a list of ReplicaSet by size in descending order, using their creation timestamp or name as a tie breaker.
  665. // By using the creation timestamp, this sorts from old to new replica sets.
  666. type ReplicaSetsBySizeOlder []*extensions.ReplicaSet
  667. func (o ReplicaSetsBySizeOlder) Len() int { return len(o) }
  668. func (o ReplicaSetsBySizeOlder) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
  669. func (o ReplicaSetsBySizeOlder) Less(i, j int) bool {
  670. if o[i].Spec.Replicas == o[j].Spec.Replicas {
  671. return ReplicaSetsByCreationTimestamp(o).Less(i, j)
  672. }
  673. return o[i].Spec.Replicas > o[j].Spec.Replicas
  674. }
  675. // ReplicaSetsBySizeNewer sorts a list of ReplicaSet by size in descending order, using their creation timestamp or name as a tie breaker.
  676. // By using the creation timestamp, this sorts from new to old replica sets.
  677. type ReplicaSetsBySizeNewer []*extensions.ReplicaSet
  678. func (o ReplicaSetsBySizeNewer) Len() int { return len(o) }
  679. func (o ReplicaSetsBySizeNewer) Swap(i, j int) { o[i], o[j] = o[j], o[i] }
  680. func (o ReplicaSetsBySizeNewer) Less(i, j int) bool {
  681. if o[i].Spec.Replicas == o[j].Spec.Replicas {
  682. return ReplicaSetsByCreationTimestamp(o).Less(j, i)
  683. }
  684. return o[i].Spec.Replicas > o[j].Spec.Replicas
  685. }