nodecontroller_test.go 65 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944
  1. /*
  2. Copyright 2014 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package node
  14. import (
  15. "testing"
  16. "time"
  17. "k8s.io/kubernetes/pkg/api"
  18. "k8s.io/kubernetes/pkg/api/resource"
  19. "k8s.io/kubernetes/pkg/api/unversioned"
  20. "k8s.io/kubernetes/pkg/apis/extensions"
  21. "k8s.io/kubernetes/pkg/client/cache"
  22. "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset/fake"
  23. fakecloud "k8s.io/kubernetes/pkg/cloudprovider/providers/fake"
  24. "k8s.io/kubernetes/pkg/util/diff"
  25. "k8s.io/kubernetes/pkg/util/wait"
  26. )
  27. const (
  28. testNodeMonitorGracePeriod = 40 * time.Second
  29. testNodeStartupGracePeriod = 60 * time.Second
  30. testNodeMonitorPeriod = 5 * time.Second
  31. testRateLimiterQPS = float32(10000)
  32. testLargeClusterThreshold = 20
  33. testUnhealtyThreshold = float32(0.55)
  34. )
  35. func TestMonitorNodeStatusEvictPods(t *testing.T) {
  36. fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
  37. evictionTimeout := 10 * time.Minute
  38. // Because of the logic that prevents NC from evicting anything when all Nodes are NotReady
  39. // we need second healthy node in tests. Because of how the tests are written we need to update
  40. // the status of this Node.
  41. healthyNodeNewStatus := api.NodeStatus{
  42. Conditions: []api.NodeCondition{
  43. {
  44. Type: api.NodeReady,
  45. Status: api.ConditionTrue,
  46. // Node status has just been updated, and is NotReady for 10min.
  47. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 9, 0, 0, time.UTC),
  48. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  49. },
  50. },
  51. }
  52. table := []struct {
  53. fakeNodeHandler *FakeNodeHandler
  54. daemonSets []extensions.DaemonSet
  55. timeToPass time.Duration
  56. newNodeStatus api.NodeStatus
  57. secondNodeNewStatus api.NodeStatus
  58. expectedEvictPods bool
  59. description string
  60. }{
  61. // Node created recently, with no status (happens only at cluster startup).
  62. {
  63. fakeNodeHandler: &FakeNodeHandler{
  64. Existing: []*api.Node{
  65. {
  66. ObjectMeta: api.ObjectMeta{
  67. Name: "node0",
  68. CreationTimestamp: fakeNow,
  69. Labels: map[string]string{
  70. unversioned.LabelZoneRegion: "region1",
  71. unversioned.LabelZoneFailureDomain: "zone1",
  72. },
  73. },
  74. },
  75. {
  76. ObjectMeta: api.ObjectMeta{
  77. Name: "node1",
  78. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  79. Labels: map[string]string{
  80. unversioned.LabelZoneRegion: "region1",
  81. unversioned.LabelZoneFailureDomain: "zone1",
  82. },
  83. },
  84. Status: api.NodeStatus{
  85. Conditions: []api.NodeCondition{
  86. {
  87. Type: api.NodeReady,
  88. Status: api.ConditionTrue,
  89. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  90. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  91. },
  92. },
  93. },
  94. },
  95. },
  96. Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
  97. },
  98. daemonSets: nil,
  99. timeToPass: 0,
  100. newNodeStatus: api.NodeStatus{},
  101. secondNodeNewStatus: healthyNodeNewStatus,
  102. expectedEvictPods: false,
  103. description: "Node created recently, with no status.",
  104. },
  105. // Node created long time ago, and kubelet posted NotReady for a short period of time.
  106. {
  107. fakeNodeHandler: &FakeNodeHandler{
  108. Existing: []*api.Node{
  109. {
  110. ObjectMeta: api.ObjectMeta{
  111. Name: "node0",
  112. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  113. Labels: map[string]string{
  114. unversioned.LabelZoneRegion: "region1",
  115. unversioned.LabelZoneFailureDomain: "zone1",
  116. },
  117. },
  118. Status: api.NodeStatus{
  119. Conditions: []api.NodeCondition{
  120. {
  121. Type: api.NodeReady,
  122. Status: api.ConditionFalse,
  123. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  124. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  125. },
  126. },
  127. },
  128. },
  129. {
  130. ObjectMeta: api.ObjectMeta{
  131. Name: "node1",
  132. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  133. Labels: map[string]string{
  134. unversioned.LabelZoneRegion: "region1",
  135. unversioned.LabelZoneFailureDomain: "zone1",
  136. },
  137. },
  138. Status: api.NodeStatus{
  139. Conditions: []api.NodeCondition{
  140. {
  141. Type: api.NodeReady,
  142. Status: api.ConditionTrue,
  143. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  144. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  145. },
  146. },
  147. },
  148. },
  149. },
  150. Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
  151. },
  152. daemonSets: nil,
  153. timeToPass: evictionTimeout,
  154. newNodeStatus: api.NodeStatus{
  155. Conditions: []api.NodeCondition{
  156. {
  157. Type: api.NodeReady,
  158. Status: api.ConditionFalse,
  159. // Node status has just been updated, and is NotReady for 10min.
  160. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 9, 0, 0, time.UTC),
  161. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  162. },
  163. },
  164. },
  165. secondNodeNewStatus: healthyNodeNewStatus,
  166. expectedEvictPods: false,
  167. description: "Node created long time ago, and kubelet posted NotReady for a short period of time.",
  168. },
  169. // Pod is ds-managed, and kubelet posted NotReady for a long period of time.
  170. {
  171. fakeNodeHandler: &FakeNodeHandler{
  172. Existing: []*api.Node{
  173. {
  174. ObjectMeta: api.ObjectMeta{
  175. Name: "node0",
  176. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  177. Labels: map[string]string{
  178. unversioned.LabelZoneRegion: "region1",
  179. unversioned.LabelZoneFailureDomain: "zone1",
  180. },
  181. },
  182. Status: api.NodeStatus{
  183. Conditions: []api.NodeCondition{
  184. {
  185. Type: api.NodeReady,
  186. Status: api.ConditionFalse,
  187. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  188. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  189. },
  190. },
  191. },
  192. },
  193. {
  194. ObjectMeta: api.ObjectMeta{
  195. Name: "node1",
  196. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  197. Labels: map[string]string{
  198. unversioned.LabelZoneRegion: "region1",
  199. unversioned.LabelZoneFailureDomain: "zone1",
  200. },
  201. },
  202. Status: api.NodeStatus{
  203. Conditions: []api.NodeCondition{
  204. {
  205. Type: api.NodeReady,
  206. Status: api.ConditionTrue,
  207. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  208. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  209. },
  210. },
  211. },
  212. },
  213. },
  214. Clientset: fake.NewSimpleClientset(
  215. &api.PodList{
  216. Items: []api.Pod{
  217. {
  218. ObjectMeta: api.ObjectMeta{
  219. Name: "pod0",
  220. Namespace: "default",
  221. Labels: map[string]string{"daemon": "yes"},
  222. },
  223. Spec: api.PodSpec{
  224. NodeName: "node0",
  225. },
  226. },
  227. },
  228. },
  229. ),
  230. },
  231. daemonSets: []extensions.DaemonSet{
  232. {
  233. ObjectMeta: api.ObjectMeta{
  234. Name: "ds0",
  235. Namespace: "default",
  236. },
  237. Spec: extensions.DaemonSetSpec{
  238. Selector: &unversioned.LabelSelector{
  239. MatchLabels: map[string]string{"daemon": "yes"},
  240. },
  241. },
  242. },
  243. },
  244. timeToPass: time.Hour,
  245. newNodeStatus: api.NodeStatus{
  246. Conditions: []api.NodeCondition{
  247. {
  248. Type: api.NodeReady,
  249. Status: api.ConditionFalse,
  250. // Node status has just been updated, and is NotReady for 1hr.
  251. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 59, 0, 0, time.UTC),
  252. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  253. },
  254. },
  255. },
  256. secondNodeNewStatus: healthyNodeNewStatus,
  257. expectedEvictPods: false,
  258. description: "Pod is ds-managed, and kubelet posted NotReady for a long period of time.",
  259. },
  260. // Node created long time ago, and kubelet posted NotReady for a long period of time.
  261. {
  262. fakeNodeHandler: &FakeNodeHandler{
  263. Existing: []*api.Node{
  264. {
  265. ObjectMeta: api.ObjectMeta{
  266. Name: "node0",
  267. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  268. Labels: map[string]string{
  269. unversioned.LabelZoneRegion: "region1",
  270. unversioned.LabelZoneFailureDomain: "zone1",
  271. },
  272. },
  273. Status: api.NodeStatus{
  274. Conditions: []api.NodeCondition{
  275. {
  276. Type: api.NodeReady,
  277. Status: api.ConditionFalse,
  278. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  279. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  280. },
  281. },
  282. },
  283. },
  284. {
  285. ObjectMeta: api.ObjectMeta{
  286. Name: "node1",
  287. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  288. Labels: map[string]string{
  289. unversioned.LabelZoneRegion: "region1",
  290. unversioned.LabelZoneFailureDomain: "zone1",
  291. },
  292. },
  293. Status: api.NodeStatus{
  294. Conditions: []api.NodeCondition{
  295. {
  296. Type: api.NodeReady,
  297. Status: api.ConditionTrue,
  298. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  299. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  300. },
  301. },
  302. },
  303. },
  304. },
  305. Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
  306. },
  307. daemonSets: nil,
  308. timeToPass: time.Hour,
  309. newNodeStatus: api.NodeStatus{
  310. Conditions: []api.NodeCondition{
  311. {
  312. Type: api.NodeReady,
  313. Status: api.ConditionFalse,
  314. // Node status has just been updated, and is NotReady for 1hr.
  315. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 59, 0, 0, time.UTC),
  316. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  317. },
  318. },
  319. },
  320. secondNodeNewStatus: healthyNodeNewStatus,
  321. expectedEvictPods: true,
  322. description: "Node created long time ago, and kubelet posted NotReady for a long period of time.",
  323. },
  324. // Node created long time ago, node controller posted Unknown for a short period of time.
  325. {
  326. fakeNodeHandler: &FakeNodeHandler{
  327. Existing: []*api.Node{
  328. {
  329. ObjectMeta: api.ObjectMeta{
  330. Name: "node0",
  331. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  332. Labels: map[string]string{
  333. unversioned.LabelZoneRegion: "region1",
  334. unversioned.LabelZoneFailureDomain: "zone1",
  335. },
  336. },
  337. Status: api.NodeStatus{
  338. Conditions: []api.NodeCondition{
  339. {
  340. Type: api.NodeReady,
  341. Status: api.ConditionUnknown,
  342. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  343. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  344. },
  345. },
  346. },
  347. },
  348. {
  349. ObjectMeta: api.ObjectMeta{
  350. Name: "node1",
  351. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  352. Labels: map[string]string{
  353. unversioned.LabelZoneRegion: "region1",
  354. unversioned.LabelZoneFailureDomain: "zone1",
  355. },
  356. },
  357. Status: api.NodeStatus{
  358. Conditions: []api.NodeCondition{
  359. {
  360. Type: api.NodeReady,
  361. Status: api.ConditionTrue,
  362. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  363. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  364. },
  365. },
  366. },
  367. },
  368. },
  369. Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
  370. },
  371. daemonSets: nil,
  372. timeToPass: evictionTimeout - testNodeMonitorGracePeriod,
  373. newNodeStatus: api.NodeStatus{
  374. Conditions: []api.NodeCondition{
  375. {
  376. Type: api.NodeReady,
  377. Status: api.ConditionUnknown,
  378. // Node status was updated by nodecontroller 10min ago
  379. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  380. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  381. },
  382. },
  383. },
  384. secondNodeNewStatus: healthyNodeNewStatus,
  385. expectedEvictPods: false,
  386. description: "Node created long time ago, node controller posted Unknown for a short period of time.",
  387. },
  388. // Node created long time ago, node controller posted Unknown for a long period of time.
  389. {
  390. fakeNodeHandler: &FakeNodeHandler{
  391. Existing: []*api.Node{
  392. {
  393. ObjectMeta: api.ObjectMeta{
  394. Name: "node0",
  395. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  396. Labels: map[string]string{
  397. unversioned.LabelZoneRegion: "region1",
  398. unversioned.LabelZoneFailureDomain: "zone1",
  399. },
  400. },
  401. Status: api.NodeStatus{
  402. Conditions: []api.NodeCondition{
  403. {
  404. Type: api.NodeReady,
  405. Status: api.ConditionUnknown,
  406. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  407. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  408. },
  409. },
  410. },
  411. },
  412. {
  413. ObjectMeta: api.ObjectMeta{
  414. Name: "node1",
  415. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  416. Labels: map[string]string{
  417. unversioned.LabelZoneRegion: "region1",
  418. unversioned.LabelZoneFailureDomain: "zone1",
  419. },
  420. },
  421. Status: api.NodeStatus{
  422. Conditions: []api.NodeCondition{
  423. {
  424. Type: api.NodeReady,
  425. Status: api.ConditionTrue,
  426. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  427. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  428. },
  429. },
  430. },
  431. },
  432. },
  433. Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
  434. },
  435. daemonSets: nil,
  436. timeToPass: 60 * time.Minute,
  437. newNodeStatus: api.NodeStatus{
  438. Conditions: []api.NodeCondition{
  439. {
  440. Type: api.NodeReady,
  441. Status: api.ConditionUnknown,
  442. // Node status was updated by nodecontroller 1hr ago
  443. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  444. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  445. },
  446. },
  447. },
  448. secondNodeNewStatus: healthyNodeNewStatus,
  449. expectedEvictPods: true,
  450. description: "Node created long time ago, node controller posted Unknown for a long period of time.",
  451. },
  452. }
  453. for _, item := range table {
  454. nodeController, _ := NewNodeControllerFromClient(nil, item.fakeNodeHandler,
  455. evictionTimeout, testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold, testNodeMonitorGracePeriod,
  456. testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
  457. nodeController.now = func() unversioned.Time { return fakeNow }
  458. for _, ds := range item.daemonSets {
  459. nodeController.daemonSetStore.Add(&ds)
  460. }
  461. if err := nodeController.monitorNodeStatus(); err != nil {
  462. t.Errorf("unexpected error: %v", err)
  463. }
  464. if item.timeToPass > 0 {
  465. nodeController.now = func() unversioned.Time { return unversioned.Time{Time: fakeNow.Add(item.timeToPass)} }
  466. item.fakeNodeHandler.Existing[0].Status = item.newNodeStatus
  467. item.fakeNodeHandler.Existing[1].Status = item.secondNodeNewStatus
  468. }
  469. if err := nodeController.monitorNodeStatus(); err != nil {
  470. t.Errorf("unexpected error: %v", err)
  471. }
  472. zones := getZones(item.fakeNodeHandler)
  473. for _, zone := range zones {
  474. nodeController.zonePodEvictor[zone].Try(func(value TimedValue) (bool, time.Duration) {
  475. nodeUid, _ := value.UID.(string)
  476. remaining, _ := deletePods(item.fakeNodeHandler, nodeController.recorder, value.Value, nodeUid, nodeController.daemonSetStore)
  477. if remaining {
  478. nodeController.zoneTerminationEvictor[zone].Add(value.Value, nodeUid)
  479. }
  480. return true, 0
  481. })
  482. nodeController.zonePodEvictor[zone].Try(func(value TimedValue) (bool, time.Duration) {
  483. nodeUid, _ := value.UID.(string)
  484. terminatePods(item.fakeNodeHandler, nodeController.recorder, value.Value, nodeUid, value.AddedAt, nodeController.maximumGracePeriod)
  485. return true, 0
  486. })
  487. }
  488. podEvicted := false
  489. for _, action := range item.fakeNodeHandler.Actions() {
  490. if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
  491. podEvicted = true
  492. }
  493. }
  494. if item.expectedEvictPods != podEvicted {
  495. t.Errorf("expected pod eviction: %+v, got %+v for %+v", item.expectedEvictPods,
  496. podEvicted, item.description)
  497. }
  498. }
  499. }
  500. func TestMonitorNodeStatusEvictPodsWithDisruption(t *testing.T) {
  501. fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
  502. evictionTimeout := 10 * time.Minute
  503. timeToPass := 60 * time.Minute
  504. // Because of the logic that prevents NC from evicting anything when all Nodes are NotReady
  505. // we need second healthy node in tests. Because of how the tests are written we need to update
  506. // the status of this Node.
  507. healthyNodeNewStatus := api.NodeStatus{
  508. Conditions: []api.NodeCondition{
  509. {
  510. Type: api.NodeReady,
  511. Status: api.ConditionTrue,
  512. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 13, 0, 0, 0, time.UTC),
  513. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  514. },
  515. },
  516. }
  517. unhealthyNodeNewStatus := api.NodeStatus{
  518. Conditions: []api.NodeCondition{
  519. {
  520. Type: api.NodeReady,
  521. Status: api.ConditionUnknown,
  522. // Node status was updated by nodecontroller 1hr ago
  523. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  524. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  525. },
  526. },
  527. }
  528. table := []struct {
  529. nodeList []*api.Node
  530. podList []api.Pod
  531. updatedNodeStatuses []api.NodeStatus
  532. expectedInitialStates map[string]zoneState
  533. expectedFollowingStates map[string]zoneState
  534. expectedEvictPods bool
  535. description string
  536. }{
  537. // NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
  538. // Only zone is down - eviction shouldn't take place
  539. {
  540. nodeList: []*api.Node{
  541. {
  542. ObjectMeta: api.ObjectMeta{
  543. Name: "node0",
  544. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  545. Labels: map[string]string{
  546. unversioned.LabelZoneRegion: "region1",
  547. unversioned.LabelZoneFailureDomain: "zone1",
  548. },
  549. },
  550. Status: api.NodeStatus{
  551. Conditions: []api.NodeCondition{
  552. {
  553. Type: api.NodeReady,
  554. Status: api.ConditionUnknown,
  555. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  556. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  557. },
  558. },
  559. },
  560. },
  561. {
  562. ObjectMeta: api.ObjectMeta{
  563. Name: "node1",
  564. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  565. Labels: map[string]string{
  566. unversioned.LabelZoneRegion: "region1",
  567. unversioned.LabelZoneFailureDomain: "zone1",
  568. },
  569. },
  570. Status: api.NodeStatus{
  571. Conditions: []api.NodeCondition{
  572. {
  573. Type: api.NodeReady,
  574. Status: api.ConditionUnknown,
  575. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  576. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  577. },
  578. },
  579. },
  580. },
  581. },
  582. podList: []api.Pod{*newPod("pod0", "node0")},
  583. updatedNodeStatuses: []api.NodeStatus{
  584. unhealthyNodeNewStatus,
  585. unhealthyNodeNewStatus,
  586. },
  587. expectedInitialStates: map[string]zoneState{createZoneID("region1", "zone1"): stateFullDisruption},
  588. expectedFollowingStates: map[string]zoneState{createZoneID("region1", "zone1"): stateFullDisruption},
  589. expectedEvictPods: false,
  590. description: "Network Disruption: Only zone is down - eviction shouldn't take place.",
  591. },
  592. // NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
  593. // Both zones down - eviction shouldn't take place
  594. {
  595. nodeList: []*api.Node{
  596. {
  597. ObjectMeta: api.ObjectMeta{
  598. Name: "node0",
  599. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  600. Labels: map[string]string{
  601. unversioned.LabelZoneRegion: "region1",
  602. unversioned.LabelZoneFailureDomain: "zone1",
  603. },
  604. },
  605. Status: api.NodeStatus{
  606. Conditions: []api.NodeCondition{
  607. {
  608. Type: api.NodeReady,
  609. Status: api.ConditionUnknown,
  610. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  611. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  612. },
  613. },
  614. },
  615. },
  616. {
  617. ObjectMeta: api.ObjectMeta{
  618. Name: "node1",
  619. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  620. Labels: map[string]string{
  621. unversioned.LabelZoneRegion: "region2",
  622. unversioned.LabelZoneFailureDomain: "zone2",
  623. },
  624. },
  625. Status: api.NodeStatus{
  626. Conditions: []api.NodeCondition{
  627. {
  628. Type: api.NodeReady,
  629. Status: api.ConditionUnknown,
  630. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  631. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  632. },
  633. },
  634. },
  635. },
  636. },
  637. podList: []api.Pod{*newPod("pod0", "node0")},
  638. updatedNodeStatuses: []api.NodeStatus{
  639. unhealthyNodeNewStatus,
  640. unhealthyNodeNewStatus,
  641. },
  642. expectedInitialStates: map[string]zoneState{
  643. createZoneID("region1", "zone1"): stateFullDisruption,
  644. createZoneID("region2", "zone2"): stateFullDisruption,
  645. },
  646. expectedFollowingStates: map[string]zoneState{
  647. createZoneID("region1", "zone1"): stateFullDisruption,
  648. createZoneID("region2", "zone2"): stateFullDisruption,
  649. },
  650. expectedEvictPods: false,
  651. description: "Network Disruption: Both zones down - eviction shouldn't take place.",
  652. },
  653. // NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
  654. // One zone is down - eviction should take place
  655. {
  656. nodeList: []*api.Node{
  657. {
  658. ObjectMeta: api.ObjectMeta{
  659. Name: "node0",
  660. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  661. Labels: map[string]string{
  662. unversioned.LabelZoneRegion: "region1",
  663. unversioned.LabelZoneFailureDomain: "zone1",
  664. },
  665. },
  666. Status: api.NodeStatus{
  667. Conditions: []api.NodeCondition{
  668. {
  669. Type: api.NodeReady,
  670. Status: api.ConditionUnknown,
  671. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  672. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  673. },
  674. },
  675. },
  676. },
  677. {
  678. ObjectMeta: api.ObjectMeta{
  679. Name: "node1",
  680. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  681. Labels: map[string]string{
  682. unversioned.LabelZoneRegion: "region1",
  683. unversioned.LabelZoneFailureDomain: "zone2",
  684. },
  685. },
  686. Status: api.NodeStatus{
  687. Conditions: []api.NodeCondition{
  688. {
  689. Type: api.NodeReady,
  690. Status: api.ConditionTrue,
  691. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  692. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  693. },
  694. },
  695. },
  696. },
  697. },
  698. podList: []api.Pod{*newPod("pod0", "node0")},
  699. updatedNodeStatuses: []api.NodeStatus{
  700. unhealthyNodeNewStatus,
  701. healthyNodeNewStatus,
  702. },
  703. expectedInitialStates: map[string]zoneState{
  704. createZoneID("region1", "zone1"): stateFullDisruption,
  705. createZoneID("region1", "zone2"): stateNormal,
  706. },
  707. expectedFollowingStates: map[string]zoneState{
  708. createZoneID("region1", "zone1"): stateFullDisruption,
  709. createZoneID("region1", "zone2"): stateNormal,
  710. },
  711. expectedEvictPods: true,
  712. description: "Network Disruption: One zone is down - eviction should take place.",
  713. },
  714. // NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period
  715. // of on first Node, eviction should stop even though -master Node is healthy.
  716. {
  717. nodeList: []*api.Node{
  718. {
  719. ObjectMeta: api.ObjectMeta{
  720. Name: "node0",
  721. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  722. Labels: map[string]string{
  723. unversioned.LabelZoneRegion: "region1",
  724. unversioned.LabelZoneFailureDomain: "zone1",
  725. },
  726. },
  727. Status: api.NodeStatus{
  728. Conditions: []api.NodeCondition{
  729. {
  730. Type: api.NodeReady,
  731. Status: api.ConditionUnknown,
  732. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  733. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  734. },
  735. },
  736. },
  737. },
  738. {
  739. ObjectMeta: api.ObjectMeta{
  740. Name: "node-master",
  741. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  742. Labels: map[string]string{
  743. unversioned.LabelZoneRegion: "region1",
  744. unversioned.LabelZoneFailureDomain: "zone1",
  745. },
  746. },
  747. Status: api.NodeStatus{
  748. Conditions: []api.NodeCondition{
  749. {
  750. Type: api.NodeReady,
  751. Status: api.ConditionTrue,
  752. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  753. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  754. },
  755. },
  756. },
  757. },
  758. },
  759. podList: []api.Pod{*newPod("pod0", "node0")},
  760. updatedNodeStatuses: []api.NodeStatus{
  761. unhealthyNodeNewStatus,
  762. healthyNodeNewStatus,
  763. },
  764. expectedInitialStates: map[string]zoneState{
  765. createZoneID("region1", "zone1"): stateFullDisruption,
  766. },
  767. expectedFollowingStates: map[string]zoneState{
  768. createZoneID("region1", "zone1"): stateFullDisruption,
  769. },
  770. expectedEvictPods: false,
  771. description: "NetworkDisruption: eviction should stop, only -master Node is healthy",
  772. },
  773. // NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
  774. // Initially both zones down, one comes back - eviction should take place
  775. {
  776. nodeList: []*api.Node{
  777. {
  778. ObjectMeta: api.ObjectMeta{
  779. Name: "node0",
  780. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  781. Labels: map[string]string{
  782. unversioned.LabelZoneRegion: "region1",
  783. unversioned.LabelZoneFailureDomain: "zone1",
  784. },
  785. },
  786. Status: api.NodeStatus{
  787. Conditions: []api.NodeCondition{
  788. {
  789. Type: api.NodeReady,
  790. Status: api.ConditionUnknown,
  791. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  792. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  793. },
  794. },
  795. },
  796. },
  797. {
  798. ObjectMeta: api.ObjectMeta{
  799. Name: "node1",
  800. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  801. Labels: map[string]string{
  802. unversioned.LabelZoneRegion: "region1",
  803. unversioned.LabelZoneFailureDomain: "zone2",
  804. },
  805. },
  806. Status: api.NodeStatus{
  807. Conditions: []api.NodeCondition{
  808. {
  809. Type: api.NodeReady,
  810. Status: api.ConditionUnknown,
  811. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  812. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  813. },
  814. },
  815. },
  816. },
  817. },
  818. podList: []api.Pod{*newPod("pod0", "node0")},
  819. updatedNodeStatuses: []api.NodeStatus{
  820. unhealthyNodeNewStatus,
  821. healthyNodeNewStatus,
  822. },
  823. expectedInitialStates: map[string]zoneState{
  824. createZoneID("region1", "zone1"): stateFullDisruption,
  825. createZoneID("region1", "zone2"): stateFullDisruption,
  826. },
  827. expectedFollowingStates: map[string]zoneState{
  828. createZoneID("region1", "zone1"): stateFullDisruption,
  829. createZoneID("region1", "zone2"): stateNormal,
  830. },
  831. expectedEvictPods: true,
  832. description: "Initially both zones down, one comes back - eviction should take place",
  833. },
  834. // NetworkDisruption: Node created long time ago, node controller posted Unknown for a long period of time on both Nodes.
  835. // Zone is partially disrupted - eviction should take place
  836. {
  837. nodeList: []*api.Node{
  838. {
  839. ObjectMeta: api.ObjectMeta{
  840. Name: "node0",
  841. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  842. Labels: map[string]string{
  843. unversioned.LabelZoneRegion: "region1",
  844. unversioned.LabelZoneFailureDomain: "zone1",
  845. },
  846. },
  847. Status: api.NodeStatus{
  848. Conditions: []api.NodeCondition{
  849. {
  850. Type: api.NodeReady,
  851. Status: api.ConditionUnknown,
  852. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  853. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  854. },
  855. },
  856. },
  857. },
  858. {
  859. ObjectMeta: api.ObjectMeta{
  860. Name: "node1",
  861. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  862. Labels: map[string]string{
  863. unversioned.LabelZoneRegion: "region1",
  864. unversioned.LabelZoneFailureDomain: "zone1",
  865. },
  866. },
  867. Status: api.NodeStatus{
  868. Conditions: []api.NodeCondition{
  869. {
  870. Type: api.NodeReady,
  871. Status: api.ConditionUnknown,
  872. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  873. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  874. },
  875. },
  876. },
  877. },
  878. {
  879. ObjectMeta: api.ObjectMeta{
  880. Name: "node2",
  881. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  882. Labels: map[string]string{
  883. unversioned.LabelZoneRegion: "region1",
  884. unversioned.LabelZoneFailureDomain: "zone1",
  885. },
  886. },
  887. Status: api.NodeStatus{
  888. Conditions: []api.NodeCondition{
  889. {
  890. Type: api.NodeReady,
  891. Status: api.ConditionUnknown,
  892. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  893. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  894. },
  895. },
  896. },
  897. },
  898. {
  899. ObjectMeta: api.ObjectMeta{
  900. Name: "node3",
  901. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  902. Labels: map[string]string{
  903. unversioned.LabelZoneRegion: "region1",
  904. unversioned.LabelZoneFailureDomain: "zone1",
  905. },
  906. },
  907. Status: api.NodeStatus{
  908. Conditions: []api.NodeCondition{
  909. {
  910. Type: api.NodeReady,
  911. Status: api.ConditionTrue,
  912. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  913. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  914. },
  915. },
  916. },
  917. },
  918. {
  919. ObjectMeta: api.ObjectMeta{
  920. Name: "node4",
  921. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  922. Labels: map[string]string{
  923. unversioned.LabelZoneRegion: "region1",
  924. unversioned.LabelZoneFailureDomain: "zone1",
  925. },
  926. },
  927. Status: api.NodeStatus{
  928. Conditions: []api.NodeCondition{
  929. {
  930. Type: api.NodeReady,
  931. Status: api.ConditionTrue,
  932. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  933. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  934. },
  935. },
  936. },
  937. },
  938. },
  939. podList: []api.Pod{*newPod("pod0", "node0")},
  940. updatedNodeStatuses: []api.NodeStatus{
  941. unhealthyNodeNewStatus,
  942. unhealthyNodeNewStatus,
  943. unhealthyNodeNewStatus,
  944. healthyNodeNewStatus,
  945. healthyNodeNewStatus,
  946. },
  947. expectedInitialStates: map[string]zoneState{
  948. createZoneID("region1", "zone1"): statePartialDisruption,
  949. },
  950. expectedFollowingStates: map[string]zoneState{
  951. createZoneID("region1", "zone1"): statePartialDisruption,
  952. },
  953. expectedEvictPods: true,
  954. description: "Zone is partially disrupted - eviction should take place.",
  955. },
  956. }
  957. for _, item := range table {
  958. fakeNodeHandler := &FakeNodeHandler{
  959. Existing: item.nodeList,
  960. Clientset: fake.NewSimpleClientset(&api.PodList{Items: item.podList}),
  961. }
  962. nodeController, _ := NewNodeControllerFromClient(nil, fakeNodeHandler,
  963. evictionTimeout, testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold, testNodeMonitorGracePeriod,
  964. testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
  965. nodeController.now = func() unversioned.Time { return fakeNow }
  966. nodeController.enterPartialDisruptionFunc = func(nodeNum int) float32 {
  967. return testRateLimiterQPS
  968. }
  969. nodeController.enterFullDisruptionFunc = func(nodeNum int) float32 {
  970. return testRateLimiterQPS
  971. }
  972. if err := nodeController.monitorNodeStatus(); err != nil {
  973. t.Errorf("%v: unexpected error: %v", item.description, err)
  974. }
  975. for zone, state := range item.expectedInitialStates {
  976. if state != nodeController.zoneStates[zone] {
  977. t.Errorf("%v: Unexpected zone state: %v: %v instead %v", item.description, zone, nodeController.zoneStates[zone], state)
  978. }
  979. }
  980. nodeController.now = func() unversioned.Time { return unversioned.Time{Time: fakeNow.Add(timeToPass)} }
  981. for i := range item.updatedNodeStatuses {
  982. fakeNodeHandler.Existing[i].Status = item.updatedNodeStatuses[i]
  983. }
  984. if err := nodeController.monitorNodeStatus(); err != nil {
  985. t.Errorf("%v: unexpected error: %v", item.description, err)
  986. }
  987. // Give some time for rate-limiter to reload
  988. time.Sleep(50 * time.Millisecond)
  989. for zone, state := range item.expectedFollowingStates {
  990. if state != nodeController.zoneStates[zone] {
  991. t.Errorf("%v: Unexpected zone state: %v: %v instead %v", item.description, zone, nodeController.zoneStates[zone], state)
  992. }
  993. }
  994. zones := getZones(fakeNodeHandler)
  995. for _, zone := range zones {
  996. nodeController.zonePodEvictor[zone].Try(func(value TimedValue) (bool, time.Duration) {
  997. uid, _ := value.UID.(string)
  998. remaining, _ := deletePods(fakeNodeHandler, nodeController.recorder, value.Value, uid, nodeController.daemonSetStore)
  999. if remaining {
  1000. nodeController.zoneTerminationEvictor[zone].Add(value.Value, value.UID)
  1001. }
  1002. return true, 0
  1003. })
  1004. nodeController.zonePodEvictor[zone].Try(func(value TimedValue) (bool, time.Duration) {
  1005. uid, _ := value.UID.(string)
  1006. terminatePods(fakeNodeHandler, nodeController.recorder, value.Value, uid, value.AddedAt, nodeController.maximumGracePeriod)
  1007. return true, 0
  1008. })
  1009. }
  1010. podEvicted := false
  1011. for _, action := range fakeNodeHandler.Actions() {
  1012. if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
  1013. podEvicted = true
  1014. break
  1015. }
  1016. }
  1017. if item.expectedEvictPods != podEvicted {
  1018. t.Errorf("%v: expected pod eviction: %+v, got %+v", item.description, item.expectedEvictPods, podEvicted)
  1019. }
  1020. }
  1021. }
  1022. // TestCloudProviderNoRateLimit tests that monitorNodes() immediately deletes
  1023. // pods and the node when kubelet has not reported, and the cloudprovider says
  1024. // the node is gone.
  1025. func TestCloudProviderNoRateLimit(t *testing.T) {
  1026. fnh := &FakeNodeHandler{
  1027. Existing: []*api.Node{
  1028. {
  1029. ObjectMeta: api.ObjectMeta{
  1030. Name: "node0",
  1031. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  1032. },
  1033. Status: api.NodeStatus{
  1034. Conditions: []api.NodeCondition{
  1035. {
  1036. Type: api.NodeReady,
  1037. Status: api.ConditionUnknown,
  1038. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1039. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1040. },
  1041. },
  1042. },
  1043. },
  1044. },
  1045. Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0"), *newPod("pod1", "node0")}}),
  1046. deleteWaitChan: make(chan struct{}),
  1047. }
  1048. nodeController, _ := NewNodeControllerFromClient(nil, fnh, 10*time.Minute,
  1049. testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold,
  1050. testNodeMonitorGracePeriod, testNodeStartupGracePeriod,
  1051. testNodeMonitorPeriod, nil, nil, 0, false)
  1052. nodeController.cloud = &fakecloud.FakeCloud{}
  1053. nodeController.now = func() unversioned.Time { return unversioned.Date(2016, 1, 1, 12, 0, 0, 0, time.UTC) }
  1054. nodeController.nodeExistsInCloudProvider = func(nodeName string) (bool, error) {
  1055. return false, nil
  1056. }
  1057. // monitorNodeStatus should allow this node to be immediately deleted
  1058. if err := nodeController.monitorNodeStatus(); err != nil {
  1059. t.Errorf("unexpected error: %v", err)
  1060. }
  1061. select {
  1062. case <-fnh.deleteWaitChan:
  1063. case <-time.After(wait.ForeverTestTimeout):
  1064. t.Errorf("Timed out waiting %v for node to be deleted", wait.ForeverTestTimeout)
  1065. }
  1066. if len(fnh.DeletedNodes) != 1 || fnh.DeletedNodes[0].Name != "node0" {
  1067. t.Errorf("Node was not deleted")
  1068. }
  1069. if nodeOnQueue := nodeController.zonePodEvictor[""].Remove("node0"); nodeOnQueue {
  1070. t.Errorf("Node was queued for eviction. Should have been immediately deleted.")
  1071. }
  1072. }
  1073. func TestMonitorNodeStatusUpdateStatus(t *testing.T) {
  1074. fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
  1075. table := []struct {
  1076. fakeNodeHandler *FakeNodeHandler
  1077. timeToPass time.Duration
  1078. newNodeStatus api.NodeStatus
  1079. expectedEvictPods bool
  1080. expectedRequestCount int
  1081. expectedNodes []*api.Node
  1082. }{
  1083. // Node created long time ago, without status:
  1084. // Expect Unknown status posted from node controller.
  1085. {
  1086. fakeNodeHandler: &FakeNodeHandler{
  1087. Existing: []*api.Node{
  1088. {
  1089. ObjectMeta: api.ObjectMeta{
  1090. Name: "node0",
  1091. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  1092. },
  1093. },
  1094. },
  1095. Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
  1096. },
  1097. expectedRequestCount: 2, // List+Update
  1098. expectedNodes: []*api.Node{
  1099. {
  1100. ObjectMeta: api.ObjectMeta{
  1101. Name: "node0",
  1102. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  1103. },
  1104. Status: api.NodeStatus{
  1105. Conditions: []api.NodeCondition{
  1106. {
  1107. Type: api.NodeReady,
  1108. Status: api.ConditionUnknown,
  1109. Reason: "NodeStatusNeverUpdated",
  1110. Message: "Kubelet never posted node status.",
  1111. LastHeartbeatTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  1112. LastTransitionTime: fakeNow,
  1113. },
  1114. {
  1115. Type: api.NodeOutOfDisk,
  1116. Status: api.ConditionUnknown,
  1117. Reason: "NodeStatusNeverUpdated",
  1118. Message: "Kubelet never posted node status.",
  1119. LastHeartbeatTime: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  1120. LastTransitionTime: fakeNow,
  1121. },
  1122. },
  1123. },
  1124. },
  1125. },
  1126. },
  1127. // Node created recently, without status.
  1128. // Expect no action from node controller (within startup grace period).
  1129. {
  1130. fakeNodeHandler: &FakeNodeHandler{
  1131. Existing: []*api.Node{
  1132. {
  1133. ObjectMeta: api.ObjectMeta{
  1134. Name: "node0",
  1135. CreationTimestamp: fakeNow,
  1136. },
  1137. },
  1138. },
  1139. Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
  1140. },
  1141. expectedRequestCount: 1, // List
  1142. expectedNodes: nil,
  1143. },
  1144. // Node created long time ago, with status updated by kubelet exceeds grace period.
  1145. // Expect Unknown status posted from node controller.
  1146. {
  1147. fakeNodeHandler: &FakeNodeHandler{
  1148. Existing: []*api.Node{
  1149. {
  1150. ObjectMeta: api.ObjectMeta{
  1151. Name: "node0",
  1152. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  1153. },
  1154. Status: api.NodeStatus{
  1155. Conditions: []api.NodeCondition{
  1156. {
  1157. Type: api.NodeReady,
  1158. Status: api.ConditionTrue,
  1159. // Node status hasn't been updated for 1hr.
  1160. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1161. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1162. },
  1163. {
  1164. Type: api.NodeOutOfDisk,
  1165. Status: api.ConditionFalse,
  1166. // Node status hasn't been updated for 1hr.
  1167. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1168. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1169. },
  1170. },
  1171. Capacity: api.ResourceList{
  1172. api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
  1173. api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
  1174. },
  1175. },
  1176. Spec: api.NodeSpec{
  1177. ExternalID: "node0",
  1178. },
  1179. },
  1180. },
  1181. Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
  1182. },
  1183. expectedRequestCount: 3, // (List+)List+Update
  1184. timeToPass: time.Hour,
  1185. newNodeStatus: api.NodeStatus{
  1186. Conditions: []api.NodeCondition{
  1187. {
  1188. Type: api.NodeReady,
  1189. Status: api.ConditionTrue,
  1190. // Node status hasn't been updated for 1hr.
  1191. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1192. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1193. },
  1194. {
  1195. Type: api.NodeOutOfDisk,
  1196. Status: api.ConditionFalse,
  1197. // Node status hasn't been updated for 1hr.
  1198. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1199. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1200. },
  1201. },
  1202. Capacity: api.ResourceList{
  1203. api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
  1204. api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
  1205. },
  1206. },
  1207. expectedNodes: []*api.Node{
  1208. {
  1209. ObjectMeta: api.ObjectMeta{
  1210. Name: "node0",
  1211. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  1212. },
  1213. Status: api.NodeStatus{
  1214. Conditions: []api.NodeCondition{
  1215. {
  1216. Type: api.NodeReady,
  1217. Status: api.ConditionUnknown,
  1218. Reason: "NodeStatusUnknown",
  1219. Message: "Kubelet stopped posting node status.",
  1220. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1221. LastTransitionTime: unversioned.Time{Time: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)},
  1222. },
  1223. {
  1224. Type: api.NodeOutOfDisk,
  1225. Status: api.ConditionUnknown,
  1226. Reason: "NodeStatusUnknown",
  1227. Message: "Kubelet stopped posting node status.",
  1228. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1229. LastTransitionTime: unversioned.Time{Time: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC).Add(time.Hour)},
  1230. },
  1231. },
  1232. Capacity: api.ResourceList{
  1233. api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
  1234. api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
  1235. },
  1236. },
  1237. Spec: api.NodeSpec{
  1238. ExternalID: "node0",
  1239. },
  1240. },
  1241. },
  1242. },
  1243. // Node created long time ago, with status updated recently.
  1244. // Expect no action from node controller (within monitor grace period).
  1245. {
  1246. fakeNodeHandler: &FakeNodeHandler{
  1247. Existing: []*api.Node{
  1248. {
  1249. ObjectMeta: api.ObjectMeta{
  1250. Name: "node0",
  1251. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  1252. },
  1253. Status: api.NodeStatus{
  1254. Conditions: []api.NodeCondition{
  1255. {
  1256. Type: api.NodeReady,
  1257. Status: api.ConditionTrue,
  1258. // Node status has just been updated.
  1259. LastHeartbeatTime: fakeNow,
  1260. LastTransitionTime: fakeNow,
  1261. },
  1262. },
  1263. Capacity: api.ResourceList{
  1264. api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
  1265. api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
  1266. },
  1267. },
  1268. Spec: api.NodeSpec{
  1269. ExternalID: "node0",
  1270. },
  1271. },
  1272. },
  1273. Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
  1274. },
  1275. expectedRequestCount: 1, // List
  1276. expectedNodes: nil,
  1277. },
  1278. }
  1279. for i, item := range table {
  1280. nodeController, _ := NewNodeControllerFromClient(nil, item.fakeNodeHandler, 5*time.Minute,
  1281. testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold,
  1282. testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
  1283. nodeController.now = func() unversioned.Time { return fakeNow }
  1284. if err := nodeController.monitorNodeStatus(); err != nil {
  1285. t.Errorf("unexpected error: %v", err)
  1286. }
  1287. if item.timeToPass > 0 {
  1288. nodeController.now = func() unversioned.Time { return unversioned.Time{Time: fakeNow.Add(item.timeToPass)} }
  1289. item.fakeNodeHandler.Existing[0].Status = item.newNodeStatus
  1290. if err := nodeController.monitorNodeStatus(); err != nil {
  1291. t.Errorf("unexpected error: %v", err)
  1292. }
  1293. }
  1294. if item.expectedRequestCount != item.fakeNodeHandler.RequestCount {
  1295. t.Errorf("expected %v call, but got %v.", item.expectedRequestCount, item.fakeNodeHandler.RequestCount)
  1296. }
  1297. if len(item.fakeNodeHandler.UpdatedNodes) > 0 && !api.Semantic.DeepEqual(item.expectedNodes, item.fakeNodeHandler.UpdatedNodes) {
  1298. t.Errorf("Case[%d] unexpected nodes: %s", i, diff.ObjectDiff(item.expectedNodes[0], item.fakeNodeHandler.UpdatedNodes[0]))
  1299. }
  1300. if len(item.fakeNodeHandler.UpdatedNodeStatuses) > 0 && !api.Semantic.DeepEqual(item.expectedNodes, item.fakeNodeHandler.UpdatedNodeStatuses) {
  1301. t.Errorf("Case[%d] unexpected nodes: %s", i, diff.ObjectDiff(item.expectedNodes[0], item.fakeNodeHandler.UpdatedNodeStatuses[0]))
  1302. }
  1303. }
  1304. }
  1305. func TestMonitorNodeStatusMarkPodsNotReady(t *testing.T) {
  1306. fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
  1307. table := []struct {
  1308. fakeNodeHandler *FakeNodeHandler
  1309. timeToPass time.Duration
  1310. newNodeStatus api.NodeStatus
  1311. expectedPodStatusUpdate bool
  1312. }{
  1313. // Node created recently, without status.
  1314. // Expect no action from node controller (within startup grace period).
  1315. {
  1316. fakeNodeHandler: &FakeNodeHandler{
  1317. Existing: []*api.Node{
  1318. {
  1319. ObjectMeta: api.ObjectMeta{
  1320. Name: "node0",
  1321. CreationTimestamp: fakeNow,
  1322. },
  1323. },
  1324. },
  1325. Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
  1326. },
  1327. expectedPodStatusUpdate: false,
  1328. },
  1329. // Node created long time ago, with status updated recently.
  1330. // Expect no action from node controller (within monitor grace period).
  1331. {
  1332. fakeNodeHandler: &FakeNodeHandler{
  1333. Existing: []*api.Node{
  1334. {
  1335. ObjectMeta: api.ObjectMeta{
  1336. Name: "node0",
  1337. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  1338. },
  1339. Status: api.NodeStatus{
  1340. Conditions: []api.NodeCondition{
  1341. {
  1342. Type: api.NodeReady,
  1343. Status: api.ConditionTrue,
  1344. // Node status has just been updated.
  1345. LastHeartbeatTime: fakeNow,
  1346. LastTransitionTime: fakeNow,
  1347. },
  1348. },
  1349. Capacity: api.ResourceList{
  1350. api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
  1351. api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
  1352. },
  1353. },
  1354. Spec: api.NodeSpec{
  1355. ExternalID: "node0",
  1356. },
  1357. },
  1358. },
  1359. Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
  1360. },
  1361. expectedPodStatusUpdate: false,
  1362. },
  1363. // Node created long time ago, with status updated by kubelet exceeds grace period.
  1364. // Expect pods status updated and Unknown node status posted from node controller
  1365. {
  1366. fakeNodeHandler: &FakeNodeHandler{
  1367. Existing: []*api.Node{
  1368. {
  1369. ObjectMeta: api.ObjectMeta{
  1370. Name: "node0",
  1371. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  1372. },
  1373. Status: api.NodeStatus{
  1374. NodeInfo: api.NodeSystemInfo{
  1375. KubeletVersion: "v1.2.0",
  1376. },
  1377. Conditions: []api.NodeCondition{
  1378. {
  1379. Type: api.NodeReady,
  1380. Status: api.ConditionTrue,
  1381. // Node status hasn't been updated for 1hr.
  1382. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1383. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1384. },
  1385. {
  1386. Type: api.NodeOutOfDisk,
  1387. Status: api.ConditionFalse,
  1388. // Node status hasn't been updated for 1hr.
  1389. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1390. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1391. },
  1392. },
  1393. Capacity: api.ResourceList{
  1394. api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
  1395. api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
  1396. },
  1397. },
  1398. Spec: api.NodeSpec{
  1399. ExternalID: "node0",
  1400. },
  1401. },
  1402. },
  1403. Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
  1404. },
  1405. timeToPass: 1 * time.Minute,
  1406. newNodeStatus: api.NodeStatus{
  1407. NodeInfo: api.NodeSystemInfo{
  1408. KubeletVersion: "v1.2.0",
  1409. },
  1410. Conditions: []api.NodeCondition{
  1411. {
  1412. Type: api.NodeReady,
  1413. Status: api.ConditionTrue,
  1414. // Node status hasn't been updated for 1hr.
  1415. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1416. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1417. },
  1418. {
  1419. Type: api.NodeOutOfDisk,
  1420. Status: api.ConditionFalse,
  1421. // Node status hasn't been updated for 1hr.
  1422. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1423. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1424. },
  1425. },
  1426. Capacity: api.ResourceList{
  1427. api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
  1428. api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
  1429. },
  1430. },
  1431. expectedPodStatusUpdate: true,
  1432. },
  1433. // Node created long time ago, with outdated kubelet version 1.1.0 and status
  1434. // updated by kubelet exceeds grace period. Expect no action from node controller.
  1435. {
  1436. fakeNodeHandler: &FakeNodeHandler{
  1437. Existing: []*api.Node{
  1438. {
  1439. ObjectMeta: api.ObjectMeta{
  1440. Name: "node0",
  1441. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  1442. },
  1443. Status: api.NodeStatus{
  1444. NodeInfo: api.NodeSystemInfo{
  1445. KubeletVersion: "v1.1.0",
  1446. },
  1447. Conditions: []api.NodeCondition{
  1448. {
  1449. Type: api.NodeReady,
  1450. Status: api.ConditionTrue,
  1451. // Node status hasn't been updated for 1hr.
  1452. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1453. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1454. },
  1455. {
  1456. Type: api.NodeOutOfDisk,
  1457. Status: api.ConditionFalse,
  1458. // Node status hasn't been updated for 1hr.
  1459. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1460. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1461. },
  1462. },
  1463. Capacity: api.ResourceList{
  1464. api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
  1465. api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
  1466. },
  1467. },
  1468. Spec: api.NodeSpec{
  1469. ExternalID: "node0",
  1470. },
  1471. },
  1472. },
  1473. Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
  1474. },
  1475. timeToPass: 1 * time.Minute,
  1476. newNodeStatus: api.NodeStatus{
  1477. NodeInfo: api.NodeSystemInfo{
  1478. KubeletVersion: "v1.1.0",
  1479. },
  1480. Conditions: []api.NodeCondition{
  1481. {
  1482. Type: api.NodeReady,
  1483. Status: api.ConditionTrue,
  1484. // Node status hasn't been updated for 1hr.
  1485. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1486. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1487. },
  1488. {
  1489. Type: api.NodeOutOfDisk,
  1490. Status: api.ConditionFalse,
  1491. // Node status hasn't been updated for 1hr.
  1492. LastHeartbeatTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1493. LastTransitionTime: unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC),
  1494. },
  1495. },
  1496. Capacity: api.ResourceList{
  1497. api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
  1498. api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
  1499. },
  1500. },
  1501. expectedPodStatusUpdate: false,
  1502. },
  1503. }
  1504. for i, item := range table {
  1505. nodeController, _ := NewNodeControllerFromClient(nil, item.fakeNodeHandler, 5*time.Minute,
  1506. testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold,
  1507. testNodeMonitorGracePeriod, testNodeStartupGracePeriod, testNodeMonitorPeriod, nil, nil, 0, false)
  1508. nodeController.now = func() unversioned.Time { return fakeNow }
  1509. if err := nodeController.monitorNodeStatus(); err != nil {
  1510. t.Errorf("Case[%d] unexpected error: %v", i, err)
  1511. }
  1512. if item.timeToPass > 0 {
  1513. nodeController.now = func() unversioned.Time { return unversioned.Time{Time: fakeNow.Add(item.timeToPass)} }
  1514. item.fakeNodeHandler.Existing[0].Status = item.newNodeStatus
  1515. if err := nodeController.monitorNodeStatus(); err != nil {
  1516. t.Errorf("Case[%d] unexpected error: %v", i, err)
  1517. }
  1518. }
  1519. podStatusUpdated := false
  1520. for _, action := range item.fakeNodeHandler.Actions() {
  1521. if action.GetVerb() == "update" && action.GetResource().Resource == "pods" && action.GetSubresource() == "status" {
  1522. podStatusUpdated = true
  1523. }
  1524. }
  1525. if podStatusUpdated != item.expectedPodStatusUpdate {
  1526. t.Errorf("Case[%d] expect pod status updated to be %v, but got %v", i, item.expectedPodStatusUpdate, podStatusUpdated)
  1527. }
  1528. }
  1529. }
  1530. func TestNodeDeletion(t *testing.T) {
  1531. fakeNow := unversioned.Date(2015, 1, 1, 12, 0, 0, 0, time.UTC)
  1532. fakeNodeHandler := &FakeNodeHandler{
  1533. Existing: []*api.Node{
  1534. {
  1535. ObjectMeta: api.ObjectMeta{
  1536. Name: "node0",
  1537. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  1538. },
  1539. Status: api.NodeStatus{
  1540. Conditions: []api.NodeCondition{
  1541. {
  1542. Type: api.NodeReady,
  1543. Status: api.ConditionTrue,
  1544. // Node status has just been updated.
  1545. LastHeartbeatTime: fakeNow,
  1546. LastTransitionTime: fakeNow,
  1547. },
  1548. },
  1549. Capacity: api.ResourceList{
  1550. api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
  1551. api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
  1552. },
  1553. },
  1554. Spec: api.NodeSpec{
  1555. ExternalID: "node0",
  1556. },
  1557. },
  1558. {
  1559. ObjectMeta: api.ObjectMeta{
  1560. Name: "node1",
  1561. CreationTimestamp: unversioned.Date(2012, 1, 1, 0, 0, 0, 0, time.UTC),
  1562. },
  1563. Status: api.NodeStatus{
  1564. Conditions: []api.NodeCondition{
  1565. {
  1566. Type: api.NodeReady,
  1567. Status: api.ConditionTrue,
  1568. // Node status has just been updated.
  1569. LastHeartbeatTime: fakeNow,
  1570. LastTransitionTime: fakeNow,
  1571. },
  1572. },
  1573. Capacity: api.ResourceList{
  1574. api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
  1575. api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
  1576. },
  1577. },
  1578. Spec: api.NodeSpec{
  1579. ExternalID: "node0",
  1580. },
  1581. },
  1582. },
  1583. Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0"), *newPod("pod1", "node1")}}),
  1584. }
  1585. nodeController, _ := NewNodeControllerFromClient(nil, fakeNodeHandler, 5*time.Minute,
  1586. testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold,
  1587. testNodeMonitorGracePeriod, testNodeStartupGracePeriod,
  1588. testNodeMonitorPeriod, nil, nil, 0, false)
  1589. nodeController.now = func() unversioned.Time { return fakeNow }
  1590. if err := nodeController.monitorNodeStatus(); err != nil {
  1591. t.Errorf("unexpected error: %v", err)
  1592. }
  1593. fakeNodeHandler.Delete("node1", nil)
  1594. if err := nodeController.monitorNodeStatus(); err != nil {
  1595. t.Errorf("unexpected error: %v", err)
  1596. }
  1597. nodeController.zonePodEvictor[""].Try(func(value TimedValue) (bool, time.Duration) {
  1598. uid, _ := value.UID.(string)
  1599. deletePods(fakeNodeHandler, nodeController.recorder, value.Value, uid, nodeController.daemonSetStore)
  1600. return true, 0
  1601. })
  1602. podEvicted := false
  1603. for _, action := range fakeNodeHandler.Actions() {
  1604. if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
  1605. podEvicted = true
  1606. }
  1607. }
  1608. if !podEvicted {
  1609. t.Error("expected pods to be evicted from the deleted node")
  1610. }
  1611. }
  1612. func TestNodeEventGeneration(t *testing.T) {
  1613. fakeNow := unversioned.Date(2016, 8, 10, 12, 0, 0, 0, time.UTC)
  1614. fakeNodeHandler := &FakeNodeHandler{
  1615. Existing: []*api.Node{
  1616. {
  1617. ObjectMeta: api.ObjectMeta{
  1618. Name: "node0",
  1619. UID: "1234567890",
  1620. CreationTimestamp: unversioned.Date(2016, 8, 10, 0, 0, 0, 0, time.UTC),
  1621. },
  1622. Spec: api.NodeSpec{
  1623. ExternalID: "node0",
  1624. },
  1625. Status: api.NodeStatus{
  1626. Conditions: []api.NodeCondition{
  1627. {
  1628. Type: api.NodeReady,
  1629. Status: api.ConditionTrue,
  1630. // Node status has just been updated.
  1631. LastHeartbeatTime: fakeNow,
  1632. LastTransitionTime: fakeNow,
  1633. },
  1634. },
  1635. Capacity: api.ResourceList{
  1636. api.ResourceName(api.ResourceRequestsCPU): resource.MustParse("10"),
  1637. api.ResourceName(api.ResourceMemory): resource.MustParse("20G"),
  1638. },
  1639. },
  1640. },
  1641. },
  1642. Clientset: fake.NewSimpleClientset(&api.PodList{Items: []api.Pod{*newPod("pod0", "node0")}}),
  1643. }
  1644. nodeController, _ := NewNodeControllerFromClient(nil, fakeNodeHandler, 5*time.Minute,
  1645. testRateLimiterQPS, testRateLimiterQPS, testLargeClusterThreshold, testUnhealtyThreshold,
  1646. testNodeMonitorGracePeriod, testNodeStartupGracePeriod,
  1647. testNodeMonitorPeriod, nil, nil, 0, false)
  1648. nodeController.now = func() unversioned.Time { return fakeNow }
  1649. fakeRecorder := NewFakeRecorder()
  1650. nodeController.recorder = fakeRecorder
  1651. if err := nodeController.monitorNodeStatus(); err != nil {
  1652. t.Errorf("unexpected error: %v", err)
  1653. }
  1654. fakeNodeHandler.Delete("node0", nil)
  1655. if err := nodeController.monitorNodeStatus(); err != nil {
  1656. t.Errorf("unexpected error: %v", err)
  1657. }
  1658. nodeController.zonePodEvictor[""].Try(func(value TimedValue) (bool, time.Duration) {
  1659. nodeUid, _ := value.UID.(string)
  1660. deletePods(fakeNodeHandler, nodeController.recorder, value.Value, nodeUid, nodeController.daemonSetStore)
  1661. return true, 0
  1662. })
  1663. if len(fakeRecorder.events) != 3 {
  1664. t.Fatalf("unexpected events: %v", fakeRecorder.events)
  1665. }
  1666. if fakeRecorder.events[0].Reason != "RegisteredNode" || fakeRecorder.events[1].Reason != "RemovingNode" || fakeRecorder.events[2].Reason != "DeletingAllPods" {
  1667. t.Fatalf("unexpected events generation: %v", fakeRecorder.events)
  1668. }
  1669. for _, event := range fakeRecorder.events {
  1670. involvedObject := event.InvolvedObject
  1671. actualUID := string(involvedObject.UID)
  1672. if actualUID != "1234567890" {
  1673. t.Fatalf("unexpected event uid: %v", actualUID)
  1674. }
  1675. }
  1676. }
  1677. func TestCheckPod(t *testing.T) {
  1678. tcs := []struct {
  1679. pod api.Pod
  1680. prune bool
  1681. }{
  1682. {
  1683. pod: api.Pod{
  1684. ObjectMeta: api.ObjectMeta{DeletionTimestamp: nil},
  1685. Spec: api.PodSpec{NodeName: "new"},
  1686. },
  1687. prune: false,
  1688. },
  1689. {
  1690. pod: api.Pod{
  1691. ObjectMeta: api.ObjectMeta{DeletionTimestamp: nil},
  1692. Spec: api.PodSpec{NodeName: "old"},
  1693. },
  1694. prune: false,
  1695. },
  1696. {
  1697. pod: api.Pod{
  1698. ObjectMeta: api.ObjectMeta{DeletionTimestamp: nil},
  1699. Spec: api.PodSpec{NodeName: ""},
  1700. },
  1701. prune: false,
  1702. },
  1703. {
  1704. pod: api.Pod{
  1705. ObjectMeta: api.ObjectMeta{DeletionTimestamp: nil},
  1706. Spec: api.PodSpec{NodeName: "nonexistant"},
  1707. },
  1708. prune: false,
  1709. },
  1710. {
  1711. pod: api.Pod{
  1712. ObjectMeta: api.ObjectMeta{DeletionTimestamp: &unversioned.Time{}},
  1713. Spec: api.PodSpec{NodeName: "new"},
  1714. },
  1715. prune: false,
  1716. },
  1717. {
  1718. pod: api.Pod{
  1719. ObjectMeta: api.ObjectMeta{DeletionTimestamp: &unversioned.Time{}},
  1720. Spec: api.PodSpec{NodeName: "old"},
  1721. },
  1722. prune: true,
  1723. },
  1724. {
  1725. pod: api.Pod{
  1726. ObjectMeta: api.ObjectMeta{DeletionTimestamp: &unversioned.Time{}},
  1727. Spec: api.PodSpec{NodeName: "older"},
  1728. },
  1729. prune: true,
  1730. },
  1731. {
  1732. pod: api.Pod{
  1733. ObjectMeta: api.ObjectMeta{DeletionTimestamp: &unversioned.Time{}},
  1734. Spec: api.PodSpec{NodeName: "oldest"},
  1735. },
  1736. prune: true,
  1737. },
  1738. {
  1739. pod: api.Pod{
  1740. ObjectMeta: api.ObjectMeta{DeletionTimestamp: &unversioned.Time{}},
  1741. Spec: api.PodSpec{NodeName: ""},
  1742. },
  1743. prune: true,
  1744. },
  1745. {
  1746. pod: api.Pod{
  1747. ObjectMeta: api.ObjectMeta{DeletionTimestamp: &unversioned.Time{}},
  1748. Spec: api.PodSpec{NodeName: "nonexistant"},
  1749. },
  1750. prune: true,
  1751. },
  1752. }
  1753. nc, _ := NewNodeControllerFromClient(nil, nil, 0, 0, 0, 0, 0, 0, 0, 0, nil, nil, 0, false)
  1754. nc.nodeStore.Store = cache.NewStore(cache.MetaNamespaceKeyFunc)
  1755. nc.nodeStore.Store.Add(&api.Node{
  1756. ObjectMeta: api.ObjectMeta{
  1757. Name: "new",
  1758. },
  1759. Status: api.NodeStatus{
  1760. NodeInfo: api.NodeSystemInfo{
  1761. KubeletVersion: "v1.1.0",
  1762. },
  1763. },
  1764. })
  1765. nc.nodeStore.Store.Add(&api.Node{
  1766. ObjectMeta: api.ObjectMeta{
  1767. Name: "old",
  1768. },
  1769. Status: api.NodeStatus{
  1770. NodeInfo: api.NodeSystemInfo{
  1771. KubeletVersion: "v1.0.0",
  1772. },
  1773. },
  1774. })
  1775. nc.nodeStore.Store.Add(&api.Node{
  1776. ObjectMeta: api.ObjectMeta{
  1777. Name: "older",
  1778. },
  1779. Status: api.NodeStatus{
  1780. NodeInfo: api.NodeSystemInfo{
  1781. KubeletVersion: "v0.21.4",
  1782. },
  1783. },
  1784. })
  1785. nc.nodeStore.Store.Add(&api.Node{
  1786. ObjectMeta: api.ObjectMeta{
  1787. Name: "oldest",
  1788. },
  1789. Status: api.NodeStatus{
  1790. NodeInfo: api.NodeSystemInfo{
  1791. KubeletVersion: "v0.19.3",
  1792. },
  1793. },
  1794. })
  1795. for i, tc := range tcs {
  1796. var deleteCalls int
  1797. nc.forcefullyDeletePod = func(_ *api.Pod) error {
  1798. deleteCalls++
  1799. return nil
  1800. }
  1801. nc.maybeDeleteTerminatingPod(&tc.pod)
  1802. if tc.prune && deleteCalls != 1 {
  1803. t.Errorf("[%v] expected number of delete calls to be 1 but got %v", i, deleteCalls)
  1804. }
  1805. if !tc.prune && deleteCalls != 0 {
  1806. t.Errorf("[%v] expected number of delete calls to be 0 but got %v", i, deleteCalls)
  1807. }
  1808. }
  1809. }
  1810. func TestCleanupOrphanedPods(t *testing.T) {
  1811. pods := []*api.Pod{
  1812. newPod("a", "foo"),
  1813. newPod("b", "bar"),
  1814. newPod("c", "gone"),
  1815. }
  1816. nc, _ := NewNodeControllerFromClient(nil, nil, 0, 0, 0, 0, 0, 0, 0, 0, nil, nil, 0, false)
  1817. nc.nodeStore.Store.Add(newNode("foo"))
  1818. nc.nodeStore.Store.Add(newNode("bar"))
  1819. for _, pod := range pods {
  1820. p := pod
  1821. nc.podStore.Indexer.Add(&p)
  1822. }
  1823. var deleteCalls int
  1824. var deletedPodName string
  1825. forcefullyDeletePodFunc := func(p *api.Pod) error {
  1826. deleteCalls++
  1827. deletedPodName = p.ObjectMeta.Name
  1828. return nil
  1829. }
  1830. cleanupOrphanedPods(pods, nc.nodeStore.Store, forcefullyDeletePodFunc)
  1831. if deleteCalls != 1 {
  1832. t.Fatalf("expected one delete, got: %v", deleteCalls)
  1833. }
  1834. if deletedPodName != "c" {
  1835. t.Fatalf("expected deleted pod name to be 'c', but got: %q", deletedPodName)
  1836. }
  1837. }
  1838. func TestCheckNodeKubeletVersionParsing(t *testing.T) {
  1839. tests := []struct {
  1840. version string
  1841. outdated bool
  1842. }{
  1843. {
  1844. version: "",
  1845. outdated: true,
  1846. },
  1847. {
  1848. version: "v0.21.4",
  1849. outdated: true,
  1850. },
  1851. {
  1852. version: "v1.0.0",
  1853. outdated: true,
  1854. },
  1855. {
  1856. version: "v1.1.0",
  1857. outdated: true,
  1858. },
  1859. {
  1860. version: "v1.1.0-alpha.2.961+9d4c6846fc03b9-dirty",
  1861. outdated: true,
  1862. },
  1863. {
  1864. version: "v1.2.0",
  1865. outdated: false,
  1866. },
  1867. {
  1868. version: "v1.3.3",
  1869. outdated: false,
  1870. },
  1871. {
  1872. version: "v1.4.0-alpha.2.961+9d4c6846fc03b9-dirty",
  1873. outdated: false,
  1874. },
  1875. {
  1876. version: "v2.0.0",
  1877. outdated: false,
  1878. },
  1879. }
  1880. for _, ov := range tests {
  1881. n := &api.Node{
  1882. Status: api.NodeStatus{
  1883. NodeInfo: api.NodeSystemInfo{
  1884. KubeletVersion: ov.version,
  1885. },
  1886. },
  1887. }
  1888. isOutdated := nodeRunningOutdatedKubelet(n)
  1889. if ov.outdated != isOutdated {
  1890. t.Errorf("Version %v doesn't match test expectation. Expected outdated %v got %v", n.Status.NodeInfo.KubeletVersion, ov.outdated, isOutdated)
  1891. } else {
  1892. t.Logf("Version %v outdated %v", ov.version, isOutdated)
  1893. }
  1894. }
  1895. }