util.sh 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121
  1. #!/bin/bash
  2. # Copyright 2016 The Kubernetes Authors.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. set -o errexit
  16. set -o nounset
  17. set -o pipefail
  18. # A library of helper functions that each provider hosting Kubernetes must implement to use cluster/kube-*.sh scripts.
  19. KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/../..
  20. # shellcheck source=./config-common.sh
  21. source "${KUBE_ROOT}/cluster/photon-controller/config-common.sh"
  22. # shellcheck source=./config-default.sh
  23. source "${KUBE_ROOT}/cluster/photon-controller/${KUBE_CONFIG_FILE-"config-default.sh"}"
  24. # shellcheck source=../common.sh
  25. source "${KUBE_ROOT}/cluster/common.sh"
  26. readonly PHOTON="photon -n"
  27. # Naming scheme for VMs (masters & nodes)
  28. readonly MASTER_NAME="${INSTANCE_PREFIX}-master"
  29. # shell check claims this doesn't work because you can't use a variable in a brace
  30. # range. It does work because we're calling eval.
  31. # shellcheck disable=SC2051
  32. readonly NODE_NAMES=($(eval echo "${INSTANCE_PREFIX}"-node-{1.."${NUM_NODES}"}))
  33. #####################################################################
  34. #
  35. # Public API
  36. #
  37. #####################################################################
  38. #
  39. # detect-master will query Photon Controller for the Kubernetes master.
  40. # It assumes that the VM name for the master is unique.
  41. # It will set KUBE_MASTER_ID to be the VM ID of the master
  42. # It will set KUBE_MASTER_IP to be the IP address of the master
  43. # If the silent parameter is passed, it will not print when the master
  44. # is found: this is used internally just to find the MASTER
  45. #
  46. function detect-master {
  47. local silent=${1:-""}
  48. local tenant_args="--tenant ${PHOTON_TENANT} --project ${PHOTON_PROJECT}"
  49. KUBE_MASTER=${MASTER_NAME}
  50. KUBE_MASTER_ID=${KUBE_MASTER_ID:-""}
  51. KUBE_MASTER_IP=${KUBE_MASTER_IP:-""}
  52. # We don't want silent failure: we check for failure
  53. set +o pipefail
  54. if [[ -z ${KUBE_MASTER_ID} ]]; then
  55. KUBE_MASTER_ID=$(${PHOTON} vm list ${tenant_args} | grep $'\t'"kubernetes-master"$'\t' | awk '{print $1}')
  56. fi
  57. if [[ -z ${KUBE_MASTER_ID} ]]; then
  58. kube::log::error "Could not find Kubernetes master node ID. Make sure you've launched a cluster with kube-up.sh"
  59. exit 1
  60. fi
  61. if [[ -z "${KUBE_MASTER_IP-}" ]]; then
  62. # Pick out the NICs that have a MAC address owned VMware (with OUI 00:0C:29)
  63. # Make sure to ignore lines that have a network interface but no address
  64. KUBE_MASTER_IP=$(${PHOTON} vm networks "${KUBE_MASTER_ID}" | grep -i $'\t'"00:0C:29" | grep -E '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -1 | awk -F'\t' '{print $3}')
  65. fi
  66. if [[ -z "${KUBE_MASTER_IP-}" ]]; then
  67. kube::log::error "Could not find Kubernetes master node IP. Make sure you've launched a cluster with 'kube-up.sh'" >&2
  68. exit 1
  69. fi
  70. if [[ -z ${silent} ]]; then
  71. kube::log::status "Master: $KUBE_MASTER ($KUBE_MASTER_IP)"
  72. fi
  73. # Reset default set in common.sh
  74. set -o pipefail
  75. }
  76. #
  77. # detect-nodes will query Photon Controller for the Kubernetes nodes
  78. # It assumes that the VM name for the nodes are unique.
  79. # It assumes that NODE_NAMES has been set
  80. # It will set KUBE_NODE_IP_ADDRESSES to be the VM IPs of the nodes
  81. # It will set the KUBE_NODE_IDS to be the VM IDs of the nodes
  82. # If the silent parameter is passed, it will not print when the nodes
  83. # are found: this is used internally just to find the MASTER
  84. #
  85. function detect-nodes {
  86. local silent=${1:-""}
  87. local failure=0
  88. local tenant_args="--tenant ${PHOTON_TENANT} --project ${PHOTON_PROJECT}"
  89. KUBE_NODE_IP_ADDRESSES=()
  90. KUBE_NODE_IDS=()
  91. # We don't want silent failure: we check for failure
  92. set +o pipefail
  93. for (( i=0; i<${#NODE_NAMES[@]}; i++)); do
  94. local node_id
  95. node_id=$(${PHOTON} vm list ${tenant_args} | grep $'\t'"${NODE_NAMES[${i}]}"$'\t' | awk '{print $1}')
  96. if [[ -z ${node_id} ]]; then
  97. kube::log::error "Could not find ${NODE_NAMES[${i}]}"
  98. failure=1
  99. fi
  100. KUBE_NODE_IDS+=("${node_id}")
  101. # Pick out the NICs that have a MAC address owned VMware (with OUI 00:0C:29)
  102. # Make sure to ignore lines that have a network interface but no address
  103. node_ip=$(${PHOTON} vm networks "${node_id}" | grep -i $'\t'"00:0C:29" | grep -E '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -1 | awk -F'\t' '{print $3}')
  104. KUBE_NODE_IP_ADDRESSES+=("${node_ip}")
  105. if [[ -z ${silent} ]]; then
  106. kube::log::status "Node: ${NODE_NAMES[${i}]} (${KUBE_NODE_IP_ADDRESSES[${i}]})"
  107. fi
  108. done
  109. if [[ ${failure} -ne 0 ]]; then
  110. exit 1
  111. fi
  112. # Reset default set in common.sh
  113. set -o pipefail
  114. }
  115. # Get node names if they are not static.
  116. function detect-node-names {
  117. echo "TODO: detect-node-names" 1>&2
  118. }
  119. #
  120. # Verifies that this computer has sufficient software installed
  121. # so that it can run the rest of the script.
  122. #
  123. function verify-prereqs {
  124. verify-cmd-in-path photon
  125. verify-cmd-in-path ssh
  126. verify-cmd-in-path scp
  127. verify-cmd-in-path ssh-add
  128. verify-cmd-in-path openssl
  129. verify-cmd-in-path mkisofs
  130. }
  131. #
  132. # The entry point for bringing up a Kubernetes cluster
  133. #
  134. function kube-up {
  135. verify-prereqs
  136. verify-ssh-prereqs
  137. verify-photon-config
  138. ensure-temp-dir
  139. find-release-tars
  140. find-image-id
  141. load-or-gen-kube-basicauth
  142. gen-cloud-init-iso
  143. gen-master-start
  144. create-master-vm
  145. install-salt-on-master
  146. gen-node-start
  147. install-salt-on-nodes
  148. detect-nodes -s
  149. install-kubernetes-on-master
  150. install-kubernetes-on-nodes
  151. wait-master-api
  152. wait-node-apis
  153. setup-pod-routes
  154. copy-kube-certs
  155. kube::log::status "Creating kubeconfig..."
  156. create-kubeconfig
  157. }
  158. # Delete a kubernetes cluster
  159. function kube-down {
  160. detect-master
  161. detect-nodes
  162. pc-delete-vm "${KUBE_MASTER}" "${KUBE_MASTER_ID}"
  163. for (( node=0; node<${#KUBE_NODE_IDS[@]}; node++)); do
  164. pc-delete-vm "${NODE_NAMES[${node}]}" "${KUBE_NODE_IDS[${node}]}"
  165. done
  166. }
  167. # Update a kubernetes cluster
  168. function kube-push {
  169. echo "TODO: kube-push" 1>&2
  170. }
  171. # Prepare update a kubernetes component
  172. function prepare-push {
  173. echo "TODO: prepare-push" 1>&2
  174. }
  175. # Update a kubernetes master
  176. function push-master {
  177. echo "TODO: push-master" 1>&2
  178. }
  179. # Update a kubernetes node
  180. function push-node {
  181. echo "TODO: push-node" 1>&2
  182. }
  183. # Execute prior to running tests to build a release if required for env
  184. function test-build-release {
  185. echo "TODO: test-build-release" 1>&2
  186. }
  187. # Execute prior to running tests to initialize required structure
  188. function test-setup {
  189. echo "TODO: test-setup" 1>&2
  190. }
  191. # Execute after running tests to perform any required clean-up
  192. function test-teardown {
  193. echo "TODO: test-teardown" 1>&2
  194. }
  195. #####################################################################
  196. #
  197. # Internal functions
  198. #
  199. #####################################################################
  200. #
  201. # Uses Photon Controller to make a VM
  202. # Takes two parameters:
  203. # - The name of the VM (Assumed to be unique)
  204. # - The name of the flavor to create the VM (Assumed to be unique)
  205. #
  206. # It assumes that the variables in config-common.sh (PHOTON_TENANT, etc)
  207. # are set correctly.
  208. #
  209. # It also assumes the cloud-init ISO has been generated
  210. #
  211. # When it completes, it sets two environment variables for use by the
  212. # caller: _VM_ID (the ID of the created VM) and _VM_IP (the IP address
  213. # of the created VM)
  214. #
  215. function pc-create-vm {
  216. local vm_name="${1}"
  217. local vm_flavor="${2}"
  218. local rc=0
  219. local i=0
  220. # Create the VM
  221. local tenant_args="--tenant ${PHOTON_TENANT} --project ${PHOTON_PROJECT}"
  222. local vm_args="--name ${vm_name} --image ${PHOTON_IMAGE_ID} --flavor ${vm_flavor}"
  223. local disk_args="disk-1 ${PHOTON_DISK_FLAVOR} boot=true"
  224. rc=0
  225. _VM_ID=$(${PHOTON} vm create ${tenant_args} ${vm_args} --disks "${disk_args}" 2>&1) || rc=$?
  226. if [[ ${rc} -ne 0 ]]; then
  227. kube::log::error "Failed to create VM. Error output:"
  228. echo "${_VM_ID}"
  229. exit 1
  230. fi
  231. kube::log::status "Created VM ${vm_name}: ${_VM_ID}"
  232. # Start the VM
  233. # Note that the VM has cloud-init in it, and we attach an ISO that
  234. # contains a user-data.txt file for cloud-init. When the VM starts,
  235. # cloud-init will temporarily mount the ISO and configure the VM
  236. # Our user-data will configure the 'kube' user and set up the ssh
  237. # authorized keys to allow us to ssh to the VM and do further work.
  238. run-cmd "${PHOTON} vm attach-iso -p ${KUBE_TEMP}/cloud-init.iso ${_VM_ID}"
  239. run-cmd "${PHOTON} vm start ${_VM_ID}"
  240. kube::log::status "Started VM ${vm_name}, waiting for network address..."
  241. # Wait for the VM to be started and connected to the network
  242. have_network=0
  243. for i in {1..120}; do
  244. # photon -n vm networks print several fields:
  245. # NETWORK MAC IP GATEWAY CONNECTED?
  246. # We wait until CONNECTED is True
  247. rc=0
  248. networks=$(${PHOTON} vm networks "${_VM_ID}") || rc=$?
  249. if [[ ${rc} -ne 0 ]]; then
  250. kube::log::error "'${PHOTON} vm networks ${_VM_ID}' failed. Error output: "
  251. echo "${networks}"
  252. fi
  253. networks=$(echo "${networks}" | grep True) || rc=$?
  254. if [[ ${rc} -eq 0 ]]; then
  255. have_network=1
  256. break;
  257. fi
  258. sleep 1
  259. done
  260. # Fail if the VM didn't come up
  261. if [[ ${have_network} -eq 0 ]]; then
  262. kube::log::error "VM ${vm_name} failed to start up: no IP was found"
  263. exit 1
  264. fi
  265. # Find the IP address of the VM
  266. _VM_IP=$(${PHOTON} vm networks "${_VM_ID}" | head -1 | awk -F'\t' '{print $3}')
  267. kube::log::status "VM ${vm_name} has IP: ${_VM_IP}"
  268. }
  269. #
  270. # Delete one of our VMs
  271. # If it is STARTED, it will be stopped first.
  272. #
  273. function pc-delete-vm {
  274. local vm_name="${1}"
  275. local vm_id="${2}"
  276. local rc=0
  277. kube::log::status "Deleting VM ${vm_name}"
  278. # In some cases, head exits before photon, so the pipline exits with
  279. # SIGPIPE. We disable the pipefile option to hide that failure.
  280. set +o pipefail
  281. ${PHOTON} vm show "${vm_id}" | head -1 | grep STARTED > /dev/null 2>&1 || rc=$?
  282. set +o pipefail
  283. if [[ ${rc} -eq 0 ]]; then
  284. ${PHOTON} vm stop "${vm_id}" > /dev/null 2>&1 || rc=$?
  285. if [[ ${rc} -ne 0 ]]; then
  286. kube::log::error "Error: could not stop ${vm_name} ($vm_id)"
  287. kube::log::error "Please investigate and stop manually"
  288. return
  289. fi
  290. fi
  291. rc=0
  292. ${PHOTON} vm delete "${vm_id}" > /dev/null 2>&1 || rc=$?
  293. if [[ ${rc} -ne 0 ]]; then
  294. kube::log::error "Error: could not delete ${vm_name} ($vm_id)"
  295. kube::log::error "Please investigate and delete manually"
  296. fi
  297. }
  298. #
  299. # Looks for the image named PHOTON_IMAGE
  300. # Sets PHOTON_IMAGE_ID to be the id of that image.
  301. # We currently assume there is exactly one image with name
  302. #
  303. function find-image-id {
  304. local rc=0
  305. PHOTON_IMAGE_ID=$(${PHOTON} image list | grep $'\t'"${PHOTON_IMAGE}"$'\t' | head -1 | grep READY | awk -F'\t' '{print $1}')
  306. if [[ ${rc} -ne 0 ]]; then
  307. kube::log::error "Cannot find image \"${PHOTON_IMAGE}\""
  308. fail=1
  309. fi
  310. }
  311. #
  312. # Generate an ISO with a single file called user-data.txt
  313. # This ISO will be used to configure cloud-init (which is already
  314. # on the VM). We will tell cloud-init to create the kube user/group
  315. # and give ourselves the ability to ssh to the VM with ssh. We also
  316. # allow people to ssh with the same password that was randomly
  317. # generated for access to Kubernetes as a backup method.
  318. #
  319. # Assumes environment variables:
  320. # - VM_USER
  321. # - KUBE_PASSWORD (randomly generated password)
  322. #
  323. function gen-cloud-init-iso {
  324. local password_hash
  325. password_hash=$(openssl passwd -1 "${KUBE_PASSWORD}")
  326. local ssh_key
  327. ssh_key=$(ssh-add -L | head -1)
  328. # Make the user-data file that will be used by cloud-init
  329. (
  330. echo "#cloud-config"
  331. echo ""
  332. echo "groups:"
  333. echo " - ${VM_USER}"
  334. echo ""
  335. echo "users:"
  336. echo " - name: ${VM_USER}"
  337. echo " gecos: Kubernetes"
  338. echo " primary-group: ${VM_USER}"
  339. echo " lock-passwd: false"
  340. echo " passwd: ${password_hash}"
  341. echo " ssh-authorized-keys: "
  342. echo " - ${ssh_key}"
  343. echo " sudo: ALL=(ALL) NOPASSWD:ALL"
  344. echo " shell: /bin/bash"
  345. echo ""
  346. echo "hostname:"
  347. echo " - hostname: kube"
  348. ) > "${KUBE_TEMP}/user-data.txt"
  349. # Make the ISO that will contain the user-data
  350. # The -rock option means that we'll generate real filenames (long and with case)
  351. run-cmd "mkisofs -rock -o ${KUBE_TEMP}/cloud-init.iso ${KUBE_TEMP}/user-data.txt"
  352. }
  353. #
  354. # Generate a script used to install salt on the master
  355. # It is placed into $KUBE_TEMP/master-start.sh
  356. #
  357. function gen-master-start {
  358. python "${KUBE_ROOT}/third_party/htpasswd/htpasswd.py" \
  359. -b -c "${KUBE_TEMP}/htpasswd" "${KUBE_USER}" "${KUBE_PASSWORD}"
  360. local htpasswd
  361. htpasswd=$(cat "${KUBE_TEMP}/htpasswd")
  362. # This calculation of the service IP should work, but if you choose an
  363. # alternate subnet, there's a small chance you'd need to modify the
  364. # service_ip, below. We'll choose an IP like 10.244.240.1 by taking
  365. # the first three octets of the SERVICE_CLUSTER_IP_RANGE and tacking
  366. # on a .1
  367. local octets
  368. local service_ip
  369. octets=($(echo "${SERVICE_CLUSTER_IP_RANGE}" | sed -e 's|/.*||' -e 's/\./ /g'))
  370. ((octets[3]+=1))
  371. service_ip=$(echo "${octets[*]}" | sed 's/ /./g')
  372. MASTER_EXTRA_SANS="IP:${service_ip},DNS:${MASTER_NAME},${MASTER_EXTRA_SANS}"
  373. (
  374. echo "#! /bin/bash"
  375. echo "readonly MY_NAME=${MASTER_NAME}"
  376. grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/hostname.sh"
  377. echo "cd /home/kube/cache/kubernetes-install"
  378. echo "readonly MASTER_NAME='${MASTER_NAME}'"
  379. echo "readonly MASTER_IP_RANGE='${MASTER_IP_RANGE}'"
  380. echo "readonly INSTANCE_PREFIX='${INSTANCE_PREFIX}'"
  381. echo "readonly NODE_INSTANCE_PREFIX='${INSTANCE_PREFIX}-node'"
  382. echo "readonly NODE_IP_RANGES='${NODE_IP_RANGES}'"
  383. echo "readonly SERVICE_CLUSTER_IP_RANGE='${SERVICE_CLUSTER_IP_RANGE}'"
  384. echo "readonly ENABLE_NODE_LOGGING='${ENABLE_NODE_LOGGING:-false}'"
  385. echo "readonly LOGGING_DESTINATION='${LOGGING_DESTINATION:-}'"
  386. echo "readonly ENABLE_CLUSTER_DNS='${ENABLE_CLUSTER_DNS:-false}'"
  387. echo "readonly ENABLE_CLUSTER_UI='${ENABLE_CLUSTER_UI:-false}'"
  388. echo "readonly DNS_SERVER_IP='${DNS_SERVER_IP:-}'"
  389. echo "readonly DNS_DOMAIN='${DNS_DOMAIN:-}'"
  390. echo "readonly KUBE_USER='${KUBE_USER:-}'"
  391. echo "readonly KUBE_PASSWORD='${KUBE_PASSWORD:-}'"
  392. echo "readonly SERVER_BINARY_TAR='${SERVER_BINARY_TAR##*/}'"
  393. echo "readonly SALT_TAR='${SALT_TAR##*/}'"
  394. echo "readonly MASTER_HTPASSWD='${htpasswd}'"
  395. echo "readonly E2E_STORAGE_TEST_ENVIRONMENT='${E2E_STORAGE_TEST_ENVIRONMENT:-}'"
  396. echo "readonly MASTER_EXTRA_SANS='${MASTER_EXTRA_SANS:-}'"
  397. grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/create-dynamic-salt-files.sh"
  398. grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/install-release.sh"
  399. grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/salt-master.sh"
  400. ) > "${KUBE_TEMP}/master-start.sh"
  401. }
  402. #
  403. # Generate the scripts for each node to install salt
  404. #
  405. function gen-node-start {
  406. local i
  407. for (( i=0; i<${#NODE_NAMES[@]}; i++)); do
  408. (
  409. echo "#! /bin/bash"
  410. echo "readonly MY_NAME=${NODE_NAMES[${i}]}"
  411. grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/hostname.sh"
  412. echo "KUBE_MASTER=${KUBE_MASTER}"
  413. echo "KUBE_MASTER_IP=${KUBE_MASTER_IP}"
  414. echo "NODE_IP_RANGE=$NODE_IP_RANGES"
  415. grep -v "^#" "${KUBE_ROOT}/cluster/photon-controller/templates/salt-minion.sh"
  416. ) > "${KUBE_TEMP}/node-start-${i}.sh"
  417. done
  418. }
  419. #
  420. # Create a script that will run on the Kubernetes master and will run salt
  421. # to configure the master. We make it a script instead of just running a
  422. # single ssh command so that we can get logging.
  423. #
  424. function gen-master-salt {
  425. gen-salt "kubernetes-master"
  426. }
  427. #
  428. # Create scripts that will be run on the Kubernetes master. Each of these
  429. # will invoke salt to configure one of the nodes
  430. #
  431. function gen-node-salt {
  432. local i
  433. for (( i=0; i<${#NODE_NAMES[@]}; i++)); do
  434. gen-salt "${NODE_NAMES[${i}]}"
  435. done
  436. }
  437. #
  438. # Shared implementation for gen-master-salt and gen-node-salt
  439. # Writes a script that installs Kubernetes with salt
  440. # The core of the script is simple (run 'salt ... state.highstate')
  441. # We also do a bit of logging so we can debug problems
  442. #
  443. # There is also a funky workaround for an issue with docker 1.9
  444. # (elsewhere we peg ourselves to docker 1.9). It's fixed in 1.10,
  445. # so we should be able to remove it in the future
  446. # https://github.com/docker/docker/issues/18113
  447. # The problem is that sometimes the install (with apt-get) of
  448. # docker fails. Deleting a file and retrying fixes it.
  449. #
  450. # Tell shellcheck to ignore our variables within single quotes:
  451. # We're writing a script, not executing it, so this is normal
  452. # shellcheck disable=SC2016
  453. function gen-salt {
  454. node_name=${1}
  455. (
  456. echo '#!/bin/bash'
  457. echo ''
  458. echo "node=${node_name}"
  459. echo 'out=/tmp/${node}-salt.out'
  460. echo 'log=/tmp/${node}-salt.log'
  461. echo ''
  462. echo 'echo $(date) >> $log'
  463. echo 'salt ${node} state.highstate -t 30 --no-color > ${out}'
  464. echo 'grep -E "Failed:[[:space:]]+0" ${out}'
  465. echo 'success=$?'
  466. echo 'cat ${out} >> ${log}'
  467. echo ''
  468. echo 'if [[ ${success} -ne 0 ]]; then'
  469. echo ' # Did we try to install docker-engine?'
  470. echo ' attempted=$(grep docker-engine ${out} | wc -l)'
  471. echo ' # Is docker-engine installed?'
  472. echo ' installed=$(salt --output=txt ${node} pkg.version docker-engine | wc -l)'
  473. echo ' if [[ ${attempted} -ne 0 && ${installed} -eq 0 ]]; then'
  474. echo ' echo "Unwedging docker-engine install" >> ${log}'
  475. echo ' salt ${node} cmd.run "rm -f /var/lib/docker/network/files/local-kv.db"'
  476. echo ' fi'
  477. echo 'fi'
  478. echo 'exit ${success}'
  479. ) > "${KUBE_TEMP}/${node_name}-salt.sh"
  480. }
  481. #
  482. # Generate a script to add a route to a host (master or node)
  483. # The script will do two things:
  484. # 1. Add the route immediately with the route command
  485. # 2. Persist the route by saving it in /etc/network/interfaces
  486. # This was done with a script because it was easier to get the quoting right
  487. # and make it clear.
  488. #
  489. function gen-add-route {
  490. route=${1}
  491. gateway=${2}
  492. (
  493. echo '#!/bin/bash'
  494. echo ''
  495. echo '# Immediately add route'
  496. echo "sudo route add -net ${route} gw ${gateway}"
  497. echo ''
  498. echo '# Persist route so it lasts over restarts'
  499. echo 'sed -in "s|^iface eth0.*|&\n post-up route add -net' "${route} gw ${gateway}|"'" /etc/network/interfaces'
  500. ) > "${KUBE_TEMP}/add-route.sh"
  501. }
  502. #
  503. # Create the Kubernetes master VM
  504. # Sets global variables:
  505. # - KUBE_MASTER (Name)
  506. # - KUBE_MASTER_ID (Photon VM ID)
  507. # - KUBE_MASTER_IP (IP address)
  508. #
  509. function create-master-vm {
  510. kube::log::status "Starting master VM..."
  511. pc-create-vm "${MASTER_NAME}" "${PHOTON_MASTER_FLAVOR}"
  512. KUBE_MASTER=${MASTER_NAME}
  513. KUBE_MASTER_ID=${_VM_ID}
  514. KUBE_MASTER_IP=${_VM_IP}
  515. }
  516. #
  517. # Install salt on the Kubernetes master
  518. # Relies on the master-start.sh script created in gen-master-start
  519. #
  520. function install-salt-on-master {
  521. kube::log::status "Installing salt on master..."
  522. upload-server-tars "${MASTER_NAME}" "${KUBE_MASTER_IP}"
  523. run-script-remotely "${KUBE_MASTER_IP}" "${KUBE_TEMP}/master-start.sh"
  524. }
  525. #
  526. # Installs salt on Kubernetes nodes in parallel
  527. # Relies on the node-start script created in gen-node-start
  528. #
  529. function install-salt-on-nodes {
  530. kube::log::status "Creating nodes and installing salt on them..."
  531. # Start each of the VMs in parallel
  532. # In the future, we'll batch this because it doesn't scale well
  533. # past 10 or 20 nodes
  534. local node
  535. for (( node=0; node<${#NODE_NAMES[@]}; node++)); do
  536. (
  537. pc-create-vm "${NODE_NAMES[${node}]}" "${PHOTON_NODE_FLAVOR}"
  538. run-script-remotely "${_VM_IP}" "${KUBE_TEMP}/node-start-${node}.sh"
  539. ) &
  540. done
  541. # Wait for the node VM startups to complete
  542. local fail=0
  543. local job
  544. for job in $(jobs -p); do
  545. wait "${job}" || fail=$((fail + 1))
  546. done
  547. if (( fail != 0 )); then
  548. kube::log::error "Failed to start ${fail}/${NUM_NODES} nodes"
  549. exit 1
  550. fi
  551. }
  552. #
  553. # Install Kubernetes on the master.
  554. # This uses the kubernetes-master-salt.sh script created by gen-master-salt
  555. # That script uses salt to install Kubernetes
  556. #
  557. function install-kubernetes-on-master {
  558. # Wait until salt-master is running: it may take a bit
  559. try-until-success-ssh "${KUBE_MASTER_IP}" \
  560. "Waiting for salt-master to start on ${KUBE_MASTER}" \
  561. "pgrep salt-master"
  562. gen-master-salt
  563. copy-file-to-vm "${_VM_IP}" "${KUBE_TEMP}/kubernetes-master-salt.sh" "/tmp/kubernetes-master-salt.sh"
  564. try-until-success-ssh "${KUBE_MASTER_IP}" \
  565. "Installing Kubernetes on ${KUBE_MASTER} via salt" \
  566. "sudo /bin/bash /tmp/kubernetes-master-salt.sh"
  567. }
  568. #
  569. # Install Kubernetes on the the nodes in parallel
  570. # This uses the kubernetes-master-salt.sh script created by gen-node-salt
  571. # That script uses salt to install Kubernetes
  572. #
  573. function install-kubernetes-on-nodes {
  574. gen-node-salt
  575. # Run in parallel to bring up the cluster faster
  576. # TODO: Batch this so that we run up to N in parallel, so
  577. # we don't overload this machine or the salt master
  578. local node
  579. for (( node=0; node<${#NODE_NAMES[@]}; node++)); do
  580. (
  581. copy-file-to-vm "${_VM_IP}" "${KUBE_TEMP}/${NODE_NAMES[${node}]}-salt.sh" "/tmp/${NODE_NAMES[${node}]}-salt.sh"
  582. try-until-success-ssh "${KUBE_NODE_IP_ADDRESSES[${node}]}" \
  583. "Waiting for salt-master to start on ${NODE_NAMES[${node}]}" \
  584. "pgrep salt-minion"
  585. try-until-success-ssh "${KUBE_MASTER_IP}" \
  586. "Installing Kubernetes on ${NODE_NAMES[${node}]} via salt" \
  587. "sudo /bin/bash /tmp/${NODE_NAMES[${node}]}-salt.sh"
  588. ) &
  589. done
  590. # Wait for the Kubernetes installations to complete
  591. local fail=0
  592. local job
  593. for job in $(jobs -p); do
  594. wait "${job}" || fail=$((fail + 1))
  595. done
  596. if (( fail != 0 )); then
  597. kube::log::error "Failed to start install Kubernetes on ${fail} out of ${NUM_NODES} nodess"
  598. exit 1
  599. fi
  600. }
  601. #
  602. # Upload the Kubernetes tarballs to the master
  603. #
  604. function upload-server-tars {
  605. vm_name=${1}
  606. vm_ip=${2}
  607. run-ssh-cmd "${vm_ip}" "mkdir -p /home/kube/cache/kubernetes-install"
  608. local tar
  609. for tar in "${SERVER_BINARY_TAR}" "${SALT_TAR}"; do
  610. local base_tar
  611. base_tar=$(basename "${tar}")
  612. kube::log::status "Uploading ${base_tar} to ${vm_name}..."
  613. copy-file-to-vm "${vm_ip}" "${tar}" "/home/kube/cache/kubernetes-install/${tar##*/}"
  614. done
  615. }
  616. #
  617. # Wait for the Kubernets healthz API to be responsive on the master
  618. #
  619. function wait-master-api {
  620. local curl_creds="--insecure --user ${KUBE_USER}:${KUBE_PASSWORD}"
  621. local curl_output="--fail --output /dev/null --silent"
  622. local curl_net="--max-time 1"
  623. try-until-success "Waiting for Kubernetes API on ${KUBE_MASTER}" \
  624. "curl ${curl_creds} ${curl_output} ${curl_net} https://${KUBE_MASTER_IP}/healthz"
  625. }
  626. #
  627. # Wait for the Kubernetes healthz API to be responsive on each node
  628. #
  629. function wait-node-apis {
  630. local curl_output="--fail --output /dev/null --silent"
  631. local curl_net="--max-time 1"
  632. for (( i=0; i<${#NODE_NAMES[@]}; i++)); do
  633. try-until-success "Waiting for Kubernetes API on ${NODE_NAMES[${i}]}..." \
  634. "curl ${curl_output} ${curl_net} http://${KUBE_NODE_IP_ADDRESSES[${i}]}:10250/healthz"
  635. done
  636. }
  637. #
  638. # Configure the nodes so the pods can communicate
  639. # Each node will have a bridge named cbr0 for the NODE_IP_RANGES
  640. # defined in config-default.sh. This finds the IP subnet (assigned
  641. # by Kubernetes) to nodes and configures routes so they can communicate
  642. #
  643. # Also configure the master to be able to talk to the nodes. This is
  644. # useful so that you can get to the UI from the master.
  645. #
  646. function setup-pod-routes {
  647. local node
  648. KUBE_NODE_BRIDGE_NETWORK=()
  649. for (( node=0; node<${#NODE_NAMES[@]}; node++)); do
  650. # This happens in two steps (wait for an address, wait for a non 172.x.x.x address)
  651. # because it's both simpler and more clear what's happening.
  652. try-until-success-ssh "${KUBE_NODE_IP_ADDRESSES[${node}]}" \
  653. "Waiting for cbr0 bridge on ${NODE_NAMES[${node}]} to have an address" \
  654. 'sudo ifconfig cbr0 | grep -oP "inet addr:\K\S+"'
  655. try-until-success-ssh "${KUBE_NODE_IP_ADDRESSES[${node}]}" \
  656. "Waiting for cbr0 bridge on ${NODE_NAMES[${node}]} to have correct address" \
  657. 'sudo ifconfig cbr0 | grep -oP "inet addr:\K\S+" | grep -v "^172."'
  658. run-ssh-cmd "${KUBE_NODE_IP_ADDRESSES[${node}]}" 'sudo ip route show | grep -E "dev cbr0" | cut -d " " -f1'
  659. KUBE_NODE_BRIDGE_NETWORK+=(${_OUTPUT})
  660. kube::log::status "cbr0 on ${NODE_NAMES[${node}]} is ${_OUTPUT}"
  661. done
  662. local i
  663. local j
  664. for (( i=0; i<${#NODE_NAMES[@]}; i++)); do
  665. kube::log::status "Configuring pod routes on ${NODE_NAMES[${i}]}..."
  666. gen-add-route "${KUBE_NODE_BRIDGE_NETWORK[${i}]}" "${KUBE_NODE_IP_ADDRESSES[${i}]}"
  667. run-script-remotely "${KUBE_MASTER_IP}" "${KUBE_TEMP}/add-route.sh"
  668. for (( j=0; j<${#NODE_NAMES[@]}; j++)); do
  669. if [[ "${i}" != "${j}" ]]; then
  670. gen-add-route "${KUBE_NODE_BRIDGE_NETWORK[${j}]}" "${KUBE_NODE_IP_ADDRESSES[${j}]}"
  671. run-script-remotely "${KUBE_NODE_IP_ADDRESSES[${i}]}" "${KUBE_TEMP}/add-route.sh"
  672. fi
  673. done
  674. done
  675. }
  676. #
  677. # Copy the certificate/key from the Kubernetes master
  678. # These are used to create the kubeconfig file, which allows
  679. # users to use kubectl easily
  680. #
  681. # We also set KUBE_CERT, KUBE_KEY, CA_CERT, and CONTEXT because they
  682. # are needed by create-kubeconfig from common.sh to generate
  683. # the kube config file.
  684. #
  685. function copy-kube-certs {
  686. local cert="kubecfg.crt"
  687. local key="kubecfg.key"
  688. local ca="ca.crt"
  689. local cert_dir="/srv/kubernetes"
  690. kube::log::status "Copying credentials from ${KUBE_MASTER}"
  691. # Set global environment variables: needed by create-kubeconfig
  692. # in common.sh
  693. export KUBE_CERT="${KUBE_TEMP}/${cert}"
  694. export KUBE_KEY="${KUBE_TEMP}/${key}"
  695. export CA_CERT="${KUBE_TEMP}/${ca}"
  696. export CONTEXT="photon-${INSTANCE_PREFIX}"
  697. run-ssh-cmd "${KUBE_MASTER_IP}" "sudo chmod 644 ${cert_dir}/${cert}"
  698. run-ssh-cmd "${KUBE_MASTER_IP}" "sudo chmod 644 ${cert_dir}/${key}"
  699. run-ssh-cmd "${KUBE_MASTER_IP}" "sudo chmod 644 ${cert_dir}/${ca}"
  700. copy-file-from-vm "${KUBE_MASTER_IP}" "${cert_dir}/${cert}" "${KUBE_CERT}"
  701. copy-file-from-vm "${KUBE_MASTER_IP}" "${cert_dir}/${key}" "${KUBE_KEY}"
  702. copy-file-from-vm "${KUBE_MASTER_IP}" "${cert_dir}/${ca}" "${CA_CERT}"
  703. run-ssh-cmd "${KUBE_MASTER_IP}" "sudo chmod 600 ${cert_dir}/${cert}"
  704. run-ssh-cmd "${KUBE_MASTER_IP}" "sudo chmod 600 ${cert_dir}/${key}"
  705. run-ssh-cmd "${KUBE_MASTER_IP}" "sudo chmod 600 ${cert_dir}/${ca}"
  706. }
  707. #
  708. # Copies a script to a VM and runs it
  709. # Parameters:
  710. # - IP of VM
  711. # - Path to local file
  712. #
  713. function run-script-remotely {
  714. local vm_ip=${1}
  715. local local_file="${2}"
  716. local base_file
  717. local remote_file
  718. base_file=$(basename "${local_file}")
  719. remote_file="/tmp/${base_file}"
  720. copy-file-to-vm "${vm_ip}" "${local_file}" "${remote_file}"
  721. run-ssh-cmd "${vm_ip}" "chmod 700 ${remote_file}"
  722. run-ssh-cmd "${vm_ip}" "nohup sudo ${remote_file} < /dev/null 1> ${remote_file}.out 2>&1 &"
  723. }
  724. #
  725. # Runs an command on a VM using ssh
  726. # Parameters:
  727. # - (optional) -i to ignore failure
  728. # - IP address of the VM
  729. # - Command to run
  730. # Assumes environment variables:
  731. # - VM_USER
  732. # - SSH_OPTS
  733. #
  734. function run-ssh-cmd {
  735. local ignore_failure=""
  736. if [[ "${1}" = "-i" ]]; then
  737. ignore_failure="-i"
  738. shift
  739. fi
  740. local vm_ip=${1}
  741. shift
  742. local cmd=${1}
  743. run-cmd ${ignore_failure} "ssh ${SSH_OPTS} $VM_USER@${vm_ip} $1"
  744. }
  745. #
  746. # Uses scp to copy file to VM
  747. # Parameters:
  748. # - IP address of the VM
  749. # - Path to local file
  750. # - Path to remote file
  751. # Assumes environment variables:
  752. # - VM_USER
  753. # - SSH_OPTS
  754. #
  755. function copy-file-to-vm {
  756. local vm_ip=${1}
  757. local local_file=${2}
  758. local remote_file=${3}
  759. run-cmd "scp ${SSH_OPTS} ${local_file} ${VM_USER}@${vm_ip}:${remote_file}"
  760. }
  761. function copy-file-from-vm {
  762. local vm_ip=${1}
  763. local remote_file=${2}
  764. local local_file=${3}
  765. run-cmd "scp ${SSH_OPTS} ${VM_USER}@${vm_ip}:${remote_file} ${local_file}"
  766. }
  767. #
  768. # Run a command, print nice error output
  769. # Used by copy-file-to-vm and run-ssh-cmd
  770. #
  771. function run-cmd {
  772. local rc=0
  773. local ignore_failure=""
  774. if [[ "${1}" = "-i" ]]; then
  775. ignore_failure=${1}
  776. shift
  777. fi
  778. local cmd=$1
  779. local output
  780. output=$(${cmd} 2>&1) || rc=$?
  781. if [[ ${rc} -ne 0 ]]; then
  782. if [[ -z "${ignore_failure}" ]]; then
  783. kube::log::error "Failed to run command: ${cmd} Output:"
  784. echo "${output}"
  785. exit 1
  786. fi
  787. fi
  788. _OUTPUT=${output}
  789. return ${rc}
  790. }
  791. #
  792. # After the initial VM setup, we use SSH with keys to access the VMs
  793. # This requires an SSH agent, so we verify that it's running
  794. #
  795. function verify-ssh-prereqs {
  796. kube::log::status "Validating SSH configuration..."
  797. local rc
  798. rc=0
  799. ssh-add -L 1> /dev/null 2> /dev/null || rc=$?
  800. # "Could not open a connection to your authentication agent."
  801. if [[ "${rc}" -eq 2 ]]; then
  802. # ssh agent wasn't running, so start it and ensure we stop it
  803. eval "$(ssh-agent)" > /dev/null
  804. trap-add "kill ${SSH_AGENT_PID}" EXIT
  805. fi
  806. rc=0
  807. ssh-add -L 1> /dev/null 2> /dev/null || rc=$?
  808. # "The agent has no identities."
  809. if [[ "${rc}" -eq 1 ]]; then
  810. # Try adding one of the default identities, with or without passphrase.
  811. ssh-add || true
  812. fi
  813. # Expect at least one identity to be available.
  814. if ! ssh-add -L 1> /dev/null 2> /dev/null; then
  815. kube::log::error "Could not find or add an SSH identity."
  816. kube::log::error "Please start ssh-agent, add your identity, and retry."
  817. exit 1
  818. fi
  819. }
  820. #
  821. # Verify that Photon Controller has been configured in the way we expect. Specifically
  822. # - Have the flavors been created?
  823. # - Has the image been uploaded?
  824. # TODO: Check the tenant and project as well.
  825. function verify-photon-config {
  826. kube::log::status "Validating Photon configuration..."
  827. # We don't want silent failure: we check for failure
  828. set +o pipefail
  829. verify-photon-flavors
  830. verify-photon-image
  831. verify-photon-tenant
  832. # Reset default set in common.sh
  833. set -o pipefail
  834. }
  835. #
  836. # Verify that the VM and disk flavors have been created
  837. #
  838. function verify-photon-flavors {
  839. local rc=0
  840. ${PHOTON} flavor list | awk -F'\t' '{print $2}' | grep -q "^${PHOTON_MASTER_FLAVOR}$" > /dev/null 2>&1 || rc=$?
  841. if [[ ${rc} -ne 0 ]]; then
  842. kube::log::error "ERROR: Cannot find VM flavor named ${PHOTON_MASTER_FLAVOR}"
  843. exit 1
  844. fi
  845. if [[ "${PHOTON_MASTER_FLAVOR}" != "${PHOTON_NODE_FLAVOR}" ]]; then
  846. rc=0
  847. ${PHOTON} flavor list | awk -F'\t' '{print $2}' | grep -q "^${PHOTON_NODE_FLAVOR}$" > /dev/null 2>&1 || rc=$?
  848. if [[ ${rc} -ne 0 ]]; then
  849. kube::log::error "ERROR: Cannot find VM flavor named ${PHOTON_NODE_FLAVOR}"
  850. exit 1
  851. fi
  852. fi
  853. ${PHOTON} flavor list | awk -F'\t' '{print $2}' | grep -q "^${PHOTON_DISK_FLAVOR}$" > /dev/null 2>&1 || rc=$?
  854. if [[ ${rc} -ne 0 ]]; then
  855. kube::log::error "ERROR: Cannot find disk flavor named ${PHOTON_DISK_FLAVOR}"
  856. exit 1
  857. fi
  858. }
  859. #
  860. # Verify that we have the image we need, and it's not in error state or
  861. # multiple copies
  862. #
  863. function verify-photon-image {
  864. local rc
  865. rc=0
  866. ${PHOTON} image list | grep -q $'\t'"${PHOTON_IMAGE}"$'\t' > /dev/null 2>&1 || rc=$?
  867. if [[ ${rc} -ne 0 ]]; then
  868. kube::log::error "ERROR: Cannot find image \"${PHOTON_IMAGE}\""
  869. exit 1
  870. fi
  871. rc=0
  872. ${PHOTON} image list | grep $'\t'"${PHOTON_IMAGE}"$'\t' | grep ERROR > /dev/null 2>&1 || rc=$?
  873. if [[ ${rc} -eq 0 ]]; then
  874. echo "Warning: You have at least one ${PHOTON_IMAGE} image in the ERROR state. You may want to investigate."
  875. echo "Images in the ERROR state will be ignored."
  876. fi
  877. rc=0
  878. num_images=$(${PHOTON} image list | grep $'\t'"${PHOTON_IMAGE}"$'\t' | grep -c READY)
  879. if [[ "${num_images}" -gt 1 ]]; then
  880. echo "ERROR: You have more than one READY ${PHOTON_IMAGE} image. Ensure there is only one"
  881. exit 1
  882. fi
  883. }
  884. function verify-photon-tenant {
  885. local rc
  886. rc=0
  887. ${PHOTON} tenant list | grep -q $'\t'"${PHOTON_TENANT}" > /dev/null 2>&1 || rc=$?
  888. if [[ ${rc} -ne 0 ]]; then
  889. echo "ERROR: Cannot find tenant \"${PHOTON_TENANT}\""
  890. exit 1
  891. fi
  892. ${PHOTON} project list --tenant "${PHOTON_TENANT}" | grep -q $'\t'"${PHOTON_PROJECT}"$'\t' > /dev/null 2>&1 || rc=$?
  893. if [[ ${rc} -ne 0 ]]; then
  894. echo "ERROR: Cannot find project \"${PHOTON_PROJECT}\""
  895. exit 1
  896. fi
  897. }
  898. #
  899. # Verifies that a given command is in the PATH
  900. #
  901. function verify-cmd-in-path {
  902. cmd=${1}
  903. which "${cmd}" >/dev/null || {
  904. kube::log::error "Can't find ${cmd} in PATH, please install and retry."
  905. exit 1
  906. }
  907. }
  908. #
  909. # Checks that KUBE_TEMP is set, or sets it
  910. # If it sets it, it also creates the temporary directory
  911. # and sets up a trap so that we delete it when we exit
  912. #
  913. function ensure-temp-dir {
  914. if [[ -z ${KUBE_TEMP-} ]]; then
  915. KUBE_TEMP=$(mktemp -d -t kubernetes.XXXXXX)
  916. trap-add "rm -rf '${KUBE_TEMP}'" EXIT
  917. fi
  918. }
  919. #
  920. # Repeatedly try a command over ssh until it succeeds or until five minutes have passed
  921. # The timeout isn't exact, since we assume the command runs instantaneously, and
  922. # it doesn't.
  923. #
  924. function try-until-success-ssh {
  925. local vm_ip=${1}
  926. local cmd_description=${2}
  927. local cmd=${3}
  928. local timeout=600
  929. local sleep_time=5
  930. local max_attempts
  931. ((max_attempts=timeout/sleep_time))
  932. kube::log::status "${cmd_description} for up to 10 minutes..."
  933. local attempt=0
  934. while true; do
  935. local rc=0
  936. run-ssh-cmd -i "${vm_ip}" "${cmd}" || rc=1
  937. if [[ ${rc} != 0 ]]; then
  938. if (( attempt == max_attempts )); then
  939. kube::log::error "Failed, cannot proceed: you may need to retry to log into the VM to debug"
  940. exit 1
  941. fi
  942. else
  943. break
  944. fi
  945. attempt=$((attempt+1))
  946. sleep ${sleep_time}
  947. done
  948. }
  949. function try-until-success {
  950. local cmd_description=${1}
  951. local cmd=${2}
  952. local timeout=600
  953. local sleep_time=5
  954. local max_attempts
  955. ((max_attempts=timeout/sleep_time))
  956. kube::log::status "${cmd_description} for up to 10 minutes..."
  957. local attempt=0
  958. while true; do
  959. local rc=0
  960. run-cmd -i "${cmd}" || rc=1
  961. if [[ ${rc} != 0 ]]; then
  962. if (( attempt == max_attempts )); then
  963. kube::log::error "Failed, cannot proceed"
  964. exit 1
  965. fi
  966. else
  967. break
  968. fi
  969. attempt=$((attempt+1))
  970. sleep ${sleep_time}
  971. done
  972. }
  973. #
  974. # Sets up a trap handler
  975. #
  976. function trap-add {
  977. local handler="${1}"
  978. local signal="${2-EXIT}"
  979. local cur
  980. cur="$(eval "sh -c 'echo \$3' -- $(trap -p ${signal})")"
  981. if [[ -n "${cur}" ]]; then
  982. handler="${cur}; ${handler}"
  983. fi
  984. # We want ${handler} to expand now, so tell shellcheck
  985. # shellcheck disable=SC2064
  986. trap "${handler}" ${signal}
  987. }