upgrade.sh 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. #!/bin/bash
  2. # Copyright 2015 The Kubernetes Authors.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. # !!!EXPERIMENTAL !!! Upgrade script for GCE. Expect this to get
  16. # rewritten in Go in relatively short order, but it allows us to start
  17. # testing the concepts.
  18. set -o errexit
  19. set -o nounset
  20. set -o pipefail
  21. if [[ "${KUBERNETES_PROVIDER:-gce}" != "gce" ]]; then
  22. echo "!!! ${1} only works on GCE" >&2
  23. exit 1
  24. fi
  25. KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
  26. source "${KUBE_ROOT}/cluster/kube-util.sh"
  27. function usage() {
  28. echo "!!! EXPERIMENTAL !!!"
  29. echo ""
  30. echo "${0} [-M|-N|-P] -l | <version number or publication>"
  31. echo " Upgrades master and nodes by default"
  32. echo " -M: Upgrade master only"
  33. echo " -N: Upgrade nodes only"
  34. echo " -P: Node upgrade prerequisites only (create a new instance template)"
  35. echo " -l: Use local(dev) binaries"
  36. echo ""
  37. echo ' Version number or publication is either a proper version number'
  38. echo ' (e.g. "v1.0.6", "v1.2.0-alpha.1.881+376438b69c7612") or a version'
  39. echo ' publication of the form <bucket>/<version> (e.g. "release/stable",'
  40. echo ' "ci/latest-1"). Some common ones are:'
  41. echo ' - "release/stable"'
  42. echo ' - "release/latest"'
  43. echo ' - "ci/latest"'
  44. echo ' See the docs on getting builds for more information about version publication.'
  45. echo ""
  46. echo "(... Fetching current release versions ...)"
  47. echo ""
  48. # NOTE: IF YOU CHANGE THE FOLLOWING LIST, ALSO UPDATE test/e2e/cluster_upgrade.go
  49. local release_stable
  50. local release_latest
  51. local ci_latest
  52. release_stable=$(gsutil cat gs://kubernetes-release/release/stable.txt)
  53. release_latest=$(gsutil cat gs://kubernetes-release/release/latest.txt)
  54. ci_latest=$(gsutil cat gs://kubernetes-release-dev/ci/latest.txt)
  55. echo "Right now, versions are as follows:"
  56. echo " release/stable: ${0} ${release_stable}"
  57. echo " release/latest: ${0} ${release_latest}"
  58. echo " ci/latest: ${0} ${ci_latest}"
  59. }
  60. function upgrade-master() {
  61. echo "== Upgrading master to '${SERVER_BINARY_TAR_URL}'. Do not interrupt, deleting master instance. =="
  62. get-kubeconfig-basicauth
  63. get-kubeconfig-bearertoken
  64. detect-master
  65. parse-master-env
  66. # Delete the master instance. Note that the master-pd is created
  67. # with auto-delete=no, so it should not be deleted.
  68. gcloud compute instances delete \
  69. --project "${PROJECT}" \
  70. --quiet \
  71. --zone "${ZONE}" \
  72. "${MASTER_NAME}"
  73. create-master-instance "${MASTER_NAME}-ip"
  74. wait-for-master
  75. }
  76. function wait-for-master() {
  77. echo "== Waiting for new master to respond to API requests =="
  78. local curl_auth_arg
  79. if [[ -n ${KUBE_BEARER_TOKEN:-} ]]; then
  80. curl_auth_arg=(-H "Authorization: Bearer ${KUBE_BEARER_TOKEN}")
  81. elif [[ -n ${KUBE_PASSWORD:-} ]]; then
  82. curl_auth_arg=(--user "${KUBE_USER}:${KUBE_PASSWORD}")
  83. else
  84. echo "can't get auth credentials for the current master"
  85. exit 1
  86. fi
  87. until curl --insecure "${curl_auth_arg[@]}" --max-time 5 \
  88. --fail --output /dev/null --silent "https://${KUBE_MASTER_IP}/healthz"; do
  89. printf "."
  90. sleep 2
  91. done
  92. echo "== Done =="
  93. }
  94. # Perform common upgrade setup tasks
  95. #
  96. # Assumed vars
  97. # KUBE_VERSION
  98. function prepare-upgrade() {
  99. ensure-temp-dir
  100. detect-project
  101. write-cluster-name
  102. tars_from_version
  103. }
  104. # Reads kube-env metadata from first node in NODE_NAMES.
  105. #
  106. # Assumed vars:
  107. # NODE_NAMES
  108. # PROJECT
  109. # ZONE
  110. function get-node-env() {
  111. # TODO(zmerlynn): Make this more reliable with retries.
  112. gcloud compute --project ${PROJECT} ssh --zone ${ZONE} ${NODE_NAMES[0]} --command \
  113. "curl --fail --silent -H 'Metadata-Flavor: Google' \
  114. 'http://metadata/computeMetadata/v1/instance/attributes/kube-env'" 2>/dev/null
  115. }
  116. # Assumed vars:
  117. # KUBE_VERSION
  118. # NODE_SCOPES
  119. # NODE_INSTANCE_PREFIX
  120. # PROJECT
  121. # ZONE
  122. #
  123. # Vars set:
  124. # KUBELET_TOKEN
  125. # KUBE_PROXY_TOKEN
  126. # CA_CERT_BASE64
  127. # EXTRA_DOCKER_OPTS
  128. # KUBELET_CERT_BASE64
  129. # KUBELET_KEY_BASE64
  130. function upgrade-nodes() {
  131. prepare-node-upgrade
  132. do-node-upgrade
  133. }
  134. # prepare-node-upgrade creates a new instance template suitable for upgrading
  135. # to KUBE_VERSION and echos a single line with the name of the new template.
  136. #
  137. # Assumed vars:
  138. # KUBE_VERSION
  139. # NODE_SCOPES
  140. # NODE_INSTANCE_PREFIX
  141. # PROJECT
  142. # ZONE
  143. #
  144. # Vars set:
  145. # SANITIZED_VERSION
  146. # INSTANCE_GROUPS
  147. # KUBELET_TOKEN
  148. # KUBE_PROXY_TOKEN
  149. # CA_CERT_BASE64
  150. # EXTRA_DOCKER_OPTS
  151. # KUBELET_CERT_BASE64
  152. # KUBELET_KEY_BASE64
  153. function prepare-node-upgrade() {
  154. echo "== Preparing node upgrade (to ${KUBE_VERSION}). ==" >&2
  155. SANITIZED_VERSION=$(echo ${KUBE_VERSION} | sed 's/[\.\+]/-/g')
  156. detect-node-names # sets INSTANCE_GROUPS
  157. # TODO(zmerlynn): Refactor setting scope flags.
  158. local scope_flags=
  159. if [ -n "${NODE_SCOPES}" ]; then
  160. scope_flags="--scopes ${NODE_SCOPES}"
  161. else
  162. scope_flags="--no-scopes"
  163. fi
  164. # Get required node env vars from exiting template.
  165. local node_env=$(get-node-env)
  166. KUBELET_TOKEN=$(get-env-val "${node_env}" "KUBELET_TOKEN")
  167. KUBE_PROXY_TOKEN=$(get-env-val "${node_env}" "KUBE_PROXY_TOKEN")
  168. CA_CERT_BASE64=$(get-env-val "${node_env}" "CA_CERT")
  169. EXTRA_DOCKER_OPTS=$(get-env-val "${node_env}" "EXTRA_DOCKER_OPTS")
  170. KUBELET_CERT_BASE64=$(get-env-val "${node_env}" "KUBELET_CERT")
  171. KUBELET_KEY_BASE64=$(get-env-val "${node_env}" "KUBELET_KEY")
  172. # TODO(zmerlynn): How do we ensure kube-env is written in a ${version}-
  173. # compatible way?
  174. write-node-env
  175. # TODO(zmerlynn): Get configure-vm script from ${version}. (Must plumb this
  176. # through all create-node-instance-template implementations).
  177. local template_name=$(get-template-name-from-version ${SANITIZED_VERSION})
  178. create-node-instance-template "${template_name}"
  179. # The following is echo'd so that callers can get the template name.
  180. echo "Instance template name: ${template_name}"
  181. echo "== Finished preparing node upgrade (to ${KUBE_VERSION}). ==" >&2
  182. }
  183. # Prereqs:
  184. # - prepare-node-upgrade should have been called successfully
  185. function do-node-upgrade() {
  186. echo "== Upgrading nodes to ${KUBE_VERSION}. ==" >&2
  187. # Do the actual upgrade.
  188. # NOTE(zmerlynn): If you are changing this gcloud command, update
  189. # test/e2e/cluster_upgrade.go to match this EXACTLY.
  190. local template_name=$(get-template-name-from-version ${SANITIZED_VERSION})
  191. local old_templates=()
  192. local updates=()
  193. for group in ${INSTANCE_GROUPS[@]}; do
  194. old_templates+=($(gcloud compute instance-groups managed list \
  195. --project="${PROJECT}" \
  196. --zones="${ZONE}" \
  197. --regexp="${group}" \
  198. --format='value(instanceTemplate)' || true))
  199. update=$(gcloud alpha compute rolling-updates \
  200. --project="${PROJECT}" \
  201. --zone="${ZONE}" \
  202. start \
  203. --group="${group}" \
  204. --template="${template_name}" \
  205. --instance-startup-timeout=300s \
  206. --max-num-concurrent-instances=1 \
  207. --max-num-failed-instances=0 \
  208. --min-instance-update-time=0s 2>&1)
  209. id=$(echo "${update}" | grep "Started" | cut -d '/' -f 11 | cut -d ']' -f 1)
  210. updates+=("${id}")
  211. done
  212. # Wait until rolling updates are finished.
  213. for update in ${updates[@]}; do
  214. while true; do
  215. result=$(gcloud alpha compute rolling-updates \
  216. --project="${PROJECT}" \
  217. --zone="${ZONE}" \
  218. describe \
  219. ${update} \
  220. --format='value(status)' || true)
  221. if [ $result = "ROLLED_OUT" ]; then
  222. echo "Rolling update ${update} is ${result} state and finished."
  223. break
  224. fi
  225. echo "Rolling update ${update} is still in ${result} state."
  226. sleep 10
  227. done
  228. done
  229. # Remove the old templates.
  230. for tmpl in ${old_templates[@]}; do
  231. gcloud compute instance-templates delete \
  232. --quiet \
  233. --project="${PROJECT}" \
  234. "${tmpl}" || true
  235. done
  236. echo "== Finished upgrading nodes to ${KUBE_VERSION}. ==" >&2
  237. }
  238. master_upgrade=true
  239. node_upgrade=true
  240. node_prereqs=false
  241. local_binaries=false
  242. while getopts ":MNPlh" opt; do
  243. case ${opt} in
  244. M)
  245. node_upgrade=false
  246. ;;
  247. N)
  248. master_upgrade=false
  249. ;;
  250. P)
  251. node_prereqs=true
  252. ;;
  253. l)
  254. local_binaries=true
  255. ;;
  256. h)
  257. usage
  258. exit 0
  259. ;;
  260. \?)
  261. echo "Invalid option: -$OPTARG" >&2
  262. usage
  263. exit 1
  264. ;;
  265. esac
  266. done
  267. shift $((OPTIND-1))
  268. if [[ $# -lt 1 ]] && [[ "${local_binaries}" == "false" ]]; then
  269. usage
  270. exit 1
  271. fi
  272. if [[ "${master_upgrade}" == "false" ]] && [[ "${node_upgrade}" == "false" ]]; then
  273. echo "Can't specify both -M and -N" >&2
  274. exit 1
  275. fi
  276. if [[ "${local_binaries}" == "false" ]]; then
  277. set_binary_version ${1}
  278. fi
  279. prepare-upgrade
  280. if [[ "${node_prereqs}" == "true" ]]; then
  281. prepare-node-upgrade
  282. exit 0
  283. fi
  284. if [[ "${master_upgrade}" == "true" ]]; then
  285. upgrade-master
  286. fi
  287. if [[ "${node_upgrade}" == "true" ]]; then
  288. if [[ "${local_binaries}" == "true" ]]; then
  289. echo "Upgrading nodes to local binaries is not yet supported." >&2
  290. exit 1
  291. else
  292. upgrade-nodes
  293. fi
  294. fi
  295. echo "== Validating cluster post-upgrade =="
  296. "${KUBE_ROOT}/cluster/validate-cluster.sh"