util.sh 57 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715
  1. #!/bin/bash
  2. # Copyright 2014 The Kubernetes Authors.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. # A library of helper functions and constant for the local config.
  16. # Use the config file specified in $KUBE_CONFIG_FILE, or default to
  17. # config-default.sh.
  18. KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
  19. source "${KUBE_ROOT}/cluster/gce/${KUBE_CONFIG_FILE-"config-default.sh"}"
  20. source "${KUBE_ROOT}/cluster/common.sh"
  21. source "${KUBE_ROOT}/cluster/lib/util.sh"
  22. if [[ "${NODE_OS_DISTRIBUTION}" == "debian" || "${NODE_OS_DISTRIBUTION}" == "coreos" || "${NODE_OS_DISTRIBUTION}" == "trusty" || "${NODE_OS_DISTRIBUTION}" == "gci" ]]; then
  23. source "${KUBE_ROOT}/cluster/gce/${NODE_OS_DISTRIBUTION}/node-helper.sh"
  24. else
  25. echo "Cannot operate on cluster using node os distro: ${NODE_OS_DISTRIBUTION}" >&2
  26. exit 1
  27. fi
  28. if [[ "${MASTER_OS_DISTRIBUTION}" == "debian" || "${MASTER_OS_DISTRIBUTION}" == "coreos" || "${MASTER_OS_DISTRIBUTION}" == "trusty" || "${MASTER_OS_DISTRIBUTION}" == "gci" ]]; then
  29. source "${KUBE_ROOT}/cluster/gce/${MASTER_OS_DISTRIBUTION}/master-helper.sh"
  30. else
  31. echo "Cannot operate on cluster using master os distro: ${MASTER_OS_DISTRIBUTION}" >&2
  32. exit 1
  33. fi
  34. if [[ "${MASTER_OS_DISTRIBUTION}" == "gci" ]]; then
  35. # If the master image is not set, we use the latest GCI image.
  36. # Otherwise, we respect whatever is set by the user.
  37. MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-${GCI_VERSION}}
  38. MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-google-containers}
  39. elif [[ "${MASTER_OS_DISTRIBUTION}" == "debian" ]]; then
  40. MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-${CVM_VERSION}}
  41. MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-google-containers}
  42. fi
  43. if [[ "${NODE_OS_DISTRIBUTION}" == "gci" ]]; then
  44. # If the node image is not set, we use the latest GCI image.
  45. # Otherwise, we respect whatever is set by the user.
  46. NODE_IMAGE=${KUBE_GCE_NODE_IMAGE:-${GCI_VERSION}}
  47. NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-google-containers}
  48. elif [[ "${NODE_OS_DISTRIBUTION}" == "debian" ]]; then
  49. NODE_IMAGE=${KUBE_GCE_NODE_IMAGE:-${CVM_VERSION}}
  50. NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-google-containers}
  51. fi
  52. # Verfiy cluster autoscaler configuration.
  53. if [[ "${ENABLE_CLUSTER_AUTOSCALER}" == "true" ]]; then
  54. if [ -z $AUTOSCALER_MIN_NODES ]; then
  55. echo "AUTOSCALER_MIN_NODES not set."
  56. exit 1
  57. fi
  58. if [ -z $AUTOSCALER_MAX_NODES ]; then
  59. echo "AUTOSCALER_MAX_NODES not set."
  60. exit 1
  61. fi
  62. fi
  63. NODE_INSTANCE_PREFIX="${INSTANCE_PREFIX}-minion"
  64. NODE_TAGS="${NODE_TAG}"
  65. ALLOCATE_NODE_CIDRS=true
  66. KUBE_PROMPT_FOR_UPDATE=y
  67. KUBE_SKIP_UPDATE=${KUBE_SKIP_UPDATE-"n"}
  68. # How long (in seconds) to wait for cluster initialization.
  69. KUBE_CLUSTER_INITIALIZATION_TIMEOUT=${KUBE_CLUSTER_INITIALIZATION_TIMEOUT:-300}
  70. function join_csv() {
  71. local IFS=','; echo "$*";
  72. }
  73. # This function returns the first string before the comma
  74. function split_csv() {
  75. echo "$*" | cut -d',' -f1
  76. }
  77. # Verify prereqs
  78. function verify-prereqs() {
  79. local cmd
  80. for cmd in gcloud gsutil; do
  81. if ! which "${cmd}" >/dev/null; then
  82. local resp
  83. if [[ "${KUBE_PROMPT_FOR_UPDATE}" == "y" ]]; then
  84. echo "Can't find ${cmd} in PATH. Do you wish to install the Google Cloud SDK? [Y/n]"
  85. read resp
  86. else
  87. resp="y"
  88. fi
  89. if [[ "${resp}" != "n" && "${resp}" != "N" ]]; then
  90. curl https://sdk.cloud.google.com | bash
  91. fi
  92. if ! which "${cmd}" >/dev/null; then
  93. echo "Can't find ${cmd} in PATH, please fix and retry. The Google Cloud " >&2
  94. echo "SDK can be downloaded from https://cloud.google.com/sdk/." >&2
  95. exit 1
  96. fi
  97. fi
  98. done
  99. if [[ "${KUBE_SKIP_UPDATE}" == "y" ]]; then
  100. return
  101. fi
  102. # update and install components as needed
  103. if [[ "${KUBE_PROMPT_FOR_UPDATE}" != "y" ]]; then
  104. gcloud_prompt="-q"
  105. fi
  106. local sudo_prefix=""
  107. if [ ! -w $(dirname `which gcloud`) ]; then
  108. sudo_prefix="sudo"
  109. fi
  110. ${sudo_prefix} gcloud ${gcloud_prompt:-} components install alpha || true
  111. ${sudo_prefix} gcloud ${gcloud_prompt:-} components install beta || true
  112. ${sudo_prefix} gcloud ${gcloud_prompt:-} components update || true
  113. }
  114. # Create a temp dir that'll be deleted at the end of this bash session.
  115. #
  116. # Vars set:
  117. # KUBE_TEMP
  118. function ensure-temp-dir() {
  119. if [[ -z ${KUBE_TEMP-} ]]; then
  120. KUBE_TEMP=$(mktemp -d -t kubernetes.XXXXXX)
  121. trap 'rm -rf "${KUBE_TEMP}"' EXIT
  122. fi
  123. }
  124. # Use the gcloud defaults to find the project. If it is already set in the
  125. # environment then go with that.
  126. #
  127. # Vars set:
  128. # PROJECT
  129. # PROJECT_REPORTED
  130. function detect-project() {
  131. if [[ -z "${PROJECT-}" ]]; then
  132. PROJECT=$(gcloud config list project --format 'value(core.project)')
  133. fi
  134. if [[ -z "${PROJECT-}" ]]; then
  135. echo "Could not detect Google Cloud Platform project. Set the default project using " >&2
  136. echo "'gcloud config set project <PROJECT>'" >&2
  137. exit 1
  138. fi
  139. if [[ -z "${PROJECT_REPORTED-}" ]]; then
  140. echo "Project: ${PROJECT}" >&2
  141. echo "Zone: ${ZONE}" >&2
  142. PROJECT_REPORTED=true
  143. fi
  144. }
  145. # Copy a release tar and its accompanying hash.
  146. function copy-to-staging() {
  147. local -r staging_path=$1
  148. local -r gs_url=$2
  149. local -r tar=$3
  150. local -r hash=$4
  151. echo "${hash}" > "${tar}.sha1"
  152. gsutil -m -q -h "Cache-Control:private, max-age=0" cp "${tar}" "${tar}.sha1" "${staging_path}"
  153. gsutil -m acl ch -g all:R "${gs_url}" "${gs_url}.sha1" >/dev/null 2>&1
  154. echo "+++ $(basename ${tar}) uploaded (sha1 = ${hash})"
  155. }
  156. # Given the cluster zone, return the list of regional GCS release
  157. # bucket suffixes for the release in preference order. GCS doesn't
  158. # give us an API for this, so we hardcode it.
  159. #
  160. # Assumed vars:
  161. # RELEASE_REGION_FALLBACK
  162. # REGIONAL_KUBE_ADDONS
  163. # ZONE
  164. # Vars set:
  165. # PREFERRED_REGION
  166. # KUBE_ADDON_REGISTRY
  167. function set-preferred-region() {
  168. case ${ZONE} in
  169. asia-*)
  170. PREFERRED_REGION=("asia" "us" "eu")
  171. ;;
  172. europe-*)
  173. PREFERRED_REGION=("eu" "us" "asia")
  174. ;;
  175. *)
  176. PREFERRED_REGION=("us" "eu" "asia")
  177. ;;
  178. esac
  179. local -r preferred="${PREFERRED_REGION[0]}"
  180. if [[ "${RELEASE_REGION_FALLBACK}" != "true" ]]; then
  181. PREFERRED_REGION=( "${preferred}" )
  182. fi
  183. # If we're using regional GCR, and we're outside the US, go to the
  184. # regional registry. The gcr.io/google_containers registry is
  185. # appropriate for US (for now).
  186. if [[ "${REGIONAL_KUBE_ADDONS}" == "true" ]] && [[ "${preferred}" != "us" ]]; then
  187. KUBE_ADDON_REGISTRY="${preferred}.gcr.io/google_containers"
  188. else
  189. KUBE_ADDON_REGISTRY="gcr.io/google_containers"
  190. fi
  191. if [[ "${ENABLE_DOCKER_REGISTRY_CACHE:-}" == "true" ]]; then
  192. DOCKER_REGISTRY_MIRROR_URL="https://${preferred}-mirror.gcr.io"
  193. fi
  194. }
  195. # Take the local tar files and upload them to Google Storage. They will then be
  196. # downloaded by the master as part of the start up script for the master.
  197. #
  198. # Assumed vars:
  199. # PROJECT
  200. # SERVER_BINARY_TAR
  201. # SALT_TAR
  202. # KUBE_MANIFESTS_TAR
  203. # ZONE
  204. # Vars set:
  205. # SERVER_BINARY_TAR_URL
  206. # SERVER_BINARY_TAR_HASH
  207. # SALT_TAR_URL
  208. # SALT_TAR_HASH
  209. # KUBE_MANIFESTS_TAR_URL
  210. # KUBE_MANIFESTS_TAR_HASH
  211. function upload-server-tars() {
  212. SERVER_BINARY_TAR_URL=
  213. SERVER_BINARY_TAR_HASH=
  214. SALT_TAR_URL=
  215. SALT_TAR_HASH=
  216. KUBE_MANIFESTS_TAR_URL=
  217. KUBE_MANIFESTS_TAR_HASH=
  218. local project_hash
  219. if which md5 > /dev/null 2>&1; then
  220. project_hash=$(md5 -q -s "$PROJECT")
  221. else
  222. project_hash=$(echo -n "$PROJECT" | md5sum | awk '{ print $1 }')
  223. fi
  224. # This requires 1 million projects before the probability of collision is 50%
  225. # that's probably good enough for now :P
  226. project_hash=${project_hash:0:10}
  227. set-preferred-region
  228. SERVER_BINARY_TAR_HASH=$(sha1sum-file "${SERVER_BINARY_TAR}")
  229. SALT_TAR_HASH=$(sha1sum-file "${SALT_TAR}")
  230. if [[ -n "${KUBE_MANIFESTS_TAR:-}" ]]; then
  231. KUBE_MANIFESTS_TAR_HASH=$(sha1sum-file "${KUBE_MANIFESTS_TAR}")
  232. fi
  233. local server_binary_tar_urls=()
  234. local salt_tar_urls=()
  235. local kube_manifest_tar_urls=()
  236. for region in "${PREFERRED_REGION[@]}"; do
  237. suffix="-${region}"
  238. if [[ "${suffix}" == "-us" ]]; then
  239. suffix=""
  240. fi
  241. local staging_bucket="gs://kubernetes-staging-${project_hash}${suffix}"
  242. # Ensure the buckets are created
  243. if ! gsutil ls "${staging_bucket}" ; then
  244. echo "Creating ${staging_bucket}"
  245. gsutil mb -l "${region}" "${staging_bucket}"
  246. fi
  247. local staging_path="${staging_bucket}/${INSTANCE_PREFIX}-devel"
  248. echo "+++ Staging server tars to Google Storage: ${staging_path}"
  249. local server_binary_gs_url="${staging_path}/${SERVER_BINARY_TAR##*/}"
  250. local salt_gs_url="${staging_path}/${SALT_TAR##*/}"
  251. copy-to-staging "${staging_path}" "${server_binary_gs_url}" "${SERVER_BINARY_TAR}" "${SERVER_BINARY_TAR_HASH}"
  252. copy-to-staging "${staging_path}" "${salt_gs_url}" "${SALT_TAR}" "${SALT_TAR_HASH}"
  253. # Convert from gs:// URL to an https:// URL
  254. server_binary_tar_urls+=("${server_binary_gs_url/gs:\/\//https://storage.googleapis.com/}")
  255. salt_tar_urls+=("${salt_gs_url/gs:\/\//https://storage.googleapis.com/}")
  256. if [[ -n "${KUBE_MANIFESTS_TAR:-}" ]]; then
  257. local kube_manifests_gs_url="${staging_path}/${KUBE_MANIFESTS_TAR##*/}"
  258. copy-to-staging "${staging_path}" "${kube_manifests_gs_url}" "${KUBE_MANIFESTS_TAR}" "${KUBE_MANIFESTS_TAR_HASH}"
  259. # Convert from gs:// URL to an https:// URL
  260. kube_manifests_tar_urls+=("${kube_manifests_gs_url/gs:\/\//https://storage.googleapis.com/}")
  261. fi
  262. done
  263. SERVER_BINARY_TAR_URL=$(join_csv "${server_binary_tar_urls[@]}")
  264. SALT_TAR_URL=$(join_csv "${salt_tar_urls[@]}")
  265. if [[ -n "${KUBE_MANIFESTS_TAR:-}" ]]; then
  266. KUBE_MANIFESTS_TAR_URL=$(join_csv "${kube_manifests_tar_urls[@]}")
  267. fi
  268. }
  269. # Detect minions created in the minion group
  270. #
  271. # Assumed vars:
  272. # NODE_INSTANCE_PREFIX
  273. # Vars set:
  274. # NODE_NAMES
  275. # INSTANCE_GROUPS
  276. function detect-node-names() {
  277. detect-project
  278. INSTANCE_GROUPS=()
  279. INSTANCE_GROUPS+=($(gcloud compute instance-groups managed list \
  280. --zones "${ZONE}" --project "${PROJECT}" \
  281. --regexp "${NODE_INSTANCE_PREFIX}-.+" \
  282. --format='value(instanceGroup)' || true))
  283. NODE_NAMES=()
  284. if [[ -n "${INSTANCE_GROUPS[@]:-}" ]]; then
  285. for group in "${INSTANCE_GROUPS[@]}"; do
  286. NODE_NAMES+=($(gcloud compute instance-groups managed list-instances \
  287. "${group}" --zone "${ZONE}" --project "${PROJECT}" \
  288. --format='value(instance)'))
  289. done
  290. fi
  291. echo "INSTANCE_GROUPS=${INSTANCE_GROUPS[*]:-}" >&2
  292. echo "NODE_NAMES=${NODE_NAMES[*]:-}" >&2
  293. }
  294. # Detect the information about the minions
  295. #
  296. # Assumed vars:
  297. # ZONE
  298. # Vars set:
  299. # NODE_NAMES
  300. # KUBE_NODE_IP_ADDRESSES (array)
  301. function detect-nodes() {
  302. detect-project
  303. detect-node-names
  304. KUBE_NODE_IP_ADDRESSES=()
  305. for (( i=0; i<${#NODE_NAMES[@]}; i++)); do
  306. local node_ip=$(gcloud compute instances describe --project "${PROJECT}" --zone "${ZONE}" \
  307. "${NODE_NAMES[$i]}" --format='value(networkInterfaces[0].accessConfigs[0].natIP)')
  308. if [[ -z "${node_ip-}" ]] ; then
  309. echo "Did not find ${NODE_NAMES[$i]}" >&2
  310. else
  311. echo "Found ${NODE_NAMES[$i]} at ${node_ip}"
  312. KUBE_NODE_IP_ADDRESSES+=("${node_ip}")
  313. fi
  314. done
  315. if [[ -z "${KUBE_NODE_IP_ADDRESSES-}" ]]; then
  316. echo "Could not detect Kubernetes minion nodes. Make sure you've launched a cluster with 'kube-up.sh'" >&2
  317. exit 1
  318. fi
  319. }
  320. # Detect the IP for the master
  321. #
  322. # Assumed vars:
  323. # MASTER_NAME
  324. # ZONE
  325. # REGION
  326. # Vars set:
  327. # KUBE_MASTER
  328. # KUBE_MASTER_IP
  329. function detect-master() {
  330. detect-project
  331. KUBE_MASTER=${MASTER_NAME}
  332. if [[ -z "${KUBE_MASTER_IP-}" ]]; then
  333. KUBE_MASTER_IP=$(gcloud compute addresses describe "${MASTER_NAME}-ip" \
  334. --project "${PROJECT}" --region "${REGION}" -q --format='value(address)')
  335. fi
  336. if [[ -z "${KUBE_MASTER_IP-}" ]]; then
  337. echo "Could not detect Kubernetes master node. Make sure you've launched a cluster with 'kube-up.sh'" >&2
  338. exit 1
  339. fi
  340. echo "Using master: $KUBE_MASTER (external IP: $KUBE_MASTER_IP)"
  341. }
  342. # Reads kube-env metadata from master
  343. #
  344. # Assumed vars:
  345. # KUBE_MASTER
  346. # PROJECT
  347. # ZONE
  348. function get-master-env() {
  349. # TODO(zmerlynn): Make this more reliable with retries.
  350. gcloud compute --project ${PROJECT} ssh --zone ${ZONE} ${KUBE_MASTER} --command \
  351. "curl --fail --silent -H 'Metadata-Flavor: Google' \
  352. 'http://metadata/computeMetadata/v1/instance/attributes/kube-env'" 2>/dev/null
  353. }
  354. # Robustly try to create a static ip.
  355. # $1: The name of the ip to create
  356. # $2: The name of the region to create the ip in.
  357. function create-static-ip() {
  358. detect-project
  359. local attempt=0
  360. local REGION="$2"
  361. while true; do
  362. if gcloud compute addresses create "$1" \
  363. --project "${PROJECT}" \
  364. --region "${REGION}" -q > /dev/null; then
  365. # successful operation
  366. break
  367. fi
  368. if cloud compute addresses describe "$1" \
  369. --project "${PROJECT}" \
  370. --region "${REGION}" >/dev/null 2>&1; then
  371. # it exists - postcondition satisfied
  372. break
  373. fi
  374. if (( attempt > 4 )); then
  375. echo -e "${color_red}Failed to create static ip $1 ${color_norm}" >&2
  376. exit 2
  377. fi
  378. attempt=$(($attempt+1))
  379. echo -e "${color_yellow}Attempt $attempt failed to create static ip $1. Retrying.${color_norm}" >&2
  380. sleep $(($attempt * 5))
  381. done
  382. }
  383. # Robustly try to create a firewall rule.
  384. # $1: The name of firewall rule.
  385. # $2: IP ranges.
  386. # $3: Target tags for this firewall rule.
  387. function create-firewall-rule() {
  388. detect-project
  389. local attempt=0
  390. while true; do
  391. if ! gcloud compute firewall-rules create "$1" \
  392. --project "${PROJECT}" \
  393. --network "${NETWORK}" \
  394. --source-ranges "$2" \
  395. --target-tags "$3" \
  396. --allow tcp,udp,icmp,esp,ah,sctp; then
  397. if (( attempt > 4 )); then
  398. echo -e "${color_red}Failed to create firewall rule $1 ${color_norm}" >&2
  399. exit 2
  400. fi
  401. echo -e "${color_yellow}Attempt $(($attempt+1)) failed to create firewall rule $1. Retrying.${color_norm}" >&2
  402. attempt=$(($attempt+1))
  403. sleep $(($attempt * 5))
  404. else
  405. break
  406. fi
  407. done
  408. }
  409. # $1: version (required)
  410. function get-template-name-from-version() {
  411. # trim template name to pass gce name validation
  412. echo "${NODE_INSTANCE_PREFIX}-template-${1}" | cut -c 1-63 | sed 's/[\.\+]/-/g;s/-*$//g'
  413. }
  414. # Robustly try to create an instance template.
  415. # $1: The name of the instance template.
  416. # $2: The scopes flag.
  417. # $3 and others: Metadata entries (must all be from a file).
  418. function create-node-template() {
  419. detect-project
  420. local template_name="$1"
  421. # First, ensure the template doesn't exist.
  422. # TODO(zmerlynn): To make this really robust, we need to parse the output and
  423. # add retries. Just relying on a non-zero exit code doesn't
  424. # distinguish an ephemeral failed call from a "not-exists".
  425. if gcloud compute instance-templates describe "$template_name" --project "${PROJECT}" &>/dev/null; then
  426. echo "Instance template ${1} already exists; deleting." >&2
  427. if ! gcloud compute instance-templates delete "$template_name" --project "${PROJECT}" &>/dev/null; then
  428. echo -e "${color_yellow}Failed to delete existing instance template${color_norm}" >&2
  429. exit 2
  430. fi
  431. fi
  432. local attempt=1
  433. local preemptible_minions=""
  434. if [[ "${PREEMPTIBLE_NODE}" == "true" ]]; then
  435. preemptible_minions="--preemptible --maintenance-policy TERMINATE"
  436. fi
  437. while true; do
  438. echo "Attempt ${attempt} to create ${1}" >&2
  439. if ! gcloud compute instance-templates create "$template_name" \
  440. --project "${PROJECT}" \
  441. --machine-type "${NODE_SIZE}" \
  442. --boot-disk-type "${NODE_DISK_TYPE}" \
  443. --boot-disk-size "${NODE_DISK_SIZE}" \
  444. --image-project="${NODE_IMAGE_PROJECT}" \
  445. --image "${NODE_IMAGE}" \
  446. --tags "${NODE_TAG}" \
  447. --network "${NETWORK}" \
  448. ${preemptible_minions} \
  449. $2 \
  450. --can-ip-forward \
  451. --metadata-from-file $(echo ${@:3} | tr ' ' ',') >&2; then
  452. if (( attempt > 5 )); then
  453. echo -e "${color_red}Failed to create instance template $template_name ${color_norm}" >&2
  454. exit 2
  455. fi
  456. echo -e "${color_yellow}Attempt ${attempt} failed to create instance template $template_name. Retrying.${color_norm}" >&2
  457. attempt=$(($attempt+1))
  458. sleep $(($attempt * 5))
  459. # In case the previous attempt failed with something like a
  460. # Backend Error and left the entry laying around, delete it
  461. # before we try again.
  462. gcloud compute instance-templates delete "$template_name" --project "${PROJECT}" &>/dev/null || true
  463. else
  464. break
  465. fi
  466. done
  467. }
  468. # Robustly try to add metadata on an instance.
  469. # $1: The name of the instance.
  470. # $2...$n: The metadata key=value pairs to add.
  471. function add-instance-metadata() {
  472. local -r instance=$1
  473. shift 1
  474. local -r kvs=( "$@" )
  475. detect-project
  476. local attempt=0
  477. while true; do
  478. if ! gcloud compute instances add-metadata "${instance}" \
  479. --project "${PROJECT}" \
  480. --zone "${ZONE}" \
  481. --metadata "${kvs[@]}"; then
  482. if (( attempt > 5 )); then
  483. echo -e "${color_red}Failed to add instance metadata in ${instance} ${color_norm}" >&2
  484. exit 2
  485. fi
  486. echo -e "${color_yellow}Attempt $(($attempt+1)) failed to add metadata in ${instance}. Retrying.${color_norm}" >&2
  487. attempt=$(($attempt+1))
  488. sleep $((5 * $attempt))
  489. else
  490. break
  491. fi
  492. done
  493. }
  494. # Robustly try to add metadata on an instance, from a file.
  495. # $1: The name of the instance.
  496. # $2...$n: The metadata key=file pairs to add.
  497. function add-instance-metadata-from-file() {
  498. local -r instance=$1
  499. shift 1
  500. local -r kvs=( "$@" )
  501. detect-project
  502. local attempt=0
  503. while true; do
  504. echo "${kvs[@]}"
  505. if ! gcloud compute instances add-metadata "${instance}" \
  506. --project "${PROJECT}" \
  507. --zone "${ZONE}" \
  508. --metadata-from-file "$(join_csv ${kvs[@]})"; then
  509. if (( attempt > 5 )); then
  510. echo -e "${color_red}Failed to add instance metadata in ${instance} ${color_norm}" >&2
  511. exit 2
  512. fi
  513. echo -e "${color_yellow}Attempt $(($attempt+1)) failed to add metadata in ${instance}. Retrying.${color_norm}" >&2
  514. attempt=$(($attempt+1))
  515. sleep $(($attempt * 5))
  516. else
  517. break
  518. fi
  519. done
  520. }
  521. # Instantiate a kubernetes cluster
  522. #
  523. # Assumed vars
  524. # KUBE_ROOT
  525. # <Various vars set in config file>
  526. function kube-up() {
  527. ensure-temp-dir
  528. detect-project
  529. load-or-gen-kube-basicauth
  530. load-or-gen-kube-bearertoken
  531. # Make sure we have the tar files staged on Google Storage
  532. find-release-tars
  533. upload-server-tars
  534. # ensure that environmental variables specifying number of migs to create
  535. set_num_migs
  536. if [[ ${KUBE_USE_EXISTING_MASTER:-} == "true" ]]; then
  537. parse-master-env
  538. create-nodes
  539. elif [[ ${KUBE_EXPERIMENTAL_REPLICATE_EXISTING_MASTER:-} == "true" ]]; then
  540. # TODO(jsz): implement adding replica for other distributions.
  541. if [[ "${MASTER_OS_DISTRIBUTION}" != "gci" ]]; then
  542. echo "Master replication supported only for gci"
  543. return 1
  544. fi
  545. create-loadbalancer
  546. # If replication of master fails, we need to ensure that the replica is removed from etcd clusters.
  547. if ! replicate-master; then
  548. remove-replica-from-etcd 4001 || true
  549. remove-replica-from-etcd 4002 || true
  550. fi
  551. else
  552. check-existing
  553. create-network
  554. write-cluster-name
  555. create-autoscaler-config
  556. create-master
  557. create-nodes-firewall
  558. create-nodes-template
  559. create-nodes
  560. check-cluster
  561. fi
  562. }
  563. function check-existing() {
  564. local running_in_terminal=false
  565. # May be false if tty is not allocated (for example with ssh -T).
  566. if [ -t 1 ]; then
  567. running_in_terminal=true
  568. fi
  569. if [[ ${running_in_terminal} == "true" || ${KUBE_UP_AUTOMATIC_CLEANUP} == "true" ]]; then
  570. if ! check-resources; then
  571. local run_kube_down="n"
  572. echo "${KUBE_RESOURCE_FOUND} found." >&2
  573. # Get user input only if running in terminal.
  574. if [[ ${running_in_terminal} == "true" && ${KUBE_UP_AUTOMATIC_CLEANUP} == "false" ]]; then
  575. read -p "Would you like to shut down the old cluster (call kube-down)? [y/N] " run_kube_down
  576. fi
  577. if [[ ${run_kube_down} == "y" || ${run_kube_down} == "Y" || ${KUBE_UP_AUTOMATIC_CLEANUP} == "true" ]]; then
  578. echo "... calling kube-down" >&2
  579. kube-down
  580. fi
  581. fi
  582. fi
  583. }
  584. function create-network() {
  585. if ! gcloud compute networks --project "${PROJECT}" describe "${NETWORK}" &>/dev/null; then
  586. echo "Creating new network: ${NETWORK}"
  587. # The network needs to be created synchronously or we have a race. The
  588. # firewalls can be added concurrent with instance creation.
  589. gcloud compute networks create --project "${PROJECT}" "${NETWORK}" --range "10.240.0.0/16"
  590. fi
  591. if ! gcloud compute firewall-rules --project "${PROJECT}" describe "${NETWORK}-default-internal" &>/dev/null; then
  592. gcloud compute firewall-rules create "${NETWORK}-default-internal" \
  593. --project "${PROJECT}" \
  594. --network "${NETWORK}" \
  595. --source-ranges "10.0.0.0/8" \
  596. --allow "tcp:1-65535,udp:1-65535,icmp" &
  597. fi
  598. if ! gcloud compute firewall-rules describe --project "${PROJECT}" "${NETWORK}-default-ssh" &>/dev/null; then
  599. gcloud compute firewall-rules create "${NETWORK}-default-ssh" \
  600. --project "${PROJECT}" \
  601. --network "${NETWORK}" \
  602. --source-ranges "0.0.0.0/0" \
  603. --allow "tcp:22" &
  604. fi
  605. }
  606. # Assumes:
  607. # NUM_NODES
  608. # Sets:
  609. # MASTER_ROOT_DISK_SIZE
  610. function get-master-root-disk-size() {
  611. if [[ "${NUM_NODES}" -le "1000" ]]; then
  612. export MASTER_ROOT_DISK_SIZE="10"
  613. else
  614. export MASTER_ROOT_DISK_SIZE="50"
  615. fi
  616. }
  617. function create-master() {
  618. echo "Starting master and configuring firewalls"
  619. gcloud compute firewall-rules create "${MASTER_NAME}-https" \
  620. --project "${PROJECT}" \
  621. --network "${NETWORK}" \
  622. --target-tags "${MASTER_TAG}" \
  623. --allow tcp:443 &
  624. # We have to make sure the disk is created before creating the master VM, so
  625. # run this in the foreground.
  626. gcloud compute disks create "${MASTER_NAME}-pd" \
  627. --project "${PROJECT}" \
  628. --zone "${ZONE}" \
  629. --type "${MASTER_DISK_TYPE}" \
  630. --size "${MASTER_DISK_SIZE}"
  631. # Create disk for cluster registry if enabled
  632. if [[ "${ENABLE_CLUSTER_REGISTRY}" == true && -n "${CLUSTER_REGISTRY_DISK}" ]]; then
  633. gcloud compute disks create "${CLUSTER_REGISTRY_DISK}" \
  634. --project "${PROJECT}" \
  635. --zone "${ZONE}" \
  636. --type "${CLUSTER_REGISTRY_DISK_TYPE_GCE}" \
  637. --size "${CLUSTER_REGISTRY_DISK_SIZE}" &
  638. fi
  639. # Generate a bearer token for this cluster. We push this separately
  640. # from the other cluster variables so that the client (this
  641. # computer) can forget it later. This should disappear with
  642. # http://issue.k8s.io/3168
  643. KUBELET_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null)
  644. KUBE_PROXY_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null)
  645. # Reserve the master's IP so that it can later be transferred to another VM
  646. # without disrupting the kubelets.
  647. create-static-ip "${MASTER_NAME}-ip" "${REGION}"
  648. MASTER_RESERVED_IP=$(gcloud compute addresses describe "${MASTER_NAME}-ip" \
  649. --project "${PROJECT}" --region "${REGION}" -q --format='value(address)')
  650. create-certs "${MASTER_RESERVED_IP}"
  651. # Sets MASTER_ROOT_DISK_SIZE that is used by create-master-instance
  652. get-master-root-disk-size
  653. create-master-instance "${MASTER_RESERVED_IP}" &
  654. }
  655. # Adds master replica to etcd cluster.
  656. #
  657. # Assumed vars:
  658. # REPLICA_NAME
  659. # PROJECT
  660. # EXISTING_MASTER_NAME
  661. # EXISTING_MASTER_ZONE
  662. #
  663. # $1: etcd client port
  664. # $2: etcd internal port
  665. # returns the result of ssh command which adds replica
  666. function add-replica-to-etcd() {
  667. local -r client_port="${1}"
  668. local -r internal_port="${2}"
  669. gcloud compute ssh "${EXISTING_MASTER_NAME}" \
  670. --project "${PROJECT}" \
  671. --zone "${EXISTING_MASTER_ZONE}" \
  672. --command \
  673. "curl localhost:${client_port}/v2/members -XPOST -H \"Content-Type: application/json\" -d '{\"peerURLs\":[\"http://${REPLICA_NAME}:${internal_port}\"]}'"
  674. return $?
  675. }
  676. # Sets EXISTING_MASTER_NAME and EXISTING_MASTER_ZONE variables.
  677. #
  678. # Assumed vars:
  679. # PROJECT
  680. #
  681. # NOTE: Must be in sync with get-replica-name-regexp
  682. function set-existing-master() {
  683. local existing_master=$(gcloud compute instances list \
  684. --project "${PROJECT}" \
  685. --regexp "$(get-replica-name-regexp)" \
  686. --format "value(name,zone)" | head -n1)
  687. EXISTING_MASTER_NAME="$(echo "${existing_master}" | cut -f1)"
  688. EXISTING_MASTER_ZONE="$(echo "${existing_master}" | cut -f2)"
  689. }
  690. function replicate-master() {
  691. set-replica-name
  692. set-existing-master
  693. echo "Experimental: replicating existing master ${EXISTING_MASTER_ZONE}/${EXISTING_MASTER_NAME} as ${ZONE}/${REPLICA_NAME}"
  694. # Before we do anything else, we should configure etcd to expect more replicas.
  695. if ! add-replica-to-etcd 4001 2380; then
  696. echo "Failed to add master replica to etcd cluster."
  697. return 1
  698. fi
  699. if ! add-replica-to-etcd 4002 2381; then
  700. echo "Failed to add master replica to etcd events cluster."
  701. return 1
  702. fi
  703. # We have to make sure the disk is created before creating the master VM, so
  704. # run this in the foreground.
  705. gcloud compute disks create "${REPLICA_NAME}-pd" \
  706. --project "${PROJECT}" \
  707. --zone "${ZONE}" \
  708. --type "${MASTER_DISK_TYPE}" \
  709. --size "${MASTER_DISK_SIZE}"
  710. # Sets MASTER_ROOT_DISK_SIZE that is used by create-master-instance
  711. get-master-root-disk-size
  712. local existing_master_replicas="$(get-all-replica-names)"
  713. replicate-master-instance "${EXISTING_MASTER_ZONE}" "${EXISTING_MASTER_NAME}" "${existing_master_replicas}"
  714. # Add new replica to the load balancer.
  715. gcloud compute target-pools add-instances "${MASTER_NAME}" \
  716. --project "${PROJECT}" \
  717. --zone "${ZONE}" \
  718. --instances "${REPLICA_NAME}"
  719. }
  720. # Detaches old and ataches new external IP to a VM.
  721. #
  722. # Arguments:
  723. # $1 - VM name
  724. # $2 - VM zone
  725. # $3 - external static IP; if empty will use an ephemeral IP address.
  726. function attach-external-ip() {
  727. local NAME=${1}
  728. local ZONE=${2}
  729. local IP_ADDR=${3:-}
  730. local ACCESS_CONFIG_NAME=$(gcloud compute instances describe "${NAME}" \
  731. --project "${PROJECT}" --zone "${ZONE}" \
  732. --format="value(networkInterfaces[0].accessConfigs[0].name)")
  733. gcloud compute instances delete-access-config "${NAME}" \
  734. --project "${PROJECT}" --zone "${ZONE}" \
  735. --access-config-name "${ACCESS_CONFIG_NAME}"
  736. if [[ -z ${IP_ADDR} ]]; then
  737. gcloud compute instances add-access-config "${NAME}" \
  738. --project "${PROJECT}" --zone "${ZONE}" \
  739. --access-config-name "${ACCESS_CONFIG_NAME}"
  740. else
  741. gcloud compute instances add-access-config "${NAME}" \
  742. --project "${PROJECT}" --zone "${ZONE}" \
  743. --access-config-name "${ACCESS_CONFIG_NAME}" \
  744. --address "${IP_ADDR}"
  745. fi
  746. }
  747. # Creates load balancer in front of apiserver if it doesn't exists already. Assumes there's only one
  748. # existing master replica.
  749. #
  750. # Assumes:
  751. # PROJECT
  752. # MASTER_NAME
  753. # ZONE
  754. # REGION
  755. function create-loadbalancer() {
  756. detect-master
  757. # Step 0: Return early if LB is already configured.
  758. if gcloud compute forwarding-rules describe ${MASTER_NAME} \
  759. --project "${PROJECT}" --region ${REGION} > /dev/null 2>&1; then
  760. echo "Load balancer already exists"
  761. return
  762. fi
  763. local EXISTING_MASTER_ZONE=$(gcloud compute instances list "${MASTER_NAME}" \
  764. --project "${PROJECT}" --format="value(zone)")
  765. echo "Creating load balancer in front of an already existing master in ${EXISTING_MASTER_ZONE}"
  766. # Step 1: Detach master IP address and attach ephemeral address to the existing master
  767. attach-external-ip ${MASTER_NAME} ${EXISTING_MASTER_ZONE}
  768. # Step 2: Create target pool.
  769. gcloud compute target-pools create "${MASTER_NAME}" --region "${REGION}"
  770. # TODO: We should also add master instances with suffixes
  771. gcloud compute target-pools add-instances ${MASTER_NAME} --instances ${MASTER_NAME} --zone ${EXISTING_MASTER_ZONE}
  772. # Step 3: Create forwarding rule.
  773. # TODO: This step can take up to 20 min. We need to speed this up...
  774. gcloud compute forwarding-rules create ${MASTER_NAME} \
  775. --project "${PROJECT}" --region ${REGION} \
  776. --target-pool ${MASTER_NAME} --address=${KUBE_MASTER_IP} --ports=443
  777. echo -n "Waiting for the load balancer configuration to propagate..."
  778. local counter=0
  779. until $(curl -k -m1 https://${KUBE_MASTER_IP} &> /dev/null); do
  780. counter=$((counter+1))
  781. echo -n .
  782. if [[ ${counter} -ge 1800 ]]; then
  783. echo -e "${color_red}TIMEOUT${color_norm}" >&2
  784. echo -e "${color_red}Load balancer failed to initialize within ${counter} seconds.${color_norm}" >&2
  785. exit 2
  786. fi
  787. done
  788. echo "DONE"
  789. }
  790. function create-nodes-firewall() {
  791. # Create a single firewall rule for all minions.
  792. create-firewall-rule "${NODE_TAG}-all" "${CLUSTER_IP_RANGE}" "${NODE_TAG}" &
  793. # Report logging choice (if any).
  794. if [[ "${ENABLE_NODE_LOGGING-}" == "true" ]]; then
  795. echo "+++ Logging using Fluentd to ${LOGGING_DESTINATION:-unknown}"
  796. fi
  797. # Wait for last batch of jobs
  798. kube::util::wait-for-jobs || {
  799. echo -e "${color_red}Some commands failed.${color_norm}" >&2
  800. }
  801. }
  802. function create-nodes-template() {
  803. echo "Creating minions."
  804. # TODO(zmerlynn): Refactor setting scope flags.
  805. local scope_flags=
  806. if [ -n "${NODE_SCOPES}" ]; then
  807. scope_flags="--scopes ${NODE_SCOPES}"
  808. else
  809. scope_flags="--no-scopes"
  810. fi
  811. write-node-env
  812. local template_name="${NODE_INSTANCE_PREFIX}-template"
  813. create-node-instance-template $template_name
  814. }
  815. # Assumes:
  816. # - MAX_INSTANCES_PER_MIG
  817. # - NUM_NODES
  818. # exports:
  819. # - NUM_MIGS
  820. function set_num_migs() {
  821. local defaulted_max_instances_per_mig=${MAX_INSTANCES_PER_MIG:-1000}
  822. if [[ ${defaulted_max_instances_per_mig} -le "0" ]]; then
  823. echo "MAX_INSTANCES_PER_MIG cannot be negative. Assuming default 1000"
  824. defaulted_max_instances_per_mig=1000
  825. fi
  826. export NUM_MIGS=$(((${NUM_NODES} + ${defaulted_max_instances_per_mig} - 1) / ${defaulted_max_instances_per_mig}))
  827. }
  828. # Assumes:
  829. # - NUM_MIGS
  830. # - NODE_INSTANCE_PREFIX
  831. # - NUM_NODES
  832. # - PROJECT
  833. # - ZONE
  834. function create-nodes() {
  835. local template_name="${NODE_INSTANCE_PREFIX}-template"
  836. local instances_left=${NUM_NODES}
  837. #TODO: parallelize this loop to speed up the process
  838. for ((i=1; i<=${NUM_MIGS}; i++)); do
  839. local group_name="${NODE_INSTANCE_PREFIX}-group-$i"
  840. if [[ $i == ${NUM_MIGS} ]]; then
  841. # TODO: We don't add a suffix for the last group to keep backward compatibility when there's only one MIG.
  842. # We should change it at some point, but note #18545 when changing this.
  843. group_name="${NODE_INSTANCE_PREFIX}-group"
  844. fi
  845. # Spread the remaining number of nodes evenly
  846. this_mig_size=$((${instances_left} / (${NUM_MIGS}-${i}+1)))
  847. instances_left=$((instances_left-${this_mig_size}))
  848. gcloud compute instance-groups managed \
  849. create "${group_name}" \
  850. --project "${PROJECT}" \
  851. --zone "${ZONE}" \
  852. --base-instance-name "${group_name}" \
  853. --size "${this_mig_size}" \
  854. --template "$template_name" || true;
  855. gcloud compute instance-groups managed wait-until-stable \
  856. "${group_name}" \
  857. --zone "${ZONE}" \
  858. --project "${PROJECT}" || true;
  859. done
  860. }
  861. # Assumes:
  862. # - NUM_MIGS
  863. # - NODE_INSTANCE_PREFIX
  864. # - PROJECT
  865. # - ZONE
  866. # - AUTOSCALER_MAX_NODES
  867. # - AUTOSCALER_MIN_NODES
  868. # Exports
  869. # - AUTOSCALER_MIG_CONFIG
  870. function create-cluster-autoscaler-mig-config() {
  871. # Each MIG must have at least one node, so the min number of nodes
  872. # must be greater or equal to the number of migs.
  873. if [[ ${AUTOSCALER_MIN_NODES} < ${NUM_MIGS} ]]; then
  874. echo "AUTOSCALER_MIN_NODES must be greater or equal ${NUM_MIGS}"
  875. exit 2
  876. fi
  877. # Each MIG must have at least one node, so the min number of nodes
  878. # must be greater or equal to the number of migs.
  879. if [[ ${AUTOSCALER_MAX_NODES} < ${NUM_MIGS} ]]; then
  880. echo "AUTOSCALER_MAX_NODES must be greater or equal ${NUM_MIGS}"
  881. exit 2
  882. fi
  883. # The code assumes that the migs were created with create-nodes
  884. # function which tries to evenly spread nodes across the migs.
  885. AUTOSCALER_MIG_CONFIG=""
  886. local left_min=${AUTOSCALER_MIN_NODES}
  887. local left_max=${AUTOSCALER_MAX_NODES}
  888. for ((i=1; i<=${NUM_MIGS}; i++)); do
  889. local group_name="${NODE_INSTANCE_PREFIX}-group-$i"
  890. if [[ $i == ${NUM_MIGS} ]]; then
  891. # TODO: We don't add a suffix for the last group to keep backward compatibility when there's only one MIG.
  892. # We should change it at some point, but note #18545 when changing this.
  893. group_name="${NODE_INSTANCE_PREFIX}-group"
  894. fi
  895. this_mig_min=$((${left_min}/(${NUM_MIGS}-${i}+1)))
  896. this_mig_max=$((${left_max}/(${NUM_MIGS}-${i}+1)))
  897. left_min=$((left_min-$this_mig_min))
  898. left_max=$((left_max-$this_mig_max))
  899. local mig_url="https://www.googleapis.com/compute/v1/projects/${PROJECT}/zones/${ZONE}/instanceGroups/${group_name}"
  900. AUTOSCALER_MIG_CONFIG="${AUTOSCALER_MIG_CONFIG} --nodes=${this_mig_min}:${this_mig_max}:${mig_url}"
  901. done
  902. AUTOSCALER_MIG_CONFIG="${AUTOSCALER_MIG_CONFIG} --scale-down-enabled=${AUTOSCALER_ENABLE_SCALE_DOWN}"
  903. }
  904. # Assumes:
  905. # - NUM_MIGS
  906. # - NODE_INSTANCE_PREFIX
  907. # - PROJECT
  908. # - ZONE
  909. # - ENABLE_CLUSTER_AUTOSCALER
  910. # - AUTOSCALER_MAX_NODES
  911. # - AUTOSCALER_MIN_NODES
  912. function create-autoscaler-config() {
  913. # Create autoscaler for nodes configuration if requested
  914. if [[ "${ENABLE_CLUSTER_AUTOSCALER}" == "true" ]]; then
  915. create-cluster-autoscaler-mig-config
  916. echo "Using autoscaler config: ${AUTOSCALER_MIG_CONFIG}"
  917. fi
  918. }
  919. function check-cluster() {
  920. detect-node-names
  921. detect-master
  922. echo "Waiting up to ${KUBE_CLUSTER_INITIALIZATION_TIMEOUT} seconds for cluster initialization."
  923. echo
  924. echo " This will continually check to see if the API for kubernetes is reachable."
  925. echo " This may time out if there was some uncaught error during start up."
  926. echo
  927. # curl in mavericks is borked.
  928. secure=""
  929. if which sw_vers >& /dev/null; then
  930. if [[ $(sw_vers | grep ProductVersion | awk '{print $2}') = "10.9."* ]]; then
  931. secure="--insecure"
  932. fi
  933. fi
  934. local start_time=$(date +%s)
  935. until curl --cacert "${CERT_DIR}/pki/ca.crt" \
  936. -H "Authorization: Bearer ${KUBE_BEARER_TOKEN}" \
  937. ${secure} \
  938. --max-time 5 --fail --output /dev/null --silent \
  939. "https://${KUBE_MASTER_IP}/api/v1/pods"; do
  940. local elapsed=$(($(date +%s) - ${start_time}))
  941. if [[ ${elapsed} -gt ${KUBE_CLUSTER_INITIALIZATION_TIMEOUT} ]]; then
  942. echo -e "${color_red}Cluster failed to initialize within ${KUBE_CLUSTER_INITIALIZATION_TIMEOUT} seconds.${color_norm}" >&2
  943. exit 2
  944. fi
  945. printf "."
  946. sleep 2
  947. done
  948. echo "Kubernetes cluster created."
  949. export KUBE_CERT="${CERT_DIR}/pki/issued/kubecfg.crt"
  950. export KUBE_KEY="${CERT_DIR}/pki/private/kubecfg.key"
  951. export CA_CERT="${CERT_DIR}/pki/ca.crt"
  952. export CONTEXT="${PROJECT}_${INSTANCE_PREFIX}"
  953. (
  954. umask 077
  955. # Update the user's kubeconfig to include credentials for this apiserver.
  956. create-kubeconfig
  957. create-kubeconfig-for-federation
  958. )
  959. # ensures KUBECONFIG is set
  960. get-kubeconfig-basicauth
  961. echo
  962. echo -e "${color_green}Kubernetes cluster is running. The master is running at:"
  963. echo
  964. echo -e "${color_yellow} https://${KUBE_MASTER_IP}"
  965. echo
  966. echo -e "${color_green}The user name and password to use is located in ${KUBECONFIG}.${color_norm}"
  967. echo
  968. }
  969. # Removes master replica from etcd cluster.
  970. #
  971. # Assumed vars:
  972. # REPLICA_NAME
  973. # PROJECT
  974. # EXISTING_MASTER_NAME
  975. # EXISTING_MASTER_ZONE
  976. #
  977. # $1: etcd client port
  978. # returns the result of ssh command which removes replica
  979. function remove-replica-from-etcd() {
  980. local -r port="${1}"
  981. gcloud compute ssh "${EXISTING_MASTER_NAME}" \
  982. --project "${PROJECT}" \
  983. --zone "${EXISTING_MASTER_ZONE}" \
  984. --command \
  985. "curl -s localhost:${port}/v2/members/\$(curl -s localhost:${port}/v2/members -XGET | sed 's/{\\\"id/\n/g' | grep ${REPLICA_NAME} | cut -f 3 -d \\\") -XDELETE -L 2>/dev/null"
  986. return $?
  987. }
  988. # Delete a kubernetes cluster. This is called from test-teardown.
  989. #
  990. # Assumed vars:
  991. # MASTER_NAME
  992. # NODE_INSTANCE_PREFIX
  993. # ZONE
  994. # This function tears down cluster resources 10 at a time to avoid issuing too many
  995. # API calls and exceeding API quota. It is important to bring down the instances before bringing
  996. # down the firewall rules and routes.
  997. function kube-down() {
  998. local -r batch=200
  999. detect-project
  1000. detect-node-names # For INSTANCE_GROUPS
  1001. echo "Bringing down cluster"
  1002. set +e # Do not stop on error
  1003. # Get the name of the managed instance group template before we delete the
  1004. # managed instance group. (The name of the managed instance group template may
  1005. # change during a cluster upgrade.)
  1006. local templates=$(get-template "${PROJECT}")
  1007. for group in ${INSTANCE_GROUPS[@]:-}; do
  1008. if gcloud compute instance-groups managed describe "${group}" --project "${PROJECT}" --zone "${ZONE}" &>/dev/null; then
  1009. gcloud compute instance-groups managed delete \
  1010. --project "${PROJECT}" \
  1011. --quiet \
  1012. --zone "${ZONE}" \
  1013. "${group}" &
  1014. fi
  1015. done
  1016. # Wait for last batch of jobs
  1017. kube::util::wait-for-jobs || {
  1018. echo -e "Failed to delete instance group(s)." >&2
  1019. }
  1020. for template in ${templates[@]:-}; do
  1021. if gcloud compute instance-templates describe --project "${PROJECT}" "${template}" &>/dev/null; then
  1022. gcloud compute instance-templates delete \
  1023. --project "${PROJECT}" \
  1024. --quiet \
  1025. "${template}"
  1026. fi
  1027. done
  1028. local -r REPLICA_NAME="$(get-replica-name)"
  1029. set-existing-master
  1030. # Un-register the master replica from etcd and events etcd.
  1031. remove-replica-from-etcd 4001
  1032. remove-replica-from-etcd 4002
  1033. # Delete the master replica (if it exists).
  1034. if gcloud compute instances describe "${REPLICA_NAME}" --zone "${ZONE}" --project "${PROJECT}" &>/dev/null; then
  1035. # If there is a load balancer in front of apiservers we need to first update its configuration.
  1036. if gcloud compute target-pools describe "${MASTER_NAME}" --region "${REGION}" --project "${PROJECT}" &>/dev/null; then
  1037. gcloud compute target-pools remove-instances "${MASTER_NAME}" \
  1038. --project "${PROJECT}" \
  1039. --zone "${ZONE}" \
  1040. --instances "${REPLICA_NAME}"
  1041. fi
  1042. # Now we can safely delete the VM.
  1043. gcloud compute instances delete \
  1044. --project "${PROJECT}" \
  1045. --quiet \
  1046. --delete-disks all \
  1047. --zone "${ZONE}" \
  1048. "${REPLICA_NAME}"
  1049. fi
  1050. # Delete the master replica pd (possibly leaked by kube-up if master create failed).
  1051. if gcloud compute disks describe "${REPLICA_NAME}"-pd --zone "${ZONE}" --project "${PROJECT}" &>/dev/null; then
  1052. gcloud compute disks delete \
  1053. --project "${PROJECT}" \
  1054. --quiet \
  1055. --zone "${ZONE}" \
  1056. "${REPLICA_NAME}"-pd
  1057. fi
  1058. # Delete disk for cluster registry if enabled
  1059. if [[ "${ENABLE_CLUSTER_REGISTRY}" == true && -n "${CLUSTER_REGISTRY_DISK}" ]]; then
  1060. if gcloud compute disks describe "${CLUSTER_REGISTRY_DISK}" --zone "${ZONE}" --project "${PROJECT}" &>/dev/null; then
  1061. gcloud compute disks delete \
  1062. --project "${PROJECT}" \
  1063. --quiet \
  1064. --zone "${ZONE}" \
  1065. "${CLUSTER_REGISTRY_DISK}"
  1066. fi
  1067. fi
  1068. # Check if this are any remaining master replicas.
  1069. local REMAINING_MASTER_COUNT=$(gcloud compute instances list \
  1070. --project "${PROJECT}" \
  1071. --regexp "$(get-replica-name-regexp)" \
  1072. --format "value(zone)" | wc -l)
  1073. # In the replicated scenario, if there's only a single master left, we should also delete load balancer in front of it.
  1074. if [[ "${REMAINING_MASTER_COUNT}" == "1" ]]; then
  1075. if gcloud compute forwarding-rules describe "${MASTER_NAME}" --region "${REGION}" --project "${PROJECT}" &>/dev/null; then
  1076. detect-master
  1077. local EXISTING_MASTER_ZONE=$(gcloud compute instances list "${MASTER_NAME}" \
  1078. --project "${PROJECT}" --format="value(zone)")
  1079. gcloud compute forwarding-rules delete \
  1080. --project "${PROJECT}" \
  1081. --region "${REGION}" \
  1082. --quiet \
  1083. "${MASTER_NAME}"
  1084. attach-external-ip "${MASTER_NAME}" "${EXISTING_MASTER_ZONE}" "${KUBE_MASTER_IP}"
  1085. gcloud compute target-pools delete \
  1086. --project "${PROJECT}" \
  1087. --region "${REGION}" \
  1088. --quiet \
  1089. "${MASTER_NAME}"
  1090. fi
  1091. fi
  1092. # If there are no more remaining master replicas, we should delete all remaining network resources.
  1093. if [[ "${REMAINING_MASTER_COUNT}" == "0" ]]; then
  1094. # Delete firewall rule for the master.
  1095. if gcloud compute firewall-rules describe --project "${PROJECT}" "${MASTER_NAME}-https" &>/dev/null; then
  1096. gcloud compute firewall-rules delete \
  1097. --project "${PROJECT}" \
  1098. --quiet \
  1099. "${MASTER_NAME}-https"
  1100. fi
  1101. # Delete the master's reserved IP
  1102. if gcloud compute addresses describe "${MASTER_NAME}-ip" --region "${REGION}" --project "${PROJECT}" &>/dev/null; then
  1103. gcloud compute addresses delete \
  1104. --project "${PROJECT}" \
  1105. --region "${REGION}" \
  1106. --quiet \
  1107. "${MASTER_NAME}-ip"
  1108. fi
  1109. # Delete firewall rule for minions.
  1110. if gcloud compute firewall-rules describe --project "${PROJECT}" "${NODE_TAG}-all" &>/dev/null; then
  1111. gcloud compute firewall-rules delete \
  1112. --project "${PROJECT}" \
  1113. --quiet \
  1114. "${NODE_TAG}-all"
  1115. fi
  1116. fi
  1117. # Find out what minions are running.
  1118. local -a minions
  1119. minions=( $(gcloud compute instances list \
  1120. --project "${PROJECT}" --zones "${ZONE}" \
  1121. --regexp "${NODE_INSTANCE_PREFIX}-.+" \
  1122. --format='value(name)') )
  1123. # If any minions are running, delete them in batches.
  1124. while (( "${#minions[@]}" > 0 )); do
  1125. echo Deleting nodes "${minions[*]::${batch}}"
  1126. gcloud compute instances delete \
  1127. --project "${PROJECT}" \
  1128. --quiet \
  1129. --delete-disks boot \
  1130. --zone "${ZONE}" \
  1131. "${minions[@]::${batch}}"
  1132. minions=( "${minions[@]:${batch}}" )
  1133. done
  1134. # Delete routes.
  1135. local -a routes
  1136. # Clean up all routes w/ names like "<cluster-name>-<node-GUID>"
  1137. # e.g. "kubernetes-12345678-90ab-cdef-1234-567890abcdef". The name is
  1138. # determined by the node controller on the master.
  1139. # Note that this is currently a noop, as synchronously deleting the node MIG
  1140. # first allows the master to cleanup routes itself.
  1141. local TRUNCATED_PREFIX="${INSTANCE_PREFIX:0:26}"
  1142. routes=( $(gcloud compute routes list --project "${PROJECT}" \
  1143. --regexp "${TRUNCATED_PREFIX}-.{8}-.{4}-.{4}-.{4}-.{12}" \
  1144. --format='value(name)') )
  1145. while (( "${#routes[@]}" > 0 )); do
  1146. echo Deleting routes "${routes[*]::${batch}}"
  1147. gcloud compute routes delete \
  1148. --project "${PROJECT}" \
  1149. --quiet \
  1150. "${routes[@]::${batch}}"
  1151. routes=( "${routes[@]:${batch}}" )
  1152. done
  1153. # Delete persistent disk for influx-db.
  1154. if gcloud compute disks describe "${INSTANCE_PREFIX}"-influxdb-pd --zone "${ZONE}" --project "${PROJECT}" &>/dev/null; then
  1155. gcloud compute disks delete \
  1156. --project "${PROJECT}" \
  1157. --quiet \
  1158. --zone "${ZONE}" \
  1159. "${INSTANCE_PREFIX}"-influxdb-pd
  1160. fi
  1161. # If there are no more remaining master replicas, we should update kubeconfig.
  1162. if [[ "${REMAINING_MASTER_COUNT}" == "0" ]]; then
  1163. export CONTEXT="${PROJECT}_${INSTANCE_PREFIX}"
  1164. clear-kubeconfig
  1165. fi
  1166. set -e
  1167. }
  1168. # Prints name of one of the master replicas in the current zone. It will be either
  1169. # just MASTER_NAME or MASTER_NAME with a suffix for a replica (see get-replica-name-regexp).
  1170. #
  1171. # Assumed vars:
  1172. # PROJECT
  1173. # ZONE
  1174. # MASTER_NAME
  1175. #
  1176. # NOTE: Must be in sync with get-replica-name-regexp and set-replica-name.
  1177. function get-replica-name() {
  1178. echo $(gcloud compute instances list \
  1179. --project "${PROJECT}" \
  1180. --zone "${ZONE}" \
  1181. --regexp "$(get-replica-name-regexp)" \
  1182. --format "value(name)" | head -n1)
  1183. }
  1184. # Prints comma-separated names of all of the master replicas in all zones.
  1185. #
  1186. # Assumed vars:
  1187. # PROJECT
  1188. # MASTER_NAME
  1189. #
  1190. # NOTE: Must be in sync with get-replica-name-regexp and set-replica-name.
  1191. function get-all-replica-names() {
  1192. echo $(gcloud compute instances list \
  1193. --project "${PROJECT}" \
  1194. --regexp "$(get-replica-name-regexp)" \
  1195. --format "value(name)" | tr "\n" "," | sed 's/,$//')
  1196. }
  1197. # Prints regexp for full master machine name. In a cluster with replicated master,
  1198. # VM names may either be MASTER_NAME or MASTER_NAME with a suffix for a replica.
  1199. function get-replica-name-regexp() {
  1200. echo "${MASTER_NAME}(-...)?"
  1201. }
  1202. # Sets REPLICA_NAME to a unique name for a master replica that will match
  1203. # expected regexp (see get-replica-name-regexp).
  1204. #
  1205. # Assumed vars:
  1206. # PROJECT
  1207. # ZONE
  1208. # MASTER_NAME
  1209. #
  1210. # Sets:
  1211. # REPLICA_NAME
  1212. function set-replica-name() {
  1213. local instances=$(gcloud compute instances list \
  1214. --project "${PROJECT}" \
  1215. --regexp "$(get-replica-name-regexp)" \
  1216. --format "value(name)")
  1217. suffix=""
  1218. while echo "${instances}" | grep "${suffix}" &>/dev/null; do
  1219. suffix="$(date | md5sum | head -c3)"
  1220. done
  1221. REPLICA_NAME="${MASTER_NAME}-${suffix}"
  1222. }
  1223. # Gets the instance template for given NODE_INSTANCE_PREFIX. It echos the template name so that the function
  1224. # output can be used.
  1225. # Assumed vars:
  1226. # NODE_INSTANCE_PREFIX
  1227. #
  1228. # $1: project
  1229. function get-template() {
  1230. gcloud compute instance-templates list -r "${NODE_INSTANCE_PREFIX}-template(-(${KUBE_RELEASE_VERSION_DASHED_REGEX}|${KUBE_CI_VERSION_DASHED_REGEX}))?" \
  1231. --project="${1}" --format='value(name)'
  1232. }
  1233. # Checks if there are any present resources related kubernetes cluster.
  1234. #
  1235. # Assumed vars:
  1236. # MASTER_NAME
  1237. # NODE_INSTANCE_PREFIX
  1238. # ZONE
  1239. # REGION
  1240. # Vars set:
  1241. # KUBE_RESOURCE_FOUND
  1242. function check-resources() {
  1243. detect-project
  1244. detect-node-names
  1245. echo "Looking for already existing resources"
  1246. KUBE_RESOURCE_FOUND=""
  1247. if [[ -n "${INSTANCE_GROUPS[@]:-}" ]]; then
  1248. KUBE_RESOURCE_FOUND="Managed instance groups ${INSTANCE_GROUPS[@]}"
  1249. return 1
  1250. fi
  1251. if gcloud compute instance-templates describe --project "${PROJECT}" "${NODE_INSTANCE_PREFIX}-template" &>/dev/null; then
  1252. KUBE_RESOURCE_FOUND="Instance template ${NODE_INSTANCE_PREFIX}-template"
  1253. return 1
  1254. fi
  1255. if gcloud compute instances describe --project "${PROJECT}" "${MASTER_NAME}" --zone "${ZONE}" &>/dev/null; then
  1256. KUBE_RESOURCE_FOUND="Kubernetes master ${MASTER_NAME}"
  1257. return 1
  1258. fi
  1259. if gcloud compute disks describe --project "${PROJECT}" "${MASTER_NAME}"-pd --zone "${ZONE}" &>/dev/null; then
  1260. KUBE_RESOURCE_FOUND="Persistent disk ${MASTER_NAME}-pd"
  1261. return 1
  1262. fi
  1263. if gcloud compute disks describe --project "${PROJECT}" "${CLUSTER_REGISTRY_DISK}" --zone "${ZONE}" &>/dev/null; then
  1264. KUBE_RESOURCE_FOUND="Persistent disk ${CLUSTER_REGISTRY_DISK}"
  1265. return 1
  1266. fi
  1267. # Find out what minions are running.
  1268. local -a minions
  1269. minions=( $(gcloud compute instances list \
  1270. --project "${PROJECT}" --zones "${ZONE}" \
  1271. --regexp "${NODE_INSTANCE_PREFIX}-.+" \
  1272. --format='value(name)') )
  1273. if (( "${#minions[@]}" > 0 )); then
  1274. KUBE_RESOURCE_FOUND="${#minions[@]} matching matching ${NODE_INSTANCE_PREFIX}-.+"
  1275. return 1
  1276. fi
  1277. if gcloud compute firewall-rules describe --project "${PROJECT}" "${MASTER_NAME}-https" &>/dev/null; then
  1278. KUBE_RESOURCE_FOUND="Firewall rules for ${MASTER_NAME}-https"
  1279. return 1
  1280. fi
  1281. if gcloud compute firewall-rules describe --project "${PROJECT}" "${NODE_TAG}-all" &>/dev/null; then
  1282. KUBE_RESOURCE_FOUND="Firewall rules for ${MASTER_NAME}-all"
  1283. return 1
  1284. fi
  1285. local -a routes
  1286. routes=( $(gcloud compute routes list --project "${PROJECT}" \
  1287. --regexp "${INSTANCE_PREFIX}-minion-.{4}" --format='value(name)') )
  1288. if (( "${#routes[@]}" > 0 )); then
  1289. KUBE_RESOURCE_FOUND="${#routes[@]} routes matching ${INSTANCE_PREFIX}-minion-.{4}"
  1290. return 1
  1291. fi
  1292. if gcloud compute addresses describe --project "${PROJECT}" "${MASTER_NAME}-ip" --region "${REGION}" &>/dev/null; then
  1293. KUBE_RESOURCE_FOUND="Master's reserved IP"
  1294. return 1
  1295. fi
  1296. # No resources found.
  1297. return 0
  1298. }
  1299. # Prepare to push new binaries to kubernetes cluster
  1300. # $1 - whether prepare push to node
  1301. function prepare-push() {
  1302. local node="${1-}"
  1303. #TODO(dawnchen): figure out how to upgrade coreos node
  1304. if [[ "${node}" == "true" && "${NODE_OS_DISTRIBUTION}" != "debian" ]]; then
  1305. echo "Updating nodes in a kubernetes cluster with ${NODE_OS_DISTRIBUTION} is not supported yet." >&2
  1306. exit 1
  1307. fi
  1308. if [[ "${node}" != "true" && "${MASTER_OS_DISTRIBUTION}" != "debian" ]]; then
  1309. echo "Updating the master in a kubernetes cluster with ${MASTER_OS_DISTRIBUTION} is not supported yet." >&2
  1310. exit 1
  1311. fi
  1312. OUTPUT=${KUBE_ROOT}/_output/logs
  1313. mkdir -p ${OUTPUT}
  1314. ensure-temp-dir
  1315. detect-project
  1316. detect-master
  1317. detect-node-names
  1318. get-kubeconfig-basicauth
  1319. get-kubeconfig-bearertoken
  1320. # Make sure we have the tar files staged on Google Storage
  1321. tars_from_version
  1322. # Prepare node env vars and update MIG template
  1323. if [[ "${node}" == "true" ]]; then
  1324. write-node-env
  1325. # TODO(zmerlynn): Refactor setting scope flags.
  1326. local scope_flags=
  1327. if [ -n "${NODE_SCOPES}" ]; then
  1328. scope_flags="--scopes ${NODE_SCOPES}"
  1329. else
  1330. scope_flags="--no-scopes"
  1331. fi
  1332. # Ugly hack: Since it is not possible to delete instance-template that is currently
  1333. # being used, create a temp one, then delete the old one and recreate it once again.
  1334. local tmp_template_name="${NODE_INSTANCE_PREFIX}-template-tmp"
  1335. create-node-instance-template $tmp_template_name
  1336. local template_name="${NODE_INSTANCE_PREFIX}-template"
  1337. for group in ${INSTANCE_GROUPS[@]:-}; do
  1338. gcloud compute instance-groups managed \
  1339. set-instance-template "${group}" \
  1340. --template "$tmp_template_name" \
  1341. --zone "${ZONE}" \
  1342. --project "${PROJECT}" || true;
  1343. done
  1344. gcloud compute instance-templates delete \
  1345. --project "${PROJECT}" \
  1346. --quiet \
  1347. "$template_name" || true
  1348. create-node-instance-template "$template_name"
  1349. for group in ${INSTANCE_GROUPS[@]:-}; do
  1350. gcloud compute instance-groups managed \
  1351. set-instance-template "${group}" \
  1352. --template "$template_name" \
  1353. --zone "${ZONE}" \
  1354. --project "${PROJECT}" || true;
  1355. done
  1356. gcloud compute instance-templates delete \
  1357. --project "${PROJECT}" \
  1358. --quiet \
  1359. "$tmp_template_name" || true
  1360. fi
  1361. }
  1362. # Push binaries to kubernetes master
  1363. function push-master() {
  1364. echo "Updating master metadata ..."
  1365. write-master-env
  1366. prepare-startup-script
  1367. add-instance-metadata-from-file "${KUBE_MASTER}" "kube-env=${KUBE_TEMP}/master-kube-env.yaml" "startup-script=${KUBE_TEMP}/configure-vm.sh"
  1368. echo "Pushing to master (log at ${OUTPUT}/push-${KUBE_MASTER}.log) ..."
  1369. cat ${KUBE_TEMP}/configure-vm.sh | gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --project "${PROJECT}" --zone "${ZONE}" "${KUBE_MASTER}" --command "sudo bash -s -- --push" &> ${OUTPUT}/push-"${KUBE_MASTER}".log
  1370. }
  1371. # Push binaries to kubernetes node
  1372. function push-node() {
  1373. node=${1}
  1374. echo "Updating node ${node} metadata... "
  1375. prepare-startup-script
  1376. add-instance-metadata-from-file "${node}" "kube-env=${KUBE_TEMP}/node-kube-env.yaml" "startup-script=${KUBE_TEMP}/configure-vm.sh"
  1377. echo "Start upgrading node ${node} (log at ${OUTPUT}/push-${node}.log) ..."
  1378. cat ${KUBE_TEMP}/configure-vm.sh | gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --project "${PROJECT}" --zone "${ZONE}" "${node}" --command "sudo bash -s -- --push" &> ${OUTPUT}/push-"${node}".log
  1379. }
  1380. # Push binaries to kubernetes cluster
  1381. function kube-push() {
  1382. # Disable this until it's fixed.
  1383. # See https://github.com/kubernetes/kubernetes/issues/17397
  1384. echo "./cluster/kube-push.sh is currently not supported in GCE."
  1385. echo "Please use ./cluster/gce/upgrade.sh."
  1386. exit 1
  1387. prepare-push true
  1388. push-master
  1389. for (( i=0; i<${#NODE_NAMES[@]}; i++)); do
  1390. push-node "${NODE_NAMES[$i]}" &
  1391. done
  1392. kube::util::wait-for-jobs || {
  1393. echo -e "${color_red}Some commands failed.${color_norm}" >&2
  1394. }
  1395. # TODO(zmerlynn): Re-create instance-template with the new
  1396. # node-kube-env. This isn't important until the node-ip-range issue
  1397. # is solved (because that's blocking automatic dynamic nodes from
  1398. # working). The node-kube-env has to be composed with the KUBELET_TOKEN
  1399. # and KUBE_PROXY_TOKEN. Ideally we would have
  1400. # http://issue.k8s.io/3168
  1401. # implemented before then, though, so avoiding this mess until then.
  1402. echo
  1403. echo "Kubernetes cluster is running. The master is running at:"
  1404. echo
  1405. echo " https://${KUBE_MASTER_IP}"
  1406. echo
  1407. echo "The user name and password to use is located in ~/.kube/config"
  1408. echo
  1409. }
  1410. # -----------------------------------------------------------------------------
  1411. # Cluster specific test helpers used from hack/e2e.go
  1412. # Execute prior to running tests to build a release if required for env.
  1413. #
  1414. # Assumed Vars:
  1415. # KUBE_ROOT
  1416. function test-build-release() {
  1417. # Make a release
  1418. "${KUBE_ROOT}/build/release.sh"
  1419. }
  1420. # Execute prior to running tests to initialize required structure. This is
  1421. # called from hack/e2e.go only when running -up.
  1422. #
  1423. # Assumed vars:
  1424. # Variables from config.sh
  1425. function test-setup() {
  1426. # Detect the project into $PROJECT if it isn't set
  1427. detect-project
  1428. if [[ ${MULTIZONE:-} == "true" ]]; then
  1429. for KUBE_GCE_ZONE in ${E2E_ZONES}
  1430. do
  1431. KUBE_GCE_ZONE="${KUBE_GCE_ZONE}" KUBE_USE_EXISTING_MASTER="${KUBE_USE_EXISTING_MASTER:-}" "${KUBE_ROOT}/cluster/kube-up.sh"
  1432. KUBE_USE_EXISTING_MASTER="true" # For subsequent zones we use the existing master
  1433. done
  1434. else
  1435. "${KUBE_ROOT}/cluster/kube-up.sh"
  1436. fi
  1437. # Open up port 80 & 8080 so common containers on minions can be reached
  1438. # TODO(roberthbailey): Remove this once we are no longer relying on hostPorts.
  1439. local start=`date +%s`
  1440. gcloud compute firewall-rules create \
  1441. --project "${PROJECT}" \
  1442. --target-tags "${NODE_TAG}" \
  1443. --allow tcp:80,tcp:8080 \
  1444. --network "${NETWORK}" \
  1445. "${NODE_TAG}-${INSTANCE_PREFIX}-http-alt" 2> /dev/null || true
  1446. # As there is no simple way to wait longer for this operation we need to manually
  1447. # wait some additional time (20 minutes altogether).
  1448. while ! gcloud compute firewall-rules describe --project "${PROJECT}" "${NODE_TAG}-${INSTANCE_PREFIX}-http-alt" 2> /dev/null; do
  1449. if [[ $(($start + 1200)) -lt `date +%s` ]]; then
  1450. echo -e "${color_red}Failed to create firewall ${NODE_TAG}-${INSTANCE_PREFIX}-http-alt in ${PROJECT}" >&2
  1451. exit 1
  1452. fi
  1453. sleep 5
  1454. done
  1455. # Open up the NodePort range
  1456. # TODO(justinsb): Move to main setup, if we decide whether we want to do this by default.
  1457. start=`date +%s`
  1458. gcloud compute firewall-rules create \
  1459. --project "${PROJECT}" \
  1460. --target-tags "${NODE_TAG}" \
  1461. --allow tcp:30000-32767,udp:30000-32767 \
  1462. --network "${NETWORK}" \
  1463. "${NODE_TAG}-${INSTANCE_PREFIX}-nodeports" 2> /dev/null || true
  1464. # As there is no simple way to wait longer for this operation we need to manually
  1465. # wait some additional time (20 minutes altogether).
  1466. while ! gcloud compute firewall-rules describe --project "${PROJECT}" "${NODE_TAG}-${INSTANCE_PREFIX}-nodeports" 2> /dev/null; do
  1467. if [[ $(($start + 1200)) -lt `date +%s` ]]; then
  1468. echo -e "${color_red}Failed to create firewall ${NODE_TAG}-${INSTANCE_PREFIX}-nodeports in ${PROJECT}" >&2
  1469. exit 1
  1470. fi
  1471. sleep 5
  1472. done
  1473. }
  1474. # Execute after running tests to perform any required clean-up. This is called
  1475. # from hack/e2e.go
  1476. function test-teardown() {
  1477. detect-project
  1478. echo "Shutting down test cluster in background."
  1479. gcloud compute firewall-rules delete \
  1480. --project "${PROJECT}" \
  1481. --quiet \
  1482. "${NODE_TAG}-${INSTANCE_PREFIX}-http-alt" || true
  1483. gcloud compute firewall-rules delete \
  1484. --project "${PROJECT}" \
  1485. --quiet \
  1486. "${NODE_TAG}-${INSTANCE_PREFIX}-nodeports" || true
  1487. if [[ ${MULTIZONE:-} == "true" ]]; then
  1488. local zones=( ${E2E_ZONES} )
  1489. # tear them down in reverse order, finally tearing down the master too.
  1490. for ((zone_num=${#zones[@]}-1; zone_num>0; zone_num--))
  1491. do
  1492. KUBE_GCE_ZONE="${zones[zone_num]}" KUBE_USE_EXISTING_MASTER="true" "${KUBE_ROOT}/cluster/kube-down.sh"
  1493. done
  1494. KUBE_GCE_ZONE="${zones[0]}" KUBE_USE_EXISTING_MASTER="false" "${KUBE_ROOT}/cluster/kube-down.sh"
  1495. else
  1496. "${KUBE_ROOT}/cluster/kube-down.sh"
  1497. fi
  1498. }
  1499. # SSH to a node by name ($1) and run a command ($2).
  1500. function ssh-to-node() {
  1501. local node="$1"
  1502. local cmd="$2"
  1503. # Loop until we can successfully ssh into the box
  1504. for try in {1..5}; do
  1505. if gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --ssh-flag="-o ConnectTimeout=30" --project "${PROJECT}" --zone="${ZONE}" "${node}" --command "echo test > /dev/null"; then
  1506. break
  1507. fi
  1508. sleep 5
  1509. done
  1510. # Then actually try the command.
  1511. gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --ssh-flag="-o ConnectTimeout=30" --project "${PROJECT}" --zone="${ZONE}" "${node}" --command "${cmd}"
  1512. }
  1513. # Perform preparations required to run e2e tests
  1514. function prepare-e2e() {
  1515. detect-project
  1516. }
  1517. # Writes configure-vm.sh to a temporary location with comments stripped. GCE
  1518. # limits the size of metadata fields to 32K, and stripping comments is the
  1519. # easiest way to buy us a little more room.
  1520. function prepare-startup-script() {
  1521. sed '/^\s*#\([^!].*\)*$/ d' ${KUBE_ROOT}/cluster/gce/configure-vm.sh > ${KUBE_TEMP}/configure-vm.sh
  1522. }