12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715 |
- #!/bin/bash
- # Copyright 2014 The Kubernetes Authors.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # A library of helper functions and constant for the local config.
- # Use the config file specified in $KUBE_CONFIG_FILE, or default to
- # config-default.sh.
- KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
- source "${KUBE_ROOT}/cluster/gce/${KUBE_CONFIG_FILE-"config-default.sh"}"
- source "${KUBE_ROOT}/cluster/common.sh"
- source "${KUBE_ROOT}/cluster/lib/util.sh"
- if [[ "${NODE_OS_DISTRIBUTION}" == "debian" || "${NODE_OS_DISTRIBUTION}" == "coreos" || "${NODE_OS_DISTRIBUTION}" == "trusty" || "${NODE_OS_DISTRIBUTION}" == "gci" ]]; then
- source "${KUBE_ROOT}/cluster/gce/${NODE_OS_DISTRIBUTION}/node-helper.sh"
- else
- echo "Cannot operate on cluster using node os distro: ${NODE_OS_DISTRIBUTION}" >&2
- exit 1
- fi
- if [[ "${MASTER_OS_DISTRIBUTION}" == "debian" || "${MASTER_OS_DISTRIBUTION}" == "coreos" || "${MASTER_OS_DISTRIBUTION}" == "trusty" || "${MASTER_OS_DISTRIBUTION}" == "gci" ]]; then
- source "${KUBE_ROOT}/cluster/gce/${MASTER_OS_DISTRIBUTION}/master-helper.sh"
- else
- echo "Cannot operate on cluster using master os distro: ${MASTER_OS_DISTRIBUTION}" >&2
- exit 1
- fi
- if [[ "${MASTER_OS_DISTRIBUTION}" == "gci" ]]; then
- # If the master image is not set, we use the latest GCI image.
- # Otherwise, we respect whatever is set by the user.
- MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-${GCI_VERSION}}
- MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-google-containers}
- elif [[ "${MASTER_OS_DISTRIBUTION}" == "debian" ]]; then
- MASTER_IMAGE=${KUBE_GCE_MASTER_IMAGE:-${CVM_VERSION}}
- MASTER_IMAGE_PROJECT=${KUBE_GCE_MASTER_PROJECT:-google-containers}
- fi
- if [[ "${NODE_OS_DISTRIBUTION}" == "gci" ]]; then
- # If the node image is not set, we use the latest GCI image.
- # Otherwise, we respect whatever is set by the user.
- NODE_IMAGE=${KUBE_GCE_NODE_IMAGE:-${GCI_VERSION}}
- NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-google-containers}
- elif [[ "${NODE_OS_DISTRIBUTION}" == "debian" ]]; then
- NODE_IMAGE=${KUBE_GCE_NODE_IMAGE:-${CVM_VERSION}}
- NODE_IMAGE_PROJECT=${KUBE_GCE_NODE_PROJECT:-google-containers}
- fi
- # Verfiy cluster autoscaler configuration.
- if [[ "${ENABLE_CLUSTER_AUTOSCALER}" == "true" ]]; then
- if [ -z $AUTOSCALER_MIN_NODES ]; then
- echo "AUTOSCALER_MIN_NODES not set."
- exit 1
- fi
- if [ -z $AUTOSCALER_MAX_NODES ]; then
- echo "AUTOSCALER_MAX_NODES not set."
- exit 1
- fi
- fi
- NODE_INSTANCE_PREFIX="${INSTANCE_PREFIX}-minion"
- NODE_TAGS="${NODE_TAG}"
- ALLOCATE_NODE_CIDRS=true
- KUBE_PROMPT_FOR_UPDATE=y
- KUBE_SKIP_UPDATE=${KUBE_SKIP_UPDATE-"n"}
- # How long (in seconds) to wait for cluster initialization.
- KUBE_CLUSTER_INITIALIZATION_TIMEOUT=${KUBE_CLUSTER_INITIALIZATION_TIMEOUT:-300}
- function join_csv() {
- local IFS=','; echo "$*";
- }
- # This function returns the first string before the comma
- function split_csv() {
- echo "$*" | cut -d',' -f1
- }
- # Verify prereqs
- function verify-prereqs() {
- local cmd
- for cmd in gcloud gsutil; do
- if ! which "${cmd}" >/dev/null; then
- local resp
- if [[ "${KUBE_PROMPT_FOR_UPDATE}" == "y" ]]; then
- echo "Can't find ${cmd} in PATH. Do you wish to install the Google Cloud SDK? [Y/n]"
- read resp
- else
- resp="y"
- fi
- if [[ "${resp}" != "n" && "${resp}" != "N" ]]; then
- curl https://sdk.cloud.google.com | bash
- fi
- if ! which "${cmd}" >/dev/null; then
- echo "Can't find ${cmd} in PATH, please fix and retry. The Google Cloud " >&2
- echo "SDK can be downloaded from https://cloud.google.com/sdk/." >&2
- exit 1
- fi
- fi
- done
- if [[ "${KUBE_SKIP_UPDATE}" == "y" ]]; then
- return
- fi
- # update and install components as needed
- if [[ "${KUBE_PROMPT_FOR_UPDATE}" != "y" ]]; then
- gcloud_prompt="-q"
- fi
- local sudo_prefix=""
- if [ ! -w $(dirname `which gcloud`) ]; then
- sudo_prefix="sudo"
- fi
- ${sudo_prefix} gcloud ${gcloud_prompt:-} components install alpha || true
- ${sudo_prefix} gcloud ${gcloud_prompt:-} components install beta || true
- ${sudo_prefix} gcloud ${gcloud_prompt:-} components update || true
- }
- # Create a temp dir that'll be deleted at the end of this bash session.
- #
- # Vars set:
- # KUBE_TEMP
- function ensure-temp-dir() {
- if [[ -z ${KUBE_TEMP-} ]]; then
- KUBE_TEMP=$(mktemp -d -t kubernetes.XXXXXX)
- trap 'rm -rf "${KUBE_TEMP}"' EXIT
- fi
- }
- # Use the gcloud defaults to find the project. If it is already set in the
- # environment then go with that.
- #
- # Vars set:
- # PROJECT
- # PROJECT_REPORTED
- function detect-project() {
- if [[ -z "${PROJECT-}" ]]; then
- PROJECT=$(gcloud config list project --format 'value(core.project)')
- fi
- if [[ -z "${PROJECT-}" ]]; then
- echo "Could not detect Google Cloud Platform project. Set the default project using " >&2
- echo "'gcloud config set project <PROJECT>'" >&2
- exit 1
- fi
- if [[ -z "${PROJECT_REPORTED-}" ]]; then
- echo "Project: ${PROJECT}" >&2
- echo "Zone: ${ZONE}" >&2
- PROJECT_REPORTED=true
- fi
- }
- # Copy a release tar and its accompanying hash.
- function copy-to-staging() {
- local -r staging_path=$1
- local -r gs_url=$2
- local -r tar=$3
- local -r hash=$4
- echo "${hash}" > "${tar}.sha1"
- gsutil -m -q -h "Cache-Control:private, max-age=0" cp "${tar}" "${tar}.sha1" "${staging_path}"
- gsutil -m acl ch -g all:R "${gs_url}" "${gs_url}.sha1" >/dev/null 2>&1
- echo "+++ $(basename ${tar}) uploaded (sha1 = ${hash})"
- }
- # Given the cluster zone, return the list of regional GCS release
- # bucket suffixes for the release in preference order. GCS doesn't
- # give us an API for this, so we hardcode it.
- #
- # Assumed vars:
- # RELEASE_REGION_FALLBACK
- # REGIONAL_KUBE_ADDONS
- # ZONE
- # Vars set:
- # PREFERRED_REGION
- # KUBE_ADDON_REGISTRY
- function set-preferred-region() {
- case ${ZONE} in
- asia-*)
- PREFERRED_REGION=("asia" "us" "eu")
- ;;
- europe-*)
- PREFERRED_REGION=("eu" "us" "asia")
- ;;
- *)
- PREFERRED_REGION=("us" "eu" "asia")
- ;;
- esac
- local -r preferred="${PREFERRED_REGION[0]}"
- if [[ "${RELEASE_REGION_FALLBACK}" != "true" ]]; then
- PREFERRED_REGION=( "${preferred}" )
- fi
- # If we're using regional GCR, and we're outside the US, go to the
- # regional registry. The gcr.io/google_containers registry is
- # appropriate for US (for now).
- if [[ "${REGIONAL_KUBE_ADDONS}" == "true" ]] && [[ "${preferred}" != "us" ]]; then
- KUBE_ADDON_REGISTRY="${preferred}.gcr.io/google_containers"
- else
- KUBE_ADDON_REGISTRY="gcr.io/google_containers"
- fi
- if [[ "${ENABLE_DOCKER_REGISTRY_CACHE:-}" == "true" ]]; then
- DOCKER_REGISTRY_MIRROR_URL="https://${preferred}-mirror.gcr.io"
- fi
- }
- # Take the local tar files and upload them to Google Storage. They will then be
- # downloaded by the master as part of the start up script for the master.
- #
- # Assumed vars:
- # PROJECT
- # SERVER_BINARY_TAR
- # SALT_TAR
- # KUBE_MANIFESTS_TAR
- # ZONE
- # Vars set:
- # SERVER_BINARY_TAR_URL
- # SERVER_BINARY_TAR_HASH
- # SALT_TAR_URL
- # SALT_TAR_HASH
- # KUBE_MANIFESTS_TAR_URL
- # KUBE_MANIFESTS_TAR_HASH
- function upload-server-tars() {
- SERVER_BINARY_TAR_URL=
- SERVER_BINARY_TAR_HASH=
- SALT_TAR_URL=
- SALT_TAR_HASH=
- KUBE_MANIFESTS_TAR_URL=
- KUBE_MANIFESTS_TAR_HASH=
- local project_hash
- if which md5 > /dev/null 2>&1; then
- project_hash=$(md5 -q -s "$PROJECT")
- else
- project_hash=$(echo -n "$PROJECT" | md5sum | awk '{ print $1 }')
- fi
- # This requires 1 million projects before the probability of collision is 50%
- # that's probably good enough for now :P
- project_hash=${project_hash:0:10}
- set-preferred-region
- SERVER_BINARY_TAR_HASH=$(sha1sum-file "${SERVER_BINARY_TAR}")
- SALT_TAR_HASH=$(sha1sum-file "${SALT_TAR}")
- if [[ -n "${KUBE_MANIFESTS_TAR:-}" ]]; then
- KUBE_MANIFESTS_TAR_HASH=$(sha1sum-file "${KUBE_MANIFESTS_TAR}")
- fi
- local server_binary_tar_urls=()
- local salt_tar_urls=()
- local kube_manifest_tar_urls=()
- for region in "${PREFERRED_REGION[@]}"; do
- suffix="-${region}"
- if [[ "${suffix}" == "-us" ]]; then
- suffix=""
- fi
- local staging_bucket="gs://kubernetes-staging-${project_hash}${suffix}"
- # Ensure the buckets are created
- if ! gsutil ls "${staging_bucket}" ; then
- echo "Creating ${staging_bucket}"
- gsutil mb -l "${region}" "${staging_bucket}"
- fi
- local staging_path="${staging_bucket}/${INSTANCE_PREFIX}-devel"
- echo "+++ Staging server tars to Google Storage: ${staging_path}"
- local server_binary_gs_url="${staging_path}/${SERVER_BINARY_TAR##*/}"
- local salt_gs_url="${staging_path}/${SALT_TAR##*/}"
- copy-to-staging "${staging_path}" "${server_binary_gs_url}" "${SERVER_BINARY_TAR}" "${SERVER_BINARY_TAR_HASH}"
- copy-to-staging "${staging_path}" "${salt_gs_url}" "${SALT_TAR}" "${SALT_TAR_HASH}"
- # Convert from gs:// URL to an https:// URL
- server_binary_tar_urls+=("${server_binary_gs_url/gs:\/\//https://storage.googleapis.com/}")
- salt_tar_urls+=("${salt_gs_url/gs:\/\//https://storage.googleapis.com/}")
- if [[ -n "${KUBE_MANIFESTS_TAR:-}" ]]; then
- local kube_manifests_gs_url="${staging_path}/${KUBE_MANIFESTS_TAR##*/}"
- copy-to-staging "${staging_path}" "${kube_manifests_gs_url}" "${KUBE_MANIFESTS_TAR}" "${KUBE_MANIFESTS_TAR_HASH}"
- # Convert from gs:// URL to an https:// URL
- kube_manifests_tar_urls+=("${kube_manifests_gs_url/gs:\/\//https://storage.googleapis.com/}")
- fi
- done
- SERVER_BINARY_TAR_URL=$(join_csv "${server_binary_tar_urls[@]}")
- SALT_TAR_URL=$(join_csv "${salt_tar_urls[@]}")
- if [[ -n "${KUBE_MANIFESTS_TAR:-}" ]]; then
- KUBE_MANIFESTS_TAR_URL=$(join_csv "${kube_manifests_tar_urls[@]}")
- fi
- }
- # Detect minions created in the minion group
- #
- # Assumed vars:
- # NODE_INSTANCE_PREFIX
- # Vars set:
- # NODE_NAMES
- # INSTANCE_GROUPS
- function detect-node-names() {
- detect-project
- INSTANCE_GROUPS=()
- INSTANCE_GROUPS+=($(gcloud compute instance-groups managed list \
- --zones "${ZONE}" --project "${PROJECT}" \
- --regexp "${NODE_INSTANCE_PREFIX}-.+" \
- --format='value(instanceGroup)' || true))
- NODE_NAMES=()
- if [[ -n "${INSTANCE_GROUPS[@]:-}" ]]; then
- for group in "${INSTANCE_GROUPS[@]}"; do
- NODE_NAMES+=($(gcloud compute instance-groups managed list-instances \
- "${group}" --zone "${ZONE}" --project "${PROJECT}" \
- --format='value(instance)'))
- done
- fi
- echo "INSTANCE_GROUPS=${INSTANCE_GROUPS[*]:-}" >&2
- echo "NODE_NAMES=${NODE_NAMES[*]:-}" >&2
- }
- # Detect the information about the minions
- #
- # Assumed vars:
- # ZONE
- # Vars set:
- # NODE_NAMES
- # KUBE_NODE_IP_ADDRESSES (array)
- function detect-nodes() {
- detect-project
- detect-node-names
- KUBE_NODE_IP_ADDRESSES=()
- for (( i=0; i<${#NODE_NAMES[@]}; i++)); do
- local node_ip=$(gcloud compute instances describe --project "${PROJECT}" --zone "${ZONE}" \
- "${NODE_NAMES[$i]}" --format='value(networkInterfaces[0].accessConfigs[0].natIP)')
- if [[ -z "${node_ip-}" ]] ; then
- echo "Did not find ${NODE_NAMES[$i]}" >&2
- else
- echo "Found ${NODE_NAMES[$i]} at ${node_ip}"
- KUBE_NODE_IP_ADDRESSES+=("${node_ip}")
- fi
- done
- if [[ -z "${KUBE_NODE_IP_ADDRESSES-}" ]]; then
- echo "Could not detect Kubernetes minion nodes. Make sure you've launched a cluster with 'kube-up.sh'" >&2
- exit 1
- fi
- }
- # Detect the IP for the master
- #
- # Assumed vars:
- # MASTER_NAME
- # ZONE
- # REGION
- # Vars set:
- # KUBE_MASTER
- # KUBE_MASTER_IP
- function detect-master() {
- detect-project
- KUBE_MASTER=${MASTER_NAME}
- if [[ -z "${KUBE_MASTER_IP-}" ]]; then
- KUBE_MASTER_IP=$(gcloud compute addresses describe "${MASTER_NAME}-ip" \
- --project "${PROJECT}" --region "${REGION}" -q --format='value(address)')
- fi
- if [[ -z "${KUBE_MASTER_IP-}" ]]; then
- echo "Could not detect Kubernetes master node. Make sure you've launched a cluster with 'kube-up.sh'" >&2
- exit 1
- fi
- echo "Using master: $KUBE_MASTER (external IP: $KUBE_MASTER_IP)"
- }
- # Reads kube-env metadata from master
- #
- # Assumed vars:
- # KUBE_MASTER
- # PROJECT
- # ZONE
- function get-master-env() {
- # TODO(zmerlynn): Make this more reliable with retries.
- gcloud compute --project ${PROJECT} ssh --zone ${ZONE} ${KUBE_MASTER} --command \
- "curl --fail --silent -H 'Metadata-Flavor: Google' \
- 'http://metadata/computeMetadata/v1/instance/attributes/kube-env'" 2>/dev/null
- }
- # Robustly try to create a static ip.
- # $1: The name of the ip to create
- # $2: The name of the region to create the ip in.
- function create-static-ip() {
- detect-project
- local attempt=0
- local REGION="$2"
- while true; do
- if gcloud compute addresses create "$1" \
- --project "${PROJECT}" \
- --region "${REGION}" -q > /dev/null; then
- # successful operation
- break
- fi
- if cloud compute addresses describe "$1" \
- --project "${PROJECT}" \
- --region "${REGION}" >/dev/null 2>&1; then
- # it exists - postcondition satisfied
- break
- fi
- if (( attempt > 4 )); then
- echo -e "${color_red}Failed to create static ip $1 ${color_norm}" >&2
- exit 2
- fi
- attempt=$(($attempt+1))
- echo -e "${color_yellow}Attempt $attempt failed to create static ip $1. Retrying.${color_norm}" >&2
- sleep $(($attempt * 5))
- done
- }
- # Robustly try to create a firewall rule.
- # $1: The name of firewall rule.
- # $2: IP ranges.
- # $3: Target tags for this firewall rule.
- function create-firewall-rule() {
- detect-project
- local attempt=0
- while true; do
- if ! gcloud compute firewall-rules create "$1" \
- --project "${PROJECT}" \
- --network "${NETWORK}" \
- --source-ranges "$2" \
- --target-tags "$3" \
- --allow tcp,udp,icmp,esp,ah,sctp; then
- if (( attempt > 4 )); then
- echo -e "${color_red}Failed to create firewall rule $1 ${color_norm}" >&2
- exit 2
- fi
- echo -e "${color_yellow}Attempt $(($attempt+1)) failed to create firewall rule $1. Retrying.${color_norm}" >&2
- attempt=$(($attempt+1))
- sleep $(($attempt * 5))
- else
- break
- fi
- done
- }
- # $1: version (required)
- function get-template-name-from-version() {
- # trim template name to pass gce name validation
- echo "${NODE_INSTANCE_PREFIX}-template-${1}" | cut -c 1-63 | sed 's/[\.\+]/-/g;s/-*$//g'
- }
- # Robustly try to create an instance template.
- # $1: The name of the instance template.
- # $2: The scopes flag.
- # $3 and others: Metadata entries (must all be from a file).
- function create-node-template() {
- detect-project
- local template_name="$1"
- # First, ensure the template doesn't exist.
- # TODO(zmerlynn): To make this really robust, we need to parse the output and
- # add retries. Just relying on a non-zero exit code doesn't
- # distinguish an ephemeral failed call from a "not-exists".
- if gcloud compute instance-templates describe "$template_name" --project "${PROJECT}" &>/dev/null; then
- echo "Instance template ${1} already exists; deleting." >&2
- if ! gcloud compute instance-templates delete "$template_name" --project "${PROJECT}" &>/dev/null; then
- echo -e "${color_yellow}Failed to delete existing instance template${color_norm}" >&2
- exit 2
- fi
- fi
- local attempt=1
- local preemptible_minions=""
- if [[ "${PREEMPTIBLE_NODE}" == "true" ]]; then
- preemptible_minions="--preemptible --maintenance-policy TERMINATE"
- fi
- while true; do
- echo "Attempt ${attempt} to create ${1}" >&2
- if ! gcloud compute instance-templates create "$template_name" \
- --project "${PROJECT}" \
- --machine-type "${NODE_SIZE}" \
- --boot-disk-type "${NODE_DISK_TYPE}" \
- --boot-disk-size "${NODE_DISK_SIZE}" \
- --image-project="${NODE_IMAGE_PROJECT}" \
- --image "${NODE_IMAGE}" \
- --tags "${NODE_TAG}" \
- --network "${NETWORK}" \
- ${preemptible_minions} \
- $2 \
- --can-ip-forward \
- --metadata-from-file $(echo ${@:3} | tr ' ' ',') >&2; then
- if (( attempt > 5 )); then
- echo -e "${color_red}Failed to create instance template $template_name ${color_norm}" >&2
- exit 2
- fi
- echo -e "${color_yellow}Attempt ${attempt} failed to create instance template $template_name. Retrying.${color_norm}" >&2
- attempt=$(($attempt+1))
- sleep $(($attempt * 5))
- # In case the previous attempt failed with something like a
- # Backend Error and left the entry laying around, delete it
- # before we try again.
- gcloud compute instance-templates delete "$template_name" --project "${PROJECT}" &>/dev/null || true
- else
- break
- fi
- done
- }
- # Robustly try to add metadata on an instance.
- # $1: The name of the instance.
- # $2...$n: The metadata key=value pairs to add.
- function add-instance-metadata() {
- local -r instance=$1
- shift 1
- local -r kvs=( "$@" )
- detect-project
- local attempt=0
- while true; do
- if ! gcloud compute instances add-metadata "${instance}" \
- --project "${PROJECT}" \
- --zone "${ZONE}" \
- --metadata "${kvs[@]}"; then
- if (( attempt > 5 )); then
- echo -e "${color_red}Failed to add instance metadata in ${instance} ${color_norm}" >&2
- exit 2
- fi
- echo -e "${color_yellow}Attempt $(($attempt+1)) failed to add metadata in ${instance}. Retrying.${color_norm}" >&2
- attempt=$(($attempt+1))
- sleep $((5 * $attempt))
- else
- break
- fi
- done
- }
- # Robustly try to add metadata on an instance, from a file.
- # $1: The name of the instance.
- # $2...$n: The metadata key=file pairs to add.
- function add-instance-metadata-from-file() {
- local -r instance=$1
- shift 1
- local -r kvs=( "$@" )
- detect-project
- local attempt=0
- while true; do
- echo "${kvs[@]}"
- if ! gcloud compute instances add-metadata "${instance}" \
- --project "${PROJECT}" \
- --zone "${ZONE}" \
- --metadata-from-file "$(join_csv ${kvs[@]})"; then
- if (( attempt > 5 )); then
- echo -e "${color_red}Failed to add instance metadata in ${instance} ${color_norm}" >&2
- exit 2
- fi
- echo -e "${color_yellow}Attempt $(($attempt+1)) failed to add metadata in ${instance}. Retrying.${color_norm}" >&2
- attempt=$(($attempt+1))
- sleep $(($attempt * 5))
- else
- break
- fi
- done
- }
- # Instantiate a kubernetes cluster
- #
- # Assumed vars
- # KUBE_ROOT
- # <Various vars set in config file>
- function kube-up() {
- ensure-temp-dir
- detect-project
- load-or-gen-kube-basicauth
- load-or-gen-kube-bearertoken
- # Make sure we have the tar files staged on Google Storage
- find-release-tars
- upload-server-tars
- # ensure that environmental variables specifying number of migs to create
- set_num_migs
- if [[ ${KUBE_USE_EXISTING_MASTER:-} == "true" ]]; then
- parse-master-env
- create-nodes
- elif [[ ${KUBE_EXPERIMENTAL_REPLICATE_EXISTING_MASTER:-} == "true" ]]; then
- # TODO(jsz): implement adding replica for other distributions.
- if [[ "${MASTER_OS_DISTRIBUTION}" != "gci" ]]; then
- echo "Master replication supported only for gci"
- return 1
- fi
- create-loadbalancer
- # If replication of master fails, we need to ensure that the replica is removed from etcd clusters.
- if ! replicate-master; then
- remove-replica-from-etcd 4001 || true
- remove-replica-from-etcd 4002 || true
- fi
- else
- check-existing
- create-network
- write-cluster-name
- create-autoscaler-config
- create-master
- create-nodes-firewall
- create-nodes-template
- create-nodes
- check-cluster
- fi
- }
- function check-existing() {
- local running_in_terminal=false
- # May be false if tty is not allocated (for example with ssh -T).
- if [ -t 1 ]; then
- running_in_terminal=true
- fi
- if [[ ${running_in_terminal} == "true" || ${KUBE_UP_AUTOMATIC_CLEANUP} == "true" ]]; then
- if ! check-resources; then
- local run_kube_down="n"
- echo "${KUBE_RESOURCE_FOUND} found." >&2
- # Get user input only if running in terminal.
- if [[ ${running_in_terminal} == "true" && ${KUBE_UP_AUTOMATIC_CLEANUP} == "false" ]]; then
- read -p "Would you like to shut down the old cluster (call kube-down)? [y/N] " run_kube_down
- fi
- if [[ ${run_kube_down} == "y" || ${run_kube_down} == "Y" || ${KUBE_UP_AUTOMATIC_CLEANUP} == "true" ]]; then
- echo "... calling kube-down" >&2
- kube-down
- fi
- fi
- fi
- }
- function create-network() {
- if ! gcloud compute networks --project "${PROJECT}" describe "${NETWORK}" &>/dev/null; then
- echo "Creating new network: ${NETWORK}"
- # The network needs to be created synchronously or we have a race. The
- # firewalls can be added concurrent with instance creation.
- gcloud compute networks create --project "${PROJECT}" "${NETWORK}" --range "10.240.0.0/16"
- fi
- if ! gcloud compute firewall-rules --project "${PROJECT}" describe "${NETWORK}-default-internal" &>/dev/null; then
- gcloud compute firewall-rules create "${NETWORK}-default-internal" \
- --project "${PROJECT}" \
- --network "${NETWORK}" \
- --source-ranges "10.0.0.0/8" \
- --allow "tcp:1-65535,udp:1-65535,icmp" &
- fi
- if ! gcloud compute firewall-rules describe --project "${PROJECT}" "${NETWORK}-default-ssh" &>/dev/null; then
- gcloud compute firewall-rules create "${NETWORK}-default-ssh" \
- --project "${PROJECT}" \
- --network "${NETWORK}" \
- --source-ranges "0.0.0.0/0" \
- --allow "tcp:22" &
- fi
- }
- # Assumes:
- # NUM_NODES
- # Sets:
- # MASTER_ROOT_DISK_SIZE
- function get-master-root-disk-size() {
- if [[ "${NUM_NODES}" -le "1000" ]]; then
- export MASTER_ROOT_DISK_SIZE="10"
- else
- export MASTER_ROOT_DISK_SIZE="50"
- fi
- }
- function create-master() {
- echo "Starting master and configuring firewalls"
- gcloud compute firewall-rules create "${MASTER_NAME}-https" \
- --project "${PROJECT}" \
- --network "${NETWORK}" \
- --target-tags "${MASTER_TAG}" \
- --allow tcp:443 &
- # We have to make sure the disk is created before creating the master VM, so
- # run this in the foreground.
- gcloud compute disks create "${MASTER_NAME}-pd" \
- --project "${PROJECT}" \
- --zone "${ZONE}" \
- --type "${MASTER_DISK_TYPE}" \
- --size "${MASTER_DISK_SIZE}"
- # Create disk for cluster registry if enabled
- if [[ "${ENABLE_CLUSTER_REGISTRY}" == true && -n "${CLUSTER_REGISTRY_DISK}" ]]; then
- gcloud compute disks create "${CLUSTER_REGISTRY_DISK}" \
- --project "${PROJECT}" \
- --zone "${ZONE}" \
- --type "${CLUSTER_REGISTRY_DISK_TYPE_GCE}" \
- --size "${CLUSTER_REGISTRY_DISK_SIZE}" &
- fi
- # Generate a bearer token for this cluster. We push this separately
- # from the other cluster variables so that the client (this
- # computer) can forget it later. This should disappear with
- # http://issue.k8s.io/3168
- KUBELET_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null)
- KUBE_PROXY_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null)
- # Reserve the master's IP so that it can later be transferred to another VM
- # without disrupting the kubelets.
- create-static-ip "${MASTER_NAME}-ip" "${REGION}"
- MASTER_RESERVED_IP=$(gcloud compute addresses describe "${MASTER_NAME}-ip" \
- --project "${PROJECT}" --region "${REGION}" -q --format='value(address)')
- create-certs "${MASTER_RESERVED_IP}"
- # Sets MASTER_ROOT_DISK_SIZE that is used by create-master-instance
- get-master-root-disk-size
- create-master-instance "${MASTER_RESERVED_IP}" &
- }
- # Adds master replica to etcd cluster.
- #
- # Assumed vars:
- # REPLICA_NAME
- # PROJECT
- # EXISTING_MASTER_NAME
- # EXISTING_MASTER_ZONE
- #
- # $1: etcd client port
- # $2: etcd internal port
- # returns the result of ssh command which adds replica
- function add-replica-to-etcd() {
- local -r client_port="${1}"
- local -r internal_port="${2}"
- gcloud compute ssh "${EXISTING_MASTER_NAME}" \
- --project "${PROJECT}" \
- --zone "${EXISTING_MASTER_ZONE}" \
- --command \
- "curl localhost:${client_port}/v2/members -XPOST -H \"Content-Type: application/json\" -d '{\"peerURLs\":[\"http://${REPLICA_NAME}:${internal_port}\"]}'"
- return $?
- }
- # Sets EXISTING_MASTER_NAME and EXISTING_MASTER_ZONE variables.
- #
- # Assumed vars:
- # PROJECT
- #
- # NOTE: Must be in sync with get-replica-name-regexp
- function set-existing-master() {
- local existing_master=$(gcloud compute instances list \
- --project "${PROJECT}" \
- --regexp "$(get-replica-name-regexp)" \
- --format "value(name,zone)" | head -n1)
- EXISTING_MASTER_NAME="$(echo "${existing_master}" | cut -f1)"
- EXISTING_MASTER_ZONE="$(echo "${existing_master}" | cut -f2)"
- }
- function replicate-master() {
- set-replica-name
- set-existing-master
- echo "Experimental: replicating existing master ${EXISTING_MASTER_ZONE}/${EXISTING_MASTER_NAME} as ${ZONE}/${REPLICA_NAME}"
- # Before we do anything else, we should configure etcd to expect more replicas.
- if ! add-replica-to-etcd 4001 2380; then
- echo "Failed to add master replica to etcd cluster."
- return 1
- fi
- if ! add-replica-to-etcd 4002 2381; then
- echo "Failed to add master replica to etcd events cluster."
- return 1
- fi
- # We have to make sure the disk is created before creating the master VM, so
- # run this in the foreground.
- gcloud compute disks create "${REPLICA_NAME}-pd" \
- --project "${PROJECT}" \
- --zone "${ZONE}" \
- --type "${MASTER_DISK_TYPE}" \
- --size "${MASTER_DISK_SIZE}"
- # Sets MASTER_ROOT_DISK_SIZE that is used by create-master-instance
- get-master-root-disk-size
- local existing_master_replicas="$(get-all-replica-names)"
- replicate-master-instance "${EXISTING_MASTER_ZONE}" "${EXISTING_MASTER_NAME}" "${existing_master_replicas}"
- # Add new replica to the load balancer.
- gcloud compute target-pools add-instances "${MASTER_NAME}" \
- --project "${PROJECT}" \
- --zone "${ZONE}" \
- --instances "${REPLICA_NAME}"
- }
- # Detaches old and ataches new external IP to a VM.
- #
- # Arguments:
- # $1 - VM name
- # $2 - VM zone
- # $3 - external static IP; if empty will use an ephemeral IP address.
- function attach-external-ip() {
- local NAME=${1}
- local ZONE=${2}
- local IP_ADDR=${3:-}
- local ACCESS_CONFIG_NAME=$(gcloud compute instances describe "${NAME}" \
- --project "${PROJECT}" --zone "${ZONE}" \
- --format="value(networkInterfaces[0].accessConfigs[0].name)")
- gcloud compute instances delete-access-config "${NAME}" \
- --project "${PROJECT}" --zone "${ZONE}" \
- --access-config-name "${ACCESS_CONFIG_NAME}"
- if [[ -z ${IP_ADDR} ]]; then
- gcloud compute instances add-access-config "${NAME}" \
- --project "${PROJECT}" --zone "${ZONE}" \
- --access-config-name "${ACCESS_CONFIG_NAME}"
- else
- gcloud compute instances add-access-config "${NAME}" \
- --project "${PROJECT}" --zone "${ZONE}" \
- --access-config-name "${ACCESS_CONFIG_NAME}" \
- --address "${IP_ADDR}"
- fi
- }
- # Creates load balancer in front of apiserver if it doesn't exists already. Assumes there's only one
- # existing master replica.
- #
- # Assumes:
- # PROJECT
- # MASTER_NAME
- # ZONE
- # REGION
- function create-loadbalancer() {
- detect-master
- # Step 0: Return early if LB is already configured.
- if gcloud compute forwarding-rules describe ${MASTER_NAME} \
- --project "${PROJECT}" --region ${REGION} > /dev/null 2>&1; then
- echo "Load balancer already exists"
- return
- fi
- local EXISTING_MASTER_ZONE=$(gcloud compute instances list "${MASTER_NAME}" \
- --project "${PROJECT}" --format="value(zone)")
- echo "Creating load balancer in front of an already existing master in ${EXISTING_MASTER_ZONE}"
- # Step 1: Detach master IP address and attach ephemeral address to the existing master
- attach-external-ip ${MASTER_NAME} ${EXISTING_MASTER_ZONE}
- # Step 2: Create target pool.
- gcloud compute target-pools create "${MASTER_NAME}" --region "${REGION}"
- # TODO: We should also add master instances with suffixes
- gcloud compute target-pools add-instances ${MASTER_NAME} --instances ${MASTER_NAME} --zone ${EXISTING_MASTER_ZONE}
- # Step 3: Create forwarding rule.
- # TODO: This step can take up to 20 min. We need to speed this up...
- gcloud compute forwarding-rules create ${MASTER_NAME} \
- --project "${PROJECT}" --region ${REGION} \
- --target-pool ${MASTER_NAME} --address=${KUBE_MASTER_IP} --ports=443
- echo -n "Waiting for the load balancer configuration to propagate..."
- local counter=0
- until $(curl -k -m1 https://${KUBE_MASTER_IP} &> /dev/null); do
- counter=$((counter+1))
- echo -n .
- if [[ ${counter} -ge 1800 ]]; then
- echo -e "${color_red}TIMEOUT${color_norm}" >&2
- echo -e "${color_red}Load balancer failed to initialize within ${counter} seconds.${color_norm}" >&2
- exit 2
- fi
- done
- echo "DONE"
- }
- function create-nodes-firewall() {
- # Create a single firewall rule for all minions.
- create-firewall-rule "${NODE_TAG}-all" "${CLUSTER_IP_RANGE}" "${NODE_TAG}" &
- # Report logging choice (if any).
- if [[ "${ENABLE_NODE_LOGGING-}" == "true" ]]; then
- echo "+++ Logging using Fluentd to ${LOGGING_DESTINATION:-unknown}"
- fi
- # Wait for last batch of jobs
- kube::util::wait-for-jobs || {
- echo -e "${color_red}Some commands failed.${color_norm}" >&2
- }
- }
- function create-nodes-template() {
- echo "Creating minions."
- # TODO(zmerlynn): Refactor setting scope flags.
- local scope_flags=
- if [ -n "${NODE_SCOPES}" ]; then
- scope_flags="--scopes ${NODE_SCOPES}"
- else
- scope_flags="--no-scopes"
- fi
- write-node-env
- local template_name="${NODE_INSTANCE_PREFIX}-template"
- create-node-instance-template $template_name
- }
- # Assumes:
- # - MAX_INSTANCES_PER_MIG
- # - NUM_NODES
- # exports:
- # - NUM_MIGS
- function set_num_migs() {
- local defaulted_max_instances_per_mig=${MAX_INSTANCES_PER_MIG:-1000}
- if [[ ${defaulted_max_instances_per_mig} -le "0" ]]; then
- echo "MAX_INSTANCES_PER_MIG cannot be negative. Assuming default 1000"
- defaulted_max_instances_per_mig=1000
- fi
- export NUM_MIGS=$(((${NUM_NODES} + ${defaulted_max_instances_per_mig} - 1) / ${defaulted_max_instances_per_mig}))
- }
- # Assumes:
- # - NUM_MIGS
- # - NODE_INSTANCE_PREFIX
- # - NUM_NODES
- # - PROJECT
- # - ZONE
- function create-nodes() {
- local template_name="${NODE_INSTANCE_PREFIX}-template"
- local instances_left=${NUM_NODES}
- #TODO: parallelize this loop to speed up the process
- for ((i=1; i<=${NUM_MIGS}; i++)); do
- local group_name="${NODE_INSTANCE_PREFIX}-group-$i"
- if [[ $i == ${NUM_MIGS} ]]; then
- # TODO: We don't add a suffix for the last group to keep backward compatibility when there's only one MIG.
- # We should change it at some point, but note #18545 when changing this.
- group_name="${NODE_INSTANCE_PREFIX}-group"
- fi
- # Spread the remaining number of nodes evenly
- this_mig_size=$((${instances_left} / (${NUM_MIGS}-${i}+1)))
- instances_left=$((instances_left-${this_mig_size}))
- gcloud compute instance-groups managed \
- create "${group_name}" \
- --project "${PROJECT}" \
- --zone "${ZONE}" \
- --base-instance-name "${group_name}" \
- --size "${this_mig_size}" \
- --template "$template_name" || true;
- gcloud compute instance-groups managed wait-until-stable \
- "${group_name}" \
- --zone "${ZONE}" \
- --project "${PROJECT}" || true;
- done
- }
- # Assumes:
- # - NUM_MIGS
- # - NODE_INSTANCE_PREFIX
- # - PROJECT
- # - ZONE
- # - AUTOSCALER_MAX_NODES
- # - AUTOSCALER_MIN_NODES
- # Exports
- # - AUTOSCALER_MIG_CONFIG
- function create-cluster-autoscaler-mig-config() {
- # Each MIG must have at least one node, so the min number of nodes
- # must be greater or equal to the number of migs.
- if [[ ${AUTOSCALER_MIN_NODES} < ${NUM_MIGS} ]]; then
- echo "AUTOSCALER_MIN_NODES must be greater or equal ${NUM_MIGS}"
- exit 2
- fi
- # Each MIG must have at least one node, so the min number of nodes
- # must be greater or equal to the number of migs.
- if [[ ${AUTOSCALER_MAX_NODES} < ${NUM_MIGS} ]]; then
- echo "AUTOSCALER_MAX_NODES must be greater or equal ${NUM_MIGS}"
- exit 2
- fi
- # The code assumes that the migs were created with create-nodes
- # function which tries to evenly spread nodes across the migs.
- AUTOSCALER_MIG_CONFIG=""
- local left_min=${AUTOSCALER_MIN_NODES}
- local left_max=${AUTOSCALER_MAX_NODES}
- for ((i=1; i<=${NUM_MIGS}; i++)); do
- local group_name="${NODE_INSTANCE_PREFIX}-group-$i"
- if [[ $i == ${NUM_MIGS} ]]; then
- # TODO: We don't add a suffix for the last group to keep backward compatibility when there's only one MIG.
- # We should change it at some point, but note #18545 when changing this.
- group_name="${NODE_INSTANCE_PREFIX}-group"
- fi
- this_mig_min=$((${left_min}/(${NUM_MIGS}-${i}+1)))
- this_mig_max=$((${left_max}/(${NUM_MIGS}-${i}+1)))
- left_min=$((left_min-$this_mig_min))
- left_max=$((left_max-$this_mig_max))
- local mig_url="https://www.googleapis.com/compute/v1/projects/${PROJECT}/zones/${ZONE}/instanceGroups/${group_name}"
- AUTOSCALER_MIG_CONFIG="${AUTOSCALER_MIG_CONFIG} --nodes=${this_mig_min}:${this_mig_max}:${mig_url}"
- done
- AUTOSCALER_MIG_CONFIG="${AUTOSCALER_MIG_CONFIG} --scale-down-enabled=${AUTOSCALER_ENABLE_SCALE_DOWN}"
- }
- # Assumes:
- # - NUM_MIGS
- # - NODE_INSTANCE_PREFIX
- # - PROJECT
- # - ZONE
- # - ENABLE_CLUSTER_AUTOSCALER
- # - AUTOSCALER_MAX_NODES
- # - AUTOSCALER_MIN_NODES
- function create-autoscaler-config() {
- # Create autoscaler for nodes configuration if requested
- if [[ "${ENABLE_CLUSTER_AUTOSCALER}" == "true" ]]; then
- create-cluster-autoscaler-mig-config
- echo "Using autoscaler config: ${AUTOSCALER_MIG_CONFIG}"
- fi
- }
- function check-cluster() {
- detect-node-names
- detect-master
- echo "Waiting up to ${KUBE_CLUSTER_INITIALIZATION_TIMEOUT} seconds for cluster initialization."
- echo
- echo " This will continually check to see if the API for kubernetes is reachable."
- echo " This may time out if there was some uncaught error during start up."
- echo
- # curl in mavericks is borked.
- secure=""
- if which sw_vers >& /dev/null; then
- if [[ $(sw_vers | grep ProductVersion | awk '{print $2}') = "10.9."* ]]; then
- secure="--insecure"
- fi
- fi
- local start_time=$(date +%s)
- until curl --cacert "${CERT_DIR}/pki/ca.crt" \
- -H "Authorization: Bearer ${KUBE_BEARER_TOKEN}" \
- ${secure} \
- --max-time 5 --fail --output /dev/null --silent \
- "https://${KUBE_MASTER_IP}/api/v1/pods"; do
- local elapsed=$(($(date +%s) - ${start_time}))
- if [[ ${elapsed} -gt ${KUBE_CLUSTER_INITIALIZATION_TIMEOUT} ]]; then
- echo -e "${color_red}Cluster failed to initialize within ${KUBE_CLUSTER_INITIALIZATION_TIMEOUT} seconds.${color_norm}" >&2
- exit 2
- fi
- printf "."
- sleep 2
- done
- echo "Kubernetes cluster created."
- export KUBE_CERT="${CERT_DIR}/pki/issued/kubecfg.crt"
- export KUBE_KEY="${CERT_DIR}/pki/private/kubecfg.key"
- export CA_CERT="${CERT_DIR}/pki/ca.crt"
- export CONTEXT="${PROJECT}_${INSTANCE_PREFIX}"
- (
- umask 077
- # Update the user's kubeconfig to include credentials for this apiserver.
- create-kubeconfig
- create-kubeconfig-for-federation
- )
- # ensures KUBECONFIG is set
- get-kubeconfig-basicauth
- echo
- echo -e "${color_green}Kubernetes cluster is running. The master is running at:"
- echo
- echo -e "${color_yellow} https://${KUBE_MASTER_IP}"
- echo
- echo -e "${color_green}The user name and password to use is located in ${KUBECONFIG}.${color_norm}"
- echo
- }
- # Removes master replica from etcd cluster.
- #
- # Assumed vars:
- # REPLICA_NAME
- # PROJECT
- # EXISTING_MASTER_NAME
- # EXISTING_MASTER_ZONE
- #
- # $1: etcd client port
- # returns the result of ssh command which removes replica
- function remove-replica-from-etcd() {
- local -r port="${1}"
- gcloud compute ssh "${EXISTING_MASTER_NAME}" \
- --project "${PROJECT}" \
- --zone "${EXISTING_MASTER_ZONE}" \
- --command \
- "curl -s localhost:${port}/v2/members/\$(curl -s localhost:${port}/v2/members -XGET | sed 's/{\\\"id/\n/g' | grep ${REPLICA_NAME} | cut -f 3 -d \\\") -XDELETE -L 2>/dev/null"
- return $?
- }
- # Delete a kubernetes cluster. This is called from test-teardown.
- #
- # Assumed vars:
- # MASTER_NAME
- # NODE_INSTANCE_PREFIX
- # ZONE
- # This function tears down cluster resources 10 at a time to avoid issuing too many
- # API calls and exceeding API quota. It is important to bring down the instances before bringing
- # down the firewall rules and routes.
- function kube-down() {
- local -r batch=200
- detect-project
- detect-node-names # For INSTANCE_GROUPS
- echo "Bringing down cluster"
- set +e # Do not stop on error
- # Get the name of the managed instance group template before we delete the
- # managed instance group. (The name of the managed instance group template may
- # change during a cluster upgrade.)
- local templates=$(get-template "${PROJECT}")
- for group in ${INSTANCE_GROUPS[@]:-}; do
- if gcloud compute instance-groups managed describe "${group}" --project "${PROJECT}" --zone "${ZONE}" &>/dev/null; then
- gcloud compute instance-groups managed delete \
- --project "${PROJECT}" \
- --quiet \
- --zone "${ZONE}" \
- "${group}" &
- fi
- done
- # Wait for last batch of jobs
- kube::util::wait-for-jobs || {
- echo -e "Failed to delete instance group(s)." >&2
- }
- for template in ${templates[@]:-}; do
- if gcloud compute instance-templates describe --project "${PROJECT}" "${template}" &>/dev/null; then
- gcloud compute instance-templates delete \
- --project "${PROJECT}" \
- --quiet \
- "${template}"
- fi
- done
- local -r REPLICA_NAME="$(get-replica-name)"
- set-existing-master
- # Un-register the master replica from etcd and events etcd.
- remove-replica-from-etcd 4001
- remove-replica-from-etcd 4002
- # Delete the master replica (if it exists).
- if gcloud compute instances describe "${REPLICA_NAME}" --zone "${ZONE}" --project "${PROJECT}" &>/dev/null; then
- # If there is a load balancer in front of apiservers we need to first update its configuration.
- if gcloud compute target-pools describe "${MASTER_NAME}" --region "${REGION}" --project "${PROJECT}" &>/dev/null; then
- gcloud compute target-pools remove-instances "${MASTER_NAME}" \
- --project "${PROJECT}" \
- --zone "${ZONE}" \
- --instances "${REPLICA_NAME}"
- fi
- # Now we can safely delete the VM.
- gcloud compute instances delete \
- --project "${PROJECT}" \
- --quiet \
- --delete-disks all \
- --zone "${ZONE}" \
- "${REPLICA_NAME}"
- fi
- # Delete the master replica pd (possibly leaked by kube-up if master create failed).
- if gcloud compute disks describe "${REPLICA_NAME}"-pd --zone "${ZONE}" --project "${PROJECT}" &>/dev/null; then
- gcloud compute disks delete \
- --project "${PROJECT}" \
- --quiet \
- --zone "${ZONE}" \
- "${REPLICA_NAME}"-pd
- fi
- # Delete disk for cluster registry if enabled
- if [[ "${ENABLE_CLUSTER_REGISTRY}" == true && -n "${CLUSTER_REGISTRY_DISK}" ]]; then
- if gcloud compute disks describe "${CLUSTER_REGISTRY_DISK}" --zone "${ZONE}" --project "${PROJECT}" &>/dev/null; then
- gcloud compute disks delete \
- --project "${PROJECT}" \
- --quiet \
- --zone "${ZONE}" \
- "${CLUSTER_REGISTRY_DISK}"
- fi
- fi
- # Check if this are any remaining master replicas.
- local REMAINING_MASTER_COUNT=$(gcloud compute instances list \
- --project "${PROJECT}" \
- --regexp "$(get-replica-name-regexp)" \
- --format "value(zone)" | wc -l)
- # In the replicated scenario, if there's only a single master left, we should also delete load balancer in front of it.
- if [[ "${REMAINING_MASTER_COUNT}" == "1" ]]; then
- if gcloud compute forwarding-rules describe "${MASTER_NAME}" --region "${REGION}" --project "${PROJECT}" &>/dev/null; then
- detect-master
- local EXISTING_MASTER_ZONE=$(gcloud compute instances list "${MASTER_NAME}" \
- --project "${PROJECT}" --format="value(zone)")
- gcloud compute forwarding-rules delete \
- --project "${PROJECT}" \
- --region "${REGION}" \
- --quiet \
- "${MASTER_NAME}"
- attach-external-ip "${MASTER_NAME}" "${EXISTING_MASTER_ZONE}" "${KUBE_MASTER_IP}"
- gcloud compute target-pools delete \
- --project "${PROJECT}" \
- --region "${REGION}" \
- --quiet \
- "${MASTER_NAME}"
- fi
- fi
- # If there are no more remaining master replicas, we should delete all remaining network resources.
- if [[ "${REMAINING_MASTER_COUNT}" == "0" ]]; then
- # Delete firewall rule for the master.
- if gcloud compute firewall-rules describe --project "${PROJECT}" "${MASTER_NAME}-https" &>/dev/null; then
- gcloud compute firewall-rules delete \
- --project "${PROJECT}" \
- --quiet \
- "${MASTER_NAME}-https"
- fi
- # Delete the master's reserved IP
- if gcloud compute addresses describe "${MASTER_NAME}-ip" --region "${REGION}" --project "${PROJECT}" &>/dev/null; then
- gcloud compute addresses delete \
- --project "${PROJECT}" \
- --region "${REGION}" \
- --quiet \
- "${MASTER_NAME}-ip"
- fi
- # Delete firewall rule for minions.
- if gcloud compute firewall-rules describe --project "${PROJECT}" "${NODE_TAG}-all" &>/dev/null; then
- gcloud compute firewall-rules delete \
- --project "${PROJECT}" \
- --quiet \
- "${NODE_TAG}-all"
- fi
- fi
- # Find out what minions are running.
- local -a minions
- minions=( $(gcloud compute instances list \
- --project "${PROJECT}" --zones "${ZONE}" \
- --regexp "${NODE_INSTANCE_PREFIX}-.+" \
- --format='value(name)') )
- # If any minions are running, delete them in batches.
- while (( "${#minions[@]}" > 0 )); do
- echo Deleting nodes "${minions[*]::${batch}}"
- gcloud compute instances delete \
- --project "${PROJECT}" \
- --quiet \
- --delete-disks boot \
- --zone "${ZONE}" \
- "${minions[@]::${batch}}"
- minions=( "${minions[@]:${batch}}" )
- done
- # Delete routes.
- local -a routes
- # Clean up all routes w/ names like "<cluster-name>-<node-GUID>"
- # e.g. "kubernetes-12345678-90ab-cdef-1234-567890abcdef". The name is
- # determined by the node controller on the master.
- # Note that this is currently a noop, as synchronously deleting the node MIG
- # first allows the master to cleanup routes itself.
- local TRUNCATED_PREFIX="${INSTANCE_PREFIX:0:26}"
- routes=( $(gcloud compute routes list --project "${PROJECT}" \
- --regexp "${TRUNCATED_PREFIX}-.{8}-.{4}-.{4}-.{4}-.{12}" \
- --format='value(name)') )
- while (( "${#routes[@]}" > 0 )); do
- echo Deleting routes "${routes[*]::${batch}}"
- gcloud compute routes delete \
- --project "${PROJECT}" \
- --quiet \
- "${routes[@]::${batch}}"
- routes=( "${routes[@]:${batch}}" )
- done
- # Delete persistent disk for influx-db.
- if gcloud compute disks describe "${INSTANCE_PREFIX}"-influxdb-pd --zone "${ZONE}" --project "${PROJECT}" &>/dev/null; then
- gcloud compute disks delete \
- --project "${PROJECT}" \
- --quiet \
- --zone "${ZONE}" \
- "${INSTANCE_PREFIX}"-influxdb-pd
- fi
- # If there are no more remaining master replicas, we should update kubeconfig.
- if [[ "${REMAINING_MASTER_COUNT}" == "0" ]]; then
- export CONTEXT="${PROJECT}_${INSTANCE_PREFIX}"
- clear-kubeconfig
- fi
- set -e
- }
- # Prints name of one of the master replicas in the current zone. It will be either
- # just MASTER_NAME or MASTER_NAME with a suffix for a replica (see get-replica-name-regexp).
- #
- # Assumed vars:
- # PROJECT
- # ZONE
- # MASTER_NAME
- #
- # NOTE: Must be in sync with get-replica-name-regexp and set-replica-name.
- function get-replica-name() {
- echo $(gcloud compute instances list \
- --project "${PROJECT}" \
- --zone "${ZONE}" \
- --regexp "$(get-replica-name-regexp)" \
- --format "value(name)" | head -n1)
- }
- # Prints comma-separated names of all of the master replicas in all zones.
- #
- # Assumed vars:
- # PROJECT
- # MASTER_NAME
- #
- # NOTE: Must be in sync with get-replica-name-regexp and set-replica-name.
- function get-all-replica-names() {
- echo $(gcloud compute instances list \
- --project "${PROJECT}" \
- --regexp "$(get-replica-name-regexp)" \
- --format "value(name)" | tr "\n" "," | sed 's/,$//')
- }
- # Prints regexp for full master machine name. In a cluster with replicated master,
- # VM names may either be MASTER_NAME or MASTER_NAME with a suffix for a replica.
- function get-replica-name-regexp() {
- echo "${MASTER_NAME}(-...)?"
- }
- # Sets REPLICA_NAME to a unique name for a master replica that will match
- # expected regexp (see get-replica-name-regexp).
- #
- # Assumed vars:
- # PROJECT
- # ZONE
- # MASTER_NAME
- #
- # Sets:
- # REPLICA_NAME
- function set-replica-name() {
- local instances=$(gcloud compute instances list \
- --project "${PROJECT}" \
- --regexp "$(get-replica-name-regexp)" \
- --format "value(name)")
- suffix=""
- while echo "${instances}" | grep "${suffix}" &>/dev/null; do
- suffix="$(date | md5sum | head -c3)"
- done
- REPLICA_NAME="${MASTER_NAME}-${suffix}"
- }
- # Gets the instance template for given NODE_INSTANCE_PREFIX. It echos the template name so that the function
- # output can be used.
- # Assumed vars:
- # NODE_INSTANCE_PREFIX
- #
- # $1: project
- function get-template() {
- gcloud compute instance-templates list -r "${NODE_INSTANCE_PREFIX}-template(-(${KUBE_RELEASE_VERSION_DASHED_REGEX}|${KUBE_CI_VERSION_DASHED_REGEX}))?" \
- --project="${1}" --format='value(name)'
- }
- # Checks if there are any present resources related kubernetes cluster.
- #
- # Assumed vars:
- # MASTER_NAME
- # NODE_INSTANCE_PREFIX
- # ZONE
- # REGION
- # Vars set:
- # KUBE_RESOURCE_FOUND
- function check-resources() {
- detect-project
- detect-node-names
- echo "Looking for already existing resources"
- KUBE_RESOURCE_FOUND=""
- if [[ -n "${INSTANCE_GROUPS[@]:-}" ]]; then
- KUBE_RESOURCE_FOUND="Managed instance groups ${INSTANCE_GROUPS[@]}"
- return 1
- fi
- if gcloud compute instance-templates describe --project "${PROJECT}" "${NODE_INSTANCE_PREFIX}-template" &>/dev/null; then
- KUBE_RESOURCE_FOUND="Instance template ${NODE_INSTANCE_PREFIX}-template"
- return 1
- fi
- if gcloud compute instances describe --project "${PROJECT}" "${MASTER_NAME}" --zone "${ZONE}" &>/dev/null; then
- KUBE_RESOURCE_FOUND="Kubernetes master ${MASTER_NAME}"
- return 1
- fi
- if gcloud compute disks describe --project "${PROJECT}" "${MASTER_NAME}"-pd --zone "${ZONE}" &>/dev/null; then
- KUBE_RESOURCE_FOUND="Persistent disk ${MASTER_NAME}-pd"
- return 1
- fi
- if gcloud compute disks describe --project "${PROJECT}" "${CLUSTER_REGISTRY_DISK}" --zone "${ZONE}" &>/dev/null; then
- KUBE_RESOURCE_FOUND="Persistent disk ${CLUSTER_REGISTRY_DISK}"
- return 1
- fi
- # Find out what minions are running.
- local -a minions
- minions=( $(gcloud compute instances list \
- --project "${PROJECT}" --zones "${ZONE}" \
- --regexp "${NODE_INSTANCE_PREFIX}-.+" \
- --format='value(name)') )
- if (( "${#minions[@]}" > 0 )); then
- KUBE_RESOURCE_FOUND="${#minions[@]} matching matching ${NODE_INSTANCE_PREFIX}-.+"
- return 1
- fi
- if gcloud compute firewall-rules describe --project "${PROJECT}" "${MASTER_NAME}-https" &>/dev/null; then
- KUBE_RESOURCE_FOUND="Firewall rules for ${MASTER_NAME}-https"
- return 1
- fi
- if gcloud compute firewall-rules describe --project "${PROJECT}" "${NODE_TAG}-all" &>/dev/null; then
- KUBE_RESOURCE_FOUND="Firewall rules for ${MASTER_NAME}-all"
- return 1
- fi
- local -a routes
- routes=( $(gcloud compute routes list --project "${PROJECT}" \
- --regexp "${INSTANCE_PREFIX}-minion-.{4}" --format='value(name)') )
- if (( "${#routes[@]}" > 0 )); then
- KUBE_RESOURCE_FOUND="${#routes[@]} routes matching ${INSTANCE_PREFIX}-minion-.{4}"
- return 1
- fi
- if gcloud compute addresses describe --project "${PROJECT}" "${MASTER_NAME}-ip" --region "${REGION}" &>/dev/null; then
- KUBE_RESOURCE_FOUND="Master's reserved IP"
- return 1
- fi
- # No resources found.
- return 0
- }
- # Prepare to push new binaries to kubernetes cluster
- # $1 - whether prepare push to node
- function prepare-push() {
- local node="${1-}"
- #TODO(dawnchen): figure out how to upgrade coreos node
- if [[ "${node}" == "true" && "${NODE_OS_DISTRIBUTION}" != "debian" ]]; then
- echo "Updating nodes in a kubernetes cluster with ${NODE_OS_DISTRIBUTION} is not supported yet." >&2
- exit 1
- fi
- if [[ "${node}" != "true" && "${MASTER_OS_DISTRIBUTION}" != "debian" ]]; then
- echo "Updating the master in a kubernetes cluster with ${MASTER_OS_DISTRIBUTION} is not supported yet." >&2
- exit 1
- fi
- OUTPUT=${KUBE_ROOT}/_output/logs
- mkdir -p ${OUTPUT}
- ensure-temp-dir
- detect-project
- detect-master
- detect-node-names
- get-kubeconfig-basicauth
- get-kubeconfig-bearertoken
- # Make sure we have the tar files staged on Google Storage
- tars_from_version
- # Prepare node env vars and update MIG template
- if [[ "${node}" == "true" ]]; then
- write-node-env
- # TODO(zmerlynn): Refactor setting scope flags.
- local scope_flags=
- if [ -n "${NODE_SCOPES}" ]; then
- scope_flags="--scopes ${NODE_SCOPES}"
- else
- scope_flags="--no-scopes"
- fi
- # Ugly hack: Since it is not possible to delete instance-template that is currently
- # being used, create a temp one, then delete the old one and recreate it once again.
- local tmp_template_name="${NODE_INSTANCE_PREFIX}-template-tmp"
- create-node-instance-template $tmp_template_name
- local template_name="${NODE_INSTANCE_PREFIX}-template"
- for group in ${INSTANCE_GROUPS[@]:-}; do
- gcloud compute instance-groups managed \
- set-instance-template "${group}" \
- --template "$tmp_template_name" \
- --zone "${ZONE}" \
- --project "${PROJECT}" || true;
- done
- gcloud compute instance-templates delete \
- --project "${PROJECT}" \
- --quiet \
- "$template_name" || true
- create-node-instance-template "$template_name"
- for group in ${INSTANCE_GROUPS[@]:-}; do
- gcloud compute instance-groups managed \
- set-instance-template "${group}" \
- --template "$template_name" \
- --zone "${ZONE}" \
- --project "${PROJECT}" || true;
- done
- gcloud compute instance-templates delete \
- --project "${PROJECT}" \
- --quiet \
- "$tmp_template_name" || true
- fi
- }
- # Push binaries to kubernetes master
- function push-master() {
- echo "Updating master metadata ..."
- write-master-env
- prepare-startup-script
- add-instance-metadata-from-file "${KUBE_MASTER}" "kube-env=${KUBE_TEMP}/master-kube-env.yaml" "startup-script=${KUBE_TEMP}/configure-vm.sh"
- echo "Pushing to master (log at ${OUTPUT}/push-${KUBE_MASTER}.log) ..."
- cat ${KUBE_TEMP}/configure-vm.sh | gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --project "${PROJECT}" --zone "${ZONE}" "${KUBE_MASTER}" --command "sudo bash -s -- --push" &> ${OUTPUT}/push-"${KUBE_MASTER}".log
- }
- # Push binaries to kubernetes node
- function push-node() {
- node=${1}
- echo "Updating node ${node} metadata... "
- prepare-startup-script
- add-instance-metadata-from-file "${node}" "kube-env=${KUBE_TEMP}/node-kube-env.yaml" "startup-script=${KUBE_TEMP}/configure-vm.sh"
- echo "Start upgrading node ${node} (log at ${OUTPUT}/push-${node}.log) ..."
- cat ${KUBE_TEMP}/configure-vm.sh | gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --project "${PROJECT}" --zone "${ZONE}" "${node}" --command "sudo bash -s -- --push" &> ${OUTPUT}/push-"${node}".log
- }
- # Push binaries to kubernetes cluster
- function kube-push() {
- # Disable this until it's fixed.
- # See https://github.com/kubernetes/kubernetes/issues/17397
- echo "./cluster/kube-push.sh is currently not supported in GCE."
- echo "Please use ./cluster/gce/upgrade.sh."
- exit 1
- prepare-push true
- push-master
- for (( i=0; i<${#NODE_NAMES[@]}; i++)); do
- push-node "${NODE_NAMES[$i]}" &
- done
- kube::util::wait-for-jobs || {
- echo -e "${color_red}Some commands failed.${color_norm}" >&2
- }
- # TODO(zmerlynn): Re-create instance-template with the new
- # node-kube-env. This isn't important until the node-ip-range issue
- # is solved (because that's blocking automatic dynamic nodes from
- # working). The node-kube-env has to be composed with the KUBELET_TOKEN
- # and KUBE_PROXY_TOKEN. Ideally we would have
- # http://issue.k8s.io/3168
- # implemented before then, though, so avoiding this mess until then.
- echo
- echo "Kubernetes cluster is running. The master is running at:"
- echo
- echo " https://${KUBE_MASTER_IP}"
- echo
- echo "The user name and password to use is located in ~/.kube/config"
- echo
- }
- # -----------------------------------------------------------------------------
- # Cluster specific test helpers used from hack/e2e.go
- # Execute prior to running tests to build a release if required for env.
- #
- # Assumed Vars:
- # KUBE_ROOT
- function test-build-release() {
- # Make a release
- "${KUBE_ROOT}/build/release.sh"
- }
- # Execute prior to running tests to initialize required structure. This is
- # called from hack/e2e.go only when running -up.
- #
- # Assumed vars:
- # Variables from config.sh
- function test-setup() {
- # Detect the project into $PROJECT if it isn't set
- detect-project
- if [[ ${MULTIZONE:-} == "true" ]]; then
- for KUBE_GCE_ZONE in ${E2E_ZONES}
- do
- KUBE_GCE_ZONE="${KUBE_GCE_ZONE}" KUBE_USE_EXISTING_MASTER="${KUBE_USE_EXISTING_MASTER:-}" "${KUBE_ROOT}/cluster/kube-up.sh"
- KUBE_USE_EXISTING_MASTER="true" # For subsequent zones we use the existing master
- done
- else
- "${KUBE_ROOT}/cluster/kube-up.sh"
- fi
- # Open up port 80 & 8080 so common containers on minions can be reached
- # TODO(roberthbailey): Remove this once we are no longer relying on hostPorts.
- local start=`date +%s`
- gcloud compute firewall-rules create \
- --project "${PROJECT}" \
- --target-tags "${NODE_TAG}" \
- --allow tcp:80,tcp:8080 \
- --network "${NETWORK}" \
- "${NODE_TAG}-${INSTANCE_PREFIX}-http-alt" 2> /dev/null || true
- # As there is no simple way to wait longer for this operation we need to manually
- # wait some additional time (20 minutes altogether).
- while ! gcloud compute firewall-rules describe --project "${PROJECT}" "${NODE_TAG}-${INSTANCE_PREFIX}-http-alt" 2> /dev/null; do
- if [[ $(($start + 1200)) -lt `date +%s` ]]; then
- echo -e "${color_red}Failed to create firewall ${NODE_TAG}-${INSTANCE_PREFIX}-http-alt in ${PROJECT}" >&2
- exit 1
- fi
- sleep 5
- done
- # Open up the NodePort range
- # TODO(justinsb): Move to main setup, if we decide whether we want to do this by default.
- start=`date +%s`
- gcloud compute firewall-rules create \
- --project "${PROJECT}" \
- --target-tags "${NODE_TAG}" \
- --allow tcp:30000-32767,udp:30000-32767 \
- --network "${NETWORK}" \
- "${NODE_TAG}-${INSTANCE_PREFIX}-nodeports" 2> /dev/null || true
- # As there is no simple way to wait longer for this operation we need to manually
- # wait some additional time (20 minutes altogether).
- while ! gcloud compute firewall-rules describe --project "${PROJECT}" "${NODE_TAG}-${INSTANCE_PREFIX}-nodeports" 2> /dev/null; do
- if [[ $(($start + 1200)) -lt `date +%s` ]]; then
- echo -e "${color_red}Failed to create firewall ${NODE_TAG}-${INSTANCE_PREFIX}-nodeports in ${PROJECT}" >&2
- exit 1
- fi
- sleep 5
- done
- }
- # Execute after running tests to perform any required clean-up. This is called
- # from hack/e2e.go
- function test-teardown() {
- detect-project
- echo "Shutting down test cluster in background."
- gcloud compute firewall-rules delete \
- --project "${PROJECT}" \
- --quiet \
- "${NODE_TAG}-${INSTANCE_PREFIX}-http-alt" || true
- gcloud compute firewall-rules delete \
- --project "${PROJECT}" \
- --quiet \
- "${NODE_TAG}-${INSTANCE_PREFIX}-nodeports" || true
- if [[ ${MULTIZONE:-} == "true" ]]; then
- local zones=( ${E2E_ZONES} )
- # tear them down in reverse order, finally tearing down the master too.
- for ((zone_num=${#zones[@]}-1; zone_num>0; zone_num--))
- do
- KUBE_GCE_ZONE="${zones[zone_num]}" KUBE_USE_EXISTING_MASTER="true" "${KUBE_ROOT}/cluster/kube-down.sh"
- done
- KUBE_GCE_ZONE="${zones[0]}" KUBE_USE_EXISTING_MASTER="false" "${KUBE_ROOT}/cluster/kube-down.sh"
- else
- "${KUBE_ROOT}/cluster/kube-down.sh"
- fi
- }
- # SSH to a node by name ($1) and run a command ($2).
- function ssh-to-node() {
- local node="$1"
- local cmd="$2"
- # Loop until we can successfully ssh into the box
- for try in {1..5}; do
- if gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --ssh-flag="-o ConnectTimeout=30" --project "${PROJECT}" --zone="${ZONE}" "${node}" --command "echo test > /dev/null"; then
- break
- fi
- sleep 5
- done
- # Then actually try the command.
- gcloud compute ssh --ssh-flag="-o LogLevel=quiet" --ssh-flag="-o ConnectTimeout=30" --project "${PROJECT}" --zone="${ZONE}" "${node}" --command "${cmd}"
- }
- # Perform preparations required to run e2e tests
- function prepare-e2e() {
- detect-project
- }
- # Writes configure-vm.sh to a temporary location with comments stripped. GCE
- # limits the size of metadata fields to 32K, and stripping comments is the
- # easiest way to buy us a little more room.
- function prepare-startup-script() {
- sed '/^\s*#\([^!].*\)*$/ d' ${KUBE_ROOT}/cluster/gce/configure-vm.sh > ${KUBE_TEMP}/configure-vm.sh
- }
|