util.sh 55 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574
  1. #!/bin/bash
  2. # Copyright 2014 The Kubernetes Authors.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. # A library of helper functions and constant for the local config.
  16. # Experimental flags can be removed/renamed at any time.
  17. # The intent is to allow experimentation/advanced functionality before we
  18. # are ready to commit to supporting it.
  19. # Experimental functionality:
  20. # KUBE_USE_EXISTING_MASTER=true
  21. # Detect and reuse an existing master; useful if you want to
  22. # create more nodes, perhaps with a different instance type or in
  23. # a different subnet/AZ
  24. # KUBE_SUBNET_CIDR=172.20.1.0/24
  25. # Override the default subnet CIDR; useful if you want to create
  26. # a second subnet. The default subnet is 172.20.0.0/24. The VPC
  27. # is created with 172.20.0.0/16; you must pick a sub-CIDR of that.
  28. # Use the config file specified in $KUBE_CONFIG_FILE, or default to
  29. # config-default.sh.
  30. KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
  31. source "${KUBE_ROOT}/cluster/aws/${KUBE_CONFIG_FILE-"config-default.sh"}"
  32. source "${KUBE_ROOT}/cluster/common.sh"
  33. source "${KUBE_ROOT}/cluster/lib/util.sh"
  34. ALLOCATE_NODE_CIDRS=true
  35. NODE_INSTANCE_PREFIX="${INSTANCE_PREFIX}-minion"
  36. # The Auto Scaling Group (ASG) name must be unique, so we include the zone
  37. ASG_NAME="${NODE_INSTANCE_PREFIX}-group-${ZONE}"
  38. # We could allow the master disk volume id to be specified in future
  39. MASTER_DISK_ID=
  40. # Well known tags
  41. TAG_KEY_MASTER_IP="kubernetes.io/master-ip"
  42. OS_DISTRIBUTION=${KUBE_OS_DISTRIBUTION}
  43. # Defaults: ubuntu -> wily
  44. if [[ "${OS_DISTRIBUTION}" == "ubuntu" ]]; then
  45. OS_DISTRIBUTION=wily
  46. fi
  47. # Loads the distro-specific utils script.
  48. # If the distro is not recommended, prints warnings or exits.
  49. function load_distro_utils () {
  50. case "${OS_DISTRIBUTION}" in
  51. jessie)
  52. ;;
  53. wily)
  54. ;;
  55. vivid)
  56. echo "vivid is no longer supported by kube-up; please use jessie instead" >&2
  57. exit 2
  58. ;;
  59. coreos)
  60. echo "coreos is no longer supported by kube-up; please use jessie instead" >&2
  61. exit 2
  62. ;;
  63. trusty)
  64. echo "trusty is no longer supported by kube-up; please use jessie or wily instead" >&2
  65. exit 2
  66. ;;
  67. wheezy)
  68. echo "wheezy is no longer supported by kube-up; please use jessie instead" >&2
  69. exit 2
  70. ;;
  71. *)
  72. echo "Cannot start cluster using os distro: ${OS_DISTRIBUTION}" >&2
  73. echo "The current recommended distro is jessie" >&2
  74. exit 2
  75. ;;
  76. esac
  77. source "${KUBE_ROOT}/cluster/aws/${OS_DISTRIBUTION}/util.sh"
  78. }
  79. load_distro_utils
  80. # This removes the final character in bash (somehow)
  81. re='[a-zA-Z]'
  82. if [[ ${ZONE: -1} =~ $re ]]; then
  83. AWS_REGION=${ZONE%?}
  84. else
  85. AWS_REGION=$ZONE
  86. fi
  87. export AWS_DEFAULT_REGION=${AWS_REGION}
  88. export AWS_DEFAULT_OUTPUT=text
  89. AWS_CMD="aws ec2"
  90. AWS_ASG_CMD="aws autoscaling"
  91. VPC_CIDR_BASE=${KUBE_VPC_CIDR_BASE:-172.20}
  92. MASTER_IP_SUFFIX=.9
  93. VPC_CIDR=${VPC_CIDR_BASE}.0.0/16
  94. SUBNET_CIDR=${VPC_CIDR_BASE}.0.0/24
  95. if [[ -n "${KUBE_SUBNET_CIDR:-}" ]]; then
  96. echo "Using subnet CIDR override: ${KUBE_SUBNET_CIDR}"
  97. SUBNET_CIDR=${KUBE_SUBNET_CIDR}
  98. fi
  99. if [[ -z "${MASTER_INTERNAL_IP-}" ]]; then
  100. MASTER_INTERNAL_IP="${SUBNET_CIDR%.*}${MASTER_IP_SUFFIX}"
  101. fi
  102. MASTER_SG_NAME="kubernetes-master-${CLUSTER_ID}"
  103. NODE_SG_NAME="kubernetes-minion-${CLUSTER_ID}"
  104. # Be sure to map all the ephemeral drives. We can specify more than we actually have.
  105. # TODO: Actually mount the correct number (especially if we have more), though this is non-trivial, and
  106. # only affects the big storage instance types, which aren't a typical use case right now.
  107. EPHEMERAL_BLOCK_DEVICE_MAPPINGS=",{\"DeviceName\": \"/dev/sdc\",\"VirtualName\":\"ephemeral0\"},{\"DeviceName\": \"/dev/sdd\",\"VirtualName\":\"ephemeral1\"},{\"DeviceName\": \"/dev/sde\",\"VirtualName\":\"ephemeral2\"},{\"DeviceName\": \"/dev/sdf\",\"VirtualName\":\"ephemeral3\"}"
  108. # Experimental: If the user sets KUBE_AWS_STORAGE to ebs, use ebs storage
  109. # in preference to local instance storage We do this by not mounting any
  110. # instance storage. We could do this better in future (e.g. making instance
  111. # storage available for other purposes)
  112. if [[ "${KUBE_AWS_STORAGE:-}" == "ebs" ]]; then
  113. EPHEMERAL_BLOCK_DEVICE_MAPPINGS=""
  114. fi
  115. # TODO (bburns) Parameterize this for multiple cluster per project
  116. function get_vpc_id {
  117. $AWS_CMD describe-vpcs \
  118. --filters Name=tag:Name,Values=${VPC_NAME} \
  119. Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \
  120. --query Vpcs[].VpcId
  121. }
  122. function get_subnet_id {
  123. local vpc_id=$1
  124. local az=$2
  125. $AWS_CMD describe-subnets \
  126. --filters Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \
  127. Name=availabilityZone,Values=${az} \
  128. Name=vpc-id,Values=${vpc_id} \
  129. --query Subnets[].SubnetId
  130. }
  131. function get_igw_id {
  132. local vpc_id=$1
  133. $AWS_CMD describe-internet-gateways \
  134. --filters Name=attachment.vpc-id,Values=${vpc_id} \
  135. --query InternetGateways[].InternetGatewayId
  136. }
  137. function get_elbs_in_vpc {
  138. # ELB doesn't seem to be on the same platform as the rest of AWS; doesn't support filtering
  139. aws elb --output json describe-load-balancers | \
  140. python -c "import json,sys; lst = [str(lb['LoadBalancerName']) for lb in json.load(sys.stdin)['LoadBalancerDescriptions'] if 'VPCId' in lb and lb['VPCId'] == '$1']; print('\n'.join(lst))"
  141. }
  142. function get_instanceid_from_name {
  143. local tagName=$1
  144. $AWS_CMD describe-instances \
  145. --filters Name=tag:Name,Values=${tagName} \
  146. Name=instance-state-name,Values=running \
  147. Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \
  148. --query Reservations[].Instances[].InstanceId
  149. }
  150. function get_instance_public_ip {
  151. local instance_id=$1
  152. $AWS_CMD describe-instances \
  153. --instance-ids ${instance_id} \
  154. --query Reservations[].Instances[].NetworkInterfaces[0].Association.PublicIp
  155. }
  156. function get_instance_private_ip {
  157. local instance_id=$1
  158. $AWS_CMD describe-instances \
  159. --instance-ids ${instance_id} \
  160. --query Reservations[].Instances[].NetworkInterfaces[0].PrivateIpAddress
  161. }
  162. # Gets a security group id, by name ($1)
  163. function get_security_group_id {
  164. local name=$1
  165. $AWS_CMD describe-security-groups \
  166. --filters Name=vpc-id,Values=${VPC_ID} \
  167. Name=group-name,Values=${name} \
  168. Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \
  169. --query SecurityGroups[].GroupId \
  170. | tr "\t" "\n"
  171. }
  172. # Finds the master ip, if it is saved (tagged on the master disk)
  173. # Sets KUBE_MASTER_IP
  174. function find-tagged-master-ip {
  175. find-master-pd
  176. if [[ -n "${MASTER_DISK_ID:-}" ]]; then
  177. KUBE_MASTER_IP=$(get-tag ${MASTER_DISK_ID} ${TAG_KEY_MASTER_IP})
  178. fi
  179. }
  180. # Gets a tag value from an AWS resource
  181. # usage: get-tag <resource-id> <tag-name>
  182. # outputs: the tag value, or "" if no tag
  183. function get-tag {
  184. $AWS_CMD describe-tags --filters Name=resource-id,Values=${1} \
  185. Name=key,Values=${2} \
  186. --query Tags[].Value
  187. }
  188. # Gets an existing master, exiting if not found
  189. # Note that this is called directly by the e2e tests
  190. function detect-master() {
  191. find-tagged-master-ip
  192. KUBE_MASTER=${MASTER_NAME}
  193. if [[ -z "${KUBE_MASTER_IP:-}" ]]; then
  194. echo "Could not detect Kubernetes master node IP. Make sure you've launched a cluster with 'kube-up.sh'"
  195. exit 1
  196. fi
  197. echo "Using master: $KUBE_MASTER (external IP: $KUBE_MASTER_IP)"
  198. }
  199. # Reads kube-env metadata from master
  200. #
  201. # Assumed vars:
  202. # KUBE_MASTER_IP
  203. # AWS_SSH_KEY
  204. # SSH_USER
  205. function get-master-env() {
  206. ssh -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" ${SSH_USER}@${KUBE_MASTER_IP} sudo cat /etc/kubernetes/kube_env.yaml
  207. }
  208. function query-running-minions () {
  209. local query=$1
  210. $AWS_CMD describe-instances \
  211. --filters Name=instance-state-name,Values=running \
  212. Name=vpc-id,Values=${VPC_ID} \
  213. Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \
  214. Name=tag:aws:autoscaling:groupName,Values=${ASG_NAME} \
  215. Name=tag:Role,Values=${NODE_TAG} \
  216. --query ${query}
  217. }
  218. function detect-node-names () {
  219. # If this is called directly, VPC_ID might not be set
  220. # (this is case from cluster/log-dump.sh)
  221. if [[ -z "${VPC_ID:-}" ]]; then
  222. VPC_ID=$(get_vpc_id)
  223. fi
  224. NODE_IDS=()
  225. NODE_NAMES=()
  226. for id in $(query-running-minions "Reservations[].Instances[].InstanceId"); do
  227. NODE_IDS+=("${id}")
  228. # We use the minion ids as the name
  229. NODE_NAMES+=("${id}")
  230. done
  231. }
  232. # Called to detect the project on GCE
  233. # Not needed on AWS
  234. function detect-project() {
  235. :
  236. }
  237. function detect-nodes () {
  238. detect-node-names
  239. # This is inefficient, but we want NODE_NAMES / NODE_IDS to be ordered the same as KUBE_NODE_IP_ADDRESSES
  240. KUBE_NODE_IP_ADDRESSES=()
  241. for (( i=0; i<${#NODE_NAMES[@]}; i++)); do
  242. local minion_ip
  243. if [[ "${ENABLE_NODE_PUBLIC_IP}" == "true" ]]; then
  244. minion_ip=$(get_instance_public_ip ${NODE_NAMES[$i]})
  245. else
  246. minion_ip=$(get_instance_private_ip ${NODE_NAMES[$i]})
  247. fi
  248. echo "Found minion ${i}: ${NODE_NAMES[$i]} @ ${minion_ip}"
  249. KUBE_NODE_IP_ADDRESSES+=("${minion_ip}")
  250. done
  251. if [[ -z "$KUBE_NODE_IP_ADDRESSES" ]]; then
  252. echo "Could not detect Kubernetes minion nodes. Make sure you've launched a cluster with 'kube-up.sh'"
  253. exit 1
  254. fi
  255. }
  256. function detect-security-groups {
  257. if [[ -z "${MASTER_SG_ID-}" ]]; then
  258. MASTER_SG_ID=$(get_security_group_id "${MASTER_SG_NAME}")
  259. if [[ -z "${MASTER_SG_ID}" ]]; then
  260. echo "Could not detect Kubernetes master security group. Make sure you've launched a cluster with 'kube-up.sh'"
  261. exit 1
  262. else
  263. echo "Using master security group: ${MASTER_SG_NAME} ${MASTER_SG_ID}"
  264. fi
  265. fi
  266. if [[ -z "${NODE_SG_ID-}" ]]; then
  267. NODE_SG_ID=$(get_security_group_id "${NODE_SG_NAME}")
  268. if [[ -z "${NODE_SG_ID}" ]]; then
  269. echo "Could not detect Kubernetes minion security group. Make sure you've launched a cluster with 'kube-up.sh'"
  270. exit 1
  271. else
  272. echo "Using minion security group: ${NODE_SG_NAME} ${NODE_SG_ID}"
  273. fi
  274. fi
  275. }
  276. # Detects the AMI to use (considering the region)
  277. # This really should be in the various distro-specific util functions,
  278. # but CoreOS uses this for the master, so for now it is here.
  279. #
  280. # TODO: Remove this and just have each distro implement detect-image
  281. #
  282. # Vars set:
  283. # AWS_IMAGE
  284. function detect-image () {
  285. case "${OS_DISTRIBUTION}" in
  286. wily)
  287. detect-wily-image
  288. ;;
  289. jessie)
  290. detect-jessie-image
  291. ;;
  292. *)
  293. echo "Please specify AWS_IMAGE directly (distro ${OS_DISTRIBUTION} not recognized)"
  294. exit 2
  295. ;;
  296. esac
  297. }
  298. # Detects the RootDevice to use in the Block Device Mapping (considering the AMI)
  299. #
  300. # Vars set:
  301. # MASTER_BLOCK_DEVICE_MAPPINGS
  302. # NODE_BLOCK_DEVICE_MAPPINGS
  303. #
  304. function detect-root-device {
  305. local master_image=${AWS_IMAGE}
  306. local node_image=${KUBE_NODE_IMAGE}
  307. ROOT_DEVICE_MASTER=$($AWS_CMD describe-images --image-ids ${master_image} --query 'Images[].RootDeviceName')
  308. if [[ "${master_image}" == "${node_image}" ]]; then
  309. ROOT_DEVICE_NODE=${ROOT_DEVICE_MASTER}
  310. else
  311. ROOT_DEVICE_NODE=$($AWS_CMD describe-images --image-ids ${node_image} --query 'Images[].RootDeviceName')
  312. fi
  313. MASTER_BLOCK_DEVICE_MAPPINGS="[{\"DeviceName\":\"${ROOT_DEVICE_MASTER}\",\"Ebs\":{\"DeleteOnTermination\":true,\"VolumeSize\":${MASTER_ROOT_DISK_SIZE},\"VolumeType\":\"${MASTER_ROOT_DISK_TYPE}\"}} ${EPHEMERAL_BLOCK_DEVICE_MAPPINGS}]"
  314. NODE_BLOCK_DEVICE_MAPPINGS="[{\"DeviceName\":\"${ROOT_DEVICE_NODE}\",\"Ebs\":{\"DeleteOnTermination\":true,\"VolumeSize\":${NODE_ROOT_DISK_SIZE},\"VolumeType\":\"${NODE_ROOT_DISK_TYPE}\"}} ${EPHEMERAL_BLOCK_DEVICE_MAPPINGS}]"
  315. }
  316. # Computes the AWS fingerprint for a public key file ($1)
  317. # $1: path to public key file
  318. # Note that this is a different hash from the OpenSSH hash.
  319. # But AWS gives us this public key hash in the describe keys output, so we should stick with this format.
  320. # Hopefully this will be done by the aws cli tool one day: https://github.com/aws/aws-cli/issues/191
  321. # NOTE: This does not work on Mavericks, due to an odd ssh-keygen version, so we use get-ssh-fingerprint instead
  322. function get-aws-fingerprint {
  323. local -r pubkey_path=$1
  324. ssh-keygen -f ${pubkey_path} -e -m PKCS8 | openssl rsa -pubin -outform DER | openssl md5 -c | sed -e 's/(stdin)= //g'
  325. }
  326. # Computes the SSH fingerprint for a public key file ($1)
  327. # #1: path to public key file
  328. # Note this is different from the AWS fingerprint; see notes on get-aws-fingerprint
  329. function get-ssh-fingerprint {
  330. local -r pubkey_path=$1
  331. ssh-keygen -lf ${pubkey_path} | cut -f2 -d' '
  332. }
  333. # Import an SSH public key to AWS.
  334. # Ignores duplicate names; recommended to use a name that includes the public key hash.
  335. # $1 name
  336. # $2 public key path
  337. function import-public-key {
  338. local -r name=$1
  339. local -r path=$2
  340. local ok=1
  341. local output=""
  342. output=$($AWS_CMD import-key-pair --key-name ${name} --public-key-material "file://${path}" 2>&1) || ok=0
  343. if [[ ${ok} == 0 ]]; then
  344. # Idempotency: ignore if duplicate name
  345. if [[ "${output}" != *"InvalidKeyPair.Duplicate"* ]]; then
  346. echo "Error importing public key"
  347. echo "Output: ${output}"
  348. exit 1
  349. fi
  350. fi
  351. }
  352. # Robustly try to create a security group, if it does not exist.
  353. # $1: The name of security group; will be created if not exists
  354. # $2: Description for security group (used if created)
  355. #
  356. # Note that this doesn't actually return the sgid; we need to re-query
  357. function create-security-group {
  358. local -r name=$1
  359. local -r description=$2
  360. local sgid=$(get_security_group_id "${name}")
  361. if [[ -z "$sgid" ]]; then
  362. echo "Creating security group ${name}."
  363. sgid=$($AWS_CMD create-security-group --group-name "${name}" --description "${description}" --vpc-id "${VPC_ID}" --query GroupId)
  364. add-tag $sgid KubernetesCluster ${CLUSTER_ID}
  365. fi
  366. }
  367. # Authorize ingress to a security group.
  368. # Attempts to be idempotent, though we end up checking the output looking for error-strings.
  369. # $1 group-id
  370. # $2.. arguments to pass to authorize-security-group-ingress
  371. function authorize-security-group-ingress {
  372. local -r sgid=$1
  373. shift
  374. local ok=1
  375. local output=""
  376. output=$($AWS_CMD authorize-security-group-ingress --group-id "${sgid}" $@ 2>&1) || ok=0
  377. if [[ ${ok} == 0 ]]; then
  378. # Idempotency: ignore if duplicate rule
  379. if [[ "${output}" != *"InvalidPermission.Duplicate"* ]]; then
  380. echo "Error creating security group ingress rule"
  381. echo "Output: ${output}"
  382. exit 1
  383. fi
  384. fi
  385. }
  386. # Gets master persistent volume, if exists
  387. # Sets MASTER_DISK_ID
  388. function find-master-pd {
  389. local name=${MASTER_NAME}-pd
  390. if [[ -z "${MASTER_DISK_ID}" ]]; then
  391. local zone_filter="Name=availability-zone,Values=${ZONE}"
  392. if [[ "${KUBE_USE_EXISTING_MASTER:-}" == "true" ]]; then
  393. # If we're reusing an existing master, it is likely to be in another zone
  394. # If running multizone, your cluster must be uniquely named across zones
  395. zone_filter=""
  396. fi
  397. MASTER_DISK_ID=`$AWS_CMD describe-volumes \
  398. --filters ${zone_filter} \
  399. Name=tag:Name,Values=${name} \
  400. Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \
  401. --query Volumes[].VolumeId`
  402. fi
  403. }
  404. # Gets or creates master persistent volume
  405. # Sets MASTER_DISK_ID
  406. function ensure-master-pd {
  407. local name=${MASTER_NAME}-pd
  408. find-master-pd
  409. if [[ -z "${MASTER_DISK_ID}" ]]; then
  410. echo "Creating master disk: size ${MASTER_DISK_SIZE}GB, type ${MASTER_DISK_TYPE}"
  411. MASTER_DISK_ID=`$AWS_CMD create-volume --availability-zone ${ZONE} --volume-type ${MASTER_DISK_TYPE} --size ${MASTER_DISK_SIZE} --query VolumeId`
  412. add-tag ${MASTER_DISK_ID} Name ${name}
  413. add-tag ${MASTER_DISK_ID} KubernetesCluster ${CLUSTER_ID}
  414. fi
  415. }
  416. # Configures a CloudWatch alarm to reboot the instance on failure
  417. function reboot-on-failure {
  418. local instance_id=$1
  419. echo "Creating Cloudwatch alarm to reboot instance ${instance_id} on failure"
  420. local aws_owner_id=`aws ec2 describe-instances --instance-ids ${instance_id} --query Reservations[0].OwnerId`
  421. if [[ -z "${aws_owner_id}" ]]; then
  422. echo "Unable to determinate AWS account id for ${instance_id}"
  423. exit 1
  424. fi
  425. aws cloudwatch put-metric-alarm \
  426. --alarm-name k8s-${instance_id}-statuscheckfailure-reboot \
  427. --alarm-description "Reboot ${instance_id} on status check failure" \
  428. --namespace "AWS/EC2" \
  429. --dimensions Name=InstanceId,Value=${instance_id} \
  430. --statistic Minimum \
  431. --metric-name StatusCheckFailed \
  432. --comparison-operator GreaterThanThreshold \
  433. --threshold 0 \
  434. --period 60 \
  435. --evaluation-periods 3 \
  436. --alarm-actions arn:aws:swf:${AWS_REGION}:${aws_owner_id}:action/actions/AWS_EC2.InstanceId.Reboot/1.0 > $LOG
  437. # TODO: The IAM role EC2ActionsAccess must have been created
  438. # See e.g. http://docs.aws.amazon.com/AmazonCloudWatch/latest/DeveloperGuide/UsingIAM.html
  439. }
  440. function delete-instance-alarms {
  441. local instance_id=$1
  442. alarm_names=`aws cloudwatch describe-alarms --alarm-name-prefix k8s-${instance_id}- --query MetricAlarms[].AlarmName`
  443. for alarm_name in ${alarm_names}; do
  444. aws cloudwatch delete-alarms --alarm-names ${alarm_name} > $LOG
  445. done
  446. }
  447. # Finds the existing master IP, or creates/reuses an Elastic IP
  448. # If MASTER_RESERVED_IP looks like an IP address, we will use it;
  449. # otherwise we will create a new elastic IP
  450. # Sets KUBE_MASTER_IP
  451. function ensure-master-ip {
  452. find-tagged-master-ip
  453. if [[ -z "${KUBE_MASTER_IP:-}" ]]; then
  454. # Check if MASTER_RESERVED_IP looks like an IPv4 address
  455. # Note that we used to only allocate an elastic IP when MASTER_RESERVED_IP=auto
  456. # So be careful changing the IPV4 test, to be sure that 'auto' => 'allocate'
  457. if [[ "${MASTER_RESERVED_IP}" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
  458. KUBE_MASTER_IP="${MASTER_RESERVED_IP}"
  459. else
  460. KUBE_MASTER_IP=`$AWS_CMD allocate-address --domain vpc --query PublicIp`
  461. echo "Allocated Elastic IP for master: ${KUBE_MASTER_IP}"
  462. fi
  463. # We can't tag elastic ips. Instead we put the tag on the persistent disk.
  464. # It is a little weird, perhaps, but it sort of makes sense...
  465. # The master mounts the master PD, and whoever mounts the master PD should also
  466. # have the master IP
  467. add-tag ${MASTER_DISK_ID} ${TAG_KEY_MASTER_IP} ${KUBE_MASTER_IP}
  468. fi
  469. }
  470. # Creates a new DHCP option set configured correctly for Kubernetes when DHCP_OPTION_SET_ID is not specified
  471. # Sets DHCP_OPTION_SET_ID
  472. function create-dhcp-option-set () {
  473. if [[ -z ${DHCP_OPTION_SET_ID-} ]]; then
  474. case "${AWS_REGION}" in
  475. us-east-1)
  476. OPTION_SET_DOMAIN=ec2.internal
  477. ;;
  478. *)
  479. OPTION_SET_DOMAIN="${AWS_REGION}.compute.internal"
  480. esac
  481. DHCP_OPTION_SET_ID=$($AWS_CMD create-dhcp-options --dhcp-configuration Key=domain-name,Values=${OPTION_SET_DOMAIN} Key=domain-name-servers,Values=AmazonProvidedDNS --query DhcpOptions.DhcpOptionsId)
  482. add-tag ${DHCP_OPTION_SET_ID} Name kubernetes-dhcp-option-set
  483. add-tag ${DHCP_OPTION_SET_ID} KubernetesCluster ${CLUSTER_ID}
  484. fi
  485. $AWS_CMD associate-dhcp-options --dhcp-options-id ${DHCP_OPTION_SET_ID} --vpc-id ${VPC_ID} > $LOG
  486. echo "Using DHCP option set ${DHCP_OPTION_SET_ID}"
  487. }
  488. # Verify prereqs
  489. function verify-prereqs {
  490. if [[ "$(which aws)" == "" ]]; then
  491. echo "Can't find aws in PATH, please fix and retry."
  492. exit 1
  493. fi
  494. }
  495. # Create a temp dir that'll be deleted at the end of this bash session.
  496. #
  497. # Vars set:
  498. # KUBE_TEMP
  499. function ensure-temp-dir {
  500. if [[ -z ${KUBE_TEMP-} ]]; then
  501. KUBE_TEMP=$(mktemp -d -t kubernetes.XXXXXX)
  502. trap 'rm -rf "${KUBE_TEMP}"' EXIT
  503. fi
  504. }
  505. # Take the local tar files and upload them to S3. They will then be
  506. # downloaded by the master as part of the start up script for the master.
  507. #
  508. # Assumed vars:
  509. # SERVER_BINARY_TAR
  510. # SALT_TAR
  511. # Vars set:
  512. # SERVER_BINARY_TAR_URL
  513. # SALT_TAR_URL
  514. function upload-server-tars() {
  515. SERVER_BINARY_TAR_URL=
  516. SERVER_BINARY_TAR_HASH=
  517. SALT_TAR_URL=
  518. SALT_TAR_HASH=
  519. BOOTSTRAP_SCRIPT_URL=
  520. BOOTSTRAP_SCRIPT_HASH=
  521. ensure-temp-dir
  522. SERVER_BINARY_TAR_HASH=$(sha1sum-file "${SERVER_BINARY_TAR}")
  523. SALT_TAR_HASH=$(sha1sum-file "${SALT_TAR}")
  524. BOOTSTRAP_SCRIPT_HASH=$(sha1sum-file "${BOOTSTRAP_SCRIPT}")
  525. if [[ -z ${AWS_S3_BUCKET-} ]]; then
  526. local project_hash=
  527. local key=$(aws configure get aws_access_key_id)
  528. if which md5 > /dev/null 2>&1; then
  529. project_hash=$(md5 -q -s "${USER} ${key}")
  530. else
  531. project_hash=$(echo -n "${USER} ${key}" | md5sum | awk '{ print $1 }')
  532. fi
  533. AWS_S3_BUCKET="kubernetes-staging-${project_hash}"
  534. fi
  535. echo "Uploading to Amazon S3"
  536. if ! aws s3api get-bucket-location --bucket ${AWS_S3_BUCKET} > /dev/null 2>&1 ; then
  537. echo "Creating ${AWS_S3_BUCKET}"
  538. # Buckets must be globally uniquely named, so always create in a known region
  539. # We default to us-east-1 because that's the canonical region for S3,
  540. # and then the bucket is most-simply named (s3.amazonaws.com)
  541. aws s3 mb "s3://${AWS_S3_BUCKET}" --region ${AWS_S3_REGION}
  542. echo "Confirming bucket was created..."
  543. local attempt=0
  544. while true; do
  545. if ! aws s3 ls --region ${AWS_S3_REGION} "s3://${AWS_S3_BUCKET}" > /dev/null 2>&1; then
  546. if (( attempt > 120 )); then
  547. echo
  548. echo -e "${color_red}Unable to confirm bucket creation." >&2
  549. echo "Please ensure that s3://${AWS_S3_BUCKET} exists" >&2
  550. echo -e "and run the script again. (sorry!)${color_norm}" >&2
  551. exit 1
  552. fi
  553. else
  554. break
  555. fi
  556. attempt=$(($attempt+1))
  557. sleep 1
  558. done
  559. fi
  560. local s3_bucket_location=$(aws s3api get-bucket-location --bucket ${AWS_S3_BUCKET})
  561. local s3_url_base=https://s3-${s3_bucket_location}.amazonaws.com
  562. if [[ "${s3_bucket_location}" == "None" ]]; then
  563. # "US Classic" does not follow the pattern
  564. s3_url_base=https://s3.amazonaws.com
  565. s3_bucket_location=us-east-1
  566. elif [[ "${s3_bucket_location}" == "cn-north-1" ]]; then
  567. s3_url_base=https://s3.cn-north-1.amazonaws.com.cn
  568. fi
  569. local -r staging_path="devel"
  570. local -r local_dir="${KUBE_TEMP}/s3/"
  571. mkdir ${local_dir}
  572. echo "+++ Staging server tars to S3 Storage: ${AWS_S3_BUCKET}/${staging_path}"
  573. cp -a "${SERVER_BINARY_TAR}" ${local_dir}
  574. cp -a "${SALT_TAR}" ${local_dir}
  575. cp -a "${BOOTSTRAP_SCRIPT}" ${local_dir}
  576. aws s3 sync --region ${s3_bucket_location} --exact-timestamps ${local_dir} "s3://${AWS_S3_BUCKET}/${staging_path}/"
  577. local server_binary_path="${staging_path}/${SERVER_BINARY_TAR##*/}"
  578. aws s3api put-object-acl --region ${s3_bucket_location} --bucket ${AWS_S3_BUCKET} --key "${server_binary_path}" --grant-read 'uri="http://acs.amazonaws.com/groups/global/AllUsers"'
  579. SERVER_BINARY_TAR_URL="${s3_url_base}/${AWS_S3_BUCKET}/${server_binary_path}"
  580. local salt_tar_path="${staging_path}/${SALT_TAR##*/}"
  581. aws s3api put-object-acl --region ${s3_bucket_location} --bucket ${AWS_S3_BUCKET} --key "${salt_tar_path}" --grant-read 'uri="http://acs.amazonaws.com/groups/global/AllUsers"'
  582. SALT_TAR_URL="${s3_url_base}/${AWS_S3_BUCKET}/${salt_tar_path}"
  583. local bootstrap_script_path="${staging_path}/${BOOTSTRAP_SCRIPT##*/}"
  584. aws s3api put-object-acl --region ${s3_bucket_location} --bucket ${AWS_S3_BUCKET} --key "${bootstrap_script_path}" --grant-read 'uri="http://acs.amazonaws.com/groups/global/AllUsers"'
  585. BOOTSTRAP_SCRIPT_URL="${s3_url_base}/${AWS_S3_BUCKET}/${bootstrap_script_path}"
  586. echo "Uploaded server tars:"
  587. echo " SERVER_BINARY_TAR_URL: ${SERVER_BINARY_TAR_URL}"
  588. echo " SALT_TAR_URL: ${SALT_TAR_URL}"
  589. echo " BOOTSTRAP_SCRIPT_URL: ${BOOTSTRAP_SCRIPT_URL}"
  590. }
  591. # Adds a tag to an AWS resource
  592. # usage: add-tag <resource-id> <tag-name> <tag-value>
  593. function add-tag {
  594. echo "Adding tag to ${1}: ${2}=${3}"
  595. # We need to retry in case the resource isn't yet fully created
  596. n=0
  597. until [ $n -ge 25 ]; do
  598. $AWS_CMD create-tags --resources ${1} --tags Key=${2},Value=${3} > $LOG && return
  599. n=$[$n+1]
  600. sleep 3
  601. done
  602. echo "Unable to add tag to AWS resource"
  603. exit 1
  604. }
  605. # Creates the IAM profile, based on configuration files in templates/iam
  606. function create-iam-profile {
  607. local key=$1
  608. local conf_dir=file://${KUBE_ROOT}/cluster/aws/templates/iam
  609. echo "Creating IAM role: ${key}"
  610. aws iam create-role --role-name ${key} --assume-role-policy-document ${conf_dir}/${key}-role.json > $LOG
  611. echo "Creating IAM role-policy: ${key}"
  612. aws iam put-role-policy --role-name ${key} --policy-name ${key} --policy-document ${conf_dir}/${key}-policy.json > $LOG
  613. echo "Creating IAM instance-policy: ${key}"
  614. aws iam create-instance-profile --instance-profile-name ${key} > $LOG
  615. echo "Adding IAM role to instance-policy: ${key}"
  616. aws iam add-role-to-instance-profile --instance-profile-name ${key} --role-name ${key} > $LOG
  617. }
  618. # Creates the IAM roles (if they do not already exist)
  619. function ensure-iam-profiles {
  620. aws iam get-instance-profile --instance-profile-name ${IAM_PROFILE_MASTER} || {
  621. echo "Creating master IAM profile: ${IAM_PROFILE_MASTER}"
  622. create-iam-profile ${IAM_PROFILE_MASTER}
  623. }
  624. aws iam get-instance-profile --instance-profile-name ${IAM_PROFILE_NODE} || {
  625. echo "Creating minion IAM profile: ${IAM_PROFILE_NODE}"
  626. create-iam-profile ${IAM_PROFILE_NODE}
  627. }
  628. }
  629. # Wait for instance to be in specified state
  630. function wait-for-instance-state {
  631. instance_id=$1
  632. state=$2
  633. while true; do
  634. instance_state=$($AWS_CMD describe-instances --instance-ids ${instance_id} --query Reservations[].Instances[].State.Name)
  635. if [[ "$instance_state" == "${state}" ]]; then
  636. break
  637. else
  638. echo "Waiting for instance ${instance_id} to be ${state} (currently ${instance_state})"
  639. echo "Sleeping for 3 seconds..."
  640. sleep 3
  641. fi
  642. done
  643. }
  644. # Allocates new Elastic IP from Amazon
  645. # Output: allocated IP address
  646. function allocate-elastic-ip {
  647. $AWS_CMD allocate-address --domain vpc --query PublicIp
  648. }
  649. # Attaches an elastic IP to the specified instance
  650. function attach-ip-to-instance {
  651. local ip_address=$1
  652. local instance_id=$2
  653. local elastic_ip_allocation_id=$($AWS_CMD describe-addresses --public-ips $ip_address --query Addresses[].AllocationId)
  654. echo "Attaching IP ${ip_address} to instance ${instance_id}"
  655. $AWS_CMD associate-address --instance-id ${instance_id} --allocation-id ${elastic_ip_allocation_id} > $LOG
  656. }
  657. # Releases an elastic IP
  658. function release-elastic-ip {
  659. local ip_address=$1
  660. echo "Releasing Elastic IP: ${ip_address}"
  661. elastic_ip_allocation_id=$($AWS_CMD describe-addresses --public-ips $ip_address --query Addresses[].AllocationId 2> $LOG) || true
  662. if [[ -z "${elastic_ip_allocation_id}" ]]; then
  663. echo "Elastic IP already released"
  664. else
  665. $AWS_CMD release-address --allocation-id ${elastic_ip_allocation_id} > $LOG
  666. fi
  667. }
  668. # Deletes a security group
  669. # usage: delete_security_group <sgid>
  670. function delete_security_group {
  671. local -r sg_id=${1}
  672. echo "Deleting security group: ${sg_id}"
  673. # We retry in case there's a dependent resource - typically an ELB
  674. n=0
  675. until [ $n -ge 20 ]; do
  676. $AWS_CMD delete-security-group --group-id ${sg_id} > $LOG && return
  677. n=$[$n+1]
  678. sleep 3
  679. done
  680. echo "Unable to delete security group: ${sg_id}"
  681. exit 1
  682. }
  683. function ssh-key-setup {
  684. if [[ ! -f "$AWS_SSH_KEY" ]]; then
  685. ssh-keygen -f "$AWS_SSH_KEY" -N ''
  686. fi
  687. # Note that we use get-ssh-fingerprint, so this works on OSX Mavericks
  688. # get-aws-fingerprint gives the same fingerprint that AWS computes,
  689. # but OSX Mavericks ssh-keygen can't compute it
  690. AWS_SSH_KEY_FINGERPRINT=$(get-ssh-fingerprint ${AWS_SSH_KEY}.pub)
  691. echo "Using SSH key with (AWS) fingerprint: ${AWS_SSH_KEY_FINGERPRINT}"
  692. AWS_SSH_KEY_NAME="kubernetes-${AWS_SSH_KEY_FINGERPRINT//:/}"
  693. import-public-key ${AWS_SSH_KEY_NAME} ${AWS_SSH_KEY}.pub
  694. }
  695. function vpc-setup {
  696. if [[ -z "${VPC_ID:-}" ]]; then
  697. VPC_ID=$(get_vpc_id)
  698. fi
  699. if [[ -z "$VPC_ID" ]]; then
  700. echo "Creating vpc."
  701. VPC_ID=$($AWS_CMD create-vpc --cidr-block ${VPC_CIDR} --query Vpc.VpcId)
  702. $AWS_CMD modify-vpc-attribute --vpc-id $VPC_ID --enable-dns-support '{"Value": true}' > $LOG
  703. $AWS_CMD modify-vpc-attribute --vpc-id $VPC_ID --enable-dns-hostnames '{"Value": true}' > $LOG
  704. add-tag $VPC_ID Name ${VPC_NAME}
  705. add-tag $VPC_ID KubernetesCluster ${CLUSTER_ID}
  706. fi
  707. echo "Using VPC $VPC_ID"
  708. }
  709. function subnet-setup {
  710. if [[ -z "${SUBNET_ID:-}" ]]; then
  711. SUBNET_ID=$(get_subnet_id $VPC_ID $ZONE)
  712. fi
  713. if [[ -z "$SUBNET_ID" ]]; then
  714. echo "Creating subnet."
  715. SUBNET_ID=$($AWS_CMD create-subnet --cidr-block ${SUBNET_CIDR} --vpc-id $VPC_ID --availability-zone ${ZONE} --query Subnet.SubnetId)
  716. add-tag $SUBNET_ID KubernetesCluster ${CLUSTER_ID}
  717. else
  718. EXISTING_CIDR=$($AWS_CMD describe-subnets --subnet-ids ${SUBNET_ID} --query Subnets[].CidrBlock)
  719. echo "Using existing subnet with CIDR $EXISTING_CIDR"
  720. if [ ! $SUBNET_CIDR = $EXISTING_CIDR ]; then
  721. MASTER_INTERNAL_IP="${EXISTING_CIDR%.*}${MASTER_IP_SUFFIX}"
  722. echo "Assuming MASTER_INTERNAL_IP=${MASTER_INTERNAL_IP}"
  723. fi
  724. fi
  725. echo "Using subnet $SUBNET_ID"
  726. }
  727. function kube-up {
  728. echo "Starting cluster using os distro: ${OS_DISTRIBUTION}" >&2
  729. get-tokens
  730. detect-image
  731. detect-minion-image
  732. detect-root-device
  733. find-release-tars
  734. ensure-temp-dir
  735. create-bootstrap-script
  736. upload-server-tars
  737. ensure-iam-profiles
  738. load-or-gen-kube-basicauth
  739. load-or-gen-kube-bearertoken
  740. ssh-key-setup
  741. vpc-setup
  742. create-dhcp-option-set
  743. subnet-setup
  744. IGW_ID=$(get_igw_id $VPC_ID)
  745. if [[ -z "$IGW_ID" ]]; then
  746. echo "Creating Internet Gateway."
  747. IGW_ID=$($AWS_CMD create-internet-gateway --query InternetGateway.InternetGatewayId)
  748. $AWS_CMD attach-internet-gateway --internet-gateway-id $IGW_ID --vpc-id $VPC_ID > $LOG
  749. fi
  750. echo "Using Internet Gateway $IGW_ID"
  751. echo "Associating route table."
  752. ROUTE_TABLE_ID=$($AWS_CMD describe-route-tables \
  753. --filters Name=vpc-id,Values=${VPC_ID} \
  754. Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \
  755. --query RouteTables[].RouteTableId)
  756. if [[ -z "${ROUTE_TABLE_ID}" ]]; then
  757. echo "Creating route table"
  758. ROUTE_TABLE_ID=$($AWS_CMD create-route-table \
  759. --vpc-id=${VPC_ID} \
  760. --query RouteTable.RouteTableId)
  761. add-tag ${ROUTE_TABLE_ID} KubernetesCluster ${CLUSTER_ID}
  762. fi
  763. echo "Associating route table ${ROUTE_TABLE_ID} to subnet ${SUBNET_ID}"
  764. $AWS_CMD associate-route-table --route-table-id $ROUTE_TABLE_ID --subnet-id $SUBNET_ID > $LOG || true
  765. echo "Adding route to route table ${ROUTE_TABLE_ID}"
  766. $AWS_CMD create-route --route-table-id $ROUTE_TABLE_ID --destination-cidr-block 0.0.0.0/0 --gateway-id $IGW_ID > $LOG || true
  767. echo "Using Route Table $ROUTE_TABLE_ID"
  768. # Create security groups
  769. MASTER_SG_ID=$(get_security_group_id "${MASTER_SG_NAME}")
  770. if [[ -z "${MASTER_SG_ID}" ]]; then
  771. echo "Creating master security group."
  772. create-security-group "${MASTER_SG_NAME}" "Kubernetes security group applied to master nodes"
  773. fi
  774. NODE_SG_ID=$(get_security_group_id "${NODE_SG_NAME}")
  775. if [[ -z "${NODE_SG_ID}" ]]; then
  776. echo "Creating minion security group."
  777. create-security-group "${NODE_SG_NAME}" "Kubernetes security group applied to minion nodes"
  778. fi
  779. detect-security-groups
  780. # Masters can talk to master
  781. authorize-security-group-ingress "${MASTER_SG_ID}" "--source-group ${MASTER_SG_ID} --protocol all"
  782. # Minions can talk to minions
  783. authorize-security-group-ingress "${NODE_SG_ID}" "--source-group ${NODE_SG_ID} --protocol all"
  784. # Masters and minions can talk to each other
  785. authorize-security-group-ingress "${MASTER_SG_ID}" "--source-group ${NODE_SG_ID} --protocol all"
  786. authorize-security-group-ingress "${NODE_SG_ID}" "--source-group ${MASTER_SG_ID} --protocol all"
  787. # SSH is open to the world
  788. authorize-security-group-ingress "${MASTER_SG_ID}" "--protocol tcp --port 22 --cidr ${SSH_CIDR}"
  789. authorize-security-group-ingress "${NODE_SG_ID}" "--protocol tcp --port 22 --cidr ${SSH_CIDR}"
  790. # HTTPS to the master is allowed (for API access)
  791. authorize-security-group-ingress "${MASTER_SG_ID}" "--protocol tcp --port 443 --cidr ${HTTP_API_CIDR}"
  792. # KUBE_USE_EXISTING_MASTER is used to add minions to an existing master
  793. if [[ "${KUBE_USE_EXISTING_MASTER:-}" == "true" ]]; then
  794. detect-master
  795. parse-master-env
  796. # Start minions
  797. start-minions
  798. wait-minions
  799. else
  800. # Create the master
  801. start-master
  802. # Build ~/.kube/config
  803. build-config
  804. # Start minions
  805. start-minions
  806. wait-minions
  807. # Wait for the master to be ready
  808. wait-master
  809. fi
  810. # Check the cluster is OK
  811. check-cluster
  812. }
  813. # Builds the bootstrap script and saves it to a local temp file
  814. # Sets BOOTSTRAP_SCRIPT to the path of the script
  815. function create-bootstrap-script() {
  816. ensure-temp-dir
  817. BOOTSTRAP_SCRIPT="${KUBE_TEMP}/bootstrap-script"
  818. (
  819. # Include the default functions from the GCE configure-vm script
  820. sed '/^#+AWS_OVERRIDES_HERE/,$d' "${KUBE_ROOT}/cluster/gce/configure-vm.sh"
  821. # Include the AWS override functions
  822. cat "${KUBE_ROOT}/cluster/aws/templates/configure-vm-aws.sh"
  823. cat "${KUBE_ROOT}/cluster/aws/templates/format-disks.sh"
  824. # Include the GCE configure-vm directly-executed code
  825. sed -e '1,/^#+AWS_OVERRIDES_HERE/d' "${KUBE_ROOT}/cluster/gce/configure-vm.sh"
  826. ) > "${BOOTSTRAP_SCRIPT}"
  827. }
  828. # Starts the master node
  829. function start-master() {
  830. # Ensure RUNTIME_CONFIG is populated
  831. build-runtime-config
  832. # Get or create master persistent volume
  833. ensure-master-pd
  834. # Get or create master elastic IP
  835. ensure-master-ip
  836. # We have to make sure that the cert is valid for API_SERVERS
  837. # i.e. we likely have to pass ELB name / elastic IP in future
  838. create-certs "${KUBE_MASTER_IP}" "${MASTER_INTERNAL_IP}"
  839. # This key is no longer needed, and this enables us to get under the 16KB size limit
  840. KUBECFG_CERT_BASE64=""
  841. KUBECFG_KEY_BASE64=""
  842. write-master-env
  843. (
  844. # We pipe this to the ami as a startup script in the user-data field. Requires a compatible ami
  845. echo "#! /bin/bash"
  846. echo "mkdir -p /var/cache/kubernetes-install"
  847. echo "cd /var/cache/kubernetes-install"
  848. echo "cat > kube_env.yaml << __EOF_MASTER_KUBE_ENV_YAML"
  849. cat ${KUBE_TEMP}/master-kube-env.yaml
  850. echo "AUTO_UPGRADE: 'true'"
  851. # TODO: get rid of these exceptions / harmonize with common or GCE
  852. echo "DOCKER_STORAGE: $(yaml-quote ${DOCKER_STORAGE:-})"
  853. echo "API_SERVERS: $(yaml-quote ${MASTER_INTERNAL_IP:-})"
  854. echo "__EOF_MASTER_KUBE_ENV_YAML"
  855. echo ""
  856. echo "wget -O bootstrap ${BOOTSTRAP_SCRIPT_URL}"
  857. echo "chmod +x bootstrap"
  858. echo "mkdir -p /etc/kubernetes"
  859. echo "mv kube_env.yaml /etc/kubernetes"
  860. echo "mv bootstrap /etc/kubernetes/"
  861. echo "cat > /etc/rc.local << EOF_RC_LOCAL"
  862. echo "#!/bin/sh -e"
  863. # We want to be sure that we don't pass an argument to bootstrap
  864. echo "/etc/kubernetes/bootstrap"
  865. echo "exit 0"
  866. echo "EOF_RC_LOCAL"
  867. echo "/etc/kubernetes/bootstrap"
  868. ) > "${KUBE_TEMP}/master-user-data"
  869. # Compress the data to fit under the 16KB limit (cloud-init accepts compressed data)
  870. gzip "${KUBE_TEMP}/master-user-data"
  871. echo "Starting Master"
  872. master_id=$($AWS_CMD run-instances \
  873. --image-id $AWS_IMAGE \
  874. --iam-instance-profile Name=$IAM_PROFILE_MASTER \
  875. --instance-type $MASTER_SIZE \
  876. --subnet-id $SUBNET_ID \
  877. --private-ip-address $MASTER_INTERNAL_IP \
  878. --key-name ${AWS_SSH_KEY_NAME} \
  879. --security-group-ids ${MASTER_SG_ID} \
  880. --associate-public-ip-address \
  881. --block-device-mappings "${MASTER_BLOCK_DEVICE_MAPPINGS}" \
  882. --user-data fileb://${KUBE_TEMP}/master-user-data.gz \
  883. --query Instances[].InstanceId)
  884. add-tag $master_id Name $MASTER_NAME
  885. add-tag $master_id Role $MASTER_TAG
  886. add-tag $master_id KubernetesCluster ${CLUSTER_ID}
  887. echo "Waiting for master to be ready"
  888. local attempt=0
  889. while true; do
  890. echo -n Attempt "$(($attempt+1))" to check for master node
  891. local ip=$(get_instance_public_ip ${master_id})
  892. if [[ -z "${ip}" ]]; then
  893. if (( attempt > 30 )); then
  894. echo
  895. echo -e "${color_red}master failed to start. Your cluster is unlikely" >&2
  896. echo "to work correctly. Please run ./cluster/kube-down.sh and re-create the" >&2
  897. echo -e "cluster. (sorry!)${color_norm}" >&2
  898. exit 1
  899. fi
  900. else
  901. # We are not able to add an elastic ip, a route or volume to the instance until that instance is in "running" state.
  902. wait-for-instance-state ${master_id} "running"
  903. KUBE_MASTER=${MASTER_NAME}
  904. echo -e " ${color_green}[master running]${color_norm}"
  905. attach-ip-to-instance ${KUBE_MASTER_IP} ${master_id}
  906. # This is a race between instance start and volume attachment. There appears to be no way to start an AWS instance with a volume attached.
  907. # To work around this, we wait for volume to be ready in setup-master-pd.sh
  908. echo "Attaching persistent data volume (${MASTER_DISK_ID}) to master"
  909. $AWS_CMD attach-volume --volume-id ${MASTER_DISK_ID} --device /dev/sdb --instance-id ${master_id}
  910. sleep 10
  911. $AWS_CMD create-route --route-table-id $ROUTE_TABLE_ID --destination-cidr-block ${MASTER_IP_RANGE} --instance-id $master_id > $LOG
  912. break
  913. fi
  914. echo -e " ${color_yellow}[master not working yet]${color_norm}"
  915. attempt=$(($attempt+1))
  916. sleep 10
  917. done
  918. }
  919. # Creates an ASG for the minion nodes
  920. function start-minions() {
  921. # Minions don't currently use runtime config, but call it anyway for sanity
  922. build-runtime-config
  923. echo "Creating minion configuration"
  924. write-node-env
  925. (
  926. # We pipe this to the ami as a startup script in the user-data field. Requires a compatible ami
  927. echo "#! /bin/bash"
  928. echo "mkdir -p /var/cache/kubernetes-install"
  929. echo "cd /var/cache/kubernetes-install"
  930. echo "cat > kube_env.yaml << __EOF_KUBE_ENV_YAML"
  931. cat ${KUBE_TEMP}/node-kube-env.yaml
  932. echo "AUTO_UPGRADE: 'true'"
  933. # TODO: get rid of these exceptions / harmonize with common or GCE
  934. echo "DOCKER_STORAGE: $(yaml-quote ${DOCKER_STORAGE:-})"
  935. echo "API_SERVERS: $(yaml-quote ${MASTER_INTERNAL_IP:-})"
  936. echo "__EOF_KUBE_ENV_YAML"
  937. echo ""
  938. echo "wget -O bootstrap ${BOOTSTRAP_SCRIPT_URL}"
  939. echo "chmod +x bootstrap"
  940. echo "mkdir -p /etc/kubernetes"
  941. echo "mv kube_env.yaml /etc/kubernetes"
  942. echo "mv bootstrap /etc/kubernetes/"
  943. echo "cat > /etc/rc.local << EOF_RC_LOCAL"
  944. echo "#!/bin/sh -e"
  945. # We want to be sure that we don't pass an argument to bootstrap
  946. echo "/etc/kubernetes/bootstrap"
  947. echo "exit 0"
  948. echo "EOF_RC_LOCAL"
  949. echo "/etc/kubernetes/bootstrap"
  950. ) > "${KUBE_TEMP}/node-user-data"
  951. # Compress the data to fit under the 16KB limit (cloud-init accepts compressed data)
  952. gzip "${KUBE_TEMP}/node-user-data"
  953. local public_ip_option
  954. if [[ "${ENABLE_NODE_PUBLIC_IP}" == "true" ]]; then
  955. public_ip_option="--associate-public-ip-address"
  956. else
  957. public_ip_option="--no-associate-public-ip-address"
  958. fi
  959. local spot_price_option
  960. if [[ -n "${NODE_SPOT_PRICE:-}" ]]; then
  961. spot_price_option="--spot-price ${NODE_SPOT_PRICE}"
  962. else
  963. spot_price_option=""
  964. fi
  965. ${AWS_ASG_CMD} create-launch-configuration \
  966. --launch-configuration-name ${ASG_NAME} \
  967. --image-id $KUBE_NODE_IMAGE \
  968. --iam-instance-profile ${IAM_PROFILE_NODE} \
  969. --instance-type $NODE_SIZE \
  970. --key-name ${AWS_SSH_KEY_NAME} \
  971. --security-groups ${NODE_SG_ID} \
  972. ${public_ip_option} \
  973. ${spot_price_option} \
  974. --block-device-mappings "${NODE_BLOCK_DEVICE_MAPPINGS}" \
  975. --user-data "fileb://${KUBE_TEMP}/node-user-data.gz"
  976. echo "Creating autoscaling group"
  977. ${AWS_ASG_CMD} create-auto-scaling-group \
  978. --auto-scaling-group-name ${ASG_NAME} \
  979. --launch-configuration-name ${ASG_NAME} \
  980. --min-size ${NUM_NODES} \
  981. --max-size ${NUM_NODES} \
  982. --vpc-zone-identifier ${SUBNET_ID} \
  983. --tags ResourceId=${ASG_NAME},ResourceType=auto-scaling-group,Key=Name,Value=${NODE_INSTANCE_PREFIX} \
  984. ResourceId=${ASG_NAME},ResourceType=auto-scaling-group,Key=Role,Value=${NODE_TAG} \
  985. ResourceId=${ASG_NAME},ResourceType=auto-scaling-group,Key=KubernetesCluster,Value=${CLUSTER_ID}
  986. }
  987. function wait-minions {
  988. # Wait for the minions to be running
  989. # TODO(justinsb): This is really not needed any more
  990. local attempt=0
  991. local max_attempts=30
  992. # Spot instances are slower to launch
  993. if [[ -n "${NODE_SPOT_PRICE:-}" ]]; then
  994. max_attempts=90
  995. fi
  996. while true; do
  997. detect-node-names > $LOG
  998. if [[ ${#NODE_IDS[@]} == ${NUM_NODES} ]]; then
  999. echo -e " ${color_green}${#NODE_IDS[@]} minions started; ready${color_norm}"
  1000. break
  1001. fi
  1002. if (( attempt > max_attempts )); then
  1003. echo
  1004. echo "Expected number of minions did not start in time"
  1005. echo
  1006. echo -e "${color_red}Expected number of minions failed to start. Your cluster is unlikely" >&2
  1007. echo "to work correctly. Please run ./cluster/kube-down.sh and re-create the" >&2
  1008. echo -e "cluster. (sorry!)${color_norm}" >&2
  1009. exit 1
  1010. fi
  1011. echo -e " ${color_yellow}${#NODE_IDS[@]} minions started; waiting${color_norm}"
  1012. attempt=$(($attempt+1))
  1013. sleep 10
  1014. done
  1015. }
  1016. # Wait for the master to be started
  1017. function wait-master() {
  1018. detect-master > $LOG
  1019. echo "Waiting for cluster initialization."
  1020. echo
  1021. echo " This will continually check to see if the API for kubernetes is reachable."
  1022. echo " This might loop forever if there was some uncaught error during start"
  1023. echo " up."
  1024. echo
  1025. until $(curl --insecure --user ${KUBE_USER}:${KUBE_PASSWORD} --max-time 5 \
  1026. --fail --output $LOG --silent https://${KUBE_MASTER_IP}/healthz); do
  1027. printf "."
  1028. sleep 2
  1029. done
  1030. echo "Kubernetes cluster created."
  1031. }
  1032. # Creates the ~/.kube/config file, getting the information from the master
  1033. # The master must be running and set in KUBE_MASTER_IP
  1034. function build-config() {
  1035. export KUBE_CERT="${CERT_DIR}/pki/issued/kubecfg.crt"
  1036. export KUBE_KEY="${CERT_DIR}/pki/private/kubecfg.key"
  1037. export CA_CERT="${CERT_DIR}/pki/ca.crt"
  1038. export CONTEXT="${CONFIG_CONTEXT}"
  1039. (
  1040. umask 077
  1041. # Update the user's kubeconfig to include credentials for this apiserver.
  1042. create-kubeconfig
  1043. create-kubeconfig-for-federation
  1044. )
  1045. }
  1046. # Sanity check the cluster and print confirmation messages
  1047. function check-cluster() {
  1048. echo "Sanity checking cluster..."
  1049. sleep 5
  1050. detect-nodes > $LOG
  1051. # Don't bail on errors, we want to be able to print some info.
  1052. set +e
  1053. # Basic sanity checking
  1054. # TODO(justinsb): This is really not needed any more
  1055. local rc # Capture return code without exiting because of errexit bash option
  1056. for (( i=0; i<${#KUBE_NODE_IP_ADDRESSES[@]}; i++)); do
  1057. # Make sure docker is installed and working.
  1058. local attempt=0
  1059. while true; do
  1060. local minion_ip=${KUBE_NODE_IP_ADDRESSES[$i]}
  1061. echo -n "Attempt $(($attempt+1)) to check Docker on node @ ${minion_ip} ..."
  1062. local output=`check-minion ${minion_ip}`
  1063. echo $output
  1064. if [[ "${output}" != "working" ]]; then
  1065. if (( attempt > 20 )); then
  1066. echo
  1067. echo -e "${color_red}Your cluster is unlikely to work correctly." >&2
  1068. echo "Please run ./cluster/kube-down.sh and re-create the" >&2
  1069. echo -e "cluster. (sorry!)${color_norm}" >&2
  1070. exit 1
  1071. fi
  1072. else
  1073. break
  1074. fi
  1075. attempt=$(($attempt+1))
  1076. sleep 30
  1077. done
  1078. done
  1079. # ensures KUBECONFIG is set
  1080. get-kubeconfig-basicauth
  1081. echo
  1082. echo -e "${color_green}Kubernetes cluster is running. The master is running at:"
  1083. echo
  1084. echo -e "${color_yellow} https://${KUBE_MASTER_IP}"
  1085. echo
  1086. echo -e "${color_green}The user name and password to use is located in ${KUBECONFIG}.${color_norm}"
  1087. echo
  1088. }
  1089. function kube-down {
  1090. local vpc_id=$(get_vpc_id)
  1091. if [[ -n "${vpc_id}" ]]; then
  1092. local elb_ids=$(get_elbs_in_vpc ${vpc_id})
  1093. if [[ -n "${elb_ids}" ]]; then
  1094. echo "Deleting ELBs in: ${vpc_id}"
  1095. for elb_id in ${elb_ids}; do
  1096. aws elb delete-load-balancer --load-balancer-name=${elb_id} >$LOG
  1097. done
  1098. echo "Waiting for ELBs to be deleted"
  1099. while true; do
  1100. elb_ids=$(get_elbs_in_vpc ${vpc_id})
  1101. if [[ -z "$elb_ids" ]]; then
  1102. echo "All ELBs deleted"
  1103. break
  1104. else
  1105. echo "ELBs not yet deleted: $elb_ids"
  1106. echo "Sleeping for 3 seconds..."
  1107. sleep 3
  1108. fi
  1109. done
  1110. fi
  1111. if [[ -z "${KUBE_MASTER_ID-}" ]]; then
  1112. KUBE_MASTER_ID=$(get_instanceid_from_name ${MASTER_NAME})
  1113. fi
  1114. if [[ -n "${KUBE_MASTER_ID-}" ]]; then
  1115. delete-instance-alarms ${KUBE_MASTER_ID}
  1116. fi
  1117. echo "Deleting instances in VPC: ${vpc_id}"
  1118. instance_ids=$($AWS_CMD describe-instances \
  1119. --filters Name=vpc-id,Values=${vpc_id} \
  1120. Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \
  1121. --query Reservations[].Instances[].InstanceId)
  1122. if [[ -n "${instance_ids}" ]]; then
  1123. asg_groups=$($AWS_CMD describe-instances \
  1124. --query 'Reservations[].Instances[].Tags[?Key==`aws:autoscaling:groupName`].Value[]' \
  1125. --instance-ids ${instance_ids})
  1126. for asg_group in ${asg_groups}; do
  1127. if [[ -n $(${AWS_ASG_CMD} describe-auto-scaling-groups --auto-scaling-group-names ${asg_group} --query AutoScalingGroups[].AutoScalingGroupName) ]]; then
  1128. echo "Deleting auto-scaling group: ${asg_group}"
  1129. ${AWS_ASG_CMD} delete-auto-scaling-group --force-delete --auto-scaling-group-name ${asg_group}
  1130. fi
  1131. if [[ -n $(${AWS_ASG_CMD} describe-launch-configurations --launch-configuration-names ${asg_group} --query LaunchConfigurations[].LaunchConfigurationName) ]]; then
  1132. echo "Deleting auto-scaling launch configuration: ${asg_group}"
  1133. ${AWS_ASG_CMD} delete-launch-configuration --launch-configuration-name ${asg_group}
  1134. fi
  1135. done
  1136. $AWS_CMD terminate-instances --instance-ids ${instance_ids} > $LOG
  1137. echo "Waiting for instances to be deleted"
  1138. for instance_id in ${instance_ids}; do
  1139. wait-for-instance-state ${instance_id} "terminated"
  1140. done
  1141. echo "All instances deleted"
  1142. fi
  1143. if [[ -n $(${AWS_ASG_CMD} describe-launch-configurations --launch-configuration-names ${ASG_NAME} --query LaunchConfigurations[].LaunchConfigurationName) ]]; then
  1144. echo "Warning: default auto-scaling launch configuration ${ASG_NAME} still exists, attempting to delete"
  1145. echo " (This may happen if kube-up leaves just the launch configuration but no auto-scaling group.)"
  1146. ${AWS_ASG_CMD} delete-launch-configuration --launch-configuration-name ${ASG_NAME} || true
  1147. fi
  1148. find-master-pd
  1149. find-tagged-master-ip
  1150. if [[ -n "${KUBE_MASTER_IP:-}" ]]; then
  1151. release-elastic-ip ${KUBE_MASTER_IP}
  1152. fi
  1153. if [[ -n "${MASTER_DISK_ID:-}" ]]; then
  1154. echo "Deleting volume ${MASTER_DISK_ID}"
  1155. $AWS_CMD delete-volume --volume-id ${MASTER_DISK_ID} > $LOG
  1156. fi
  1157. echo "Cleaning up resources in VPC: ${vpc_id}"
  1158. default_sg_id=$($AWS_CMD describe-security-groups \
  1159. --filters Name=vpc-id,Values=${vpc_id} \
  1160. Name=group-name,Values=default \
  1161. --query SecurityGroups[].GroupId \
  1162. | tr "\t" "\n")
  1163. sg_ids=$($AWS_CMD describe-security-groups \
  1164. --filters Name=vpc-id,Values=${vpc_id} \
  1165. Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \
  1166. --query SecurityGroups[].GroupId \
  1167. | tr "\t" "\n")
  1168. # First delete any inter-security group ingress rules
  1169. # (otherwise we get dependency violations)
  1170. for sg_id in ${sg_ids}; do
  1171. # EC2 doesn't let us delete the default security group
  1172. if [[ "${sg_id}" == "${default_sg_id}" ]]; then
  1173. continue
  1174. fi
  1175. echo "Cleaning up security group: ${sg_id}"
  1176. other_sgids=$(${AWS_CMD} describe-security-groups --group-id "${sg_id}" --query SecurityGroups[].IpPermissions[].UserIdGroupPairs[].GroupId)
  1177. for other_sgid in ${other_sgids}; do
  1178. $AWS_CMD revoke-security-group-ingress --group-id "${sg_id}" --source-group "${other_sgid}" --protocol all > $LOG
  1179. done
  1180. done
  1181. for sg_id in ${sg_ids}; do
  1182. # EC2 doesn't let us delete the default security group
  1183. if [[ "${sg_id}" == "${default_sg_id}" ]]; then
  1184. continue
  1185. fi
  1186. delete_security_group ${sg_id}
  1187. done
  1188. subnet_ids=$($AWS_CMD describe-subnets \
  1189. --filters Name=vpc-id,Values=${vpc_id} \
  1190. Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \
  1191. --query Subnets[].SubnetId \
  1192. | tr "\t" "\n")
  1193. for subnet_id in ${subnet_ids}; do
  1194. $AWS_CMD delete-subnet --subnet-id ${subnet_id} > $LOG
  1195. done
  1196. igw_ids=$($AWS_CMD describe-internet-gateways \
  1197. --filters Name=attachment.vpc-id,Values=${vpc_id} \
  1198. --query InternetGateways[].InternetGatewayId \
  1199. | tr "\t" "\n")
  1200. for igw_id in ${igw_ids}; do
  1201. $AWS_CMD detach-internet-gateway --internet-gateway-id $igw_id --vpc-id $vpc_id > $LOG
  1202. $AWS_CMD delete-internet-gateway --internet-gateway-id $igw_id > $LOG
  1203. done
  1204. route_table_ids=$($AWS_CMD describe-route-tables \
  1205. --filters Name=vpc-id,Values=$vpc_id \
  1206. Name=route.destination-cidr-block,Values=0.0.0.0/0 \
  1207. --query RouteTables[].RouteTableId \
  1208. | tr "\t" "\n")
  1209. for route_table_id in ${route_table_ids}; do
  1210. $AWS_CMD delete-route --route-table-id $route_table_id --destination-cidr-block 0.0.0.0/0 > $LOG
  1211. done
  1212. route_table_ids=$($AWS_CMD describe-route-tables \
  1213. --filters Name=vpc-id,Values=$vpc_id \
  1214. Name=tag:KubernetesCluster,Values=${CLUSTER_ID} \
  1215. --query RouteTables[].RouteTableId \
  1216. | tr "\t" "\n")
  1217. for route_table_id in ${route_table_ids}; do
  1218. $AWS_CMD delete-route-table --route-table-id $route_table_id > $LOG
  1219. done
  1220. echo "Deleting VPC: ${vpc_id}"
  1221. $AWS_CMD delete-vpc --vpc-id $vpc_id > $LOG
  1222. else
  1223. echo "" >&2
  1224. echo -e "${color_red}Cluster NOT deleted!${color_norm}" >&2
  1225. echo "" >&2
  1226. echo "No VPC was found with tag KubernetesCluster=${CLUSTER_ID}" >&2
  1227. echo "" >&2
  1228. echo "If you are trying to delete a cluster in a shared VPC," >&2
  1229. echo "please consider using one of the methods in the kube-deploy repo." >&2
  1230. echo "See: https://github.com/kubernetes/kube-deploy/blob/master/docs/delete_cluster.md" >&2
  1231. echo "" >&2
  1232. echo "Note: You may be seeing this message may be because the cluster was already deleted, or" >&2
  1233. echo "has a name other than '${CLUSTER_ID}'." >&2
  1234. fi
  1235. }
  1236. # Update a kubernetes cluster with latest source
  1237. function kube-push {
  1238. detect-master
  1239. # Make sure we have the tar files staged on Google Storage
  1240. find-release-tars
  1241. create-bootstrap-script
  1242. upload-server-tars
  1243. (
  1244. echo "#! /bin/bash"
  1245. echo "mkdir -p /var/cache/kubernetes-install"
  1246. echo "cd /var/cache/kubernetes-install"
  1247. echo "readonly SERVER_BINARY_TAR_URL='${SERVER_BINARY_TAR_URL}'"
  1248. echo "readonly SALT_TAR_URL='${SALT_TAR_URL}'"
  1249. grep -v "^#" "${KUBE_ROOT}/cluster/aws/templates/common.sh"
  1250. grep -v "^#" "${KUBE_ROOT}/cluster/aws/templates/download-release.sh"
  1251. echo "echo Executing configuration"
  1252. echo "sudo salt '*' mine.update"
  1253. echo "sudo salt --force-color '*' state.highstate"
  1254. ) | ssh -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" ${SSH_USER}@${KUBE_MASTER_IP} sudo bash
  1255. get-kubeconfig-basicauth
  1256. echo
  1257. echo "Kubernetes cluster is running. The master is running at:"
  1258. echo
  1259. echo " https://${KUBE_MASTER_IP}"
  1260. echo
  1261. }
  1262. # -----------------------------------------------------------------------------
  1263. # Cluster specific test helpers used from hack/e2e.go
  1264. # Execute prior to running tests to build a release if required for env.
  1265. #
  1266. # Assumed Vars:
  1267. # KUBE_ROOT
  1268. function test-build-release {
  1269. # Make a release
  1270. "${KUBE_ROOT}/build/release.sh"
  1271. }
  1272. # Execute prior to running tests to initialize required structure. This is
  1273. # called from hack/e2e.go only when running -up.
  1274. #
  1275. # Assumed vars:
  1276. # Variables from config.sh
  1277. function test-setup {
  1278. "${KUBE_ROOT}/cluster/kube-up.sh"
  1279. VPC_ID=$(get_vpc_id)
  1280. detect-security-groups
  1281. # Open up port 80 & 8080 so common containers on minions can be reached
  1282. # TODO(roberthbailey): Remove this once we are no longer relying on hostPorts.
  1283. authorize-security-group-ingress "${NODE_SG_ID}" "--protocol tcp --port 80 --cidr 0.0.0.0/0"
  1284. authorize-security-group-ingress "${NODE_SG_ID}" "--protocol tcp --port 8080 --cidr 0.0.0.0/0"
  1285. # Open up the NodePort range
  1286. # TODO(justinsb): Move to main setup, if we decide whether we want to do this by default.
  1287. authorize-security-group-ingress "${NODE_SG_ID}" "--protocol all --port 30000-32767 --cidr 0.0.0.0/0"
  1288. echo "test-setup complete"
  1289. }
  1290. # Execute after running tests to perform any required clean-up. This is called
  1291. # from hack/e2e.go
  1292. function test-teardown {
  1293. # (ingress rules will be deleted along with the security group)
  1294. echo "Shutting down test cluster."
  1295. "${KUBE_ROOT}/cluster/kube-down.sh"
  1296. }
  1297. # Gets the hostname (or IP) that we should SSH to for the given nodename
  1298. # For the master, we use the nodename, for the nodes we use their instanceids
  1299. function get_ssh_hostname {
  1300. local node="$1"
  1301. if [[ "${node}" == "${MASTER_NAME}" ]]; then
  1302. node=$(get_instanceid_from_name ${MASTER_NAME})
  1303. if [[ -z "${node-}" ]]; then
  1304. echo "Could not detect Kubernetes master node. Make sure you've launched a cluster with 'kube-up.sh'" 1>&2
  1305. exit 1
  1306. fi
  1307. fi
  1308. local ip=$(get_instance_public_ip ${node})
  1309. if [[ -z "$ip" ]]; then
  1310. echo "Could not detect IP for ${node}." 1>&2
  1311. exit 1
  1312. fi
  1313. echo ${ip}
  1314. }
  1315. # SSH to a node by name ($1) and run a command ($2).
  1316. function ssh-to-node {
  1317. local node="$1"
  1318. local cmd="$2"
  1319. local ip=$(get_ssh_hostname ${node})
  1320. for try in {1..5}; do
  1321. if ssh -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" ${SSH_USER}@${ip} "echo test > /dev/null"; then
  1322. break
  1323. fi
  1324. sleep 5
  1325. done
  1326. ssh -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" ${SSH_USER}@${ip} "${cmd}"
  1327. }
  1328. # Perform preparations required to run e2e tests
  1329. function prepare-e2e() {
  1330. # (AWS runs detect-project, I don't think we need to anything)
  1331. # Note: we can't print anything here, or else the test tools will break with the extra output
  1332. return
  1333. }
  1334. function get-tokens() {
  1335. KUBELET_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null)
  1336. KUBE_PROXY_TOKEN=$(dd if=/dev/urandom bs=128 count=1 2>/dev/null | base64 | tr -d "=+/" | dd bs=32 count=1 2>/dev/null)
  1337. }