kube-addon-update.sh 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. #!/bin/bash
  2. # Copyright 2015 The Kubernetes Authors.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. # The business logic for whether a given object should be created
  16. # was already enforced by salt, and /etc/kubernetes/addons is the
  17. # managed result is of that. Start everything below that directory.
  18. # Parameters
  19. # $1 path to add-ons
  20. # LIMITATIONS
  21. # 1. controllers are not updated unless their name is changed
  22. # 3. Services will not be updated unless their name is changed,
  23. # but for services we actually want updates without name change.
  24. # 4. Json files are not handled at all. Currently addons must be
  25. # in yaml files
  26. # 5. exit code is probably not always correct (I haven't checked
  27. # carefully if it works in 100% cases)
  28. # 6. There are no unittests
  29. # 8. Will not work if the total length of paths to addons is greater than
  30. # bash can handle. Probably it is not a problem: ARG_MAX=2097152 on GCE.
  31. # 9. Performance issue: yaml files are read many times in a single execution.
  32. # cosmetic improvements to be done
  33. # 1. improve the log function; add timestamp, file name, etc.
  34. # 2. logging doesn't work from files that print things out.
  35. # 3. kubectl prints the output to stderr (the output should be captured and then
  36. # logged)
  37. # global config
  38. KUBECTL=${TEST_KUBECTL:-} # substitute for tests
  39. KUBECTL=${KUBECTL:-${KUBECTL_BIN:-}}
  40. KUBECTL=${KUBECTL:-/usr/local/bin/kubectl}
  41. if [[ ! -x ${KUBECTL} ]]; then
  42. echo "ERROR: kubectl command (${KUBECTL}) not found or is not executable" 1>&2
  43. exit 1
  44. fi
  45. KUBECTL_OPTS=${KUBECTL_OPTS:-}
  46. # If an add-on definition is incorrect, or a definition has just disappeared
  47. # from the local directory, the script will still keep on retrying.
  48. # The script does not end until all retries are done, so
  49. # one invalid manifest may block updates of other add-ons.
  50. # Be careful how you set these parameters
  51. NUM_TRIES=1 # will be updated based on input parameters
  52. DELAY_AFTER_ERROR_SEC=${TEST_DELAY_AFTER_ERROR_SEC:=10}
  53. # remember that you can't log from functions that print some output (because
  54. # logs are also printed on stdout)
  55. # $1 level
  56. # $2 message
  57. function log() {
  58. # manage log levels manually here
  59. # add the timestamp if you find it useful
  60. case $1 in
  61. DB3 )
  62. # echo "$1: $2"
  63. ;;
  64. DB2 )
  65. # echo "$1: $2"
  66. ;;
  67. DBG )
  68. # echo "$1: $2"
  69. ;;
  70. INFO )
  71. echo "$1: $2"
  72. ;;
  73. WRN )
  74. echo "$1: $2"
  75. ;;
  76. ERR )
  77. echo "$1: $2"
  78. ;;
  79. * )
  80. echo "INVALID_LOG_LEVEL $1: $2"
  81. ;;
  82. esac
  83. }
  84. #$1 yaml file path
  85. function get-object-kind-from-file() {
  86. # prints to stdout, so log cannot be used
  87. #WARNING: only yaml is supported
  88. cat $1 | python -c '''
  89. try:
  90. import pipes,sys,yaml
  91. y = yaml.load(sys.stdin)
  92. labels = y["metadata"]["labels"]
  93. if ("kubernetes.io/cluster-service", "true") not in labels.iteritems():
  94. # all add-ons must have the label "kubernetes.io/cluster-service".
  95. # Otherwise we are ignoring them (the update will not work anyway)
  96. print "ERROR"
  97. else:
  98. print y["kind"]
  99. except Exception, ex:
  100. print "ERROR"
  101. '''
  102. }
  103. # $1 yaml file path
  104. # returns a string of the form <namespace>/<name> (we call it nsnames)
  105. function get-object-nsname-from-file() {
  106. # prints to stdout, so log cannot be used
  107. #WARNING: only yaml is supported
  108. #addons that do not specify a namespace are assumed to be in "default".
  109. cat $1 | python -c '''
  110. try:
  111. import pipes,sys,yaml
  112. y = yaml.load(sys.stdin)
  113. labels = y["metadata"]["labels"]
  114. if ("kubernetes.io/cluster-service", "true") not in labels.iteritems():
  115. # all add-ons must have the label "kubernetes.io/cluster-service".
  116. # Otherwise we are ignoring them (the update will not work anyway)
  117. print "ERROR"
  118. else:
  119. try:
  120. print "%s/%s" % (y["metadata"]["namespace"], y["metadata"]["name"])
  121. except Exception, ex:
  122. print "/%s" % y["metadata"]["name"]
  123. except Exception, ex:
  124. print "ERROR"
  125. '''
  126. }
  127. # $1 addon directory path
  128. # $2 addon type (e.g. ReplicationController)
  129. # echoes the string with paths to files containing addon for the given type
  130. # works only for yaml files (!) (ignores json files)
  131. function get-addon-paths-from-disk() {
  132. # prints to stdout, so log cannot be used
  133. local -r addon_dir=$1
  134. local -r obj_type=$2
  135. local kind
  136. local file_path
  137. for file_path in $(find ${addon_dir} -name \*.yaml); do
  138. kind=$(get-object-kind-from-file ${file_path})
  139. # WARNING: assumption that the topmost indentation is zero (I'm not sure yaml allows for topmost indentation)
  140. if [[ "${kind}" == "${obj_type}" ]]; then
  141. echo ${file_path}
  142. fi
  143. done
  144. }
  145. # waits for all subprocesses
  146. # returns 0 if all of them were successful and 1 otherwise
  147. function wait-for-jobs() {
  148. local rv=0
  149. local pid
  150. for pid in $(jobs -p); do
  151. wait ${pid}
  152. if [[ $? -ne 0 ]]; then
  153. rv=1;
  154. log ERR "error in pid ${pid}"
  155. fi
  156. log DB2 "pid ${pid} completed, current error code: ${rv}"
  157. done
  158. return ${rv}
  159. }
  160. function run-until-success() {
  161. local -r command=$1
  162. local tries=$2
  163. local -r delay=$3
  164. local -r command_name=$1
  165. while [ ${tries} -gt 0 ]; do
  166. log DBG "executing: '$command'"
  167. # let's give the command as an argument to bash -c, so that we can use
  168. # && and || inside the command itself
  169. /bin/bash -c "${command}" && \
  170. log DB3 "== Successfully executed ${command_name} at $(date -Is) ==" && \
  171. return 0
  172. let tries=tries-1
  173. log INFO "== Failed to execute ${command_name} at $(date -Is). ${tries} tries remaining. =="
  174. sleep ${delay}
  175. done
  176. return 1
  177. }
  178. # $1 object type
  179. # returns a list of <namespace>/<name> pairs (nsnames)
  180. function get-addon-nsnames-from-server() {
  181. local -r obj_type=$1
  182. "${KUBECTL}" "${KUBECTL_OPTS}" get "${obj_type}" --all-namespaces -o go-template="{{range.items}}{{.metadata.namespace}}/{{.metadata.name}} {{end}}" -l kubernetes.io/cluster-service=true | sed 's/<no value>//g'
  183. }
  184. # returns the characters after the last separator (including)
  185. # If the separator is empty or if it doesn't appear in the string,
  186. # an empty string is printed
  187. # $1 input string
  188. # $2 separator (must be single character, or empty)
  189. function get-suffix() {
  190. # prints to stdout, so log cannot be used
  191. local -r input_string=$1
  192. local -r separator=$2
  193. local suffix
  194. if [[ "${separator}" == "" ]]; then
  195. echo ""
  196. return
  197. fi
  198. if [[ "${input_string}" == *"${separator}"* ]]; then
  199. suffix=$(echo "${input_string}" | rev | cut -d "${separator}" -f1 | rev)
  200. echo "${separator}${suffix}"
  201. else
  202. echo ""
  203. fi
  204. }
  205. # returns the characters up to the last '-' (without it)
  206. # $1 input string
  207. # $2 separator
  208. function get-basename() {
  209. # prints to stdout, so log cannot be used
  210. local -r input_string=$1
  211. local -r separator=$2
  212. local suffix
  213. suffix="$(get-suffix ${input_string} ${separator})"
  214. # this will strip the suffix (if matches)
  215. echo ${input_string%$suffix}
  216. }
  217. function delete-object() {
  218. local -r obj_type=$1
  219. local -r namespace=$2
  220. local -r obj_name=$3
  221. log INFO "Deleting ${obj_type} ${namespace}/${obj_name}"
  222. run-until-success "${KUBECTL} ${KUBECTL_OPTS} delete --namespace=${namespace} ${obj_type} ${obj_name}" ${NUM_TRIES} ${DELAY_AFTER_ERROR_SEC}
  223. }
  224. function create-object() {
  225. local -r obj_type=$1
  226. local -r file_path=$2
  227. local nsname_from_file
  228. nsname_from_file=$(get-object-nsname-from-file ${file_path})
  229. if [[ "${nsname_from_file}" == "ERROR" ]]; then
  230. log INFO "Cannot read object name from ${file_path}. Ignoring"
  231. return 1
  232. fi
  233. IFS='/' read namespace obj_name <<< "${nsname_from_file}"
  234. log INFO "Creating new ${obj_type} from file ${file_path} in namespace ${namespace}, name: ${obj_name}"
  235. # this will keep on failing if the ${file_path} disappeared in the meantime.
  236. # Do not use too many retries.
  237. if [[ -n "${namespace}" ]]; then
  238. run-until-success "${KUBECTL} ${KUBECTL_OPTS} create --namespace=${namespace} -f ${file_path}" ${NUM_TRIES} ${DELAY_AFTER_ERROR_SEC}
  239. else
  240. run-until-success "${KUBECTL} ${KUBECTL_OPTS} create -f ${file_path}" ${NUM_TRIES} ${DELAY_AFTER_ERROR_SEC}
  241. fi
  242. }
  243. function update-object() {
  244. local -r obj_type=$1
  245. local -r namespace=$2
  246. local -r obj_name=$3
  247. local -r file_path=$4
  248. log INFO "updating the ${obj_type} ${namespace}/${obj_name} with the new definition ${file_path}"
  249. delete-object ${obj_type} ${namespace} ${obj_name}
  250. create-object ${obj_type} ${file_path}
  251. }
  252. # deletes the objects from the server
  253. # $1 object type
  254. # $2 a list of object nsnames
  255. function delete-objects() {
  256. local -r obj_type=$1
  257. local -r obj_nsnames=$2
  258. local namespace
  259. local obj_name
  260. for nsname in ${obj_nsnames}; do
  261. IFS='/' read namespace obj_name <<< "${nsname}"
  262. delete-object ${obj_type} ${namespace} ${obj_name} &
  263. done
  264. }
  265. # creates objects from the given files
  266. # $1 object type
  267. # $2 a list of paths to definition files
  268. function create-objects() {
  269. local -r obj_type=$1
  270. local -r file_paths=$2
  271. local file_path
  272. for file_path in ${file_paths}; do
  273. # Remember that the file may have disappear by now
  274. # But we don't want to check it here because
  275. # such race condition may always happen after
  276. # we check it. Let's have the race
  277. # condition happen a bit more often so that
  278. # we see that our tests pass anyway.
  279. create-object ${obj_type} ${file_path} &
  280. done
  281. }
  282. # updates objects
  283. # $1 object type
  284. # $2 a list of update specifications
  285. # each update specification is a ';' separated pair: <nsname>;<file path>
  286. function update-objects() {
  287. local -r obj_type=$1 # ignored
  288. local -r update_spec=$2
  289. local objdesc
  290. local nsname
  291. local obj_name
  292. local namespace
  293. for objdesc in ${update_spec}; do
  294. IFS=';' read nsname file_path <<< "${objdesc}"
  295. IFS='/' read namespace obj_name <<< "${nsname}"
  296. update-object ${obj_type} ${namespace} ${obj_name} ${file_path} &
  297. done
  298. }
  299. # Global variables set by function match-objects.
  300. nsnames_for_delete="" # a list of object nsnames to be deleted
  301. for_update="" # a list of pairs <nsname>;<filePath> for objects that should be updated
  302. nsnames_for_ignore="" # a list of object nsnames that will be ignored
  303. new_files="" # a list of file paths that weren't matched by any existing objects (these objects must be created now)
  304. # $1 path to files with objects
  305. # $2 object type in the API (ReplicationController or Service)
  306. # $3 name separator (single character or empty)
  307. function match-objects() {
  308. local -r addon_dir=$1
  309. local -r obj_type=$2
  310. local -r separator=$3
  311. # output variables (globals)
  312. nsnames_for_delete=""
  313. for_update=""
  314. nsnames_for_ignore=""
  315. new_files=""
  316. addon_nsnames_on_server=$(get-addon-nsnames-from-server "${obj_type}")
  317. # if the api server is unavailable then abandon the update for this cycle
  318. if [[ $? -ne 0 ]]; then
  319. log ERR "unable to query ${obj_type} - exiting"
  320. exit 1
  321. fi
  322. addon_paths_in_files=$(get-addon-paths-from-disk "${addon_dir}" "${obj_type}")
  323. log DB2 "addon_nsnames_on_server=${addon_nsnames_on_server}"
  324. log DB2 "addon_paths_in_files=${addon_paths_in_files}"
  325. local matched_files=""
  326. local basensname_on_server=""
  327. local nsname_on_server=""
  328. local suffix_on_server=""
  329. local nsname_from_file=""
  330. local suffix_from_file=""
  331. local found=0
  332. local addon_path=""
  333. # objects that were moved between namespaces will have different nsname
  334. # because the namespace is included. So they will be treated
  335. # like different objects and not updated but deleted and created again
  336. # (in the current version update is also delete+create, so it does not matter)
  337. for nsname_on_server in ${addon_nsnames_on_server}; do
  338. basensname_on_server=$(get-basename ${nsname_on_server} ${separator})
  339. suffix_on_server="$(get-suffix ${nsname_on_server} ${separator})"
  340. log DB3 "Found existing addon ${nsname_on_server}, basename=${basensname_on_server}"
  341. # check if the addon is present in the directory and decide
  342. # what to do with it
  343. # this is not optimal because we're reading the files over and over
  344. # again. But for small number of addons it doesn't matter so much.
  345. found=0
  346. for addon_path in ${addon_paths_in_files}; do
  347. nsname_from_file=$(get-object-nsname-from-file ${addon_path})
  348. if [[ "${nsname_from_file}" == "ERROR" ]]; then
  349. log INFO "Cannot read object name from ${addon_path}. Ignoring"
  350. continue
  351. else
  352. log DB2 "Found object name '${nsname_from_file}' in file ${addon_path}"
  353. fi
  354. suffix_from_file="$(get-suffix ${nsname_from_file} ${separator})"
  355. log DB3 "matching: ${basensname_on_server}${suffix_from_file} == ${nsname_from_file}"
  356. if [[ "${basensname_on_server}${suffix_from_file}" == "${nsname_from_file}" ]]; then
  357. log DB3 "matched existing ${obj_type} ${nsname_on_server} to file ${addon_path}; suffix_on_server=${suffix_on_server}, suffix_from_file=${suffix_from_file}"
  358. found=1
  359. matched_files="${matched_files} ${addon_path}"
  360. if [[ "${suffix_on_server}" == "${suffix_from_file}" ]]; then
  361. nsnames_for_ignore="${nsnames_for_ignore} ${nsname_from_file}"
  362. else
  363. for_update="${for_update} ${nsname_on_server};${addon_path}"
  364. fi
  365. break
  366. fi
  367. done
  368. if [[ ${found} -eq 0 ]]; then
  369. log DB2 "No definition file found for replication controller ${nsname_on_server}. Scheduling for deletion"
  370. nsnames_for_delete="${nsnames_for_delete} ${nsname_on_server}"
  371. fi
  372. done
  373. log DB3 "matched_files=${matched_files}"
  374. # note that if the addon file is invalid (or got removed after listing files
  375. # but before we managed to match it) it will not be matched to any
  376. # of the existing objects. So we will treat it as a new file
  377. # and try to create its object.
  378. for addon_path in ${addon_paths_in_files}; do
  379. echo ${matched_files} | grep "${addon_path}" >/dev/null
  380. if [[ $? -ne 0 ]]; then
  381. new_files="${new_files} ${addon_path}"
  382. fi
  383. done
  384. }
  385. function reconcile-objects() {
  386. local -r addon_path=$1
  387. local -r obj_type=$2
  388. local -r separator=$3 # name separator
  389. match-objects ${addon_path} ${obj_type} ${separator}
  390. log DBG "${obj_type}: nsnames_for_delete=${nsnames_for_delete}"
  391. log DBG "${obj_type}: for_update=${for_update}"
  392. log DBG "${obj_type}: nsnames_for_ignore=${nsnames_for_ignore}"
  393. log DBG "${obj_type}: new_files=${new_files}"
  394. delete-objects "${obj_type}" "${nsnames_for_delete}"
  395. # wait for jobs below is a protection against changing the basename
  396. # of a replication controllerm without changing the selector.
  397. # If we don't wait, the new rc may be created before the old one is deleted
  398. # In such case the old one will wait for all its pods to be gone, but the pods
  399. # are created by the new replication controller.
  400. # passing --cascade=false could solve the problem, but we want
  401. # all orphan pods to be deleted.
  402. wait-for-jobs
  403. deleteResult=$?
  404. create-objects "${obj_type}" "${new_files}"
  405. update-objects "${obj_type}" "${for_update}"
  406. local nsname
  407. for nsname in ${nsnames_for_ignore}; do
  408. log DB2 "The ${obj_type} ${nsname} is already up to date"
  409. done
  410. wait-for-jobs
  411. createUpdateResult=$?
  412. if [[ ${deleteResult} -eq 0 ]] && [[ ${createUpdateResult} -eq 0 ]]; then
  413. return 0
  414. else
  415. return 1
  416. fi
  417. }
  418. function update-addons() {
  419. local -r addon_path=$1
  420. # be careful, reconcile-objects uses global variables
  421. reconcile-objects ${addon_path} ReplicationController "-" &
  422. reconcile-objects ${addon_path} Deployment "-" &
  423. reconcile-objects ${addon_path} DaemonSet "-" &
  424. reconcile-objects ${addon_path} PetSet "-" &
  425. # We don't expect names to be versioned for the following kinds, so
  426. # we match the entire name, ignoring version suffix.
  427. # That's why we pass an empty string as the version separator.
  428. # If the description differs on disk, the object should be recreated.
  429. # This is not implemented in this version.
  430. reconcile-objects ${addon_path} Service "" &
  431. reconcile-objects ${addon_path} PersistentVolume "" &
  432. reconcile-objects ${addon_path} PersistentVolumeClaim "" &
  433. reconcile-objects ${addon_path} ConfigMap "" &
  434. wait-for-jobs
  435. if [[ $? -eq 0 ]]; then
  436. log INFO "== Kubernetes addon update completed successfully at $(date -Is) =="
  437. else
  438. log WRN "== Kubernetes addon update completed with errors at $(date -Is) =="
  439. fi
  440. }
  441. # input parameters:
  442. # $1 input directory
  443. # $2 retry period in seconds - the script will retry api-server errors for approximately
  444. # this amound of time (it is not very precise), at interval equal $DELAY_AFTER_ERROR_SEC.
  445. #
  446. if [[ $# -ne 2 ]]; then
  447. echo "Illegal number of parameters. Usage $0 addon-dir [retry-period]" 1>&2
  448. exit 1
  449. fi
  450. NUM_TRIES=$(($2 / ${DELAY_AFTER_ERROR_SEC}))
  451. if [[ ${NUM_TRIES} -le 0 ]]; then
  452. NUM_TRIES=1
  453. fi
  454. addon_path=$1
  455. update-addons ${addon_path}