docker_manager.go 98 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604
  1. /*
  2. Copyright 2015 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package dockertools
  14. import (
  15. "bytes"
  16. "crypto/md5"
  17. "encoding/json"
  18. "errors"
  19. "fmt"
  20. "io"
  21. "io/ioutil"
  22. "os"
  23. "os/exec"
  24. "path"
  25. "path/filepath"
  26. "strconv"
  27. "strings"
  28. "sync"
  29. "time"
  30. "github.com/coreos/go-semver/semver"
  31. dockertypes "github.com/docker/engine-api/types"
  32. dockercontainer "github.com/docker/engine-api/types/container"
  33. dockerstrslice "github.com/docker/engine-api/types/strslice"
  34. dockerapiversion "github.com/docker/engine-api/types/versions"
  35. dockernat "github.com/docker/go-connections/nat"
  36. "github.com/golang/glog"
  37. cadvisorapi "github.com/google/cadvisor/info/v1"
  38. "k8s.io/kubernetes/pkg/api"
  39. "k8s.io/kubernetes/pkg/api/unversioned"
  40. "k8s.io/kubernetes/pkg/client/record"
  41. kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
  42. "k8s.io/kubernetes/pkg/kubelet/events"
  43. "k8s.io/kubernetes/pkg/kubelet/images"
  44. "k8s.io/kubernetes/pkg/kubelet/lifecycle"
  45. "k8s.io/kubernetes/pkg/kubelet/metrics"
  46. "k8s.io/kubernetes/pkg/kubelet/network"
  47. "k8s.io/kubernetes/pkg/kubelet/network/hairpin"
  48. proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
  49. "k8s.io/kubernetes/pkg/kubelet/qos"
  50. "k8s.io/kubernetes/pkg/kubelet/types"
  51. "k8s.io/kubernetes/pkg/kubelet/util/cache"
  52. "k8s.io/kubernetes/pkg/kubelet/util/format"
  53. "k8s.io/kubernetes/pkg/runtime"
  54. "k8s.io/kubernetes/pkg/security/apparmor"
  55. "k8s.io/kubernetes/pkg/securitycontext"
  56. kubetypes "k8s.io/kubernetes/pkg/types"
  57. "k8s.io/kubernetes/pkg/util/flowcontrol"
  58. "k8s.io/kubernetes/pkg/util/oom"
  59. "k8s.io/kubernetes/pkg/util/procfs"
  60. utilruntime "k8s.io/kubernetes/pkg/util/runtime"
  61. "k8s.io/kubernetes/pkg/util/sets"
  62. utilstrings "k8s.io/kubernetes/pkg/util/strings"
  63. "k8s.io/kubernetes/pkg/util/term"
  64. )
  65. const (
  66. DockerType = "docker"
  67. // https://docs.docker.com/engine/reference/api/docker_remote_api/
  68. // docker version should be at least 1.9.x
  69. minimumDockerAPIVersion = "1.21"
  70. // Remote API version for docker daemon versions
  71. // https://docs.docker.com/engine/reference/api/docker_remote_api/
  72. dockerV110APIVersion = "1.22"
  73. DockerV112APIVersion = "1.24"
  74. // ndots specifies the minimum number of dots that a domain name must contain for the resolver to consider it as FQDN (fully-qualified)
  75. // we want to able to consider SRV lookup names like _dns._udp.kube-dns.default.svc to be considered relative.
  76. // hence, setting ndots to be 5.
  77. ndotsDNSOption = "options ndots:5\n"
  78. // In order to avoid unnecessary SIGKILLs, give every container a minimum grace
  79. // period after SIGTERM. Docker will guarantee the termination, but SIGTERM is
  80. // potentially dangerous.
  81. // TODO: evaluate whether there are scenarios in which SIGKILL is preferable to
  82. // SIGTERM for certain process types, which may justify setting this to 0.
  83. minimumGracePeriodInSeconds = 2
  84. DockerNetnsFmt = "/proc/%v/ns/net"
  85. // String used to detect docker host mode for various namespaces (e.g.
  86. // networking). Must match the value returned by docker inspect -f
  87. // '{{.HostConfig.NetworkMode}}'.
  88. namespaceModeHost = "host"
  89. // The expiration time of version cache.
  90. versionCacheTTL = 60 * time.Second
  91. )
  92. var (
  93. // DockerManager implements the Runtime interface.
  94. _ kubecontainer.Runtime = &DockerManager{}
  95. // TODO: make this a TTL based pull (if image older than X policy, pull)
  96. podInfraContainerImagePullPolicy = api.PullIfNotPresent
  97. // Default set of seccomp security options.
  98. defaultSeccompOpt = []dockerOpt{{"seccomp", "unconfined", ""}}
  99. )
  100. type DockerManager struct {
  101. client DockerInterface
  102. recorder record.EventRecorder
  103. containerRefManager *kubecontainer.RefManager
  104. os kubecontainer.OSInterface
  105. machineInfo *cadvisorapi.MachineInfo
  106. // The image name of the pod infra container.
  107. podInfraContainerImage string
  108. // (Optional) Additional environment variables to be set for the pod infra container.
  109. podInfraContainerEnv []api.EnvVar
  110. // TODO(yifan): Record the pull failure so we can eliminate the image checking?
  111. // Lower level docker image puller.
  112. dockerPuller DockerPuller
  113. // wrapped image puller.
  114. imagePuller images.ImageManager
  115. // Root of the Docker runtime.
  116. dockerRoot string
  117. // Directory of container logs.
  118. containerLogsDir string
  119. // Network plugin.
  120. networkPlugin network.NetworkPlugin
  121. // Health check results.
  122. livenessManager proberesults.Manager
  123. // RuntimeHelper that wraps kubelet to generate runtime container options.
  124. runtimeHelper kubecontainer.RuntimeHelper
  125. // Runner of lifecycle events.
  126. runner kubecontainer.HandlerRunner
  127. // Handler used to execute commands in containers.
  128. execHandler ExecHandler
  129. // Used to set OOM scores of processes.
  130. oomAdjuster *oom.OOMAdjuster
  131. // Get information from /proc mount.
  132. procFs procfs.ProcFSInterface
  133. // If true, enforce container cpu limits with CFS quota support
  134. cpuCFSQuota bool
  135. // Container GC manager
  136. containerGC *containerGC
  137. // Support for gathering custom metrics.
  138. enableCustomMetrics bool
  139. // If true, the "hairpin mode" flag is set on container interfaces.
  140. // A false value means the kubelet just backs off from setting it,
  141. // it might already be true.
  142. configureHairpinMode bool
  143. // Provides image stats
  144. *imageStatsProvider
  145. // The version cache of docker daemon.
  146. versionCache *cache.ObjectCache
  147. // Directory to host local seccomp profiles.
  148. seccompProfileRoot string
  149. }
  150. // A subset of the pod.Manager interface extracted for testing purposes.
  151. type podGetter interface {
  152. GetPodByUID(kubetypes.UID) (*api.Pod, bool)
  153. }
  154. func PodInfraContainerEnv(env map[string]string) kubecontainer.Option {
  155. return func(rt kubecontainer.Runtime) {
  156. dm := rt.(*DockerManager)
  157. for k, v := range env {
  158. dm.podInfraContainerEnv = append(dm.podInfraContainerEnv, api.EnvVar{
  159. Name: k,
  160. Value: v,
  161. })
  162. }
  163. }
  164. }
  165. func NewDockerManager(
  166. client DockerInterface,
  167. recorder record.EventRecorder,
  168. livenessManager proberesults.Manager,
  169. containerRefManager *kubecontainer.RefManager,
  170. podGetter podGetter,
  171. machineInfo *cadvisorapi.MachineInfo,
  172. podInfraContainerImage string,
  173. qps float32,
  174. burst int,
  175. containerLogsDir string,
  176. osInterface kubecontainer.OSInterface,
  177. networkPlugin network.NetworkPlugin,
  178. runtimeHelper kubecontainer.RuntimeHelper,
  179. httpClient types.HttpGetter,
  180. execHandler ExecHandler,
  181. oomAdjuster *oom.OOMAdjuster,
  182. procFs procfs.ProcFSInterface,
  183. cpuCFSQuota bool,
  184. imageBackOff *flowcontrol.Backoff,
  185. serializeImagePulls bool,
  186. enableCustomMetrics bool,
  187. hairpinMode bool,
  188. seccompProfileRoot string,
  189. options ...kubecontainer.Option) *DockerManager {
  190. // Wrap the docker client with instrumentedDockerInterface
  191. client = NewInstrumentedDockerInterface(client)
  192. // Work out the location of the Docker runtime, defaulting to /var/lib/docker
  193. // if there are any problems.
  194. dockerRoot := "/var/lib/docker"
  195. dockerInfo, err := client.Info()
  196. if err != nil {
  197. glog.Errorf("Failed to execute Info() call to the Docker client: %v", err)
  198. glog.Warningf("Using fallback default of /var/lib/docker for location of Docker runtime")
  199. } else {
  200. dockerRoot = dockerInfo.DockerRootDir
  201. glog.Infof("Setting dockerRoot to %s", dockerRoot)
  202. }
  203. dm := &DockerManager{
  204. client: client,
  205. recorder: recorder,
  206. containerRefManager: containerRefManager,
  207. os: osInterface,
  208. machineInfo: machineInfo,
  209. podInfraContainerImage: podInfraContainerImage,
  210. dockerPuller: newDockerPuller(client, qps, burst),
  211. dockerRoot: dockerRoot,
  212. containerLogsDir: containerLogsDir,
  213. networkPlugin: networkPlugin,
  214. livenessManager: livenessManager,
  215. runtimeHelper: runtimeHelper,
  216. execHandler: execHandler,
  217. oomAdjuster: oomAdjuster,
  218. procFs: procFs,
  219. cpuCFSQuota: cpuCFSQuota,
  220. enableCustomMetrics: enableCustomMetrics,
  221. configureHairpinMode: hairpinMode,
  222. imageStatsProvider: newImageStatsProvider(client),
  223. seccompProfileRoot: seccompProfileRoot,
  224. }
  225. dm.runner = lifecycle.NewHandlerRunner(httpClient, dm, dm)
  226. dm.imagePuller = images.NewImageManager(kubecontainer.FilterEventRecorder(recorder), dm, imageBackOff, serializeImagePulls)
  227. dm.containerGC = NewContainerGC(client, podGetter, containerLogsDir)
  228. dm.versionCache = cache.NewObjectCache(
  229. func() (interface{}, error) {
  230. return dm.getVersionInfo()
  231. },
  232. versionCacheTTL,
  233. )
  234. // apply optional settings..
  235. for _, optf := range options {
  236. optf(dm)
  237. }
  238. return dm
  239. }
  240. // GetContainerLogs returns logs of a specific container. By
  241. // default, it returns a snapshot of the container log. Set 'follow' to true to
  242. // stream the log. Set 'follow' to false and specify the number of lines (e.g.
  243. // "100" or "all") to tail the log.
  244. // TODO: Make 'RawTerminal' option flagable.
  245. func (dm *DockerManager) GetContainerLogs(pod *api.Pod, containerID kubecontainer.ContainerID, logOptions *api.PodLogOptions, stdout, stderr io.Writer) error {
  246. return GetContainerLogs(dm.client, pod, containerID, logOptions, stdout, stderr)
  247. }
  248. // Temporarily export this function to share with dockershim.
  249. // TODO: clean this up.
  250. func GetContainerLogs(client DockerInterface, pod *api.Pod, containerID kubecontainer.ContainerID, logOptions *api.PodLogOptions, stdout, stderr io.Writer) error {
  251. var since int64
  252. if logOptions.SinceSeconds != nil {
  253. t := unversioned.Now().Add(-time.Duration(*logOptions.SinceSeconds) * time.Second)
  254. since = t.Unix()
  255. }
  256. if logOptions.SinceTime != nil {
  257. since = logOptions.SinceTime.Unix()
  258. }
  259. opts := dockertypes.ContainerLogsOptions{
  260. ShowStdout: true,
  261. ShowStderr: true,
  262. Since: strconv.FormatInt(since, 10),
  263. Timestamps: logOptions.Timestamps,
  264. Follow: logOptions.Follow,
  265. }
  266. if logOptions.TailLines != nil {
  267. opts.Tail = strconv.FormatInt(*logOptions.TailLines, 10)
  268. }
  269. sopts := StreamOptions{
  270. OutputStream: stdout,
  271. ErrorStream: stderr,
  272. RawTerminal: false,
  273. }
  274. return client.Logs(containerID.ID, opts, sopts)
  275. }
  276. var (
  277. // ErrNoContainersInPod is returned when there are no containers for a given pod
  278. ErrNoContainersInPod = errors.New("NoContainersInPod")
  279. // ErrNoPodInfraContainerInPod is returned when there is no pod infra container for a given pod
  280. ErrNoPodInfraContainerInPod = errors.New("NoPodInfraContainerInPod")
  281. // ErrContainerCannotRun is returned when a container is created, but cannot run properly
  282. ErrContainerCannotRun = errors.New("ContainerCannotRun")
  283. )
  284. // determineContainerIP determines the IP address of the given container. It is expected
  285. // that the container passed is the infrastructure container of a pod and the responsibility
  286. // of the caller to ensure that the correct container is passed.
  287. func (dm *DockerManager) determineContainerIP(podNamespace, podName string, container *dockertypes.ContainerJSON) (string, error) {
  288. result := ""
  289. if container.NetworkSettings != nil {
  290. result = container.NetworkSettings.IPAddress
  291. // Fall back to IPv6 address if no IPv4 address is present
  292. if result == "" {
  293. result = container.NetworkSettings.GlobalIPv6Address
  294. }
  295. }
  296. networkMode := getDockerNetworkMode(container)
  297. isHostNetwork := networkMode == namespaceModeHost
  298. // For host networking or default network plugin, GetPodNetworkStatus doesn't work
  299. if !isHostNetwork && dm.networkPlugin.Name() != network.DefaultPluginName {
  300. netStatus, err := dm.networkPlugin.GetPodNetworkStatus(podNamespace, podName, kubecontainer.DockerID(container.ID).ContainerID())
  301. if err != nil {
  302. glog.Errorf("NetworkPlugin %s failed on the status hook for pod '%s' - %v", dm.networkPlugin.Name(), podName, err)
  303. return result, err
  304. } else if netStatus != nil {
  305. result = netStatus.IP.String()
  306. }
  307. }
  308. return result, nil
  309. }
  310. func (dm *DockerManager) inspectContainer(id string, podName, podNamespace string) (*kubecontainer.ContainerStatus, string, error) {
  311. var ip string
  312. iResult, err := dm.client.InspectContainer(id)
  313. if err != nil {
  314. return nil, ip, err
  315. }
  316. glog.V(4).Infof("Container inspect result: %+v", *iResult)
  317. // TODO: Get k8s container name by parsing the docker name. This will be
  318. // replaced by checking docker labels eventually.
  319. dockerName, hash, err := ParseDockerName(iResult.Name)
  320. if err != nil {
  321. return nil, ip, fmt.Errorf("Unable to parse docker name %q", iResult.Name)
  322. }
  323. containerName := dockerName.ContainerName
  324. var containerInfo *labelledContainerInfo
  325. containerInfo = getContainerInfoFromLabel(iResult.Config.Labels)
  326. parseTimestampError := func(label, s string) {
  327. glog.Errorf("Failed to parse %q timestamp %q for container %q of pod %q", label, s, id, kubecontainer.BuildPodFullName(podName, podNamespace))
  328. }
  329. var createdAt, startedAt, finishedAt time.Time
  330. if createdAt, err = ParseDockerTimestamp(iResult.Created); err != nil {
  331. parseTimestampError("Created", iResult.Created)
  332. }
  333. if startedAt, err = ParseDockerTimestamp(iResult.State.StartedAt); err != nil {
  334. parseTimestampError("StartedAt", iResult.State.StartedAt)
  335. }
  336. if finishedAt, err = ParseDockerTimestamp(iResult.State.FinishedAt); err != nil {
  337. parseTimestampError("FinishedAt", iResult.State.FinishedAt)
  338. }
  339. status := kubecontainer.ContainerStatus{
  340. Name: containerName,
  341. RestartCount: containerInfo.RestartCount,
  342. Image: iResult.Config.Image,
  343. ImageID: DockerPrefix + iResult.Image,
  344. ID: kubecontainer.DockerID(id).ContainerID(),
  345. ExitCode: iResult.State.ExitCode,
  346. CreatedAt: createdAt,
  347. Hash: hash,
  348. }
  349. if iResult.State.Running {
  350. // Container that are running, restarting and paused
  351. status.State = kubecontainer.ContainerStateRunning
  352. status.StartedAt = startedAt
  353. if containerName == PodInfraContainerName {
  354. ip, err = dm.determineContainerIP(podNamespace, podName, iResult)
  355. // Kubelet doesn't handle the network error scenario
  356. if err != nil {
  357. status.State = kubecontainer.ContainerStateUnknown
  358. status.Message = fmt.Sprintf("Network error: %#v", err)
  359. }
  360. }
  361. return &status, ip, nil
  362. }
  363. // Find containers that have exited or failed to start.
  364. if !finishedAt.IsZero() || iResult.State.ExitCode != 0 {
  365. // Containers that are exited, dead or created (docker failed to start container)
  366. // When a container fails to start State.ExitCode is non-zero, FinishedAt and StartedAt are both zero
  367. reason := ""
  368. message := iResult.State.Error
  369. // Note: An application might handle OOMKilled gracefully.
  370. // In that case, the container is oom killed, but the exit
  371. // code could be 0.
  372. if iResult.State.OOMKilled {
  373. reason = "OOMKilled"
  374. } else if iResult.State.ExitCode == 0 {
  375. reason = "Completed"
  376. } else if !finishedAt.IsZero() {
  377. reason = "Error"
  378. } else {
  379. // finishedAt is zero and ExitCode is nonZero occurs when docker fails to start the container
  380. reason = ErrContainerCannotRun.Error()
  381. // Adjust time to the time docker attempted to run the container, otherwise startedAt and finishedAt will be set to epoch, which is misleading
  382. finishedAt = createdAt
  383. startedAt = createdAt
  384. }
  385. terminationMessagePath := containerInfo.TerminationMessagePath
  386. if terminationMessagePath != "" {
  387. for _, mount := range iResult.Mounts {
  388. if mount.Destination == terminationMessagePath {
  389. path := mount.Source
  390. if data, err := ioutil.ReadFile(path); err != nil {
  391. message = fmt.Sprintf("Error on reading termination-log %s: %v", path, err)
  392. } else {
  393. message = string(data)
  394. }
  395. }
  396. }
  397. }
  398. status.State = kubecontainer.ContainerStateExited
  399. status.Message = message
  400. status.Reason = reason
  401. status.StartedAt = startedAt
  402. status.FinishedAt = finishedAt
  403. } else {
  404. // Non-running containers that are created (not yet started or kubelet failed before calling
  405. // start container function etc.) Kubelet doesn't handle these scenarios yet.
  406. status.State = kubecontainer.ContainerStateUnknown
  407. }
  408. return &status, "", nil
  409. }
  410. // makeEnvList converts EnvVar list to a list of strings, in the form of
  411. // '<key>=<value>', which can be understood by docker.
  412. func makeEnvList(envs []kubecontainer.EnvVar) (result []string) {
  413. for _, env := range envs {
  414. result = append(result, fmt.Sprintf("%s=%s", env.Name, env.Value))
  415. }
  416. return
  417. }
  418. // makeMountBindings converts the mount list to a list of strings that
  419. // can be understood by docker.
  420. // Each element in the string is in the form of:
  421. // '<HostPath>:<ContainerPath>', or
  422. // '<HostPath>:<ContainerPath>:ro', if the path is read only, or
  423. // '<HostPath>:<ContainerPath>:Z', if the volume requires SELinux
  424. // relabeling and the pod provides an SELinux label
  425. func makeMountBindings(mounts []kubecontainer.Mount, podHasSELinuxLabel bool) (result []string) {
  426. for _, m := range mounts {
  427. bind := fmt.Sprintf("%s:%s", m.HostPath, m.ContainerPath)
  428. if m.ReadOnly {
  429. bind += ":ro"
  430. }
  431. // Only request relabeling if the pod provides an
  432. // SELinux context. If the pod does not provide an
  433. // SELinux context relabeling will label the volume
  434. // with the container's randomly allocated MCS label.
  435. // This would restrict access to the volume to the
  436. // container which mounts it first.
  437. if m.SELinuxRelabel && podHasSELinuxLabel {
  438. if m.ReadOnly {
  439. bind += ",Z"
  440. } else {
  441. bind += ":Z"
  442. }
  443. }
  444. result = append(result, bind)
  445. }
  446. return
  447. }
  448. func makePortsAndBindings(portMappings []kubecontainer.PortMapping) (map[dockernat.Port]struct{}, map[dockernat.Port][]dockernat.PortBinding) {
  449. exposedPorts := map[dockernat.Port]struct{}{}
  450. portBindings := map[dockernat.Port][]dockernat.PortBinding{}
  451. for _, port := range portMappings {
  452. exteriorPort := port.HostPort
  453. if exteriorPort == 0 {
  454. // No need to do port binding when HostPort is not specified
  455. continue
  456. }
  457. interiorPort := port.ContainerPort
  458. // Some of this port stuff is under-documented voodoo.
  459. // See http://stackoverflow.com/questions/20428302/binding-a-port-to-a-host-interface-using-the-rest-api
  460. var protocol string
  461. switch strings.ToUpper(string(port.Protocol)) {
  462. case "UDP":
  463. protocol = "/udp"
  464. case "TCP":
  465. protocol = "/tcp"
  466. default:
  467. glog.Warningf("Unknown protocol %q: defaulting to TCP", port.Protocol)
  468. protocol = "/tcp"
  469. }
  470. dockerPort := dockernat.Port(strconv.Itoa(interiorPort) + protocol)
  471. exposedPorts[dockerPort] = struct{}{}
  472. hostBinding := dockernat.PortBinding{
  473. HostPort: strconv.Itoa(exteriorPort),
  474. HostIP: port.HostIP,
  475. }
  476. // Allow multiple host ports bind to same docker port
  477. if existedBindings, ok := portBindings[dockerPort]; ok {
  478. // If a docker port already map to a host port, just append the host ports
  479. portBindings[dockerPort] = append(existedBindings, hostBinding)
  480. } else {
  481. // Otherwise, it's fresh new port binding
  482. portBindings[dockerPort] = []dockernat.PortBinding{
  483. hostBinding,
  484. }
  485. }
  486. }
  487. return exposedPorts, portBindings
  488. }
  489. func (dm *DockerManager) runContainer(
  490. pod *api.Pod,
  491. container *api.Container,
  492. opts *kubecontainer.RunContainerOptions,
  493. ref *api.ObjectReference,
  494. netMode string,
  495. ipcMode string,
  496. utsMode string,
  497. pidMode string,
  498. restartCount int,
  499. oomScoreAdj int) (kubecontainer.ContainerID, error) {
  500. dockerName := KubeletContainerName{
  501. PodFullName: kubecontainer.GetPodFullName(pod),
  502. PodUID: pod.UID,
  503. ContainerName: container.Name,
  504. }
  505. securityOpts, err := dm.getSecurityOpts(pod, container.Name)
  506. if err != nil {
  507. return kubecontainer.ContainerID{}, err
  508. }
  509. fmtSecurityOpts, err := dm.fmtDockerOpts(securityOpts)
  510. if err != nil {
  511. return kubecontainer.ContainerID{}, err
  512. }
  513. // Pod information is recorded on the container as labels to preserve it in the event the pod is deleted
  514. // while the Kubelet is down and there is no information available to recover the pod.
  515. // TODO: keep these labels up to date if the pod changes
  516. labels := newLabels(container, pod, restartCount, dm.enableCustomMetrics)
  517. // TODO(random-liu): Remove this when we start to use new labels for KillContainerInPod
  518. if container.Lifecycle != nil && container.Lifecycle.PreStop != nil {
  519. // TODO: This is kind of hacky, we should really just encode the bits we need.
  520. // TODO: This is hacky because the Kubelet should be parameterized to encode a specific version
  521. // and needs to be able to migrate this whenever we deprecate v1. Should be a member of DockerManager.
  522. if data, err := runtime.Encode(api.Codecs.LegacyCodec(unversioned.GroupVersion{Group: api.GroupName, Version: "v1"}), pod); err == nil {
  523. labels[kubernetesPodLabel] = string(data)
  524. } else {
  525. glog.Errorf("Failed to encode pod: %s for prestop hook", pod.Name)
  526. }
  527. }
  528. memoryLimit := container.Resources.Limits.Memory().Value()
  529. cpuRequest := container.Resources.Requests.Cpu()
  530. cpuLimit := container.Resources.Limits.Cpu()
  531. nvidiaGPULimit := container.Resources.Limits.NvidiaGPU()
  532. var cpuShares int64
  533. // If request is not specified, but limit is, we want request to default to limit.
  534. // API server does this for new containers, but we repeat this logic in Kubelet
  535. // for containers running on existing Kubernetes clusters.
  536. if cpuRequest.IsZero() && !cpuLimit.IsZero() {
  537. cpuShares = milliCPUToShares(cpuLimit.MilliValue())
  538. } else {
  539. // if cpuRequest.Amount is nil, then milliCPUToShares will return the minimal number
  540. // of CPU shares.
  541. cpuShares = milliCPUToShares(cpuRequest.MilliValue())
  542. }
  543. var devices []dockercontainer.DeviceMapping
  544. if nvidiaGPULimit.Value() != 0 {
  545. // Experimental. For now, we hardcode /dev/nvidia0 no matter what the user asks for
  546. // (we only support one device per node).
  547. devices = []dockercontainer.DeviceMapping{
  548. {PathOnHost: "/dev/nvidia0", PathInContainer: "/dev/nvidia0", CgroupPermissions: "mrw"},
  549. {PathOnHost: "/dev/nvidiactl", PathInContainer: "/dev/nvidiactl", CgroupPermissions: "mrw"},
  550. {PathOnHost: "/dev/nvidia-uvm", PathInContainer: "/dev/nvidia-uvm", CgroupPermissions: "mrw"},
  551. }
  552. }
  553. podHasSELinuxLabel := pod.Spec.SecurityContext != nil && pod.Spec.SecurityContext.SELinuxOptions != nil
  554. binds := makeMountBindings(opts.Mounts, podHasSELinuxLabel)
  555. // The reason we create and mount the log file in here (not in kubelet) is because
  556. // the file's location depends on the ID of the container, and we need to create and
  557. // mount the file before actually starting the container.
  558. // TODO(yifan): Consider to pull this logic out since we might need to reuse it in
  559. // other container runtime.
  560. _, containerName, cid := BuildDockerName(dockerName, container)
  561. if opts.PodContainerDir != "" && len(container.TerminationMessagePath) != 0 {
  562. // Because the PodContainerDir contains pod uid and container name which is unique enough,
  563. // here we just add a unique container id to make the path unique for different instances
  564. // of the same container.
  565. containerLogPath := path.Join(opts.PodContainerDir, cid)
  566. fs, err := os.Create(containerLogPath)
  567. if err != nil {
  568. // TODO: Clean up the previously created dir? return the error?
  569. glog.Errorf("Error on creating termination-log file %q: %v", containerLogPath, err)
  570. } else {
  571. fs.Close() // Close immediately; we're just doing a `touch` here
  572. b := fmt.Sprintf("%s:%s", containerLogPath, container.TerminationMessagePath)
  573. binds = append(binds, b)
  574. }
  575. }
  576. hc := &dockercontainer.HostConfig{
  577. Binds: binds,
  578. NetworkMode: dockercontainer.NetworkMode(netMode),
  579. IpcMode: dockercontainer.IpcMode(ipcMode),
  580. UTSMode: dockercontainer.UTSMode(utsMode),
  581. PidMode: dockercontainer.PidMode(pidMode),
  582. ReadonlyRootfs: readOnlyRootFilesystem(container),
  583. Resources: dockercontainer.Resources{
  584. Memory: memoryLimit,
  585. MemorySwap: -1,
  586. CPUShares: cpuShares,
  587. Devices: devices,
  588. },
  589. SecurityOpt: fmtSecurityOpts,
  590. }
  591. // Set sysctls if requested
  592. sysctls, unsafeSysctls, err := api.SysctlsFromPodAnnotations(pod.Annotations)
  593. if err != nil {
  594. dm.recorder.Eventf(ref, api.EventTypeWarning, events.FailedToCreateContainer, "Failed to create docker container %q of pod %q with error: %v", container.Name, format.Pod(pod), err)
  595. return kubecontainer.ContainerID{}, err
  596. }
  597. if len(sysctls)+len(unsafeSysctls) > 0 {
  598. hc.Sysctls = make(map[string]string, len(sysctls)+len(unsafeSysctls))
  599. for _, c := range sysctls {
  600. hc.Sysctls[c.Name] = c.Value
  601. }
  602. for _, c := range unsafeSysctls {
  603. hc.Sysctls[c.Name] = c.Value
  604. }
  605. }
  606. // If current api version is newer than docker 1.10 requested, set OomScoreAdj to HostConfig
  607. result, err := dm.checkDockerAPIVersion(dockerV110APIVersion)
  608. if err != nil {
  609. glog.Errorf("Failed to check docker api version: %v", err)
  610. } else if result >= 0 {
  611. hc.OomScoreAdj = oomScoreAdj
  612. }
  613. if dm.cpuCFSQuota {
  614. // if cpuLimit.Amount is nil, then the appropriate default value is returned to allow full usage of cpu resource.
  615. cpuQuota, cpuPeriod := milliCPUToQuota(cpuLimit.MilliValue())
  616. hc.CPUQuota = cpuQuota
  617. hc.CPUPeriod = cpuPeriod
  618. }
  619. if len(opts.CgroupParent) > 0 {
  620. hc.CgroupParent = opts.CgroupParent
  621. }
  622. dockerOpts := dockertypes.ContainerCreateConfig{
  623. Name: containerName,
  624. Config: &dockercontainer.Config{
  625. Env: makeEnvList(opts.Envs),
  626. Image: container.Image,
  627. WorkingDir: container.WorkingDir,
  628. Labels: labels,
  629. // Interactive containers:
  630. OpenStdin: container.Stdin,
  631. StdinOnce: container.StdinOnce,
  632. Tty: container.TTY,
  633. },
  634. HostConfig: hc,
  635. }
  636. // Set network configuration for infra-container
  637. if container.Name == PodInfraContainerName {
  638. setInfraContainerNetworkConfig(pod, netMode, opts, &dockerOpts)
  639. }
  640. setEntrypointAndCommand(container, opts, dockerOpts)
  641. glog.V(3).Infof("Container %v/%v/%v: setting entrypoint \"%v\" and command \"%v\"", pod.Namespace, pod.Name, container.Name, dockerOpts.Config.Entrypoint, dockerOpts.Config.Cmd)
  642. supplementalGids := dm.runtimeHelper.GetExtraSupplementalGroupsForPod(pod)
  643. securityContextProvider := securitycontext.NewSimpleSecurityContextProvider()
  644. securityContextProvider.ModifyContainerConfig(pod, container, dockerOpts.Config)
  645. securityContextProvider.ModifyHostConfig(pod, container, dockerOpts.HostConfig, supplementalGids)
  646. createResp, err := dm.client.CreateContainer(dockerOpts)
  647. if err != nil {
  648. dm.recorder.Eventf(ref, api.EventTypeWarning, events.FailedToCreateContainer, "Failed to create docker container %q of pod %q with error: %v", container.Name, format.Pod(pod), err)
  649. return kubecontainer.ContainerID{}, err
  650. }
  651. if len(createResp.Warnings) != 0 {
  652. glog.V(2).Infof("Container %q of pod %q created with warnings: %v", container.Name, format.Pod(pod), createResp.Warnings)
  653. }
  654. createdEventMsg := fmt.Sprintf("Created container with docker id %v", utilstrings.ShortenString(createResp.ID, 12))
  655. if len(securityOpts) > 0 {
  656. var msgs []string
  657. for _, opt := range securityOpts {
  658. glog.Errorf("Logging security options: %+v", opt)
  659. msg := opt.msg
  660. if msg == "" {
  661. msg = opt.value
  662. }
  663. msgs = append(msgs, fmt.Sprintf("%s=%s", opt.key, truncateMsg(msg, 256)))
  664. }
  665. createdEventMsg = fmt.Sprintf("%s; Security:[%s]", createdEventMsg, strings.Join(msgs, " "))
  666. }
  667. dm.recorder.Eventf(ref, api.EventTypeNormal, events.CreatedContainer, createdEventMsg)
  668. if err = dm.client.StartContainer(createResp.ID); err != nil {
  669. dm.recorder.Eventf(ref, api.EventTypeWarning, events.FailedToStartContainer,
  670. "Failed to start container with docker id %v with error: %v", utilstrings.ShortenString(createResp.ID, 12), err)
  671. return kubecontainer.ContainerID{}, err
  672. }
  673. dm.recorder.Eventf(ref, api.EventTypeNormal, events.StartedContainer, "Started container with docker id %v", utilstrings.ShortenString(createResp.ID, 12))
  674. return kubecontainer.DockerID(createResp.ID).ContainerID(), nil
  675. }
  676. // setInfraContainerNetworkConfig sets the network configuration for the infra-container. We only set network configuration for infra-container, all
  677. // the user containers will share the same network namespace with infra-container.
  678. func setInfraContainerNetworkConfig(pod *api.Pod, netMode string, opts *kubecontainer.RunContainerOptions, dockerOpts *dockertypes.ContainerCreateConfig) {
  679. exposedPorts, portBindings := makePortsAndBindings(opts.PortMappings)
  680. dockerOpts.Config.ExposedPorts = exposedPorts
  681. dockerOpts.HostConfig.PortBindings = dockernat.PortMap(portBindings)
  682. if netMode != namespaceModeHost {
  683. dockerOpts.Config.Hostname = opts.Hostname
  684. if len(opts.DNS) > 0 {
  685. dockerOpts.HostConfig.DNS = opts.DNS
  686. }
  687. if len(opts.DNSSearch) > 0 {
  688. dockerOpts.HostConfig.DNSSearch = opts.DNSSearch
  689. }
  690. }
  691. }
  692. func setEntrypointAndCommand(container *api.Container, opts *kubecontainer.RunContainerOptions, dockerOpts dockertypes.ContainerCreateConfig) {
  693. command, args := kubecontainer.ExpandContainerCommandAndArgs(container, opts.Envs)
  694. dockerOpts.Config.Entrypoint = dockerstrslice.StrSlice(command)
  695. dockerOpts.Config.Cmd = dockerstrslice.StrSlice(args)
  696. }
  697. // A helper function to get the KubeletContainerName and hash from a docker
  698. // container.
  699. func getDockerContainerNameInfo(c *dockertypes.Container) (*KubeletContainerName, uint64, error) {
  700. if len(c.Names) == 0 {
  701. return nil, 0, fmt.Errorf("cannot parse empty docker container name: %#v", c.Names)
  702. }
  703. dockerName, hash, err := ParseDockerName(c.Names[0])
  704. if err != nil {
  705. return nil, 0, fmt.Errorf("parse docker container name %q error: %v", c.Names[0], err)
  706. }
  707. return dockerName, hash, nil
  708. }
  709. // Get pod UID, name, and namespace by examining the container names.
  710. func getPodInfoFromContainer(c *dockertypes.Container) (kubetypes.UID, string, string, error) {
  711. dockerName, _, err := getDockerContainerNameInfo(c)
  712. if err != nil {
  713. return kubetypes.UID(""), "", "", err
  714. }
  715. name, namespace, err := kubecontainer.ParsePodFullName(dockerName.PodFullName)
  716. if err != nil {
  717. return kubetypes.UID(""), "", "", fmt.Errorf("parse pod full name %q error: %v", dockerName.PodFullName, err)
  718. }
  719. return dockerName.PodUID, name, namespace, nil
  720. }
  721. // GetContainers returns a list of running containers if |all| is false;
  722. // otherwise, it returns all containers.
  723. func (dm *DockerManager) GetContainers(all bool) ([]*kubecontainer.Container, error) {
  724. containers, err := GetKubeletDockerContainers(dm.client, all)
  725. if err != nil {
  726. return nil, err
  727. }
  728. // Convert DockerContainers to []*kubecontainer.Container
  729. result := make([]*kubecontainer.Container, 0, len(containers))
  730. for _, c := range containers {
  731. converted, err := toRuntimeContainer(c)
  732. if err != nil {
  733. glog.Errorf("Error examining the container %v: %v", c.ID, err)
  734. continue
  735. }
  736. result = append(result, converted)
  737. }
  738. return result, nil
  739. }
  740. func (dm *DockerManager) GetPods(all bool) ([]*kubecontainer.Pod, error) {
  741. start := time.Now()
  742. defer func() {
  743. metrics.ContainerManagerLatency.WithLabelValues("GetPods").Observe(metrics.SinceInMicroseconds(start))
  744. }()
  745. pods := make(map[kubetypes.UID]*kubecontainer.Pod)
  746. var result []*kubecontainer.Pod
  747. containers, err := GetKubeletDockerContainers(dm.client, all)
  748. if err != nil {
  749. return nil, err
  750. }
  751. // Group containers by pod.
  752. for _, c := range containers {
  753. converted, err := toRuntimeContainer(c)
  754. if err != nil {
  755. glog.Errorf("Error examining the container %v: %v", c.ID, err)
  756. continue
  757. }
  758. podUID, podName, podNamespace, err := getPodInfoFromContainer(c)
  759. if err != nil {
  760. glog.Errorf("Error examining the container %v: %v", c.ID, err)
  761. continue
  762. }
  763. pod, found := pods[podUID]
  764. if !found {
  765. pod = &kubecontainer.Pod{
  766. ID: podUID,
  767. Name: podName,
  768. Namespace: podNamespace,
  769. }
  770. pods[podUID] = pod
  771. }
  772. pod.Containers = append(pod.Containers, converted)
  773. }
  774. // Convert map to list.
  775. for _, p := range pods {
  776. result = append(result, p)
  777. }
  778. return result, nil
  779. }
  780. // List all images in the local storage.
  781. func (dm *DockerManager) ListImages() ([]kubecontainer.Image, error) {
  782. var images []kubecontainer.Image
  783. dockerImages, err := dm.client.ListImages(dockertypes.ImageListOptions{})
  784. if err != nil {
  785. return images, err
  786. }
  787. for _, di := range dockerImages {
  788. image, err := toRuntimeImage(&di)
  789. if err != nil {
  790. continue
  791. }
  792. images = append(images, *image)
  793. }
  794. return images, nil
  795. }
  796. // PullImage pulls an image from network to local storage.
  797. func (dm *DockerManager) PullImage(image kubecontainer.ImageSpec, secrets []api.Secret) error {
  798. return dm.dockerPuller.Pull(image.Image, secrets)
  799. }
  800. // IsImagePresent checks whether the container image is already in the local storage.
  801. func (dm *DockerManager) IsImagePresent(image kubecontainer.ImageSpec) (bool, error) {
  802. return dm.dockerPuller.IsImagePresent(image.Image)
  803. }
  804. // Removes the specified image.
  805. func (dm *DockerManager) RemoveImage(image kubecontainer.ImageSpec) error {
  806. // If the image has multiple tags, we need to remove all the tags
  807. if inspectImage, err := dm.client.InspectImage(image.Image); err == nil && len(inspectImage.RepoTags) > 1 {
  808. for _, tag := range inspectImage.RepoTags {
  809. if _, err := dm.client.RemoveImage(tag, dockertypes.ImageRemoveOptions{PruneChildren: true}); err != nil {
  810. return err
  811. }
  812. }
  813. return nil
  814. }
  815. _, err := dm.client.RemoveImage(image.Image, dockertypes.ImageRemoveOptions{PruneChildren: true})
  816. return err
  817. }
  818. // podInfraContainerChanged returns true if the pod infra container has changed.
  819. func (dm *DockerManager) podInfraContainerChanged(pod *api.Pod, podInfraContainerStatus *kubecontainer.ContainerStatus) (bool, error) {
  820. var ports []api.ContainerPort
  821. // Check network mode.
  822. if kubecontainer.IsHostNetworkPod(pod) {
  823. dockerPodInfraContainer, err := dm.client.InspectContainer(podInfraContainerStatus.ID.ID)
  824. if err != nil {
  825. return false, err
  826. }
  827. networkMode := getDockerNetworkMode(dockerPodInfraContainer)
  828. if networkMode != namespaceModeHost {
  829. glog.V(4).Infof("host: %v, %v", pod.Spec.SecurityContext.HostNetwork, networkMode)
  830. return true, nil
  831. }
  832. } else if dm.networkPlugin.Name() != "cni" && dm.networkPlugin.Name() != "kubenet" {
  833. // Docker only exports ports from the pod infra container. Let's
  834. // collect all of the relevant ports and export them.
  835. for _, container := range pod.Spec.InitContainers {
  836. ports = append(ports, container.Ports...)
  837. }
  838. for _, container := range pod.Spec.Containers {
  839. ports = append(ports, container.Ports...)
  840. }
  841. }
  842. expectedPodInfraContainer := &api.Container{
  843. Name: PodInfraContainerName,
  844. Image: dm.podInfraContainerImage,
  845. Ports: ports,
  846. ImagePullPolicy: podInfraContainerImagePullPolicy,
  847. Env: dm.podInfraContainerEnv,
  848. }
  849. return podInfraContainerStatus.Hash != kubecontainer.HashContainer(expectedPodInfraContainer), nil
  850. }
  851. // determine if the container root should be a read only filesystem.
  852. func readOnlyRootFilesystem(container *api.Container) bool {
  853. return container.SecurityContext != nil && container.SecurityContext.ReadOnlyRootFilesystem != nil && *container.SecurityContext.ReadOnlyRootFilesystem
  854. }
  855. // container must not be nil
  856. func getDockerNetworkMode(container *dockertypes.ContainerJSON) string {
  857. if container.HostConfig != nil {
  858. return string(container.HostConfig.NetworkMode)
  859. }
  860. return ""
  861. }
  862. // dockerVersion implements kubecontainer.Version interface by implementing
  863. // Compare() and String() (which is implemented by the underlying semver.Version)
  864. // TODO: this code is the same as rktVersion and may make sense to be moved to
  865. // somewhere shared.
  866. type dockerVersion struct {
  867. *semver.Version
  868. }
  869. // newDockerVersion returns a semantically versioned docker version value
  870. func newDockerVersion(version string) (dockerVersion, error) {
  871. sem, err := semver.NewVersion(version)
  872. return dockerVersion{sem}, err
  873. }
  874. func (r dockerVersion) String() string {
  875. return r.Version.String()
  876. }
  877. func (r dockerVersion) Compare(other string) (int, error) {
  878. v, err := newDockerVersion(other)
  879. if err != nil {
  880. return -1, err
  881. }
  882. if r.LessThan(*v.Version) {
  883. return -1, nil
  884. }
  885. if v.Version.LessThan(*r.Version) {
  886. return 1, nil
  887. }
  888. return 0, nil
  889. }
  890. // apiVersion implements kubecontainer.Version interface by implementing
  891. // Compare() and String(). It uses the compare function of engine-api to
  892. // compare docker apiversions.
  893. type apiVersion string
  894. func (v apiVersion) String() string {
  895. return string(v)
  896. }
  897. func (v apiVersion) Compare(other string) (int, error) {
  898. if dockerapiversion.LessThan(string(v), other) {
  899. return -1, nil
  900. } else if dockerapiversion.GreaterThan(string(v), other) {
  901. return 1, nil
  902. }
  903. return 0, nil
  904. }
  905. func (dm *DockerManager) Type() string {
  906. return DockerType
  907. }
  908. func (dm *DockerManager) Version() (kubecontainer.Version, error) {
  909. v, err := dm.client.Version()
  910. if err != nil {
  911. return nil, fmt.Errorf("docker: failed to get docker version: %v", err)
  912. }
  913. version, err := newDockerVersion(v.Version)
  914. if err != nil {
  915. return nil, fmt.Errorf("docker: failed to parse docker version %q: %v", v.Version, err)
  916. }
  917. return version, nil
  918. }
  919. func (dm *DockerManager) APIVersion() (kubecontainer.Version, error) {
  920. v, err := dm.client.Version()
  921. if err != nil {
  922. return nil, fmt.Errorf("docker: failed to get docker version: %v", err)
  923. }
  924. return apiVersion(v.APIVersion), nil
  925. }
  926. // Status returns error if docker daemon is unhealthy, nil otherwise.
  927. // Now we do this by checking whether:
  928. // 1) `docker version` works
  929. // 2) docker version is compatible with minimum requirement
  930. func (dm *DockerManager) Status() error {
  931. return dm.checkVersionCompatibility()
  932. }
  933. func (dm *DockerManager) checkVersionCompatibility() error {
  934. version, err := dm.APIVersion()
  935. if err != nil {
  936. return err
  937. }
  938. // Verify the docker version.
  939. result, err := version.Compare(minimumDockerAPIVersion)
  940. if err != nil {
  941. return fmt.Errorf("failed to compare current docker version %v with minimum support Docker version %q - %v", version, minimumDockerAPIVersion, err)
  942. }
  943. if result < 0 {
  944. return fmt.Errorf("container runtime version is older than %s", minimumDockerAPIVersion)
  945. }
  946. return nil
  947. }
  948. func (dm *DockerManager) fmtDockerOpts(opts []dockerOpt) ([]string, error) {
  949. version, err := dm.APIVersion()
  950. if err != nil {
  951. return nil, err
  952. }
  953. const (
  954. // Docker changed the API for specifying options in v1.11
  955. optSeparatorChangeVersion = "1.23" // Corresponds to docker 1.11.x
  956. optSeparatorOld = ':'
  957. optSeparatorNew = '='
  958. )
  959. sep := optSeparatorNew
  960. if result, err := version.Compare(optSeparatorChangeVersion); err != nil {
  961. return nil, fmt.Errorf("error parsing docker API version: %v", err)
  962. } else if result < 0 {
  963. sep = optSeparatorOld
  964. }
  965. fmtOpts := make([]string, len(opts))
  966. for i, opt := range opts {
  967. fmtOpts[i] = fmt.Sprintf("%s%c%s", opt.key, sep, opt.value)
  968. }
  969. return fmtOpts, nil
  970. }
  971. func (dm *DockerManager) getSecurityOpts(pod *api.Pod, ctrName string) ([]dockerOpt, error) {
  972. var securityOpts []dockerOpt
  973. if seccompOpts, err := dm.getSeccompOpts(pod, ctrName); err != nil {
  974. return nil, err
  975. } else {
  976. securityOpts = append(securityOpts, seccompOpts...)
  977. }
  978. if appArmorOpts, err := dm.getAppArmorOpts(pod, ctrName); err != nil {
  979. return nil, err
  980. } else {
  981. securityOpts = append(securityOpts, appArmorOpts...)
  982. }
  983. return securityOpts, nil
  984. }
  985. type dockerOpt struct {
  986. // The key-value pair passed to docker.
  987. key, value string
  988. // The alternative value to use in log/event messages.
  989. msg string
  990. }
  991. // Get the docker security options for seccomp.
  992. func (dm *DockerManager) getSeccompOpts(pod *api.Pod, ctrName string) ([]dockerOpt, error) {
  993. version, err := dm.APIVersion()
  994. if err != nil {
  995. return nil, err
  996. }
  997. // seccomp is only on docker versions >= v1.10
  998. if result, err := version.Compare(dockerV110APIVersion); err != nil {
  999. return nil, err
  1000. } else if result < 0 {
  1001. return nil, nil // return early for Docker < 1.10
  1002. }
  1003. profile, profileOK := pod.ObjectMeta.Annotations[api.SeccompContainerAnnotationKeyPrefix+ctrName]
  1004. if !profileOK {
  1005. // try the pod profile
  1006. profile, profileOK = pod.ObjectMeta.Annotations[api.SeccompPodAnnotationKey]
  1007. if !profileOK {
  1008. // return early the default
  1009. return defaultSeccompOpt, nil
  1010. }
  1011. }
  1012. if profile == "unconfined" {
  1013. // return early the default
  1014. return defaultSeccompOpt, nil
  1015. }
  1016. if profile == "docker/default" {
  1017. // return nil so docker will load the default seccomp profile
  1018. return nil, nil
  1019. }
  1020. if !strings.HasPrefix(profile, "localhost/") {
  1021. return nil, fmt.Errorf("unknown seccomp profile option: %s", profile)
  1022. }
  1023. name := strings.TrimPrefix(profile, "localhost/") // by pod annotation validation, name is a valid subpath
  1024. fname := filepath.Join(dm.seccompProfileRoot, filepath.FromSlash(name))
  1025. file, err := ioutil.ReadFile(fname)
  1026. if err != nil {
  1027. return nil, fmt.Errorf("cannot load seccomp profile %q: %v", name, err)
  1028. }
  1029. b := bytes.NewBuffer(nil)
  1030. if err := json.Compact(b, file); err != nil {
  1031. return nil, err
  1032. }
  1033. // Rather than the full profile, just put the filename & md5sum in the event log.
  1034. msg := fmt.Sprintf("%s(md5:%x)", name, md5.Sum(file))
  1035. return []dockerOpt{{"seccomp", b.String(), msg}}, nil
  1036. }
  1037. // Get the docker security options for AppArmor.
  1038. func (dm *DockerManager) getAppArmorOpts(pod *api.Pod, ctrName string) ([]dockerOpt, error) {
  1039. profile := apparmor.GetProfileName(pod, ctrName)
  1040. if profile == "" || profile == apparmor.ProfileRuntimeDefault {
  1041. // The docker applies the default profile by default.
  1042. return nil, nil
  1043. }
  1044. // Assume validation has already happened.
  1045. profileName := strings.TrimPrefix(profile, apparmor.ProfileNamePrefix)
  1046. return []dockerOpt{{"apparmor", profileName, ""}}, nil
  1047. }
  1048. type dockerExitError struct {
  1049. Inspect *dockertypes.ContainerExecInspect
  1050. }
  1051. func (d *dockerExitError) String() string {
  1052. return d.Error()
  1053. }
  1054. func (d *dockerExitError) Error() string {
  1055. return fmt.Sprintf("Error executing in Docker Container: %d", d.Inspect.ExitCode)
  1056. }
  1057. func (d *dockerExitError) Exited() bool {
  1058. return !d.Inspect.Running
  1059. }
  1060. func (d *dockerExitError) ExitStatus() int {
  1061. return d.Inspect.ExitCode
  1062. }
  1063. // ExecInContainer runs the command inside the container identified by containerID.
  1064. func (dm *DockerManager) ExecInContainer(containerID kubecontainer.ContainerID, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool, resize <-chan term.Size) error {
  1065. if dm.execHandler == nil {
  1066. return errors.New("unable to exec without an exec handler")
  1067. }
  1068. container, err := dm.client.InspectContainer(containerID.ID)
  1069. if err != nil {
  1070. return err
  1071. }
  1072. if !container.State.Running {
  1073. return fmt.Errorf("container not running (%s)", container.ID)
  1074. }
  1075. return dm.execHandler.ExecInContainer(dm.client, container, cmd, stdin, stdout, stderr, tty, resize)
  1076. }
  1077. func (dm *DockerManager) AttachContainer(containerID kubecontainer.ContainerID, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool, resize <-chan term.Size) error {
  1078. return AttachContainer(dm.client, containerID, stdin, stdout, stderr, tty, resize)
  1079. }
  1080. // Temporarily export this function to share with dockershim.
  1081. // TODO: clean this up.
  1082. func AttachContainer(client DockerInterface, containerID kubecontainer.ContainerID, stdin io.Reader, stdout, stderr io.WriteCloser, tty bool, resize <-chan term.Size) error {
  1083. // Have to start this before the call to client.AttachToContainer because client.AttachToContainer is a blocking
  1084. // call :-( Otherwise, resize events don't get processed and the terminal never resizes.
  1085. kubecontainer.HandleResizing(resize, func(size term.Size) {
  1086. client.ResizeContainerTTY(containerID.ID, int(size.Height), int(size.Width))
  1087. })
  1088. // TODO(random-liu): Do we really use the *Logs* field here?
  1089. opts := dockertypes.ContainerAttachOptions{
  1090. Stream: true,
  1091. Stdin: stdin != nil,
  1092. Stdout: stdout != nil,
  1093. Stderr: stderr != nil,
  1094. }
  1095. sopts := StreamOptions{
  1096. InputStream: stdin,
  1097. OutputStream: stdout,
  1098. ErrorStream: stderr,
  1099. RawTerminal: tty,
  1100. }
  1101. return client.AttachToContainer(containerID.ID, opts, sopts)
  1102. }
  1103. func noPodInfraContainerError(podName, podNamespace string) error {
  1104. return fmt.Errorf("cannot find pod infra container in pod %q", kubecontainer.BuildPodFullName(podName, podNamespace))
  1105. }
  1106. // PortForward executes socat in the pod's network namespace and copies
  1107. // data between stream (representing the user's local connection on their
  1108. // computer) and the specified port in the container.
  1109. //
  1110. // TODO:
  1111. // - match cgroups of container
  1112. // - should we support nsenter + socat on the host? (current impl)
  1113. // - should we support nsenter + socat in a container, running with elevated privs and --pid=host?
  1114. func (dm *DockerManager) PortForward(pod *kubecontainer.Pod, port uint16, stream io.ReadWriteCloser) error {
  1115. podInfraContainer := pod.FindContainerByName(PodInfraContainerName)
  1116. if podInfraContainer == nil {
  1117. return noPodInfraContainerError(pod.Name, pod.Namespace)
  1118. }
  1119. container, err := dm.client.InspectContainer(podInfraContainer.ID.ID)
  1120. if err != nil {
  1121. return err
  1122. }
  1123. if !container.State.Running {
  1124. return fmt.Errorf("container not running (%s)", container.ID)
  1125. }
  1126. containerPid := container.State.Pid
  1127. socatPath, lookupErr := exec.LookPath("socat")
  1128. if lookupErr != nil {
  1129. return fmt.Errorf("unable to do port forwarding: socat not found.")
  1130. }
  1131. args := []string{"-t", fmt.Sprintf("%d", containerPid), "-n", socatPath, "-", fmt.Sprintf("TCP4:localhost:%d", port)}
  1132. nsenterPath, lookupErr := exec.LookPath("nsenter")
  1133. if lookupErr != nil {
  1134. return fmt.Errorf("unable to do port forwarding: nsenter not found.")
  1135. }
  1136. commandString := fmt.Sprintf("%s %s", nsenterPath, strings.Join(args, " "))
  1137. glog.V(4).Infof("executing port forwarding command: %s", commandString)
  1138. command := exec.Command(nsenterPath, args...)
  1139. command.Stdout = stream
  1140. stderr := new(bytes.Buffer)
  1141. command.Stderr = stderr
  1142. // If we use Stdin, command.Run() won't return until the goroutine that's copying
  1143. // from stream finishes. Unfortunately, if you have a client like telnet connected
  1144. // via port forwarding, as long as the user's telnet client is connected to the user's
  1145. // local listener that port forwarding sets up, the telnet session never exits. This
  1146. // means that even if socat has finished running, command.Run() won't ever return
  1147. // (because the client still has the connection and stream open).
  1148. //
  1149. // The work around is to use StdinPipe(), as Wait() (called by Run()) closes the pipe
  1150. // when the command (socat) exits.
  1151. inPipe, err := command.StdinPipe()
  1152. if err != nil {
  1153. return fmt.Errorf("unable to do port forwarding: error creating stdin pipe: %v", err)
  1154. }
  1155. go func() {
  1156. io.Copy(inPipe, stream)
  1157. inPipe.Close()
  1158. }()
  1159. if err := command.Run(); err != nil {
  1160. return fmt.Errorf("%v: %s", err, stderr.String())
  1161. }
  1162. return nil
  1163. }
  1164. // TODO(random-liu): Change running pod to pod status in the future. We can't do it now, because kubelet also uses this function without pod status.
  1165. // We can only deprecate this after refactoring kubelet.
  1166. // TODO(random-liu): After using pod status for KillPod(), we can also remove the kubernetesPodLabel, because all the needed information should have
  1167. // been extract from new labels and stored in pod status.
  1168. // only hard eviction scenarios should provide a grace period override, all other code paths must pass nil.
  1169. func (dm *DockerManager) KillPod(pod *api.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) error {
  1170. result := dm.killPodWithSyncResult(pod, runningPod, gracePeriodOverride)
  1171. return result.Error()
  1172. }
  1173. // NOTE(random-liu): The pod passed in could be *nil* when kubelet restarted.
  1174. func (dm *DockerManager) killPodWithSyncResult(pod *api.Pod, runningPod kubecontainer.Pod, gracePeriodOverride *int64) (result kubecontainer.PodSyncResult) {
  1175. // Send the kills in parallel since they may take a long time.
  1176. // There may be len(runningPod.Containers) or len(runningPod.Containers)-1 of result in the channel
  1177. containerResults := make(chan *kubecontainer.SyncResult, len(runningPod.Containers))
  1178. wg := sync.WaitGroup{}
  1179. var (
  1180. networkContainer *kubecontainer.Container
  1181. networkSpec *api.Container
  1182. )
  1183. wg.Add(len(runningPod.Containers))
  1184. for _, container := range runningPod.Containers {
  1185. go func(container *kubecontainer.Container) {
  1186. defer utilruntime.HandleCrash()
  1187. defer wg.Done()
  1188. var containerSpec *api.Container
  1189. if pod != nil {
  1190. for i, c := range pod.Spec.Containers {
  1191. if c.Name == container.Name {
  1192. containerSpec = &pod.Spec.Containers[i]
  1193. break
  1194. }
  1195. }
  1196. if containerSpec == nil {
  1197. for i, c := range pod.Spec.InitContainers {
  1198. if c.Name == container.Name {
  1199. containerSpec = &pod.Spec.InitContainers[i]
  1200. break
  1201. }
  1202. }
  1203. }
  1204. }
  1205. // TODO: Handle this without signaling the pod infra container to
  1206. // adapt to the generic container runtime.
  1207. if container.Name == PodInfraContainerName {
  1208. // Store the container runtime for later deletion.
  1209. // We do this so that PreStop handlers can run in the network namespace.
  1210. networkContainer = container
  1211. networkSpec = containerSpec
  1212. return
  1213. }
  1214. killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, container.Name)
  1215. err := dm.KillContainerInPod(container.ID, containerSpec, pod, "Need to kill pod.", gracePeriodOverride)
  1216. if err != nil {
  1217. killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
  1218. glog.Errorf("Failed to delete container %v: %v; Skipping pod %q", container.ID.ID, err, runningPod.ID)
  1219. }
  1220. containerResults <- killContainerResult
  1221. }(container)
  1222. }
  1223. wg.Wait()
  1224. close(containerResults)
  1225. for containerResult := range containerResults {
  1226. result.AddSyncResult(containerResult)
  1227. }
  1228. if networkContainer != nil {
  1229. ins, err := dm.client.InspectContainer(networkContainer.ID.ID)
  1230. if err != nil {
  1231. err = fmt.Errorf("Error inspecting container %v: %v", networkContainer.ID.ID, err)
  1232. glog.Error(err)
  1233. result.Fail(err)
  1234. return
  1235. }
  1236. if getDockerNetworkMode(ins) != namespaceModeHost {
  1237. teardownNetworkResult := kubecontainer.NewSyncResult(kubecontainer.TeardownNetwork, kubecontainer.BuildPodFullName(runningPod.Name, runningPod.Namespace))
  1238. result.AddSyncResult(teardownNetworkResult)
  1239. glog.V(3).Infof("Calling network plugin %s to tear down pod for %s", dm.networkPlugin.Name(), kubecontainer.BuildPodFullName(runningPod.Name, runningPod.Namespace))
  1240. if err := dm.networkPlugin.TearDownPod(runningPod.Namespace, runningPod.Name, networkContainer.ID); err != nil {
  1241. message := fmt.Sprintf("Failed to teardown network for pod %q using network plugins %q: %v", runningPod.ID, dm.networkPlugin.Name(), err)
  1242. teardownNetworkResult.Fail(kubecontainer.ErrTeardownNetwork, message)
  1243. glog.Error(message)
  1244. }
  1245. }
  1246. killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, networkContainer.Name)
  1247. result.AddSyncResult(killContainerResult)
  1248. if err := dm.KillContainerInPod(networkContainer.ID, networkSpec, pod, "Need to kill pod.", gracePeriodOverride); err != nil {
  1249. killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
  1250. glog.Errorf("Failed to delete container %v: %v; Skipping pod %q", networkContainer.ID.ID, err, runningPod.ID)
  1251. }
  1252. }
  1253. return
  1254. }
  1255. // KillContainerInPod kills a container in the pod. It must be passed either a container ID or a container and pod,
  1256. // and will attempt to lookup the other information if missing.
  1257. func (dm *DockerManager) KillContainerInPod(containerID kubecontainer.ContainerID, container *api.Container, pod *api.Pod, message string, gracePeriodOverride *int64) error {
  1258. switch {
  1259. case containerID.IsEmpty():
  1260. // Locate the container.
  1261. pods, err := dm.GetPods(false)
  1262. if err != nil {
  1263. return err
  1264. }
  1265. targetPod := kubecontainer.Pods(pods).FindPod(kubecontainer.GetPodFullName(pod), pod.UID)
  1266. targetContainer := targetPod.FindContainerByName(container.Name)
  1267. if targetContainer == nil {
  1268. return fmt.Errorf("unable to find container %q in pod %q", container.Name, targetPod.Name)
  1269. }
  1270. containerID = targetContainer.ID
  1271. case container == nil || pod == nil:
  1272. // Read information about the container from labels
  1273. inspect, err := dm.client.InspectContainer(containerID.ID)
  1274. if err != nil {
  1275. return err
  1276. }
  1277. storedPod, storedContainer, cerr := containerAndPodFromLabels(inspect)
  1278. if cerr != nil {
  1279. glog.Errorf("unable to access pod data from container: %v", cerr)
  1280. }
  1281. if container == nil {
  1282. container = storedContainer
  1283. }
  1284. if pod == nil {
  1285. pod = storedPod
  1286. }
  1287. }
  1288. return dm.killContainer(containerID, container, pod, message, gracePeriodOverride)
  1289. }
  1290. // killContainer accepts a containerID and an optional container or pod containing shutdown policies. Invoke
  1291. // KillContainerInPod if information must be retrieved first. It is only valid to provide a grace period override
  1292. // during hard eviction scenarios. All other code paths in kubelet must never provide a grace period override otherwise
  1293. // data corruption could occur in the end-user application.
  1294. func (dm *DockerManager) killContainer(containerID kubecontainer.ContainerID, container *api.Container, pod *api.Pod, reason string, gracePeriodOverride *int64) error {
  1295. ID := containerID.ID
  1296. name := ID
  1297. if container != nil {
  1298. name = fmt.Sprintf("%s %s", name, container.Name)
  1299. }
  1300. if pod != nil {
  1301. name = fmt.Sprintf("%s %s/%s", name, pod.Namespace, pod.Name)
  1302. }
  1303. gracePeriod := int64(minimumGracePeriodInSeconds)
  1304. if pod != nil {
  1305. switch {
  1306. case pod.DeletionGracePeriodSeconds != nil:
  1307. gracePeriod = *pod.DeletionGracePeriodSeconds
  1308. case pod.Spec.TerminationGracePeriodSeconds != nil:
  1309. gracePeriod = *pod.Spec.TerminationGracePeriodSeconds
  1310. }
  1311. }
  1312. glog.V(2).Infof("Killing container %q with %d second grace period", name, gracePeriod)
  1313. start := unversioned.Now()
  1314. if pod != nil && container != nil && container.Lifecycle != nil && container.Lifecycle.PreStop != nil {
  1315. glog.V(4).Infof("Running preStop hook for container %q", name)
  1316. done := make(chan struct{})
  1317. go func() {
  1318. defer close(done)
  1319. defer utilruntime.HandleCrash()
  1320. if msg, err := dm.runner.Run(containerID, pod, container, container.Lifecycle.PreStop); err != nil {
  1321. glog.Errorf("preStop hook for container %q failed: %v", name, err)
  1322. dm.generateFailedContainerEvent(containerID, pod.Name, events.FailedPreStopHook, msg)
  1323. }
  1324. }()
  1325. select {
  1326. case <-time.After(time.Duration(gracePeriod) * time.Second):
  1327. glog.V(2).Infof("preStop hook for container %q did not complete in %d seconds", name, gracePeriod)
  1328. case <-done:
  1329. glog.V(4).Infof("preStop hook for container %q completed", name)
  1330. }
  1331. gracePeriod -= int64(unversioned.Now().Sub(start.Time).Seconds())
  1332. }
  1333. // if the caller did not specify a grace period override, we ensure that the grace period
  1334. // is not less than the minimal shutdown window to avoid unnecessary SIGKILLs. if a caller
  1335. // did provide an override, we always set the gracePeriod to that value. the only valid
  1336. // time to send an override is during eviction scenarios where we want to do a hard kill of
  1337. // a container because of resource exhaustion for incompressible resources (i.e. disk, memory).
  1338. if gracePeriodOverride == nil {
  1339. if gracePeriod < minimumGracePeriodInSeconds {
  1340. gracePeriod = minimumGracePeriodInSeconds
  1341. }
  1342. } else {
  1343. gracePeriod = *gracePeriodOverride
  1344. glog.V(2).Infof("Killing container %q, but using %d second grace period override", name, gracePeriod)
  1345. }
  1346. err := dm.client.StopContainer(ID, int(gracePeriod))
  1347. if err == nil {
  1348. glog.V(2).Infof("Container %q exited after %s", name, unversioned.Now().Sub(start.Time))
  1349. } else {
  1350. glog.V(2).Infof("Container %q termination failed after %s: %v", name, unversioned.Now().Sub(start.Time), err)
  1351. }
  1352. ref, ok := dm.containerRefManager.GetRef(containerID)
  1353. if !ok {
  1354. glog.Warningf("No ref for pod '%q'", name)
  1355. } else {
  1356. message := fmt.Sprintf("Killing container with docker id %v", utilstrings.ShortenString(ID, 12))
  1357. if reason != "" {
  1358. message = fmt.Sprint(message, ": ", reason)
  1359. }
  1360. dm.recorder.Event(ref, api.EventTypeNormal, events.KillingContainer, message)
  1361. dm.containerRefManager.ClearRef(containerID)
  1362. }
  1363. return err
  1364. }
  1365. func (dm *DockerManager) generateFailedContainerEvent(containerID kubecontainer.ContainerID, podName, reason, message string) {
  1366. ref, ok := dm.containerRefManager.GetRef(containerID)
  1367. if !ok {
  1368. glog.Warningf("No ref for pod '%q'", podName)
  1369. return
  1370. }
  1371. dm.recorder.Event(ref, api.EventTypeWarning, reason, message)
  1372. }
  1373. var errNoPodOnContainer = fmt.Errorf("no pod information labels on Docker container")
  1374. // containerAndPodFromLabels tries to load the appropriate container info off of a Docker container's labels
  1375. func containerAndPodFromLabels(inspect *dockertypes.ContainerJSON) (pod *api.Pod, container *api.Container, err error) {
  1376. if inspect == nil || inspect.Config == nil || inspect.Config.Labels == nil {
  1377. return nil, nil, errNoPodOnContainer
  1378. }
  1379. labels := inspect.Config.Labels
  1380. // the pod data may not be set
  1381. if body, found := labels[kubernetesPodLabel]; found {
  1382. pod = &api.Pod{}
  1383. if err = runtime.DecodeInto(api.Codecs.UniversalDecoder(), []byte(body), pod); err == nil {
  1384. name := labels[types.KubernetesContainerNameLabel]
  1385. for ix := range pod.Spec.Containers {
  1386. if pod.Spec.Containers[ix].Name == name {
  1387. container = &pod.Spec.Containers[ix]
  1388. break
  1389. }
  1390. }
  1391. if container == nil {
  1392. for ix := range pod.Spec.InitContainers {
  1393. if pod.Spec.InitContainers[ix].Name == name {
  1394. container = &pod.Spec.InitContainers[ix]
  1395. break
  1396. }
  1397. }
  1398. }
  1399. if container == nil {
  1400. err = fmt.Errorf("unable to find container %s in pod %v", name, pod)
  1401. }
  1402. } else {
  1403. pod = nil
  1404. }
  1405. }
  1406. // attempt to find the default grace period if we didn't commit a pod, but set the generic metadata
  1407. // field (the one used by kill)
  1408. if pod == nil {
  1409. if period, ok := labels[kubernetesPodTerminationGracePeriodLabel]; ok {
  1410. if seconds, err := strconv.ParseInt(period, 10, 64); err == nil {
  1411. pod = &api.Pod{}
  1412. pod.DeletionGracePeriodSeconds = &seconds
  1413. }
  1414. }
  1415. }
  1416. return
  1417. }
  1418. func (dm *DockerManager) applyOOMScoreAdj(pod *api.Pod, container *api.Container, containerInfo *dockertypes.ContainerJSON) error {
  1419. if containerInfo.State.Pid == 0 {
  1420. // Container exited. We cannot do anything about it. Ignore this error.
  1421. glog.V(2).Infof("Failed to apply OOM score adj on container %q with ID %q. Init process does not exist.", containerInfo.Name, containerInfo.ID)
  1422. return nil
  1423. }
  1424. cgroupName, err := dm.procFs.GetFullContainerName(containerInfo.State.Pid)
  1425. if err != nil {
  1426. if err == os.ErrNotExist {
  1427. // Container exited. We cannot do anything about it. Ignore this error.
  1428. glog.V(2).Infof("Failed to apply OOM score adj on container %q with ID %q. Init process does not exist.", containerInfo.Name, containerInfo.ID)
  1429. return nil
  1430. }
  1431. return err
  1432. }
  1433. oomScoreAdj := dm.calculateOomScoreAdj(pod, container)
  1434. if err = dm.oomAdjuster.ApplyOOMScoreAdjContainer(cgroupName, oomScoreAdj, 5); err != nil {
  1435. if err == os.ErrNotExist {
  1436. // Container exited. We cannot do anything about it. Ignore this error.
  1437. glog.V(2).Infof("Failed to apply OOM score adj on container %q with ID %q. Init process does not exist.", containerInfo.Name, containerInfo.ID)
  1438. return nil
  1439. }
  1440. return err
  1441. }
  1442. return nil
  1443. }
  1444. // Run a single container from a pod. Returns the docker container ID
  1445. // If do not need to pass labels, just pass nil.
  1446. func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Container, netMode, ipcMode, pidMode, podIP string, restartCount int) (kubecontainer.ContainerID, error) {
  1447. start := time.Now()
  1448. defer func() {
  1449. metrics.ContainerManagerLatency.WithLabelValues("runContainerInPod").Observe(metrics.SinceInMicroseconds(start))
  1450. }()
  1451. ref, err := kubecontainer.GenerateContainerRef(pod, container)
  1452. if err != nil {
  1453. glog.Errorf("Can't make a ref to pod %v, container %v: '%v'", pod.Name, container.Name, err)
  1454. } else {
  1455. glog.V(5).Infof("Generating ref for container %s: %#v", container.Name, ref)
  1456. }
  1457. opts, err := dm.runtimeHelper.GenerateRunContainerOptions(pod, container, podIP)
  1458. if err != nil {
  1459. return kubecontainer.ContainerID{}, fmt.Errorf("GenerateRunContainerOptions: %v", err)
  1460. }
  1461. utsMode := ""
  1462. if kubecontainer.IsHostNetworkPod(pod) {
  1463. utsMode = namespaceModeHost
  1464. }
  1465. oomScoreAdj := dm.calculateOomScoreAdj(pod, container)
  1466. id, err := dm.runContainer(pod, container, opts, ref, netMode, ipcMode, utsMode, pidMode, restartCount, oomScoreAdj)
  1467. if err != nil {
  1468. return kubecontainer.ContainerID{}, fmt.Errorf("runContainer: %v", err)
  1469. }
  1470. // Remember this reference so we can report events about this container
  1471. if ref != nil {
  1472. dm.containerRefManager.SetRef(id, ref)
  1473. }
  1474. if container.Lifecycle != nil && container.Lifecycle.PostStart != nil {
  1475. msg, handlerErr := dm.runner.Run(id, pod, container, container.Lifecycle.PostStart)
  1476. if handlerErr != nil {
  1477. err := fmt.Errorf("PostStart handler: %v", handlerErr)
  1478. dm.generateFailedContainerEvent(id, pod.Name, events.FailedPostStartHook, msg)
  1479. dm.KillContainerInPod(id, container, pod, err.Error(), nil)
  1480. return kubecontainer.ContainerID{}, err
  1481. }
  1482. }
  1483. // Container information is used in adjusting OOM scores, adding ndots and getting the logPath.
  1484. containerInfo, err := dm.client.InspectContainer(id.ID)
  1485. if err != nil {
  1486. return kubecontainer.ContainerID{}, fmt.Errorf("InspectContainer: %v", err)
  1487. }
  1488. // Create a symbolic link to the Docker container log file using a name which captures the
  1489. // full pod name, the container name and the Docker container ID. Cluster level logging will
  1490. // capture these symbolic filenames which can be used for search terms in Elasticsearch or for
  1491. // labels for Cloud Logging.
  1492. containerLogFile := containerInfo.LogPath
  1493. symlinkFile := LogSymlink(dm.containerLogsDir, kubecontainer.GetPodFullName(pod), container.Name, id.ID)
  1494. if err = dm.os.Symlink(containerLogFile, symlinkFile); err != nil {
  1495. glog.Errorf("Failed to create symbolic link to the log file of pod %q container %q: %v", format.Pod(pod), container.Name, err)
  1496. }
  1497. // Check if current docker version is higher than 1.10. Otherwise, we have to apply OOMScoreAdj instead of using docker API.
  1498. // TODO: Remove this logic after we stop supporting docker version < 1.10.
  1499. if err = dm.applyOOMScoreAdjIfNeeded(pod, container, containerInfo); err != nil {
  1500. return kubecontainer.ContainerID{}, err
  1501. }
  1502. // The addNDotsOption call appends the ndots option to the resolv.conf file generated by docker.
  1503. // This resolv.conf file is shared by all containers of the same pod, and needs to be modified only once per pod.
  1504. // we modify it when the pause container is created since it is the first container created in the pod since it holds
  1505. // the networking namespace.
  1506. if container.Name == PodInfraContainerName && utsMode != namespaceModeHost {
  1507. err = addNDotsOption(containerInfo.ResolvConfPath)
  1508. if err != nil {
  1509. return kubecontainer.ContainerID{}, fmt.Errorf("addNDotsOption: %v", err)
  1510. }
  1511. }
  1512. return id, err
  1513. }
  1514. func (dm *DockerManager) applyOOMScoreAdjIfNeeded(pod *api.Pod, container *api.Container, containerInfo *dockertypes.ContainerJSON) error {
  1515. // Compare current API version with expected api version.
  1516. result, err := dm.checkDockerAPIVersion(dockerV110APIVersion)
  1517. if err != nil {
  1518. return fmt.Errorf("Failed to check docker api version: %v", err)
  1519. }
  1520. // If current api version is older than OOMScoreAdj requested, use the old way.
  1521. if result < 0 {
  1522. if err := dm.applyOOMScoreAdj(pod, container, containerInfo); err != nil {
  1523. return fmt.Errorf("Failed to apply oom-score-adj to container %q- %v", containerInfo.Name, err)
  1524. }
  1525. }
  1526. return nil
  1527. }
  1528. func (dm *DockerManager) calculateOomScoreAdj(pod *api.Pod, container *api.Container) int {
  1529. // Set OOM score of the container based on the priority of the container.
  1530. // Processes in lower-priority pods should be killed first if the system runs out of memory.
  1531. // The main pod infrastructure container is considered high priority, since if it is killed the
  1532. // whole pod will die.
  1533. var oomScoreAdj int
  1534. if container.Name == PodInfraContainerName {
  1535. oomScoreAdj = qos.PodInfraOOMAdj
  1536. } else {
  1537. oomScoreAdj = qos.GetContainerOOMScoreAdjust(pod, container, int64(dm.machineInfo.MemoryCapacity))
  1538. }
  1539. return oomScoreAdj
  1540. }
  1541. // versionInfo wraps api version and daemon version.
  1542. type versionInfo struct {
  1543. apiVersion kubecontainer.Version
  1544. daemonVersion kubecontainer.Version
  1545. }
  1546. // checkDockerAPIVersion checks current docker API version against expected version.
  1547. // Return:
  1548. // 1 : newer than expected version
  1549. // -1: older than expected version
  1550. // 0 : same version
  1551. func (dm *DockerManager) checkDockerAPIVersion(expectedVersion string) (int, error) {
  1552. value, err := dm.versionCache.Get(dm.machineInfo.MachineID)
  1553. if err != nil {
  1554. return 0, err
  1555. }
  1556. apiVersion := value.(versionInfo).apiVersion
  1557. result, err := apiVersion.Compare(expectedVersion)
  1558. if err != nil {
  1559. return 0, fmt.Errorf("Failed to compare current docker api version %v with OOMScoreAdj supported Docker version %q - %v",
  1560. apiVersion, expectedVersion, err)
  1561. }
  1562. return result, nil
  1563. }
  1564. func addNDotsOption(resolvFilePath string) error {
  1565. if len(resolvFilePath) == 0 {
  1566. glog.Errorf("ResolvConfPath is empty.")
  1567. return nil
  1568. }
  1569. if _, err := os.Stat(resolvFilePath); os.IsNotExist(err) {
  1570. return fmt.Errorf("ResolvConfPath %q does not exist", resolvFilePath)
  1571. }
  1572. glog.V(4).Infof("DNS ResolvConfPath exists: %s. Will attempt to add ndots option: %s", resolvFilePath, ndotsDNSOption)
  1573. if err := appendToFile(resolvFilePath, ndotsDNSOption); err != nil {
  1574. glog.Errorf("resolv.conf could not be updated: %v", err)
  1575. return err
  1576. }
  1577. return nil
  1578. }
  1579. func appendToFile(filePath, stringToAppend string) error {
  1580. f, err := os.OpenFile(filePath, os.O_APPEND|os.O_WRONLY, 0644)
  1581. if err != nil {
  1582. return err
  1583. }
  1584. defer f.Close()
  1585. _, err = f.WriteString(stringToAppend)
  1586. return err
  1587. }
  1588. // createPodInfraContainer starts the pod infra container for a pod. Returns the docker container ID of the newly created container.
  1589. // If any error occurs in this function, it will return a brief error and a detailed error message.
  1590. func (dm *DockerManager) createPodInfraContainer(pod *api.Pod) (kubecontainer.DockerID, error, string) {
  1591. start := time.Now()
  1592. defer func() {
  1593. metrics.ContainerManagerLatency.WithLabelValues("createPodInfraContainer").Observe(metrics.SinceInMicroseconds(start))
  1594. }()
  1595. // Use host networking if specified.
  1596. netNamespace := ""
  1597. var ports []api.ContainerPort
  1598. if kubecontainer.IsHostNetworkPod(pod) {
  1599. netNamespace = namespaceModeHost
  1600. } else if dm.networkPlugin.Name() == "cni" || dm.networkPlugin.Name() == "kubenet" {
  1601. netNamespace = "none"
  1602. } else {
  1603. // Docker only exports ports from the pod infra container. Let's
  1604. // collect all of the relevant ports and export them.
  1605. for _, container := range pod.Spec.InitContainers {
  1606. ports = append(ports, container.Ports...)
  1607. }
  1608. for _, container := range pod.Spec.Containers {
  1609. ports = append(ports, container.Ports...)
  1610. }
  1611. }
  1612. container := &api.Container{
  1613. Name: PodInfraContainerName,
  1614. Image: dm.podInfraContainerImage,
  1615. Ports: ports,
  1616. ImagePullPolicy: podInfraContainerImagePullPolicy,
  1617. Env: dm.podInfraContainerEnv,
  1618. }
  1619. // No pod secrets for the infra container.
  1620. // The message isn't needed for the Infra container
  1621. if err, msg := dm.imagePuller.EnsureImageExists(pod, container, nil); err != nil {
  1622. return "", err, msg
  1623. }
  1624. // Currently we don't care about restart count of infra container, just set it to 0.
  1625. id, err := dm.runContainerInPod(pod, container, netNamespace, getIPCMode(pod), getPidMode(pod), "", 0)
  1626. if err != nil {
  1627. return "", kubecontainer.ErrRunContainer, err.Error()
  1628. }
  1629. return kubecontainer.DockerID(id.ID), nil, ""
  1630. }
  1631. // Structure keeping information on changes that need to happen for a pod. The semantics is as follows:
  1632. // - startInfraContainer is true if new Infra Containers have to be started and old one (if running) killed.
  1633. // Additionally if it is true then containersToKeep have to be empty
  1634. // - infraContainerId have to be set if and only if startInfraContainer is false. It stores dockerID of running Infra Container
  1635. // - containersToStart keeps indices of Specs of containers that have to be started and reasons why containers will be started.
  1636. // - containersToKeep stores mapping from dockerIDs of running containers to indices of their Specs for containers that
  1637. // should be kept running. If startInfraContainer is false then it contains an entry for infraContainerId (mapped to -1).
  1638. // It shouldn't be the case where containersToStart is empty and containersToKeep contains only infraContainerId. In such case
  1639. // Infra Container should be killed, hence it's removed from this map.
  1640. // - all init containers are stored in initContainersToKeep
  1641. // - all running containers which are NOT contained in containersToKeep and initContainersToKeep should be killed.
  1642. type podContainerChangesSpec struct {
  1643. StartInfraContainer bool
  1644. InfraChanged bool
  1645. InfraContainerId kubecontainer.DockerID
  1646. InitFailed bool
  1647. InitContainersToKeep map[kubecontainer.DockerID]int
  1648. ContainersToStart map[int]string
  1649. ContainersToKeep map[kubecontainer.DockerID]int
  1650. }
  1651. func (dm *DockerManager) computePodContainerChanges(pod *api.Pod, podStatus *kubecontainer.PodStatus) (podContainerChangesSpec, error) {
  1652. start := time.Now()
  1653. defer func() {
  1654. metrics.ContainerManagerLatency.WithLabelValues("computePodContainerChanges").Observe(metrics.SinceInMicroseconds(start))
  1655. }()
  1656. glog.V(5).Infof("Syncing Pod %q: %#v", format.Pod(pod), pod)
  1657. containersToStart := make(map[int]string)
  1658. containersToKeep := make(map[kubecontainer.DockerID]int)
  1659. var err error
  1660. var podInfraContainerID kubecontainer.DockerID
  1661. var changed bool
  1662. podInfraContainerStatus := podStatus.FindContainerStatusByName(PodInfraContainerName)
  1663. if podInfraContainerStatus != nil && podInfraContainerStatus.State == kubecontainer.ContainerStateRunning {
  1664. glog.V(4).Infof("Found pod infra container for %q", format.Pod(pod))
  1665. changed, err = dm.podInfraContainerChanged(pod, podInfraContainerStatus)
  1666. if err != nil {
  1667. return podContainerChangesSpec{}, err
  1668. }
  1669. }
  1670. createPodInfraContainer := true
  1671. if podInfraContainerStatus == nil || podInfraContainerStatus.State != kubecontainer.ContainerStateRunning {
  1672. glog.V(2).Infof("Need to restart pod infra container for %q because it is not found", format.Pod(pod))
  1673. } else if changed {
  1674. glog.V(2).Infof("Need to restart pod infra container for %q because it is changed", format.Pod(pod))
  1675. } else {
  1676. glog.V(4).Infof("Pod infra container looks good, keep it %q", format.Pod(pod))
  1677. createPodInfraContainer = false
  1678. podInfraContainerID = kubecontainer.DockerID(podInfraContainerStatus.ID.ID)
  1679. containersToKeep[podInfraContainerID] = -1
  1680. }
  1681. // check the status of the init containers
  1682. initFailed := false
  1683. initContainersToKeep := make(map[kubecontainer.DockerID]int)
  1684. // always reset the init containers if the pod is reset
  1685. if !createPodInfraContainer {
  1686. // keep all successfully completed containers up to and including the first failing container
  1687. Containers:
  1688. for i, container := range pod.Spec.InitContainers {
  1689. containerStatus := podStatus.FindContainerStatusByName(container.Name)
  1690. switch {
  1691. case containerStatus == nil:
  1692. continue
  1693. case containerStatus.State == kubecontainer.ContainerStateRunning:
  1694. initContainersToKeep[kubecontainer.DockerID(containerStatus.ID.ID)] = i
  1695. case containerStatus.State == kubecontainer.ContainerStateExited:
  1696. initContainersToKeep[kubecontainer.DockerID(containerStatus.ID.ID)] = i
  1697. // TODO: should we abstract the "did the init container fail" check?
  1698. if containerStatus.ExitCode != 0 {
  1699. initFailed = true
  1700. break Containers
  1701. }
  1702. }
  1703. }
  1704. }
  1705. // check the status of the containers
  1706. for index, container := range pod.Spec.Containers {
  1707. expectedHash := kubecontainer.HashContainer(&container)
  1708. containerStatus := podStatus.FindContainerStatusByName(container.Name)
  1709. if containerStatus == nil || containerStatus.State != kubecontainer.ContainerStateRunning {
  1710. if kubecontainer.ShouldContainerBeRestarted(&container, pod, podStatus) {
  1711. // If we are here it means that the container is dead and should be restarted, or never existed and should
  1712. // be created. We may be inserting this ID again if the container has changed and it has
  1713. // RestartPolicy::Always, but it's not a big deal.
  1714. message := fmt.Sprintf("Container %+v is dead, but RestartPolicy says that we should restart it.", container)
  1715. glog.V(3).Info(message)
  1716. containersToStart[index] = message
  1717. }
  1718. continue
  1719. }
  1720. containerID := kubecontainer.DockerID(containerStatus.ID.ID)
  1721. hash := containerStatus.Hash
  1722. glog.V(3).Infof("pod %q container %q exists as %v", format.Pod(pod), container.Name, containerID)
  1723. if createPodInfraContainer {
  1724. // createPodInfraContainer == true and Container exists
  1725. // If we're creating infra container everything will be killed anyway
  1726. // If RestartPolicy is Always or OnFailure we restart containers that were running before we
  1727. // killed them when restarting Infra Container.
  1728. if pod.Spec.RestartPolicy != api.RestartPolicyNever {
  1729. message := fmt.Sprintf("Infra Container is being recreated. %q will be restarted.", container.Name)
  1730. glog.V(1).Info(message)
  1731. containersToStart[index] = message
  1732. }
  1733. continue
  1734. }
  1735. if initFailed {
  1736. // initialization failed and Container exists
  1737. // If we have an initialization failure everything will be killed anyway
  1738. // If RestartPolicy is Always or OnFailure we restart containers that were running before we
  1739. // killed them when re-running initialization
  1740. if pod.Spec.RestartPolicy != api.RestartPolicyNever {
  1741. message := fmt.Sprintf("Failed to initialize pod. %q will be restarted.", container.Name)
  1742. glog.V(1).Info(message)
  1743. containersToStart[index] = message
  1744. }
  1745. continue
  1746. }
  1747. // At this point, the container is running and pod infra container is good.
  1748. // We will look for changes and check healthiness for the container.
  1749. containerChanged := hash != 0 && hash != expectedHash
  1750. if containerChanged {
  1751. message := fmt.Sprintf("pod %q container %q hash changed (%d vs %d), it will be killed and re-created.", format.Pod(pod), container.Name, hash, expectedHash)
  1752. glog.Info(message)
  1753. containersToStart[index] = message
  1754. continue
  1755. }
  1756. liveness, found := dm.livenessManager.Get(containerStatus.ID)
  1757. if !found || liveness == proberesults.Success {
  1758. containersToKeep[containerID] = index
  1759. continue
  1760. }
  1761. if pod.Spec.RestartPolicy != api.RestartPolicyNever {
  1762. message := fmt.Sprintf("pod %q container %q is unhealthy, it will be killed and re-created.", format.Pod(pod), container.Name)
  1763. glog.Info(message)
  1764. containersToStart[index] = message
  1765. }
  1766. }
  1767. // After the loop one of the following should be true:
  1768. // - createPodInfraContainer is true and containersToKeep is empty.
  1769. // (In fact, when createPodInfraContainer is false, containersToKeep will not be touched).
  1770. // - createPodInfraContainer is false and containersToKeep contains at least ID of Infra Container
  1771. // If Infra container is the last running one, we don't want to keep it, and we don't want to
  1772. // keep any init containers.
  1773. if !createPodInfraContainer && len(containersToStart) == 0 && len(containersToKeep) == 1 {
  1774. containersToKeep = make(map[kubecontainer.DockerID]int)
  1775. initContainersToKeep = make(map[kubecontainer.DockerID]int)
  1776. }
  1777. return podContainerChangesSpec{
  1778. StartInfraContainer: createPodInfraContainer,
  1779. InfraChanged: changed,
  1780. InfraContainerId: podInfraContainerID,
  1781. InitFailed: initFailed,
  1782. InitContainersToKeep: initContainersToKeep,
  1783. ContainersToStart: containersToStart,
  1784. ContainersToKeep: containersToKeep,
  1785. }, nil
  1786. }
  1787. // Sync the running pod to match the specified desired pod.
  1788. func (dm *DockerManager) SyncPod(pod *api.Pod, _ api.PodStatus, podStatus *kubecontainer.PodStatus, pullSecrets []api.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {
  1789. start := time.Now()
  1790. defer func() {
  1791. metrics.ContainerManagerLatency.WithLabelValues("SyncPod").Observe(metrics.SinceInMicroseconds(start))
  1792. }()
  1793. containerChanges, err := dm.computePodContainerChanges(pod, podStatus)
  1794. if err != nil {
  1795. result.Fail(err)
  1796. return
  1797. }
  1798. glog.V(3).Infof("Got container changes for pod %q: %+v", format.Pod(pod), containerChanges)
  1799. if containerChanges.InfraChanged {
  1800. dm.recorder.Eventf(pod, api.EventTypeNormal, "InfraChanged", "Pod infrastructure changed, it will be killed and re-created.")
  1801. }
  1802. if containerChanges.StartInfraContainer || (len(containerChanges.ContainersToKeep) == 0 && len(containerChanges.ContainersToStart) == 0) {
  1803. if len(containerChanges.ContainersToKeep) == 0 && len(containerChanges.ContainersToStart) == 0 {
  1804. glog.V(4).Infof("Killing Infra Container for %q because all other containers are dead.", format.Pod(pod))
  1805. } else {
  1806. glog.V(4).Infof("Killing Infra Container for %q, will start new one", format.Pod(pod))
  1807. }
  1808. // Killing phase: if we want to start new infra container, or nothing is running kill everything (including infra container)
  1809. // TODO(random-liu): We'll use pod status directly in the future
  1810. killResult := dm.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(podStatus), nil)
  1811. result.AddPodSyncResult(killResult)
  1812. if killResult.Error() != nil {
  1813. return
  1814. }
  1815. } else {
  1816. // Otherwise kill any running containers in this pod which are not specified as ones to keep.
  1817. runningContainerStatues := podStatus.GetRunningContainerStatuses()
  1818. for _, containerStatus := range runningContainerStatues {
  1819. _, keep := containerChanges.ContainersToKeep[kubecontainer.DockerID(containerStatus.ID.ID)]
  1820. _, keepInit := containerChanges.InitContainersToKeep[kubecontainer.DockerID(containerStatus.ID.ID)]
  1821. if !keep && !keepInit {
  1822. glog.V(3).Infof("Killing unwanted container %q(id=%q) for pod %q", containerStatus.Name, containerStatus.ID, format.Pod(pod))
  1823. // attempt to find the appropriate container policy
  1824. var podContainer *api.Container
  1825. var killMessage string
  1826. for i, c := range pod.Spec.Containers {
  1827. if c.Name == containerStatus.Name {
  1828. podContainer = &pod.Spec.Containers[i]
  1829. killMessage = containerChanges.ContainersToStart[i]
  1830. break
  1831. }
  1832. }
  1833. killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, containerStatus.Name)
  1834. result.AddSyncResult(killContainerResult)
  1835. if err := dm.KillContainerInPod(containerStatus.ID, podContainer, pod, killMessage, nil); err != nil {
  1836. killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
  1837. glog.Errorf("Error killing container %q(id=%q) for pod %q: %v", containerStatus.Name, containerStatus.ID, format.Pod(pod), err)
  1838. return
  1839. }
  1840. }
  1841. }
  1842. }
  1843. // Keep terminated init containers fairly aggressively controlled
  1844. dm.pruneInitContainersBeforeStart(pod, podStatus, containerChanges.InitContainersToKeep)
  1845. // We pass the value of the podIP down to runContainerInPod, which in turn
  1846. // passes it to various other functions, in order to facilitate
  1847. // functionality that requires this value (hosts file and downward API)
  1848. // and avoid races determining the pod IP in cases where a container
  1849. // requires restart but the podIP isn't in the status manager yet.
  1850. //
  1851. // We default to the IP in the passed-in pod status, and overwrite it if the
  1852. // infra container needs to be (re)started.
  1853. podIP := ""
  1854. if podStatus != nil {
  1855. podIP = podStatus.IP
  1856. }
  1857. // If we should create infra container then we do it first.
  1858. podInfraContainerID := containerChanges.InfraContainerId
  1859. if containerChanges.StartInfraContainer && (len(containerChanges.ContainersToStart) > 0) {
  1860. glog.V(4).Infof("Creating pod infra container for %q", format.Pod(pod))
  1861. startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, PodInfraContainerName)
  1862. result.AddSyncResult(startContainerResult)
  1863. var msg string
  1864. podInfraContainerID, err, msg = dm.createPodInfraContainer(pod)
  1865. if err != nil {
  1866. startContainerResult.Fail(err, msg)
  1867. glog.Errorf("Failed to create pod infra container: %v; Skipping pod %q: %s", err, format.Pod(pod), msg)
  1868. return
  1869. }
  1870. setupNetworkResult := kubecontainer.NewSyncResult(kubecontainer.SetupNetwork, kubecontainer.GetPodFullName(pod))
  1871. result.AddSyncResult(setupNetworkResult)
  1872. if !kubecontainer.IsHostNetworkPod(pod) {
  1873. glog.V(3).Infof("Calling network plugin %s to setup pod for %s", dm.networkPlugin.Name(), format.Pod(pod))
  1874. err = dm.networkPlugin.SetUpPod(pod.Namespace, pod.Name, podInfraContainerID.ContainerID())
  1875. if err != nil {
  1876. // TODO: (random-liu) There shouldn't be "Skipping pod" in sync result message
  1877. message := fmt.Sprintf("Failed to setup network for pod %q using network plugins %q: %v; Skipping pod", format.Pod(pod), dm.networkPlugin.Name(), err)
  1878. setupNetworkResult.Fail(kubecontainer.ErrSetupNetwork, message)
  1879. glog.Error(message)
  1880. // Delete infra container
  1881. killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, PodInfraContainerName)
  1882. result.AddSyncResult(killContainerResult)
  1883. if delErr := dm.KillContainerInPod(kubecontainer.ContainerID{
  1884. ID: string(podInfraContainerID),
  1885. Type: "docker"}, nil, pod, message, nil); delErr != nil {
  1886. killContainerResult.Fail(kubecontainer.ErrKillContainer, delErr.Error())
  1887. glog.Warningf("Clear infra container failed for pod %q: %v", format.Pod(pod), delErr)
  1888. }
  1889. return
  1890. }
  1891. // Setup the host interface unless the pod is on the host's network (FIXME: move to networkPlugin when ready)
  1892. podInfraContainer, err := dm.client.InspectContainer(string(podInfraContainerID))
  1893. if err != nil {
  1894. glog.Errorf("Failed to inspect pod infra container: %v; Skipping pod %q", err, format.Pod(pod))
  1895. result.Fail(err)
  1896. return
  1897. }
  1898. if dm.configureHairpinMode {
  1899. if err = hairpin.SetUpContainerPid(podInfraContainer.State.Pid, network.DefaultInterfaceName); err != nil {
  1900. glog.Warningf("Hairpin setup failed for pod %q: %v", format.Pod(pod), err)
  1901. }
  1902. }
  1903. // Overwrite the podIP passed in the pod status, since we just started the infra container.
  1904. podIP, err = dm.determineContainerIP(pod.Namespace, pod.Name, podInfraContainer)
  1905. if err != nil {
  1906. glog.Errorf("Network error: %v; Skipping pod %q", err, format.Pod(pod))
  1907. result.Fail(err)
  1908. return
  1909. }
  1910. glog.V(4).Infof("Determined pod ip after infra change: %q: %q", format.Pod(pod), podIP)
  1911. }
  1912. }
  1913. next, status, done := findActiveInitContainer(pod, podStatus)
  1914. if status != nil {
  1915. if status.ExitCode != 0 {
  1916. // container initialization has failed, flag the pod as failed
  1917. initContainerResult := kubecontainer.NewSyncResult(kubecontainer.InitContainer, status.Name)
  1918. initContainerResult.Fail(kubecontainer.ErrRunInitContainer, fmt.Sprintf("init container %q exited with %d", status.Name, status.ExitCode))
  1919. result.AddSyncResult(initContainerResult)
  1920. if pod.Spec.RestartPolicy == api.RestartPolicyNever {
  1921. utilruntime.HandleError(fmt.Errorf("error running pod %q init container %q, restart=Never: %#v", format.Pod(pod), status.Name, status))
  1922. return
  1923. }
  1924. utilruntime.HandleError(fmt.Errorf("Error running pod %q init container %q, restarting: %#v", format.Pod(pod), status.Name, status))
  1925. }
  1926. }
  1927. // Note: when configuring the pod's containers anything that can be configured by pointing
  1928. // to the namespace of the infra container should use namespaceMode. This includes things like the net namespace
  1929. // and IPC namespace. PID mode cannot point to another container right now.
  1930. // See createPodInfraContainer for infra container setup.
  1931. namespaceMode := fmt.Sprintf("container:%v", podInfraContainerID)
  1932. pidMode := getPidMode(pod)
  1933. if next != nil {
  1934. if len(containerChanges.ContainersToStart) == 0 {
  1935. glog.V(4).Infof("No containers to start, stopping at init container %+v in pod %v", next.Name, format.Pod(pod))
  1936. return
  1937. }
  1938. // If we need to start the next container, do so now then exit
  1939. container := next
  1940. startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
  1941. result.AddSyncResult(startContainerResult)
  1942. // containerChanges.StartInfraContainer causes the containers to be restarted for config reasons
  1943. if !containerChanges.StartInfraContainer {
  1944. isInBackOff, err, msg := dm.doBackOff(pod, container, podStatus, backOff)
  1945. if isInBackOff {
  1946. startContainerResult.Fail(err, msg)
  1947. glog.V(4).Infof("Backing Off restarting init container %+v in pod %v", container, format.Pod(pod))
  1948. return
  1949. }
  1950. }
  1951. glog.V(4).Infof("Creating init container %+v in pod %v", container, format.Pod(pod))
  1952. if err, msg := dm.tryContainerStart(container, pod, podStatus, pullSecrets, namespaceMode, pidMode, podIP); err != nil {
  1953. startContainerResult.Fail(err, msg)
  1954. utilruntime.HandleError(fmt.Errorf("container start failed: %v: %s", err, msg))
  1955. return
  1956. }
  1957. // Successfully started the container; clear the entry in the failure
  1958. glog.V(4).Infof("Completed init container %q for pod %q", container.Name, format.Pod(pod))
  1959. return
  1960. }
  1961. if !done {
  1962. // init container still running
  1963. glog.V(4).Infof("An init container is still running in pod %v", format.Pod(pod))
  1964. return
  1965. }
  1966. if containerChanges.InitFailed {
  1967. // init container still running
  1968. glog.V(4).Infof("Not all init containers have succeeded for pod %v", format.Pod(pod))
  1969. return
  1970. }
  1971. // Start regular containers
  1972. for idx := range containerChanges.ContainersToStart {
  1973. container := &pod.Spec.Containers[idx]
  1974. startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
  1975. result.AddSyncResult(startContainerResult)
  1976. // containerChanges.StartInfraContainer causes the containers to be restarted for config reasons
  1977. if !containerChanges.StartInfraContainer {
  1978. isInBackOff, err, msg := dm.doBackOff(pod, container, podStatus, backOff)
  1979. if isInBackOff {
  1980. startContainerResult.Fail(err, msg)
  1981. glog.V(4).Infof("Backing Off restarting container %+v in pod %v", container, format.Pod(pod))
  1982. continue
  1983. }
  1984. }
  1985. glog.V(4).Infof("Creating container %+v in pod %v", container, format.Pod(pod))
  1986. if err, msg := dm.tryContainerStart(container, pod, podStatus, pullSecrets, namespaceMode, pidMode, podIP); err != nil {
  1987. startContainerResult.Fail(err, msg)
  1988. utilruntime.HandleError(fmt.Errorf("container start failed: %v: %s", err, msg))
  1989. continue
  1990. }
  1991. }
  1992. return
  1993. }
  1994. // tryContainerStart attempts to pull and start the container, returning an error and a reason string if the start
  1995. // was not successful.
  1996. func (dm *DockerManager) tryContainerStart(container *api.Container, pod *api.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []api.Secret, namespaceMode, pidMode, podIP string) (err error, reason string) {
  1997. err, msg := dm.imagePuller.EnsureImageExists(pod, container, pullSecrets)
  1998. if err != nil {
  1999. return err, msg
  2000. }
  2001. if container.SecurityContext != nil && container.SecurityContext.RunAsNonRoot != nil && *container.SecurityContext.RunAsNonRoot {
  2002. err := dm.verifyNonRoot(container)
  2003. if err != nil {
  2004. return kubecontainer.ErrVerifyNonRoot, err.Error()
  2005. }
  2006. }
  2007. // For a new container, the RestartCount should be 0
  2008. restartCount := 0
  2009. containerStatus := podStatus.FindContainerStatusByName(container.Name)
  2010. if containerStatus != nil {
  2011. restartCount = containerStatus.RestartCount + 1
  2012. }
  2013. _, err = dm.runContainerInPod(pod, container, namespaceMode, namespaceMode, pidMode, podIP, restartCount)
  2014. if err != nil {
  2015. // TODO(bburns) : Perhaps blacklist a container after N failures?
  2016. return kubecontainer.ErrRunContainer, err.Error()
  2017. }
  2018. return nil, ""
  2019. }
  2020. // pruneInitContainers ensures that before we begin creating init containers, we have reduced the number
  2021. // of outstanding init containers still present. This reduces load on the container garbage collector
  2022. // by only preserving the most recent terminated init container.
  2023. func (dm *DockerManager) pruneInitContainersBeforeStart(pod *api.Pod, podStatus *kubecontainer.PodStatus, initContainersToKeep map[kubecontainer.DockerID]int) {
  2024. // only the last execution of each init container should be preserved, and only preserve it if it is in the
  2025. // list of init containers to keep.
  2026. initContainerNames := sets.NewString()
  2027. for _, container := range pod.Spec.InitContainers {
  2028. initContainerNames.Insert(container.Name)
  2029. }
  2030. for name := range initContainerNames {
  2031. count := 0
  2032. for _, status := range podStatus.ContainerStatuses {
  2033. if status.Name != name || !initContainerNames.Has(status.Name) || status.State != kubecontainer.ContainerStateExited {
  2034. continue
  2035. }
  2036. count++
  2037. // keep the first init container for this name
  2038. if count == 1 {
  2039. continue
  2040. }
  2041. // if there is a reason to preserve the older container, do so
  2042. if _, ok := initContainersToKeep[kubecontainer.DockerID(status.ID.ID)]; ok {
  2043. continue
  2044. }
  2045. // prune all other init containers that match this container name
  2046. // TODO: we may not need aggressive pruning
  2047. glog.V(4).Infof("Removing init container %q instance %q %d", status.Name, status.ID.ID, count)
  2048. if err := dm.client.RemoveContainer(status.ID.ID, dockertypes.ContainerRemoveOptions{RemoveVolumes: true}); err != nil {
  2049. if _, ok := err.(containerNotFoundError); ok {
  2050. count--
  2051. continue
  2052. }
  2053. utilruntime.HandleError(fmt.Errorf("failed to remove pod init container %q: %v; Skipping pod %q", status.Name, err, format.Pod(pod)))
  2054. // TODO: report serious errors
  2055. continue
  2056. }
  2057. // remove any references to this container
  2058. if _, ok := dm.containerRefManager.GetRef(status.ID); ok {
  2059. dm.containerRefManager.ClearRef(status.ID)
  2060. } else {
  2061. glog.Warningf("No ref for pod '%q'", pod.Name)
  2062. }
  2063. }
  2064. }
  2065. }
  2066. // findActiveInitContainer returns the status of the last failed container, the next init container to
  2067. // start, or done if there are no further init containers. Status is only returned if an init container
  2068. // failed, in which case next will point to the current container.
  2069. func findActiveInitContainer(pod *api.Pod, podStatus *kubecontainer.PodStatus) (next *api.Container, status *kubecontainer.ContainerStatus, done bool) {
  2070. if len(pod.Spec.InitContainers) == 0 {
  2071. return nil, nil, true
  2072. }
  2073. for i := len(pod.Spec.InitContainers) - 1; i >= 0; i-- {
  2074. container := &pod.Spec.InitContainers[i]
  2075. status := podStatus.FindContainerStatusByName(container.Name)
  2076. switch {
  2077. case status == nil:
  2078. continue
  2079. case status.State == kubecontainer.ContainerStateRunning:
  2080. return nil, nil, false
  2081. case status.State == kubecontainer.ContainerStateExited:
  2082. switch {
  2083. // the container has failed, we'll have to retry
  2084. case status.ExitCode != 0:
  2085. return &pod.Spec.InitContainers[i], status, false
  2086. // all init containers successful
  2087. case i == (len(pod.Spec.InitContainers) - 1):
  2088. return nil, nil, true
  2089. // all containers up to i successful, go to i+1
  2090. default:
  2091. return &pod.Spec.InitContainers[i+1], nil, false
  2092. }
  2093. }
  2094. }
  2095. return &pod.Spec.InitContainers[0], nil, false
  2096. }
  2097. // verifyNonRoot returns an error if the container or image will run as the root user.
  2098. func (dm *DockerManager) verifyNonRoot(container *api.Container) error {
  2099. if securitycontext.HasRunAsUser(container) {
  2100. if securitycontext.HasRootRunAsUser(container) {
  2101. return fmt.Errorf("container's runAsUser breaks non-root policy")
  2102. }
  2103. return nil
  2104. }
  2105. imgRoot, err := dm.isImageRoot(container.Image)
  2106. if err != nil {
  2107. return fmt.Errorf("can't tell if image runs as root: %v", err)
  2108. }
  2109. if imgRoot {
  2110. return fmt.Errorf("container has no runAsUser and image will run as root")
  2111. }
  2112. return nil
  2113. }
  2114. // isImageRoot returns true if the user directive is not set on the image, the user is set to 0
  2115. // or the user is set to root. If there is an error inspecting the image this method will return
  2116. // false and return the error.
  2117. func (dm *DockerManager) isImageRoot(image string) (bool, error) {
  2118. img, err := dm.client.InspectImage(image)
  2119. if err != nil {
  2120. return false, err
  2121. }
  2122. if img == nil || img.Config == nil {
  2123. return false, fmt.Errorf("unable to inspect image %s, nil Config", image)
  2124. }
  2125. user := getUidFromUser(img.Config.User)
  2126. // if no user is defined container will run as root
  2127. if user == "" {
  2128. return true, nil
  2129. }
  2130. // do not allow non-numeric user directives
  2131. uid, err := strconv.Atoi(user)
  2132. if err != nil {
  2133. return false, fmt.Errorf("non-numeric user (%s) is not allowed", user)
  2134. }
  2135. // user is numeric, check for 0
  2136. return uid == 0, nil
  2137. }
  2138. // getUidFromUser splits the uid out of an uid:gid string.
  2139. func getUidFromUser(id string) string {
  2140. if id == "" {
  2141. return id
  2142. }
  2143. // split instances where the id may contain uid:gid
  2144. if strings.Contains(id, ":") {
  2145. return strings.Split(id, ":")[0]
  2146. }
  2147. // no gid, just return the id
  2148. return id
  2149. }
  2150. // If all instances of a container are garbage collected, doBackOff will also return false, which means the container may be restarted before the
  2151. // backoff deadline. However, because that won't cause error and the chance is really slim, we can just ignore it for now.
  2152. // If a container is still in backoff, the function will return a brief backoff error and a detailed error message.
  2153. func (dm *DockerManager) doBackOff(pod *api.Pod, container *api.Container, podStatus *kubecontainer.PodStatus, backOff *flowcontrol.Backoff) (bool, error, string) {
  2154. var cStatus *kubecontainer.ContainerStatus
  2155. // Use the finished time of the latest exited container as the start point to calculate whether to do back-off.
  2156. // TODO(random-liu): Better define backoff start point; add unit and e2e test after we finalize this. (See github issue #22240)
  2157. for _, c := range podStatus.ContainerStatuses {
  2158. if c.Name == container.Name && c.State == kubecontainer.ContainerStateExited {
  2159. cStatus = c
  2160. break
  2161. }
  2162. }
  2163. if cStatus != nil {
  2164. glog.Infof("checking backoff for container %q in pod %q", container.Name, pod.Name)
  2165. ts := cStatus.FinishedAt
  2166. // found a container that requires backoff
  2167. dockerName := KubeletContainerName{
  2168. PodFullName: kubecontainer.GetPodFullName(pod),
  2169. PodUID: pod.UID,
  2170. ContainerName: container.Name,
  2171. }
  2172. stableName, _, _ := BuildDockerName(dockerName, container)
  2173. if backOff.IsInBackOffSince(stableName, ts) {
  2174. if ref, err := kubecontainer.GenerateContainerRef(pod, container); err == nil {
  2175. dm.recorder.Eventf(ref, api.EventTypeWarning, events.BackOffStartContainer, "Back-off restarting failed docker container")
  2176. }
  2177. err := fmt.Errorf("Back-off %s restarting failed container=%s pod=%s", backOff.Get(stableName), container.Name, format.Pod(pod))
  2178. glog.Infof("%s", err.Error())
  2179. return true, kubecontainer.ErrCrashLoopBackOff, err.Error()
  2180. }
  2181. backOff.Next(stableName, ts)
  2182. }
  2183. return false, nil, ""
  2184. }
  2185. // getPidMode returns the pid mode to use on the docker container based on pod.Spec.HostPID.
  2186. func getPidMode(pod *api.Pod) string {
  2187. pidMode := ""
  2188. if pod.Spec.SecurityContext != nil && pod.Spec.SecurityContext.HostPID {
  2189. pidMode = namespaceModeHost
  2190. }
  2191. return pidMode
  2192. }
  2193. // getIPCMode returns the ipc mode to use on the docker container based on pod.Spec.HostIPC.
  2194. func getIPCMode(pod *api.Pod) string {
  2195. ipcMode := ""
  2196. if pod.Spec.SecurityContext != nil && pod.Spec.SecurityContext.HostIPC {
  2197. ipcMode = namespaceModeHost
  2198. }
  2199. return ipcMode
  2200. }
  2201. func (dm *DockerManager) DeleteContainer(containerID kubecontainer.ContainerID) error {
  2202. return dm.containerGC.deleteContainer(containerID.ID)
  2203. }
  2204. // GetNetNS returns the network namespace path for the given container
  2205. func (dm *DockerManager) GetNetNS(containerID kubecontainer.ContainerID) (string, error) {
  2206. inspectResult, err := dm.client.InspectContainer(containerID.ID)
  2207. if err != nil {
  2208. glog.Errorf("Error inspecting container: '%v'", err)
  2209. return "", err
  2210. }
  2211. netnsPath := fmt.Sprintf(DockerNetnsFmt, inspectResult.State.Pid)
  2212. return netnsPath, nil
  2213. }
  2214. func (dm *DockerManager) GetPodContainerID(pod *kubecontainer.Pod) (kubecontainer.ContainerID, error) {
  2215. for _, c := range pod.Containers {
  2216. if c.Name == PodInfraContainerName {
  2217. return c.ID, nil
  2218. }
  2219. }
  2220. return kubecontainer.ContainerID{}, fmt.Errorf("Pod %s unknown to docker.", kubecontainer.BuildPodFullName(pod.Name, pod.Namespace))
  2221. }
  2222. // Garbage collection of dead containers
  2223. func (dm *DockerManager) GarbageCollect(gcPolicy kubecontainer.ContainerGCPolicy, allSourcesReady bool) error {
  2224. return dm.containerGC.GarbageCollect(gcPolicy, allSourcesReady)
  2225. }
  2226. func (dm *DockerManager) GetPodStatus(uid kubetypes.UID, name, namespace string) (*kubecontainer.PodStatus, error) {
  2227. podStatus := &kubecontainer.PodStatus{ID: uid, Name: name, Namespace: namespace}
  2228. // Now we retain restart count of container as a docker label. Each time a container
  2229. // restarts, pod will read the restart count from the registered dead container, increment
  2230. // it to get the new restart count, and then add a label with the new restart count on
  2231. // the newly started container.
  2232. // However, there are some limitations of this method:
  2233. // 1. When all dead containers were garbage collected, the container status could
  2234. // not get the historical value and would be *inaccurate*. Fortunately, the chance
  2235. // is really slim.
  2236. // 2. When working with old version containers which have no restart count label,
  2237. // we can only assume their restart count is 0.
  2238. // Anyhow, we only promised "best-effort" restart count reporting, we can just ignore
  2239. // these limitations now.
  2240. var containerStatuses []*kubecontainer.ContainerStatus
  2241. // We have added labels like pod name and pod namespace, it seems that we can do filtered list here.
  2242. // However, there may be some old containers without these labels, so at least now we can't do that.
  2243. // TODO(random-liu): Do only one list and pass in the list result in the future
  2244. // TODO(random-liu): Add filter when we are sure that all the containers have the labels
  2245. containers, err := dm.client.ListContainers(dockertypes.ContainerListOptions{All: true})
  2246. if err != nil {
  2247. return podStatus, err
  2248. }
  2249. // Loop through list of running and exited docker containers to construct
  2250. // the statuses. We assume docker returns a list of containers sorted in
  2251. // reverse by time.
  2252. // TODO: optimization: set maximum number of containers per container name to examine.
  2253. for _, c := range containers {
  2254. if len(c.Names) == 0 {
  2255. continue
  2256. }
  2257. dockerName, _, err := ParseDockerName(c.Names[0])
  2258. if err != nil {
  2259. continue
  2260. }
  2261. if dockerName.PodUID != uid {
  2262. continue
  2263. }
  2264. result, ip, err := dm.inspectContainer(c.ID, name, namespace)
  2265. if err != nil {
  2266. if _, ok := err.(containerNotFoundError); ok {
  2267. // https://github.com/kubernetes/kubernetes/issues/22541
  2268. // Sometimes when docker's state is corrupt, a container can be listed
  2269. // but couldn't be inspected. We fake a status for this container so
  2270. // that we can still return a status for the pod to sync.
  2271. result = &kubecontainer.ContainerStatus{
  2272. ID: kubecontainer.DockerID(c.ID).ContainerID(),
  2273. Name: dockerName.ContainerName,
  2274. State: kubecontainer.ContainerStateUnknown,
  2275. }
  2276. glog.Errorf("Unable to inspect container %q: %v", c.ID, err)
  2277. } else {
  2278. return podStatus, err
  2279. }
  2280. }
  2281. containerStatuses = append(containerStatuses, result)
  2282. if ip != "" {
  2283. podStatus.IP = ip
  2284. }
  2285. }
  2286. podStatus.ContainerStatuses = containerStatuses
  2287. return podStatus, nil
  2288. }
  2289. // getVersionInfo returns apiVersion & daemonVersion of docker runtime
  2290. func (dm *DockerManager) getVersionInfo() (versionInfo, error) {
  2291. apiVersion, err := dm.APIVersion()
  2292. if err != nil {
  2293. return versionInfo{}, err
  2294. }
  2295. daemonVersion, err := dm.Version()
  2296. if err != nil {
  2297. return versionInfo{}, err
  2298. }
  2299. return versionInfo{
  2300. apiVersion: apiVersion,
  2301. daemonVersion: daemonVersion,
  2302. }, nil
  2303. }
  2304. // Truncate the message if it exceeds max length.
  2305. func truncateMsg(msg string, max int) string {
  2306. if len(msg) <= max {
  2307. return msg
  2308. }
  2309. glog.V(2).Infof("Truncated %s", msg)
  2310. const truncatedMsg = "..TRUNCATED.."
  2311. begin := (max - len(truncatedMsg)) / 2
  2312. end := len(msg) - (max - (len(truncatedMsg) + begin))
  2313. return msg[:begin] + truncatedMsg + msg[end:]
  2314. }