summary.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package stats
  14. import (
  15. "fmt"
  16. "strings"
  17. "time"
  18. "k8s.io/kubernetes/pkg/api"
  19. "k8s.io/kubernetes/pkg/api/unversioned"
  20. "k8s.io/kubernetes/pkg/kubelet/api/v1alpha1/stats"
  21. "k8s.io/kubernetes/pkg/kubelet/cm"
  22. "k8s.io/kubernetes/pkg/kubelet/container"
  23. "k8s.io/kubernetes/pkg/kubelet/leaky"
  24. "k8s.io/kubernetes/pkg/kubelet/network"
  25. "k8s.io/kubernetes/pkg/kubelet/types"
  26. kubetypes "k8s.io/kubernetes/pkg/types"
  27. "github.com/golang/glog"
  28. cadvisorapiv1 "github.com/google/cadvisor/info/v1"
  29. cadvisorapiv2 "github.com/google/cadvisor/info/v2"
  30. )
  31. type SummaryProvider interface {
  32. // Get provides a new Summary using the latest results from cadvisor
  33. Get() (*stats.Summary, error)
  34. }
  35. type summaryProviderImpl struct {
  36. provider StatsProvider
  37. fsResourceAnalyzer fsResourceAnalyzerInterface
  38. runtime container.Runtime
  39. }
  40. var _ SummaryProvider = &summaryProviderImpl{}
  41. // NewSummaryProvider returns a new SummaryProvider
  42. func NewSummaryProvider(statsProvider StatsProvider, fsResourceAnalyzer fsResourceAnalyzerInterface, cruntime container.Runtime) SummaryProvider {
  43. return &summaryProviderImpl{statsProvider, fsResourceAnalyzer, cruntime}
  44. }
  45. // Get implements the SummaryProvider interface
  46. // Query cadvisor for the latest resource metrics and build into a summary
  47. func (sp *summaryProviderImpl) Get() (*stats.Summary, error) {
  48. options := cadvisorapiv2.RequestOptions{
  49. IdType: cadvisorapiv2.TypeName,
  50. Count: 2, // 2 samples are needed to compute "instantaneous" CPU
  51. Recursive: true,
  52. }
  53. infos, err := sp.provider.GetContainerInfoV2("/", options)
  54. if err != nil {
  55. if _, ok := infos["/"]; ok {
  56. // If the failure is partial, log it and return a best-effort response.
  57. glog.Errorf("Partial failure issuing GetContainerInfoV2: %v", err)
  58. } else {
  59. return nil, fmt.Errorf("failed GetContainerInfoV2: %v", err)
  60. }
  61. }
  62. // TODO(timstclair): Consider returning a best-effort response if any of the following errors
  63. // occur.
  64. node, err := sp.provider.GetNode()
  65. if err != nil {
  66. return nil, fmt.Errorf("failed GetNode: %v", err)
  67. }
  68. nodeConfig := sp.provider.GetNodeConfig()
  69. rootFsInfo, err := sp.provider.RootFsInfo()
  70. if err != nil {
  71. return nil, fmt.Errorf("failed RootFsInfo: %v", err)
  72. }
  73. imageFsInfo, err := sp.provider.ImagesFsInfo()
  74. if err != nil {
  75. return nil, fmt.Errorf("failed DockerImagesFsInfo: %v", err)
  76. }
  77. imageStats, err := sp.runtime.ImageStats()
  78. if err != nil || imageStats == nil {
  79. return nil, fmt.Errorf("failed ImageStats: %v", err)
  80. }
  81. sb := &summaryBuilder{sp.fsResourceAnalyzer, node, nodeConfig, rootFsInfo, imageFsInfo, *imageStats, infos}
  82. return sb.build()
  83. }
  84. // summaryBuilder aggregates the datastructures provided by cadvisor into a Summary result
  85. type summaryBuilder struct {
  86. fsResourceAnalyzer fsResourceAnalyzerInterface
  87. node *api.Node
  88. nodeConfig cm.NodeConfig
  89. rootFsInfo cadvisorapiv2.FsInfo
  90. imageFsInfo cadvisorapiv2.FsInfo
  91. imageStats container.ImageStats
  92. infos map[string]cadvisorapiv2.ContainerInfo
  93. }
  94. // build returns a Summary from aggregating the input data
  95. func (sb *summaryBuilder) build() (*stats.Summary, error) {
  96. rootInfo, found := sb.infos["/"]
  97. if !found {
  98. return nil, fmt.Errorf("Missing stats for root container")
  99. }
  100. rootStats := sb.containerInfoV2ToStats("", &rootInfo)
  101. nodeStats := stats.NodeStats{
  102. NodeName: sb.node.Name,
  103. CPU: rootStats.CPU,
  104. Memory: rootStats.Memory,
  105. Network: sb.containerInfoV2ToNetworkStats("node:"+sb.node.Name, &rootInfo),
  106. Fs: &stats.FsStats{
  107. AvailableBytes: &sb.rootFsInfo.Available,
  108. CapacityBytes: &sb.rootFsInfo.Capacity,
  109. UsedBytes: &sb.rootFsInfo.Usage,
  110. InodesFree: sb.rootFsInfo.InodesFree,
  111. Inodes: sb.rootFsInfo.Inodes},
  112. StartTime: rootStats.StartTime,
  113. Runtime: &stats.RuntimeStats{
  114. ImageFs: &stats.FsStats{
  115. AvailableBytes: &sb.imageFsInfo.Available,
  116. CapacityBytes: &sb.imageFsInfo.Capacity,
  117. UsedBytes: &sb.imageStats.TotalStorageBytes,
  118. InodesFree: sb.imageFsInfo.InodesFree,
  119. Inodes: sb.rootFsInfo.Inodes,
  120. },
  121. },
  122. }
  123. systemContainers := map[string]string{
  124. stats.SystemContainerKubelet: sb.nodeConfig.KubeletCgroupsName,
  125. stats.SystemContainerRuntime: sb.nodeConfig.RuntimeCgroupsName,
  126. stats.SystemContainerMisc: sb.nodeConfig.SystemCgroupsName,
  127. }
  128. for sys, name := range systemContainers {
  129. if info, ok := sb.infos[name]; ok {
  130. nodeStats.SystemContainers = append(nodeStats.SystemContainers, sb.containerInfoV2ToStats(sys, &info))
  131. }
  132. }
  133. summary := stats.Summary{
  134. Node: nodeStats,
  135. Pods: sb.buildSummaryPods(),
  136. }
  137. return &summary, nil
  138. }
  139. // containerInfoV2FsStats populates the container fs stats
  140. func (sb *summaryBuilder) containerInfoV2FsStats(
  141. info *cadvisorapiv2.ContainerInfo,
  142. cs *stats.ContainerStats) {
  143. // The container logs live on the node rootfs device
  144. cs.Logs = &stats.FsStats{
  145. AvailableBytes: &sb.rootFsInfo.Available,
  146. CapacityBytes: &sb.rootFsInfo.Capacity,
  147. InodesFree: sb.rootFsInfo.InodesFree,
  148. Inodes: sb.rootFsInfo.Inodes,
  149. }
  150. // The container rootFs lives on the imageFs devices (which may not be the node root fs)
  151. cs.Rootfs = &stats.FsStats{
  152. AvailableBytes: &sb.imageFsInfo.Available,
  153. CapacityBytes: &sb.imageFsInfo.Capacity,
  154. InodesFree: sb.imageFsInfo.InodesFree,
  155. Inodes: sb.imageFsInfo.Inodes,
  156. }
  157. lcs, found := sb.latestContainerStats(info)
  158. if !found {
  159. return
  160. }
  161. cfs := lcs.Filesystem
  162. if cfs != nil && cfs.BaseUsageBytes != nil {
  163. rootfsUsage := *cfs.BaseUsageBytes
  164. cs.Rootfs.UsedBytes = &rootfsUsage
  165. if cfs.TotalUsageBytes != nil {
  166. logsUsage := *cfs.TotalUsageBytes - *cfs.BaseUsageBytes
  167. cs.Logs.UsedBytes = &logsUsage
  168. }
  169. }
  170. }
  171. // latestContainerStats returns the latest container stats from cadvisor, or nil if none exist
  172. func (sb *summaryBuilder) latestContainerStats(info *cadvisorapiv2.ContainerInfo) (*cadvisorapiv2.ContainerStats, bool) {
  173. stats := info.Stats
  174. if len(stats) < 1 {
  175. return nil, false
  176. }
  177. latest := stats[len(stats)-1]
  178. if latest == nil {
  179. return nil, false
  180. }
  181. return latest, true
  182. }
  183. // buildSummaryPods aggregates and returns the container stats in cinfos by the Pod managing the container.
  184. // Containers not managed by a Pod are omitted.
  185. func (sb *summaryBuilder) buildSummaryPods() []stats.PodStats {
  186. // Map each container to a pod and update the PodStats with container data
  187. podToStats := map[stats.PodReference]*stats.PodStats{}
  188. for key, cinfo := range sb.infos {
  189. // on systemd using devicemapper each mount into the container has an associated cgroup.
  190. // we ignore them to ensure we do not get duplicate entries in our summary.
  191. // for details on .mount units: http://man7.org/linux/man-pages/man5/systemd.mount.5.html
  192. if strings.HasSuffix(key, ".mount") {
  193. continue
  194. }
  195. // Build the Pod key if this container is managed by a Pod
  196. if !sb.isPodManagedContainer(&cinfo) {
  197. continue
  198. }
  199. ref := sb.buildPodRef(&cinfo)
  200. // Lookup the PodStats for the pod using the PodRef. If none exists, initialize a new entry.
  201. podStats, found := podToStats[ref]
  202. if !found {
  203. podStats = &stats.PodStats{PodRef: ref}
  204. podToStats[ref] = podStats
  205. }
  206. // Update the PodStats entry with the stats from the container by adding it to stats.Containers
  207. containerName := types.GetContainerName(cinfo.Spec.Labels)
  208. if containerName == leaky.PodInfraContainerName {
  209. // Special case for infrastructure container which is hidden from the user and has network stats
  210. podStats.Network = sb.containerInfoV2ToNetworkStats("pod:"+ref.Namespace+"_"+ref.Name, &cinfo)
  211. podStats.StartTime = unversioned.NewTime(cinfo.Spec.CreationTime)
  212. } else {
  213. podStats.Containers = append(podStats.Containers, sb.containerInfoV2ToStats(containerName, &cinfo))
  214. }
  215. }
  216. // Add each PodStats to the result
  217. result := make([]stats.PodStats, 0, len(podToStats))
  218. for _, podStats := range podToStats {
  219. // Lookup the volume stats for each pod
  220. podUID := kubetypes.UID(podStats.PodRef.UID)
  221. if vstats, found := sb.fsResourceAnalyzer.GetPodVolumeStats(podUID); found {
  222. podStats.VolumeStats = vstats.Volumes
  223. }
  224. result = append(result, *podStats)
  225. }
  226. return result
  227. }
  228. // buildPodRef returns a PodReference that identifies the Pod managing cinfo
  229. func (sb *summaryBuilder) buildPodRef(cinfo *cadvisorapiv2.ContainerInfo) stats.PodReference {
  230. podName := types.GetPodName(cinfo.Spec.Labels)
  231. podNamespace := types.GetPodNamespace(cinfo.Spec.Labels)
  232. podUID := types.GetPodUID(cinfo.Spec.Labels)
  233. return stats.PodReference{Name: podName, Namespace: podNamespace, UID: podUID}
  234. }
  235. // isPodManagedContainer returns true if the cinfo container is managed by a Pod
  236. func (sb *summaryBuilder) isPodManagedContainer(cinfo *cadvisorapiv2.ContainerInfo) bool {
  237. podName := types.GetPodName(cinfo.Spec.Labels)
  238. podNamespace := types.GetPodNamespace(cinfo.Spec.Labels)
  239. managed := podName != "" && podNamespace != ""
  240. if !managed && podName != podNamespace {
  241. glog.Warningf(
  242. "Expect container to have either both podName (%s) and podNamespace (%s) labels, or neither.",
  243. podName, podNamespace)
  244. }
  245. return managed
  246. }
  247. func (sb *summaryBuilder) containerInfoV2ToStats(
  248. name string,
  249. info *cadvisorapiv2.ContainerInfo) stats.ContainerStats {
  250. cStats := stats.ContainerStats{
  251. StartTime: unversioned.NewTime(info.Spec.CreationTime),
  252. Name: name,
  253. }
  254. cstat, found := sb.latestContainerStats(info)
  255. if !found {
  256. return cStats
  257. }
  258. if info.Spec.HasCpu {
  259. cpuStats := stats.CPUStats{
  260. Time: unversioned.NewTime(cstat.Timestamp),
  261. }
  262. if cstat.CpuInst != nil {
  263. cpuStats.UsageNanoCores = &cstat.CpuInst.Usage.Total
  264. }
  265. if cstat.Cpu != nil {
  266. cpuStats.UsageCoreNanoSeconds = &cstat.Cpu.Usage.Total
  267. }
  268. cStats.CPU = &cpuStats
  269. }
  270. if info.Spec.HasMemory {
  271. pageFaults := cstat.Memory.ContainerData.Pgfault
  272. majorPageFaults := cstat.Memory.ContainerData.Pgmajfault
  273. cStats.Memory = &stats.MemoryStats{
  274. Time: unversioned.NewTime(cstat.Timestamp),
  275. UsageBytes: &cstat.Memory.Usage,
  276. WorkingSetBytes: &cstat.Memory.WorkingSet,
  277. RSSBytes: &cstat.Memory.RSS,
  278. PageFaults: &pageFaults,
  279. MajorPageFaults: &majorPageFaults,
  280. }
  281. // availableBytes = memory limit (if known) - workingset
  282. if !isMemoryUnlimited(info.Spec.Memory.Limit) {
  283. availableBytes := info.Spec.Memory.Limit - cstat.Memory.WorkingSet
  284. cStats.Memory.AvailableBytes = &availableBytes
  285. }
  286. }
  287. sb.containerInfoV2FsStats(info, &cStats)
  288. cStats.UserDefinedMetrics = sb.containerInfoV2ToUserDefinedMetrics(info)
  289. return cStats
  290. }
  291. // Size after which we consider memory to be "unlimited". This is not
  292. // MaxInt64 due to rounding by the kernel.
  293. // TODO: cadvisor should export this https://github.com/google/cadvisor/blob/master/metrics/prometheus.go#L596
  294. const maxMemorySize = uint64(1 << 62)
  295. func isMemoryUnlimited(v uint64) bool {
  296. return v > maxMemorySize
  297. }
  298. func (sb *summaryBuilder) containerInfoV2ToNetworkStats(name string, info *cadvisorapiv2.ContainerInfo) *stats.NetworkStats {
  299. if !info.Spec.HasNetwork {
  300. return nil
  301. }
  302. cstat, found := sb.latestContainerStats(info)
  303. if !found {
  304. return nil
  305. }
  306. for _, inter := range cstat.Network.Interfaces {
  307. if inter.Name == network.DefaultInterfaceName {
  308. return &stats.NetworkStats{
  309. Time: unversioned.NewTime(cstat.Timestamp),
  310. RxBytes: &inter.RxBytes,
  311. RxErrors: &inter.RxErrors,
  312. TxBytes: &inter.TxBytes,
  313. TxErrors: &inter.TxErrors,
  314. }
  315. }
  316. }
  317. glog.Warningf("Missing default interface %q for %s", network.DefaultInterfaceName, name)
  318. return nil
  319. }
  320. func (sb *summaryBuilder) containerInfoV2ToUserDefinedMetrics(info *cadvisorapiv2.ContainerInfo) []stats.UserDefinedMetric {
  321. type specVal struct {
  322. ref stats.UserDefinedMetricDescriptor
  323. valType cadvisorapiv1.DataType
  324. time time.Time
  325. value float64
  326. }
  327. udmMap := map[string]*specVal{}
  328. for _, spec := range info.Spec.CustomMetrics {
  329. udmMap[spec.Name] = &specVal{
  330. ref: stats.UserDefinedMetricDescriptor{
  331. Name: spec.Name,
  332. Type: stats.UserDefinedMetricType(spec.Type),
  333. Units: spec.Units,
  334. },
  335. valType: spec.Format,
  336. }
  337. }
  338. for _, stat := range info.Stats {
  339. for name, values := range stat.CustomMetrics {
  340. specVal, ok := udmMap[name]
  341. if !ok {
  342. glog.Warningf("spec for custom metric %q is missing from cAdvisor output. Spec: %+v, Metrics: %+v", name, info.Spec, stat.CustomMetrics)
  343. continue
  344. }
  345. for _, value := range values {
  346. // Pick the most recent value
  347. if value.Timestamp.Before(specVal.time) {
  348. continue
  349. }
  350. specVal.time = value.Timestamp
  351. specVal.value = value.FloatValue
  352. if specVal.valType == cadvisorapiv1.IntType {
  353. specVal.value = float64(value.IntValue)
  354. }
  355. }
  356. }
  357. }
  358. var udm []stats.UserDefinedMetric
  359. for _, specVal := range udmMap {
  360. udm = append(udm, stats.UserDefinedMetric{
  361. UserDefinedMetricDescriptor: specVal.ref,
  362. Time: unversioned.NewTime(specVal.time),
  363. Value: specVal.value,
  364. })
  365. }
  366. return udm
  367. }