metrics_client.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. /*
  2. Copyright 2015 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package metrics
  14. import (
  15. "encoding/json"
  16. "fmt"
  17. "strings"
  18. "time"
  19. "github.com/golang/glog"
  20. "k8s.io/kubernetes/pkg/api"
  21. "k8s.io/kubernetes/pkg/api/v1"
  22. clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
  23. "k8s.io/kubernetes/pkg/labels"
  24. "k8s.io/kubernetes/pkg/util/sets"
  25. heapster "k8s.io/heapster/metrics/api/v1/types"
  26. metrics_api "k8s.io/heapster/metrics/apis/metrics/v1alpha1"
  27. )
  28. const (
  29. DefaultHeapsterNamespace = "kube-system"
  30. DefaultHeapsterScheme = "http"
  31. DefaultHeapsterService = "heapster"
  32. DefaultHeapsterPort = "" // use the first exposed port on the service
  33. )
  34. var heapsterQueryStart = -5 * time.Minute
  35. // MetricsClient is an interface for getting metrics for pods.
  36. type MetricsClient interface {
  37. // GetCPUUtilization returns the average utilization over all pods represented as a percent of requested CPU
  38. // (e.g. 70 means that an average pod uses 70% of the requested CPU)
  39. // and the time of generation of the oldest of utilization reports for pods.
  40. GetCPUUtilization(namespace string, selector labels.Selector) (*int, time.Time, error)
  41. // GetCustomMetric returns the average value of the given custom metrics from the
  42. // pods picked using the namespace and selector passed as arguments.
  43. GetCustomMetric(customMetricName string, namespace string, selector labels.Selector) (*float64, time.Time, error)
  44. }
  45. type intAndFloat struct {
  46. intValue int64
  47. floatValue float64
  48. }
  49. // Aggregates results into ResourceConsumption. Also returns number of pods included in the aggregation.
  50. type metricAggregator func(heapster.MetricResultList) (intAndFloat, int, time.Time)
  51. type metricDefinition struct {
  52. name string
  53. aggregator metricAggregator
  54. }
  55. // HeapsterMetricsClient is Heapster-based implementation of MetricsClient
  56. type HeapsterMetricsClient struct {
  57. client clientset.Interface
  58. heapsterNamespace string
  59. heapsterScheme string
  60. heapsterService string
  61. heapsterPort string
  62. }
  63. var averageFunction = func(metrics heapster.MetricResultList) (intAndFloat, int, time.Time) {
  64. sum, count, timestamp := calculateSumFromTimeSample(metrics, time.Minute)
  65. result := intAndFloat{0, 0}
  66. if count > 0 {
  67. result.intValue = sum.intValue / int64(count)
  68. result.floatValue = sum.floatValue / float64(count)
  69. }
  70. return result, count, timestamp
  71. }
  72. func getHeapsterCustomMetricDefinition(metricName string) metricDefinition {
  73. return metricDefinition{"custom/" + metricName, averageFunction}
  74. }
  75. // NewHeapsterMetricsClient returns a new instance of Heapster-based implementation of MetricsClient interface.
  76. func NewHeapsterMetricsClient(client clientset.Interface, namespace, scheme, service, port string) *HeapsterMetricsClient {
  77. return &HeapsterMetricsClient{
  78. client: client,
  79. heapsterNamespace: namespace,
  80. heapsterScheme: scheme,
  81. heapsterService: service,
  82. heapsterPort: port,
  83. }
  84. }
  85. func (h *HeapsterMetricsClient) GetCPUUtilization(namespace string, selector labels.Selector) (*int, time.Time, error) {
  86. avgConsumption, avgRequest, timestamp, err := h.GetCpuConsumptionAndRequestInMillis(namespace, selector)
  87. if err != nil {
  88. return nil, time.Time{}, fmt.Errorf("failed to get CPU consumption and request: %v", err)
  89. }
  90. utilization := int((avgConsumption * 100) / avgRequest)
  91. return &utilization, timestamp, nil
  92. }
  93. func (h *HeapsterMetricsClient) GetCpuConsumptionAndRequestInMillis(namespace string, selector labels.Selector) (avgConsumption int64,
  94. avgRequest int64, timestamp time.Time, err error) {
  95. podList, err := h.client.Core().Pods(namespace).
  96. List(api.ListOptions{LabelSelector: selector})
  97. if err != nil {
  98. return 0, 0, time.Time{}, fmt.Errorf("failed to get pod list: %v", err)
  99. }
  100. podNames := map[string]struct{}{}
  101. requestSum := int64(0)
  102. missing := false
  103. for _, pod := range podList.Items {
  104. if pod.Status.Phase == api.PodPending {
  105. // Skip pending pods.
  106. continue
  107. }
  108. podNames[pod.Name] = struct{}{}
  109. for _, container := range pod.Spec.Containers {
  110. if containerRequest, ok := container.Resources.Requests[api.ResourceCPU]; ok {
  111. requestSum += containerRequest.MilliValue()
  112. } else {
  113. missing = true
  114. }
  115. }
  116. }
  117. if len(podNames) == 0 && len(podList.Items) > 0 {
  118. return 0, 0, time.Time{}, fmt.Errorf("no running pods")
  119. }
  120. if missing || requestSum == 0 {
  121. return 0, 0, time.Time{}, fmt.Errorf("some pods do not have request for cpu")
  122. }
  123. glog.V(4).Infof("%s %s - sum of CPU requested: %d", namespace, selector, requestSum)
  124. requestAvg := requestSum / int64(len(podList.Items))
  125. // Consumption is already averaged and in millis.
  126. consumption, timestamp, err := h.getCpuUtilizationForPods(namespace, selector, podNames)
  127. if err != nil {
  128. return 0, 0, time.Time{}, err
  129. }
  130. return consumption, requestAvg, timestamp, nil
  131. }
  132. func (h *HeapsterMetricsClient) getCpuUtilizationForPods(namespace string, selector labels.Selector, podNames map[string]struct{}) (int64, time.Time, error) {
  133. metricPath := fmt.Sprintf("/apis/metrics/v1alpha1/namespaces/%s/pods", namespace)
  134. params := map[string]string{"labelSelector": selector.String()}
  135. resultRaw, err := h.client.Core().Services(h.heapsterNamespace).
  136. ProxyGet(h.heapsterScheme, h.heapsterService, h.heapsterPort, metricPath, params).
  137. DoRaw()
  138. if err != nil {
  139. return 0, time.Time{}, fmt.Errorf("failed to get pods metrics: %v", err)
  140. }
  141. glog.V(4).Infof("Heapster metrics result: %s", string(resultRaw))
  142. metrics := metrics_api.PodMetricsList{}
  143. err = json.Unmarshal(resultRaw, &metrics)
  144. if err != nil {
  145. return 0, time.Time{}, fmt.Errorf("failed to unmarshall heapster response: %v", err)
  146. }
  147. if len(metrics.Items) != len(podNames) {
  148. present := sets.NewString()
  149. for _, m := range metrics.Items {
  150. present.Insert(m.Name)
  151. }
  152. missing := make([]string, 0)
  153. for expected := range podNames {
  154. if !present.Has(expected) {
  155. missing = append(missing, expected)
  156. }
  157. }
  158. hint := ""
  159. if len(missing) > 0 {
  160. hint = fmt.Sprintf(" (sample missing pod: %s/%s)", namespace, missing[0])
  161. }
  162. return 0, time.Time{}, fmt.Errorf("metrics obtained for %d/%d of pods%s", len(metrics.Items), len(podNames), hint)
  163. }
  164. sum := int64(0)
  165. for _, m := range metrics.Items {
  166. if _, found := podNames[m.Name]; found {
  167. for _, c := range m.Containers {
  168. cpu, found := c.Usage[v1.ResourceCPU]
  169. if !found {
  170. return 0, time.Time{}, fmt.Errorf("no cpu for container %v in pod %v/%v", c.Name, namespace, m.Name)
  171. }
  172. sum += cpu.MilliValue()
  173. }
  174. } else {
  175. return 0, time.Time{}, fmt.Errorf("not expected metrics for pod %v/%v", namespace, m.Name)
  176. }
  177. }
  178. return sum / int64(len(metrics.Items)), metrics.Items[0].Timestamp.Time, nil
  179. }
  180. // GetCustomMetric returns the average value of the given custom metric from the
  181. // pods picked using the namespace and selector passed as arguments.
  182. func (h *HeapsterMetricsClient) GetCustomMetric(customMetricName string, namespace string, selector labels.Selector) (*float64, time.Time, error) {
  183. metricSpec := getHeapsterCustomMetricDefinition(customMetricName)
  184. podList, err := h.client.Core().Pods(namespace).List(api.ListOptions{LabelSelector: selector})
  185. if err != nil {
  186. return nil, time.Time{}, fmt.Errorf("failed to get pod list: %v", err)
  187. }
  188. podNames := []string{}
  189. for _, pod := range podList.Items {
  190. if pod.Status.Phase == api.PodPending {
  191. // Skip pending pods.
  192. continue
  193. }
  194. podNames = append(podNames, pod.Name)
  195. }
  196. if len(podNames) == 0 && len(podList.Items) > 0 {
  197. return nil, time.Time{}, fmt.Errorf("no running pods")
  198. }
  199. value, timestamp, err := h.getCustomMetricForPods(metricSpec, namespace, podNames)
  200. if err != nil {
  201. return nil, time.Time{}, err
  202. }
  203. return &value.floatValue, timestamp, nil
  204. }
  205. func (h *HeapsterMetricsClient) getCustomMetricForPods(metricSpec metricDefinition, namespace string, podNames []string) (*intAndFloat, time.Time, error) {
  206. now := time.Now()
  207. startTime := now.Add(heapsterQueryStart)
  208. metricPath := fmt.Sprintf("/api/v1/model/namespaces/%s/pod-list/%s/metrics/%s",
  209. namespace,
  210. strings.Join(podNames, ","),
  211. metricSpec.name)
  212. resultRaw, err := h.client.Core().Services(h.heapsterNamespace).
  213. ProxyGet(h.heapsterScheme, h.heapsterService, h.heapsterPort, metricPath, map[string]string{"start": startTime.Format(time.RFC3339)}).
  214. DoRaw()
  215. if err != nil {
  216. return nil, time.Time{}, fmt.Errorf("failed to get pods metrics: %v", err)
  217. }
  218. var metrics heapster.MetricResultList
  219. err = json.Unmarshal(resultRaw, &metrics)
  220. if err != nil {
  221. return nil, time.Time{}, fmt.Errorf("failed to unmarshall heapster response: %v", err)
  222. }
  223. glog.V(4).Infof("Heapster metrics result: %s", string(resultRaw))
  224. sum, count, timestamp := metricSpec.aggregator(metrics)
  225. if count != len(podNames) {
  226. missing := make([]string, 0)
  227. for i, expected := range podNames {
  228. if len(metrics.Items) > i && len(metrics.Items[i].Metrics) == 0 {
  229. missing = append(missing, expected)
  230. }
  231. }
  232. hint := ""
  233. if len(missing) > 0 {
  234. hint = fmt.Sprintf(" (sample missing pod: %s/%s)", namespace, missing[0])
  235. }
  236. return nil, time.Time{}, fmt.Errorf("metrics obtained for %d/%d of pods%s", count, len(podNames), hint)
  237. }
  238. return &sum, timestamp, nil
  239. }
  240. func calculateSumFromTimeSample(metrics heapster.MetricResultList, duration time.Duration) (sum intAndFloat, count int, timestamp time.Time) {
  241. sum = intAndFloat{0, 0}
  242. count = 0
  243. timestamp = time.Time{}
  244. var oldest *time.Time // creation time of the oldest of used samples across pods
  245. oldest = nil
  246. for _, metrics := range metrics.Items {
  247. var newest *heapster.MetricPoint // creation time of the newest sample for pod
  248. newest = nil
  249. for i, metricPoint := range metrics.Metrics {
  250. if newest == nil || newest.Timestamp.Before(metricPoint.Timestamp) {
  251. newest = &metrics.Metrics[i]
  252. }
  253. }
  254. if newest != nil {
  255. if oldest == nil || newest.Timestamp.Before(*oldest) {
  256. oldest = &newest.Timestamp
  257. }
  258. intervalSum := intAndFloat{0, 0}
  259. intSumCount := 0
  260. floatSumCount := 0
  261. for _, metricPoint := range metrics.Metrics {
  262. if metricPoint.Timestamp.Add(duration).After(newest.Timestamp) {
  263. intervalSum.intValue += int64(metricPoint.Value)
  264. intSumCount++
  265. if metricPoint.FloatValue != nil {
  266. intervalSum.floatValue += *metricPoint.FloatValue
  267. floatSumCount++
  268. }
  269. }
  270. }
  271. if newest.FloatValue == nil {
  272. if intSumCount > 0 {
  273. sum.intValue += int64(intervalSum.intValue / int64(intSumCount))
  274. sum.floatValue += float64(intervalSum.intValue / int64(intSumCount))
  275. }
  276. } else {
  277. if floatSumCount > 0 {
  278. sum.intValue += int64(intervalSum.floatValue / float64(floatSumCount))
  279. sum.floatValue += intervalSum.floatValue / float64(floatSumCount)
  280. }
  281. }
  282. count++
  283. }
  284. }
  285. if oldest != nil {
  286. timestamp = *oldest
  287. }
  288. return sum, count, timestamp
  289. }