whitelist.go 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package sysctl
  14. import (
  15. "fmt"
  16. "strings"
  17. "k8s.io/kubernetes/pkg/api"
  18. "k8s.io/kubernetes/pkg/api/validation"
  19. extvalidation "k8s.io/kubernetes/pkg/apis/extensions/validation"
  20. "k8s.io/kubernetes/pkg/kubelet/lifecycle"
  21. )
  22. const (
  23. AnnotationInvalidReason = "InvalidSysctlAnnotation"
  24. ForbiddenReason = "SysctlForbidden"
  25. )
  26. // SafeSysctlWhitelist returns the whitelist of safe sysctls and safe sysctl patterns (ending in *).
  27. //
  28. // A sysctl is called safe iff
  29. // - it is namespaced in the container or the pod
  30. // - it is isolated, i.e. has no influence on any other pod on the same node.
  31. func SafeSysctlWhitelist() []string {
  32. return []string{
  33. "kernel.shm_rmid_forced",
  34. "net.ipv4.ip_local_port_range",
  35. "net.ipv4.tcp_max_syn_backlog",
  36. "net.ipv4.tcp_syncookies",
  37. }
  38. }
  39. // Whitelist provides a list of allowed sysctls and sysctl patterns (ending in *)
  40. // and a function to check whether a given sysctl matches this list.
  41. type Whitelist interface {
  42. // Validate checks that all sysctls given in a api.SysctlsPodAnnotationKey annotation
  43. // are valid according to the whitelist.
  44. Validate(pod *api.Pod) error
  45. }
  46. // patternWhitelist takes a list of sysctls or sysctl patterns (ending in *) and
  47. // checks validity via a sysctl and prefix map, rejecting those which are not known
  48. // to be namespaced.
  49. type patternWhitelist struct {
  50. sysctls map[string]Namespace
  51. prefixes map[string]Namespace
  52. annotationKey string
  53. }
  54. var _ lifecycle.PodAdmitHandler = &patternWhitelist{}
  55. // NewWhitelist creates a new Whitelist from a list of sysctls and sysctl pattern (ending in *).
  56. func NewWhitelist(patterns []string, annotationKey string) (*patternWhitelist, error) {
  57. w := &patternWhitelist{
  58. sysctls: map[string]Namespace{},
  59. prefixes: map[string]Namespace{},
  60. annotationKey: annotationKey,
  61. }
  62. for _, s := range patterns {
  63. if !extvalidation.IsValidSysctlPattern(s) {
  64. return nil, fmt.Errorf("sysctl %q must have at most %d characters and match regex %s",
  65. s,
  66. validation.SysctlMaxLength,
  67. extvalidation.SysctlPatternFmt,
  68. )
  69. }
  70. if strings.HasSuffix(s, "*") {
  71. prefix := s[:len(s)-1]
  72. ns := NamespacedBy(prefix)
  73. if ns == UnknownNamespace {
  74. return nil, fmt.Errorf("the sysctls %q are not known to be namespaced", s)
  75. }
  76. w.prefixes[prefix] = ns
  77. } else {
  78. ns := NamespacedBy(s)
  79. if ns == UnknownNamespace {
  80. return nil, fmt.Errorf("the sysctl %q are not known to be namespaced", s)
  81. }
  82. w.sysctls[s] = ns
  83. }
  84. }
  85. return w, nil
  86. }
  87. // validateSysctl checks that a sysctl is whitelisted because it is known
  88. // to be namespaced by the Linux kernel. Note that being whitelisted is required, but not
  89. // sufficient: the container runtime might have a stricter check and refuse to launch a pod.
  90. //
  91. // The parameters hostNet and hostIPC are used to forbid sysctls for pod sharing the
  92. // respective namespaces with the host. This check is only possible for sysctls on
  93. // the static default whitelist, not those on the custom whitelist provided by the admin.
  94. func (w *patternWhitelist) validateSysctl(sysctl string, hostNet, hostIPC bool) error {
  95. nsErrorFmt := "%q not allowed with host %s enabled"
  96. if ns, found := w.sysctls[sysctl]; found {
  97. if ns == IpcNamespace && hostIPC {
  98. return fmt.Errorf(nsErrorFmt, sysctl, ns)
  99. }
  100. if ns == NetNamespace && hostNet {
  101. return fmt.Errorf(nsErrorFmt, sysctl, ns)
  102. }
  103. return nil
  104. }
  105. for p, ns := range w.prefixes {
  106. if strings.HasPrefix(sysctl, p) {
  107. if ns == IpcNamespace && hostIPC {
  108. return fmt.Errorf(nsErrorFmt, sysctl, ns)
  109. }
  110. if ns == NetNamespace && hostNet {
  111. return fmt.Errorf(nsErrorFmt, sysctl, ns)
  112. }
  113. return nil
  114. }
  115. }
  116. return fmt.Errorf("%q not whitelisted", sysctl)
  117. }
  118. // Admit checks that all sysctls given in a api.SysctlsPodAnnotationKey annotation
  119. // are valid according to the whitelist.
  120. func (w *patternWhitelist) Admit(attrs *lifecycle.PodAdmitAttributes) lifecycle.PodAdmitResult {
  121. pod := attrs.Pod
  122. a := pod.Annotations[w.annotationKey]
  123. if a == "" {
  124. return lifecycle.PodAdmitResult{
  125. Admit: true,
  126. }
  127. }
  128. sysctls, err := api.SysctlsFromPodAnnotation(a)
  129. if err != nil {
  130. return lifecycle.PodAdmitResult{
  131. Admit: false,
  132. Reason: AnnotationInvalidReason,
  133. Message: fmt.Sprintf("invalid %s annotation: %v", w.annotationKey, err),
  134. }
  135. }
  136. var hostNet, hostIPC bool
  137. if pod.Spec.SecurityContext != nil {
  138. hostNet = pod.Spec.SecurityContext.HostNetwork
  139. hostIPC = pod.Spec.SecurityContext.HostIPC
  140. }
  141. for _, s := range sysctls {
  142. if err := w.validateSysctl(s.Name, hostNet, hostIPC); err != nil {
  143. return lifecycle.PodAdmitResult{
  144. Admit: false,
  145. Reason: ForbiddenReason,
  146. Message: fmt.Sprintf("forbidden sysctl: %v", err),
  147. }
  148. }
  149. }
  150. return lifecycle.PodAdmitResult{
  151. Admit: true,
  152. }
  153. }