dns.go 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package dns
  14. import (
  15. "encoding/json"
  16. "fmt"
  17. "hash/fnv"
  18. "net"
  19. "strings"
  20. "sync"
  21. "time"
  22. etcd "github.com/coreos/etcd/client"
  23. "github.com/miekg/dns"
  24. skymsg "github.com/skynetservices/skydns/msg"
  25. kapi "k8s.io/kubernetes/pkg/api"
  26. "k8s.io/kubernetes/pkg/api/endpoints"
  27. "k8s.io/kubernetes/pkg/api/unversioned"
  28. kcache "k8s.io/kubernetes/pkg/client/cache"
  29. clientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
  30. kframework "k8s.io/kubernetes/pkg/controller/framework"
  31. "k8s.io/kubernetes/pkg/runtime"
  32. "k8s.io/kubernetes/pkg/util/validation"
  33. "k8s.io/kubernetes/pkg/util/wait"
  34. "k8s.io/kubernetes/pkg/watch"
  35. "github.com/golang/glog"
  36. )
  37. const (
  38. kubernetesSvcName = "kubernetes"
  39. // A subdomain added to the user specified domain for all services.
  40. serviceSubdomain = "svc"
  41. // A subdomain added to the user specified dmoain for all pods.
  42. podSubdomain = "pod"
  43. // arpaSuffix is the standard suffix for PTR IP reverse lookups.
  44. arpaSuffix = ".in-addr.arpa."
  45. // Resync period for the kube controller loop.
  46. resyncPeriod = 5 * time.Minute
  47. // Duration for which the TTL cache should hold the node resource to retrieve the zone
  48. // annotation from it so that it could be added to federation CNAMEs. There is ideally
  49. // no need to expire this cache, but we don't want to assume that node annotations
  50. // never change. So we expire the cache and retrieve a node once every 180 seconds.
  51. // The value is chosen to be neither too long nor too short.
  52. nodeCacheTTL = 180 * time.Second
  53. // default priority used for service records
  54. defaultPriority = 10
  55. // default weight used for service records
  56. defaultWeight = 10
  57. // default TTL used for service records
  58. defaultTTL = 30
  59. )
  60. type KubeDNS struct {
  61. // kubeClient makes calls to API Server and registers calls with API Server
  62. // to get Endpoints and Service objects.
  63. kubeClient clientset.Interface
  64. // The domain for which this DNS Server is authoritative.
  65. domain string
  66. // A cache that contains all the endpoints in the system.
  67. endpointsStore kcache.Store
  68. // A cache that contains all the services in the system.
  69. servicesStore kcache.Store
  70. // stores DNS records for the domain.
  71. // A Records and SRV Records for (regular) services and headless Services.
  72. // CNAME Records for ExternalName Services.
  73. cache *TreeCache
  74. // TODO(nikhiljindal): Remove this. It can be recreated using clusterIPServiceMap.
  75. reverseRecordMap map[string]*skymsg.Service
  76. // Map of cluster IP to service object. Headless services are not part of this map.
  77. // Used to get a service when given its cluster IP.
  78. // Access to this is coordinated using cacheLock. We use the same lock for cache and this map
  79. // to ensure that they dont get out of sync.
  80. clusterIPServiceMap map[string]*kapi.Service
  81. // caller is responsible for using the cacheLock before invoking methods on cache
  82. // the cache is not thread-safe, and the caller can guarantee thread safety by using
  83. // the cacheLock
  84. cacheLock sync.RWMutex
  85. // The domain for which this DNS Server is authoritative, in array format and reversed.
  86. // e.g. if domain is "cluster.local", domainPath is []string{"local", "cluster"}
  87. domainPath []string
  88. // endpointsController invokes registered callbacks when endpoints change.
  89. endpointsController *kframework.Controller
  90. // serviceController invokes registered callbacks when services change.
  91. serviceController *kframework.Controller
  92. // Map of federation names that the cluster in which this kube-dns is running belongs to, to
  93. // the corresponding domain names.
  94. federations map[string]string
  95. // A TTL cache that contains some subset of nodes in the system so that we can retrieve the
  96. // cluster zone annotation from the cached node instead of getting it from the API server
  97. // every time.
  98. nodesStore kcache.Store
  99. }
  100. func NewKubeDNS(client clientset.Interface, domain string, federations map[string]string) (*KubeDNS, error) {
  101. // Verify that federation names should not contain dots ('.')
  102. // We can not allow dots since we use that as separator for path segments (svcname.nsname.fedname.svc.domain)
  103. for key := range federations {
  104. if strings.ContainsAny(key, ".") {
  105. return nil, fmt.Errorf("invalid federation name: %s, cannot have '.'", key)
  106. }
  107. }
  108. kd := &KubeDNS{
  109. kubeClient: client,
  110. domain: domain,
  111. cache: NewTreeCache(),
  112. cacheLock: sync.RWMutex{},
  113. nodesStore: kcache.NewStore(kcache.MetaNamespaceKeyFunc),
  114. reverseRecordMap: make(map[string]*skymsg.Service),
  115. clusterIPServiceMap: make(map[string]*kapi.Service),
  116. domainPath: reverseArray(strings.Split(strings.TrimRight(domain, "."), ".")),
  117. federations: federations,
  118. }
  119. kd.setEndpointsStore()
  120. kd.setServicesStore()
  121. return kd, nil
  122. }
  123. func (kd *KubeDNS) Start() {
  124. go kd.endpointsController.Run(wait.NeverStop)
  125. go kd.serviceController.Run(wait.NeverStop)
  126. // Wait synchronously for the Kubernetes service and add a DNS record for it.
  127. // This ensures that the Start function returns only after having received Service objects
  128. // from APIServer.
  129. // TODO: we might not have to wait for kubernetes service specifically. We should just wait
  130. // for a list operation to be complete from APIServer.
  131. kd.waitForKubernetesService()
  132. }
  133. func (kd *KubeDNS) waitForKubernetesService() (svc *kapi.Service) {
  134. name := fmt.Sprintf("%v/%v", kapi.NamespaceDefault, kubernetesSvcName)
  135. glog.Infof("Waiting for service: %v", name)
  136. var err error
  137. servicePollInterval := 1 * time.Second
  138. for {
  139. svc, err = kd.kubeClient.Core().Services(kapi.NamespaceDefault).Get(kubernetesSvcName)
  140. if err != nil || svc == nil {
  141. glog.Infof("Ignoring error while waiting for service %v: %v. Sleeping %v before retrying.", name, err, servicePollInterval)
  142. time.Sleep(servicePollInterval)
  143. continue
  144. }
  145. break
  146. }
  147. return
  148. }
  149. func (kd *KubeDNS) GetCacheAsJSON() (string, error) {
  150. kd.cacheLock.RLock()
  151. defer kd.cacheLock.RUnlock()
  152. json, err := kd.cache.Serialize()
  153. return json, err
  154. }
  155. func (kd *KubeDNS) setServicesStore() {
  156. // Returns a cache.ListWatch that gets all changes to services.
  157. kd.servicesStore, kd.serviceController = kframework.NewInformer(
  158. &kcache.ListWatch{
  159. ListFunc: func(options kapi.ListOptions) (runtime.Object, error) {
  160. return kd.kubeClient.Core().Services(kapi.NamespaceAll).List(options)
  161. },
  162. WatchFunc: func(options kapi.ListOptions) (watch.Interface, error) {
  163. return kd.kubeClient.Core().Services(kapi.NamespaceAll).Watch(options)
  164. },
  165. },
  166. &kapi.Service{},
  167. resyncPeriod,
  168. kframework.ResourceEventHandlerFuncs{
  169. AddFunc: kd.newService,
  170. DeleteFunc: kd.removeService,
  171. UpdateFunc: kd.updateService,
  172. },
  173. )
  174. }
  175. func (kd *KubeDNS) setEndpointsStore() {
  176. // Returns a cache.ListWatch that gets all changes to endpoints.
  177. kd.endpointsStore, kd.endpointsController = kframework.NewInformer(
  178. &kcache.ListWatch{
  179. ListFunc: func(options kapi.ListOptions) (runtime.Object, error) {
  180. return kd.kubeClient.Core().Endpoints(kapi.NamespaceAll).List(options)
  181. },
  182. WatchFunc: func(options kapi.ListOptions) (watch.Interface, error) {
  183. return kd.kubeClient.Core().Endpoints(kapi.NamespaceAll).Watch(options)
  184. },
  185. },
  186. &kapi.Endpoints{},
  187. resyncPeriod,
  188. kframework.ResourceEventHandlerFuncs{
  189. AddFunc: kd.handleEndpointAdd,
  190. UpdateFunc: func(oldObj, newObj interface{}) {
  191. // TODO: Avoid unwanted updates.
  192. kd.handleEndpointAdd(newObj)
  193. },
  194. },
  195. )
  196. }
  197. func assertIsService(obj interface{}) (*kapi.Service, bool) {
  198. if service, ok := obj.(*kapi.Service); ok {
  199. return service, ok
  200. } else {
  201. glog.Errorf("Type assertion failed! Expected 'Service', got %T", service)
  202. return nil, ok
  203. }
  204. }
  205. func (kd *KubeDNS) newService(obj interface{}) {
  206. if service, ok := assertIsService(obj); ok {
  207. glog.V(4).Infof("Add/Updated for service %v", service.Name)
  208. // ExternalName services are a special kind that return CNAME records
  209. if service.Spec.Type == kapi.ServiceTypeExternalName {
  210. kd.newExternalNameService(service)
  211. return
  212. }
  213. // if ClusterIP is not set, a DNS entry should not be created
  214. if !kapi.IsServiceIPSet(service) {
  215. kd.newHeadlessService(service)
  216. return
  217. }
  218. if len(service.Spec.Ports) == 0 {
  219. glog.Warningf("Unexpected service with no ports, this should not have happend: %v", service)
  220. }
  221. kd.newPortalService(service)
  222. }
  223. }
  224. func (kd *KubeDNS) removeService(obj interface{}) {
  225. if s, ok := assertIsService(obj); ok {
  226. subCachePath := append(kd.domainPath, serviceSubdomain, s.Namespace, s.Name)
  227. kd.cacheLock.Lock()
  228. defer kd.cacheLock.Unlock()
  229. success := kd.cache.deletePath(subCachePath...)
  230. glog.V(2).Infof("Removing service %v at path %v. Success: ", s.Name, subCachePath, success)
  231. // ExternalName services have no IP
  232. if kapi.IsServiceIPSet(s) {
  233. delete(kd.reverseRecordMap, s.Spec.ClusterIP)
  234. delete(kd.clusterIPServiceMap, s.Spec.ClusterIP)
  235. }
  236. }
  237. }
  238. func (kd *KubeDNS) updateService(oldObj, newObj interface{}) {
  239. if new, ok := assertIsService(newObj); ok {
  240. if old, ok := assertIsService(oldObj); ok {
  241. // Remove old cache path only if changing type to/from ExternalName.
  242. // In all other cases, we'll update records in place.
  243. if (new.Spec.Type == kapi.ServiceTypeExternalName) != (old.Spec.Type == kapi.ServiceTypeExternalName) {
  244. kd.removeService(oldObj)
  245. }
  246. kd.newService(newObj)
  247. }
  248. }
  249. }
  250. func (kd *KubeDNS) handleEndpointAdd(obj interface{}) {
  251. if e, ok := obj.(*kapi.Endpoints); ok {
  252. kd.addDNSUsingEndpoints(e)
  253. }
  254. }
  255. func (kd *KubeDNS) addDNSUsingEndpoints(e *kapi.Endpoints) error {
  256. svc, err := kd.getServiceFromEndpoints(e)
  257. if err != nil {
  258. return err
  259. }
  260. if svc == nil || kapi.IsServiceIPSet(svc) {
  261. // No headless service found corresponding to endpoints object.
  262. return nil
  263. }
  264. return kd.generateRecordsForHeadlessService(e, svc)
  265. }
  266. func (kd *KubeDNS) getServiceFromEndpoints(e *kapi.Endpoints) (*kapi.Service, error) {
  267. key, err := kcache.MetaNamespaceKeyFunc(e)
  268. if err != nil {
  269. return nil, err
  270. }
  271. obj, exists, err := kd.servicesStore.GetByKey(key)
  272. if err != nil {
  273. return nil, fmt.Errorf("failed to get service object from services store - %v", err)
  274. }
  275. if !exists {
  276. glog.V(1).Infof("could not find service for endpoint %q in namespace %q", e.Name, e.Namespace)
  277. return nil, nil
  278. }
  279. if svc, ok := assertIsService(obj); ok {
  280. return svc, nil
  281. }
  282. return nil, fmt.Errorf("got a non service object in services store %v", obj)
  283. }
  284. // fqdn constructs the fqdn for the given service. subpaths is a list of path
  285. // elements rooted at the given service, ending at a service record.
  286. func (kd *KubeDNS) fqdn(service *kapi.Service, subpaths ...string) string {
  287. domainLabels := append(append(kd.domainPath, serviceSubdomain, service.Namespace, service.Name), subpaths...)
  288. return dns.Fqdn(strings.Join(reverseArray(domainLabels), "."))
  289. }
  290. func (kd *KubeDNS) newPortalService(service *kapi.Service) {
  291. subCache := NewTreeCache()
  292. recordValue, recordLabel := getSkyMsg(service.Spec.ClusterIP, 0)
  293. subCache.setEntry(recordLabel, recordValue, kd.fqdn(service, recordLabel))
  294. // Generate SRV Records
  295. for i := range service.Spec.Ports {
  296. port := &service.Spec.Ports[i]
  297. if port.Name != "" && port.Protocol != "" {
  298. srvValue := kd.generateSRVRecordValue(service, int(port.Port))
  299. l := []string{"_" + strings.ToLower(string(port.Protocol)), "_" + port.Name}
  300. subCache.setEntry(recordLabel, srvValue, kd.fqdn(service, append(l, recordLabel)...), l...)
  301. }
  302. }
  303. subCachePath := append(kd.domainPath, serviceSubdomain, service.Namespace)
  304. host := kd.getServiceFQDN(service)
  305. reverseRecord, _ := getSkyMsg(host, 0)
  306. kd.cacheLock.Lock()
  307. defer kd.cacheLock.Unlock()
  308. kd.cache.setSubCache(service.Name, subCache, subCachePath...)
  309. kd.reverseRecordMap[service.Spec.ClusterIP] = reverseRecord
  310. kd.clusterIPServiceMap[service.Spec.ClusterIP] = service
  311. }
  312. func (kd *KubeDNS) generateRecordsForHeadlessService(e *kapi.Endpoints, svc *kapi.Service) error {
  313. // TODO: remove this after v1.4 is released and the old annotations are EOL
  314. podHostnames, err := getPodHostnamesFromAnnotation(e.Annotations)
  315. if err != nil {
  316. return err
  317. }
  318. subCache := NewTreeCache()
  319. glog.V(4).Infof("Endpoints Annotations: %v", e.Annotations)
  320. for idx := range e.Subsets {
  321. for subIdx := range e.Subsets[idx].Addresses {
  322. address := &e.Subsets[idx].Addresses[subIdx]
  323. endpointIP := address.IP
  324. recordValue, endpointName := getSkyMsg(endpointIP, 0)
  325. if hostLabel, exists := getHostname(address, podHostnames); exists {
  326. endpointName = hostLabel
  327. }
  328. subCache.setEntry(endpointName, recordValue, kd.fqdn(svc, endpointName))
  329. for portIdx := range e.Subsets[idx].Ports {
  330. endpointPort := &e.Subsets[idx].Ports[portIdx]
  331. if endpointPort.Name != "" && endpointPort.Protocol != "" {
  332. srvValue := kd.generateSRVRecordValue(svc, int(endpointPort.Port), endpointName)
  333. l := []string{"_" + strings.ToLower(string(endpointPort.Protocol)), "_" + endpointPort.Name}
  334. subCache.setEntry(endpointName, srvValue, kd.fqdn(svc, append(l, endpointName)...), l...)
  335. }
  336. }
  337. }
  338. }
  339. subCachePath := append(kd.domainPath, serviceSubdomain, svc.Namespace)
  340. kd.cacheLock.Lock()
  341. defer kd.cacheLock.Unlock()
  342. kd.cache.setSubCache(svc.Name, subCache, subCachePath...)
  343. return nil
  344. }
  345. func getHostname(address *kapi.EndpointAddress, podHostnames map[string]endpoints.HostRecord) (string, bool) {
  346. if len(address.Hostname) > 0 {
  347. return address.Hostname, true
  348. }
  349. if hostRecord, exists := podHostnames[address.IP]; exists && len(validation.IsDNS1123Label(hostRecord.HostName)) == 0 {
  350. return hostRecord.HostName, true
  351. }
  352. return "", false
  353. }
  354. func getPodHostnamesFromAnnotation(annotations map[string]string) (map[string]endpoints.HostRecord, error) {
  355. hostnames := map[string]endpoints.HostRecord{}
  356. if annotations != nil {
  357. if serializedHostnames, exists := annotations[endpoints.PodHostnamesAnnotation]; exists && len(serializedHostnames) > 0 {
  358. err := json.Unmarshal([]byte(serializedHostnames), &hostnames)
  359. if err != nil {
  360. return nil, err
  361. }
  362. }
  363. }
  364. return hostnames, nil
  365. }
  366. func (kd *KubeDNS) generateSRVRecordValue(svc *kapi.Service, portNumber int, labels ...string) *skymsg.Service {
  367. host := strings.Join([]string{svc.Name, svc.Namespace, serviceSubdomain, kd.domain}, ".")
  368. for _, cNameLabel := range labels {
  369. host = cNameLabel + "." + host
  370. }
  371. recordValue, _ := getSkyMsg(host, portNumber)
  372. return recordValue
  373. }
  374. // Generates skydns records for a headless service.
  375. func (kd *KubeDNS) newHeadlessService(service *kapi.Service) error {
  376. // Create an A record for every pod in the service.
  377. // This record must be periodically updated.
  378. // Format is as follows:
  379. // For a service x, with pods a and b create DNS records,
  380. // a.x.ns.domain. and, b.x.ns.domain.
  381. key, err := kcache.MetaNamespaceKeyFunc(service)
  382. if err != nil {
  383. return err
  384. }
  385. e, exists, err := kd.endpointsStore.GetByKey(key)
  386. if err != nil {
  387. return fmt.Errorf("failed to get endpoints object from endpoints store - %v", err)
  388. }
  389. if !exists {
  390. glog.V(1).Infof("Could not find endpoints for service %q in namespace %q. DNS records will be created once endpoints show up.", service.Name, service.Namespace)
  391. return nil
  392. }
  393. if e, ok := e.(*kapi.Endpoints); ok {
  394. return kd.generateRecordsForHeadlessService(e, service)
  395. }
  396. return nil
  397. }
  398. // Generates skydns records for an ExternalName service.
  399. func (kd *KubeDNS) newExternalNameService(service *kapi.Service) {
  400. // Create a CNAME record for the service's ExternalName.
  401. // TODO: TTL?
  402. recordValue, _ := getSkyMsg(service.Spec.ExternalName, 0)
  403. cachePath := append(kd.domainPath, serviceSubdomain, service.Namespace)
  404. fqdn := kd.fqdn(service)
  405. glog.V(2).Infof("newExternalNameService: storing key %s with value %v as %s under %v", service.Name, recordValue, fqdn, cachePath)
  406. kd.cacheLock.Lock()
  407. defer kd.cacheLock.Unlock()
  408. // Store the service name directly as the leaf key
  409. kd.cache.setEntry(service.Name, recordValue, fqdn, cachePath...)
  410. }
  411. // Records responds with DNS records that match the given name, in a format
  412. // understood by the skydns server. If "exact" is true, a single record
  413. // matching the given name is returned, otherwise all records stored under
  414. // the subtree matching the name are returned.
  415. func (kd *KubeDNS) Records(name string, exact bool) (retval []skymsg.Service, err error) {
  416. glog.V(2).Infof("Received DNS Request:%s, exact:%v", name, exact)
  417. trimmed := strings.TrimRight(name, ".")
  418. segments := strings.Split(trimmed, ".")
  419. isFederationQuery := false
  420. federationSegments := []string{}
  421. if !exact && kd.isFederationQuery(segments) {
  422. glog.V(2).Infof("federation service query: Received federation query. Going to try to find local service first")
  423. // Try quering the non-federation (local) service first.
  424. // Will try the federation one later, if this fails.
  425. isFederationQuery = true
  426. federationSegments = append(federationSegments, segments...)
  427. // To try local service, remove federation name from segments.
  428. // Federation name is 3rd in the segment (after service name and namespace).
  429. segments = append(segments[:2], segments[3:]...)
  430. }
  431. path := reverseArray(segments)
  432. records, err := kd.getRecordsForPath(path, exact)
  433. if err != nil {
  434. return nil, err
  435. }
  436. if !isFederationQuery {
  437. if len(records) > 0 {
  438. return records, nil
  439. }
  440. return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound}
  441. }
  442. // For federation query, verify that the local service has endpoints.
  443. validRecord := false
  444. for _, val := range records {
  445. // We know that a headless service has endpoints for sure if a record was returned for it.
  446. // The record contains endpoint IPs. So nothing to check for headless services.
  447. if !kd.isHeadlessServiceRecord(&val) {
  448. ok, err := kd.serviceWithClusterIPHasEndpoints(&val)
  449. if err != nil {
  450. glog.V(2).Infof("federation service query: unexpected error while trying to find if service has endpoint: %v")
  451. continue
  452. }
  453. if !ok {
  454. glog.Infof("federation service query: skipping record since service has no endpoint: %v", val)
  455. continue
  456. }
  457. }
  458. validRecord = true
  459. break
  460. }
  461. if validRecord {
  462. // There is a local service with valid endpoints, return its CNAME.
  463. name := strings.Join(reverseArray(path), ".")
  464. // Ensure that this name that we are returning as a CNAME response is a fully qualified
  465. // domain name so that the client's resolver library doesn't have to go through its
  466. // search list all over again.
  467. if !strings.HasSuffix(name, ".") {
  468. name = name + "."
  469. }
  470. glog.Infof("federation service query: Returning CNAME for local service : %s", name)
  471. return []skymsg.Service{{Host: name}}, nil
  472. }
  473. // If the name query is not an exact query and does not match any records in the local store,
  474. // attempt to send a federation redirect (CNAME) response.
  475. if !exact {
  476. glog.V(2).Infof("federation service query: Did not find a local service. Trying federation redirect (CNAME) response")
  477. return kd.federationRecords(reverseArray(federationSegments))
  478. }
  479. return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound}
  480. }
  481. func (kd *KubeDNS) getRecordsForPath(path []string, exact bool) ([]skymsg.Service, error) {
  482. retval := []skymsg.Service{}
  483. if kd.isPodRecord(path) {
  484. ip, err := kd.getPodIP(path)
  485. if err == nil {
  486. skyMsg, _ := getSkyMsg(ip, 0)
  487. return []skymsg.Service{*skyMsg}, nil
  488. }
  489. return nil, err
  490. }
  491. if exact {
  492. key := path[len(path)-1]
  493. if key == "" {
  494. return []skymsg.Service{}, nil
  495. }
  496. kd.cacheLock.RLock()
  497. defer kd.cacheLock.RUnlock()
  498. if record, ok := kd.cache.getEntry(key, path[:len(path)-1]...); ok {
  499. glog.V(2).Infof("Exact match %v for %v received from cache", record, path[:len(path)-1])
  500. return []skymsg.Service{*(record.(*skymsg.Service))}, nil
  501. }
  502. glog.V(2).Infof("Exact match for %v not found in cache", path)
  503. return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound}
  504. }
  505. kd.cacheLock.RLock()
  506. defer kd.cacheLock.RUnlock()
  507. records := kd.cache.getValuesForPathWithWildcards(path...)
  508. glog.V(2).Infof("Received %d records for %v from cache", len(records), path)
  509. for _, val := range records {
  510. retval = append(retval, *val)
  511. }
  512. glog.V(2).Infof("records:%v, retval:%v, path:%v", records, retval, path)
  513. return retval, nil
  514. }
  515. // Returns true if the given record corresponds to a headless service.
  516. // Important: Assumes that we already have the cacheLock. Callers responsibility to acquire it.
  517. // This is because the code will panic, if we try to acquire it again if we already have it.
  518. func (kd *KubeDNS) isHeadlessServiceRecord(msg *skymsg.Service) bool {
  519. // If it is not a headless service, then msg.Host will be the cluster IP.
  520. // So we can check if msg.host exists in our clusterIPServiceMap.
  521. _, ok := kd.clusterIPServiceMap[msg.Host]
  522. // It is headless service if no record was found.
  523. return !ok
  524. }
  525. // Returns true if the service corresponding to the given message has endpoints.
  526. // Note: Works only for services with ClusterIP. Will return an error for headless service (service without a clusterIP).
  527. // Important: Assumes that we already have the cacheLock. Callers responsibility to acquire it.
  528. // This is because the code will panic, if we try to acquire it again if we already have it.
  529. func (kd *KubeDNS) serviceWithClusterIPHasEndpoints(msg *skymsg.Service) (bool, error) {
  530. svc, ok := kd.clusterIPServiceMap[msg.Host]
  531. if !ok {
  532. // It is a headless service.
  533. return false, fmt.Errorf("method not expected to be called for headless service")
  534. }
  535. key, err := kcache.MetaNamespaceKeyFunc(svc)
  536. if err != nil {
  537. return false, err
  538. }
  539. e, exists, err := kd.endpointsStore.GetByKey(key)
  540. if err != nil {
  541. return false, fmt.Errorf("failed to get endpoints object from endpoints store - %v", err)
  542. }
  543. if !exists {
  544. return false, nil
  545. }
  546. if e, ok := e.(*kapi.Endpoints); ok {
  547. return len(e.Subsets) > 0, nil
  548. }
  549. return false, fmt.Errorf("unexpected: found non-endpoint object in endpoint store: %v", e)
  550. }
  551. // ReverseRecords performs a reverse lookup for the given name.
  552. func (kd *KubeDNS) ReverseRecord(name string) (*skymsg.Service, error) {
  553. glog.V(2).Infof("Received ReverseRecord Request:%s", name)
  554. // if portalIP is not a valid IP, the reverseRecordMap lookup will fail
  555. portalIP, ok := extractIP(name)
  556. if !ok {
  557. return nil, fmt.Errorf("does not support reverse lookup for %s", name)
  558. }
  559. kd.cacheLock.RLock()
  560. defer kd.cacheLock.RUnlock()
  561. if reverseRecord, ok := kd.reverseRecordMap[portalIP]; ok {
  562. return reverseRecord, nil
  563. }
  564. return nil, fmt.Errorf("must be exactly one service record")
  565. }
  566. // extractIP turns a standard PTR reverse record lookup name
  567. // into an IP address
  568. func extractIP(reverseName string) (string, bool) {
  569. if !strings.HasSuffix(reverseName, arpaSuffix) {
  570. return "", false
  571. }
  572. search := strings.TrimSuffix(reverseName, arpaSuffix)
  573. // reverse the segments and then combine them
  574. segments := reverseArray(strings.Split(search, "."))
  575. return strings.Join(segments, "."), true
  576. }
  577. // e.g {"local", "cluster", "pod", "default", "10-0-0-1"}
  578. func (kd *KubeDNS) isPodRecord(path []string) bool {
  579. if len(path) != len(kd.domainPath)+3 {
  580. return false
  581. }
  582. if path[len(kd.domainPath)] != "pod" {
  583. return false
  584. }
  585. for _, segment := range path {
  586. if segment == "*" {
  587. return false
  588. }
  589. }
  590. return true
  591. }
  592. func (kd *KubeDNS) getPodIP(path []string) (string, error) {
  593. ipStr := path[len(path)-1]
  594. ip := strings.Replace(ipStr, "-", ".", -1)
  595. if parsed := net.ParseIP(ip); parsed != nil {
  596. return ip, nil
  597. }
  598. return "", fmt.Errorf("Invalid IP Address %v", ip)
  599. }
  600. func hashServiceRecord(msg *skymsg.Service) string {
  601. s := fmt.Sprintf("%v", msg)
  602. h := fnv.New32a()
  603. h.Write([]byte(s))
  604. return fmt.Sprintf("%x", h.Sum32())
  605. }
  606. func newServiceRecord(ip string, port int) *skymsg.Service {
  607. return &skymsg.Service{
  608. Host: ip,
  609. Port: port,
  610. Priority: defaultPriority,
  611. Weight: defaultWeight,
  612. Ttl: defaultTTL,
  613. }
  614. }
  615. // Returns record in a format that SkyDNS understands.
  616. // Also return the hash of the record.
  617. func getSkyMsg(ip string, port int) (*skymsg.Service, string) {
  618. msg := newServiceRecord(ip, port)
  619. hash := hashServiceRecord(msg)
  620. glog.V(2).Infof("DNS Record:%s, hash:%s", fmt.Sprintf("%v", msg), hash)
  621. return msg, fmt.Sprintf("%x", hash)
  622. }
  623. // isFederationQuery checks if the given query `path` matches the federated service query pattern.
  624. // The conjunction of the following conditions forms the test for the federated service query
  625. // pattern:
  626. // 1. `path` has exactly 4+len(domainPath) segments: mysvc.myns.myfederation.svc.domain.path.
  627. // 2. Service name component must be a valid RFC 1035 name.
  628. // 3. Namespace component must be a valid RFC 1123 name.
  629. // 4. Federation component must also be a valid RFC 1123 name.
  630. // 5. Fourth segment is exactly "svc"
  631. // 6. The remaining segments match kd.domainPath.
  632. // 7. And federation must be one of the listed federations in the config.
  633. // Note: Because of the above conditions, this method will treat wildcard queries such as
  634. // *.mysvc.myns.myfederation.svc.domain.path as non-federation queries.
  635. // We can add support for wildcard queries later, if needed.
  636. func (kd *KubeDNS) isFederationQuery(path []string) bool {
  637. if len(path) != 4+len(kd.domainPath) {
  638. glog.V(2).Infof("not a federation query: len(%q) != 4+len(%q)", path, kd.domainPath)
  639. return false
  640. }
  641. if errs := validation.IsDNS1035Label(path[0]); len(errs) != 0 {
  642. glog.V(2).Infof("not a federation query: %q is not an RFC 1035 label: %q", path[0], errs)
  643. return false
  644. }
  645. if errs := validation.IsDNS1123Label(path[1]); len(errs) != 0 {
  646. glog.V(2).Infof("not a federation query: %q is not an RFC 1123 label: %q", path[1], errs)
  647. return false
  648. }
  649. if errs := validation.IsDNS1123Label(path[2]); len(errs) != 0 {
  650. glog.V(2).Infof("not a federation query: %q is not an RFC 1123 label: %q", path[2], errs)
  651. return false
  652. }
  653. if path[3] != serviceSubdomain {
  654. glog.V(2).Infof("not a federation query: %q != %q (serviceSubdomain)", path[3], serviceSubdomain)
  655. return false
  656. }
  657. for i, domComp := range kd.domainPath {
  658. // kd.domainPath is reversed, so we need to look in the `path` in the reverse order.
  659. if domComp != path[len(path)-i-1] {
  660. glog.V(2).Infof("not a federation query: kd.domainPath[%d] != path[%d] (%q != %q)", i, len(path)-i-1, domComp, path[len(path)-i-1])
  661. return false
  662. }
  663. }
  664. if _, ok := kd.federations[path[2]]; !ok {
  665. glog.V(2).Infof("not a federation query: kd.federations[%q] not found", path[2])
  666. return false
  667. }
  668. return true
  669. }
  670. // federationRecords checks if the given `queryPath` is for a federated service and if it is,
  671. // it returns a CNAME response containing the cluster zone name and federation domain name
  672. // suffix.
  673. func (kd *KubeDNS) federationRecords(queryPath []string) ([]skymsg.Service, error) {
  674. // `queryPath` is a reversed-array of the queried name, reverse it back to make it easy
  675. // to follow through this code and reduce confusion. There is no reason for it to be
  676. // reversed here.
  677. path := reverseArray(queryPath)
  678. // Check if the name query matches the federation query pattern.
  679. if !kd.isFederationQuery(path) {
  680. return nil, etcd.Error{Code: etcd.ErrorCodeKeyNotFound}
  681. }
  682. // Now that we have already established that the query is a federation query, remove the local
  683. // domain path components, i.e. kd.domainPath, from the query.
  684. path = path[:len(path)-len(kd.domainPath)]
  685. // Append the zone name (zone in the cloud provider terminology, not a DNS
  686. // zone) and the region name.
  687. zone, region, err := kd.getClusterZoneAndRegion()
  688. if err != nil {
  689. return nil, fmt.Errorf("failed to obtain the cluster zone and region: %v", err)
  690. }
  691. path = append(path, zone, region)
  692. // We have already established that the map entry exists for the given federation,
  693. // we just need to retrieve the domain name, validate it and append it to the path.
  694. domain := kd.federations[path[2]]
  695. // We accept valid subdomains as well, so just let all the valid subdomains.
  696. if len(validation.IsDNS1123Subdomain(domain)) != 0 {
  697. return nil, fmt.Errorf("%s is not a valid domain name for federation %s", domain, path[2])
  698. }
  699. name := strings.Join(append(path, domain), ".")
  700. // Ensure that this name that we are returning as a CNAME response is a fully qualified
  701. // domain name so that the client's resolver library doesn't have to go through its
  702. // search list all over again.
  703. if !strings.HasSuffix(name, ".") {
  704. name = name + "."
  705. }
  706. return []skymsg.Service{{Host: name}}, nil
  707. }
  708. // getClusterZoneAndRegion returns the name of the zone and the region the
  709. // cluster is running in. It arbitrarily selects a node and reads the failure
  710. // domain label on the node. An alternative is to obtain this pod's
  711. // (i.e. kube-dns pod's) name using the downward API, get the pod, get the
  712. // node the pod is bound to and retrieve that node's labels. But even just by
  713. // reading those steps, it looks complex and it is not entirely clear what
  714. // that complexity is going to buy us. So taking a simpler approach here.
  715. // Also note that zone here means the zone in cloud provider terminology, not
  716. // the DNS zone.
  717. func (kd *KubeDNS) getClusterZoneAndRegion() (string, string, error) {
  718. var node *kapi.Node
  719. objs := kd.nodesStore.List()
  720. if len(objs) > 0 {
  721. var ok bool
  722. if node, ok = objs[0].(*kapi.Node); !ok {
  723. return "", "", fmt.Errorf("expected node object, got: %T", objs[0])
  724. }
  725. } else {
  726. // An alternative to listing nodes each time is to set a watch, but that is totally
  727. // wasteful in case of non-federated independent Kubernetes clusters. So carefully
  728. // proceeding here.
  729. // TODO(madhusudancs): Move this to external/v1 API.
  730. nodeList, err := kd.kubeClient.Core().Nodes().List(kapi.ListOptions{})
  731. if err != nil || len(nodeList.Items) == 0 {
  732. return "", "", fmt.Errorf("failed to retrieve the cluster nodes: %v", err)
  733. }
  734. // Select a node (arbitrarily the first node) that has
  735. // `LabelZoneFailureDomain` and `LabelZoneRegion` set.
  736. for _, nodeItem := range nodeList.Items {
  737. _, zfound := nodeItem.Labels[unversioned.LabelZoneFailureDomain]
  738. _, rfound := nodeItem.Labels[unversioned.LabelZoneRegion]
  739. if !zfound || !rfound {
  740. continue
  741. }
  742. // Make a copy of the node, don't rely on the loop variable.
  743. node = &(*(&nodeItem))
  744. if err := kd.nodesStore.Add(node); err != nil {
  745. return "", "", fmt.Errorf("couldn't add the retrieved node to the cache: %v", err)
  746. }
  747. // Node is found, break out of the loop.
  748. break
  749. }
  750. }
  751. if node == nil {
  752. return "", "", fmt.Errorf("Could not find any nodes")
  753. }
  754. zone, ok := node.Labels[unversioned.LabelZoneFailureDomain]
  755. if !ok || zone == "" {
  756. return "", "", fmt.Errorf("unknown cluster zone")
  757. }
  758. region, ok := node.Labels[unversioned.LabelZoneRegion]
  759. if !ok || region == "" {
  760. return "", "", fmt.Errorf("unknown cluster region")
  761. }
  762. return zone, region, nil
  763. }
  764. func (kd *KubeDNS) getServiceFQDN(service *kapi.Service) string {
  765. return strings.Join([]string{service.Name, service.Namespace, serviceSubdomain, kd.domain}, ".")
  766. }
  767. func reverseArray(arr []string) []string {
  768. for i := 0; i < len(arr)/2; i++ {
  769. j := len(arr) - i - 1
  770. arr[i], arr[j] = arr[j], arr[i]
  771. }
  772. return arr
  773. }