watcher.go 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. /*
  2. Copyright 2016 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package etcd3
  14. import (
  15. "fmt"
  16. "net/http"
  17. "strings"
  18. "sync"
  19. "k8s.io/kubernetes/pkg/api/unversioned"
  20. "k8s.io/kubernetes/pkg/runtime"
  21. "k8s.io/kubernetes/pkg/storage"
  22. "k8s.io/kubernetes/pkg/watch"
  23. "github.com/coreos/etcd/clientv3"
  24. etcdrpc "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
  25. "github.com/golang/glog"
  26. "golang.org/x/net/context"
  27. "google.golang.org/grpc"
  28. "google.golang.org/grpc/codes"
  29. )
  30. const (
  31. // We have set a buffer in order to reduce times of context switches.
  32. incomingBufSize = 100
  33. outgoingBufSize = 100
  34. )
  35. type watcher struct {
  36. client *clientv3.Client
  37. codec runtime.Codec
  38. versioner storage.Versioner
  39. }
  40. // watchChan implements watch.Interface.
  41. type watchChan struct {
  42. watcher *watcher
  43. key string
  44. initialRev int64
  45. recursive bool
  46. filter storage.Filter
  47. ctx context.Context
  48. cancel context.CancelFunc
  49. incomingEventChan chan *event
  50. resultChan chan watch.Event
  51. errChan chan error
  52. }
  53. func newWatcher(client *clientv3.Client, codec runtime.Codec, versioner storage.Versioner) *watcher {
  54. return &watcher{
  55. client: client,
  56. codec: codec,
  57. versioner: versioner,
  58. }
  59. }
  60. // Watch watches on a key and returns a watch.Interface that transfers relevant notifications.
  61. // If rev is zero, it will return the existing object(s) and then start watching from
  62. // the maximum revision+1 from returned objects.
  63. // If rev is non-zero, it will watch events happened after given revision.
  64. // If recursive is false, it watches on given key.
  65. // If recursive is true, it watches any children and directories under the key, excluding the root key itself.
  66. // filter must be non-nil. Only if filter returns true will the changes be returned.
  67. func (w *watcher) Watch(ctx context.Context, key string, rev int64, recursive bool, filter storage.Filter) (watch.Interface, error) {
  68. if recursive && !strings.HasSuffix(key, "/") {
  69. key += "/"
  70. }
  71. wc := w.createWatchChan(ctx, key, rev, recursive, filter)
  72. go wc.run()
  73. return wc, nil
  74. }
  75. func (w *watcher) createWatchChan(ctx context.Context, key string, rev int64, recursive bool, filter storage.Filter) *watchChan {
  76. wc := &watchChan{
  77. watcher: w,
  78. key: key,
  79. initialRev: rev,
  80. recursive: recursive,
  81. filter: filter,
  82. incomingEventChan: make(chan *event, incomingBufSize),
  83. resultChan: make(chan watch.Event, outgoingBufSize),
  84. errChan: make(chan error, 1),
  85. }
  86. wc.ctx, wc.cancel = context.WithCancel(ctx)
  87. return wc
  88. }
  89. func (wc *watchChan) run() {
  90. go wc.startWatching()
  91. var resultChanWG sync.WaitGroup
  92. resultChanWG.Add(1)
  93. go wc.processEvent(&resultChanWG)
  94. select {
  95. case err := <-wc.errChan:
  96. errResult := parseError(err)
  97. if errResult != nil {
  98. // error result is guaranteed to be received by user before closing ResultChan.
  99. select {
  100. case wc.resultChan <- *errResult:
  101. case <-wc.ctx.Done(): // user has given up all results
  102. }
  103. }
  104. wc.cancel()
  105. case <-wc.ctx.Done():
  106. }
  107. // we need to wait until resultChan wouldn't be sent to anymore
  108. resultChanWG.Wait()
  109. close(wc.resultChan)
  110. }
  111. func (wc *watchChan) Stop() {
  112. wc.cancel()
  113. }
  114. func (wc *watchChan) ResultChan() <-chan watch.Event {
  115. return wc.resultChan
  116. }
  117. // sync tries to retrieve existing data and send them to process.
  118. // The revision to watch will be set to the revision in response.
  119. func (wc *watchChan) sync() error {
  120. opts := []clientv3.OpOption{}
  121. if wc.recursive {
  122. opts = append(opts, clientv3.WithPrefix())
  123. }
  124. getResp, err := wc.watcher.client.Get(wc.ctx, wc.key, opts...)
  125. if err != nil {
  126. return err
  127. }
  128. wc.initialRev = getResp.Header.Revision
  129. for _, kv := range getResp.Kvs {
  130. wc.sendEvent(parseKV(kv))
  131. }
  132. return nil
  133. }
  134. // startWatching does:
  135. // - get current objects if initialRev=0; set initialRev to current rev
  136. // - watch on given key and send events to process.
  137. func (wc *watchChan) startWatching() {
  138. if wc.initialRev == 0 {
  139. if err := wc.sync(); err != nil {
  140. wc.sendError(err)
  141. return
  142. }
  143. }
  144. opts := []clientv3.OpOption{clientv3.WithRev(wc.initialRev + 1)}
  145. if wc.recursive {
  146. opts = append(opts, clientv3.WithPrefix())
  147. }
  148. wch := wc.watcher.client.Watch(wc.ctx, wc.key, opts...)
  149. for wres := range wch {
  150. if wres.Err() != nil {
  151. // If there is an error on server (e.g. compaction), the channel will return it before closed.
  152. wc.sendError(wres.Err())
  153. return
  154. }
  155. for _, e := range wres.Events {
  156. wc.sendEvent(parseEvent(e))
  157. }
  158. }
  159. }
  160. // processEvent processes events from etcd watcher and sends results to resultChan.
  161. func (wc *watchChan) processEvent(wg *sync.WaitGroup) {
  162. defer wg.Done()
  163. for {
  164. select {
  165. case e := <-wc.incomingEventChan:
  166. res := wc.transform(e)
  167. if res == nil {
  168. continue
  169. }
  170. // If user couldn't receive results fast enough, we also block incoming events from watcher.
  171. // Because storing events in local will cause more memory usage.
  172. // The worst case would be closing the fast watcher.
  173. select {
  174. case wc.resultChan <- *res:
  175. case <-wc.ctx.Done():
  176. return
  177. }
  178. case <-wc.ctx.Done():
  179. return
  180. }
  181. }
  182. }
  183. // transform transforms an event into a result for user if not filtered.
  184. // TODO (Optimization):
  185. // - Save remote round-trip.
  186. // Currently, DELETE and PUT event don't contain the previous value.
  187. // We need to do another Get() in order to get previous object and have logic upon it.
  188. // We could potentially do some optimizations:
  189. // - For PUT, we can save current and previous objects into the value.
  190. // - For DELETE, See https://github.com/coreos/etcd/issues/4620
  191. func (wc *watchChan) transform(e *event) (res *watch.Event) {
  192. curObj, oldObj, err := prepareObjs(wc.ctx, e, wc.watcher.client, wc.watcher.codec, wc.watcher.versioner)
  193. if err != nil {
  194. wc.sendError(err)
  195. return nil
  196. }
  197. switch {
  198. case e.isDeleted:
  199. if !wc.filter.Filter(oldObj) {
  200. return nil
  201. }
  202. res = &watch.Event{
  203. Type: watch.Deleted,
  204. Object: oldObj,
  205. }
  206. case e.isCreated:
  207. if !wc.filter.Filter(curObj) {
  208. return nil
  209. }
  210. res = &watch.Event{
  211. Type: watch.Added,
  212. Object: curObj,
  213. }
  214. default:
  215. curObjPasses := wc.filter.Filter(curObj)
  216. oldObjPasses := wc.filter.Filter(oldObj)
  217. switch {
  218. case curObjPasses && oldObjPasses:
  219. res = &watch.Event{
  220. Type: watch.Modified,
  221. Object: curObj,
  222. }
  223. case curObjPasses && !oldObjPasses:
  224. res = &watch.Event{
  225. Type: watch.Added,
  226. Object: curObj,
  227. }
  228. case !curObjPasses && oldObjPasses:
  229. res = &watch.Event{
  230. Type: watch.Deleted,
  231. Object: oldObj,
  232. }
  233. }
  234. }
  235. return res
  236. }
  237. func parseError(err error) *watch.Event {
  238. var status *unversioned.Status
  239. switch {
  240. case err == etcdrpc.ErrCompacted:
  241. status = &unversioned.Status{
  242. Status: unversioned.StatusFailure,
  243. Message: err.Error(),
  244. Code: http.StatusGone,
  245. Reason: unversioned.StatusReasonExpired,
  246. }
  247. default:
  248. status = &unversioned.Status{
  249. Status: unversioned.StatusFailure,
  250. Message: err.Error(),
  251. Code: http.StatusInternalServerError,
  252. Reason: unversioned.StatusReasonInternalError,
  253. }
  254. }
  255. return &watch.Event{
  256. Type: watch.Error,
  257. Object: status,
  258. }
  259. }
  260. func (wc *watchChan) sendError(err error) {
  261. // Context.canceled is an expected behavior.
  262. // We should just stop all goroutines in watchChan without returning error.
  263. // TODO: etcd client should return context.Canceled instead of grpc specific error.
  264. if grpc.Code(err) == codes.Canceled || err == context.Canceled {
  265. return
  266. }
  267. select {
  268. case wc.errChan <- err:
  269. case <-wc.ctx.Done():
  270. }
  271. }
  272. func (wc *watchChan) sendEvent(e *event) {
  273. if len(wc.incomingEventChan) == incomingBufSize {
  274. glog.V(2).Infof("Fast watcher, slow processing. Number of buffered events: %d."+
  275. "Probably caused by slow decoding, user not receiving fast, or other processing logic",
  276. incomingBufSize)
  277. }
  278. select {
  279. case wc.incomingEventChan <- e:
  280. case <-wc.ctx.Done():
  281. }
  282. }
  283. func prepareObjs(ctx context.Context, e *event, client *clientv3.Client, codec runtime.Codec, versioner storage.Versioner) (curObj runtime.Object, oldObj runtime.Object, err error) {
  284. if !e.isDeleted {
  285. curObj, err = decodeObj(codec, versioner, e.value, e.rev)
  286. if err != nil {
  287. return nil, nil, err
  288. }
  289. }
  290. if e.isDeleted || !e.isCreated {
  291. getResp, err := client.Get(ctx, e.key, clientv3.WithRev(e.rev-1))
  292. if err != nil {
  293. return nil, nil, err
  294. }
  295. // Note that this sends the *old* object with the etcd revision for the time at
  296. // which it gets deleted.
  297. // We assume old object is returned only in Deleted event. Users (e.g. cacher) need
  298. // to have larger than previous rev to tell the ordering.
  299. oldObj, err = decodeObj(codec, versioner, getResp.Kvs[0].Value, e.rev)
  300. if err != nil {
  301. return nil, nil, err
  302. }
  303. }
  304. return curObj, oldObj, nil
  305. }
  306. func decodeObj(codec runtime.Codec, versioner storage.Versioner, data []byte, rev int64) (runtime.Object, error) {
  307. obj, err := runtime.Decode(codec, []byte(data))
  308. if err != nil {
  309. return nil, err
  310. }
  311. // ensure resource version is set on the object we load from etcd
  312. if err := versioner.UpdateObject(obj, uint64(rev)); err != nil {
  313. return nil, fmt.Errorf("failure to version api object (%d) %#v: %v", rev, obj, err)
  314. }
  315. return obj, nil
  316. }