subnet.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. // Copyright 2015 CoreOS, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package subnet
  15. import (
  16. "encoding/json"
  17. "errors"
  18. "fmt"
  19. "net"
  20. "regexp"
  21. "strconv"
  22. "time"
  23. "github.com/coreos/flannel/Godeps/_workspace/src/github.com/coreos/go-etcd/etcd"
  24. log "github.com/coreos/flannel/Godeps/_workspace/src/github.com/golang/glog"
  25. "github.com/coreos/flannel/pkg/ip"
  26. "github.com/coreos/flannel/pkg/task"
  27. )
  28. const (
  29. registerRetries = 10
  30. subnetTTL = 24 * 3600
  31. renewMargin = time.Hour
  32. subnetListRetries = 10
  33. )
  34. // etcd error codes
  35. const (
  36. etcdKeyNotFound = 100
  37. etcdKeyAlreadyExists = 105
  38. etcdEventIndexCleared = 401
  39. )
  40. const (
  41. SubnetAdded = iota
  42. SubnetRemoved
  43. SubnetListed
  44. )
  45. var (
  46. subnetRegex *regexp.Regexp = regexp.MustCompile(`(\d+\.\d+.\d+.\d+)-(\d+)`)
  47. )
  48. type LeaseAttrs struct {
  49. PublicIP ip.IP4
  50. BackendType string `json:",omitempty"`
  51. BackendData json.RawMessage `json:",omitempty"`
  52. }
  53. type SubnetLease struct {
  54. Network ip.IP4Net
  55. Attrs LeaseAttrs
  56. }
  57. type SubnetManager struct {
  58. registry subnetRegistry
  59. config *Config
  60. myLease SubnetLease
  61. leaseExp time.Time
  62. lastIndex uint64
  63. leases []SubnetLease
  64. }
  65. type EventType int
  66. type Event struct {
  67. Type EventType
  68. Lease SubnetLease
  69. }
  70. type EventBatch []Event
  71. func NewSubnetManager(config *EtcdConfig) (*SubnetManager, error) {
  72. esr, err := newEtcdSubnetRegistry(config)
  73. if err != nil {
  74. return nil, err
  75. }
  76. return newSubnetManager(esr)
  77. }
  78. func (sm *SubnetManager) AcquireLease(attrs *LeaseAttrs, cancel chan bool) (ip.IP4Net, error) {
  79. for {
  80. sn, err := sm.acquireLeaseOnce(attrs, cancel)
  81. switch {
  82. case err == nil:
  83. log.Info("Subnet lease acquired: ", sn)
  84. return sn, nil
  85. case err == task.ErrCanceled:
  86. return ip.IP4Net{}, err
  87. default:
  88. log.Error("Failed to acquire subnet: ", err)
  89. }
  90. select {
  91. case <-time.After(time.Second):
  92. case <-cancel:
  93. return ip.IP4Net{}, task.ErrCanceled
  94. }
  95. }
  96. }
  97. func findLeaseByIP(leases []SubnetLease, pubIP ip.IP4) *SubnetLease {
  98. for _, l := range leases {
  99. if pubIP == l.Attrs.PublicIP {
  100. return &l
  101. }
  102. }
  103. return nil
  104. }
  105. func (sm *SubnetManager) tryAcquireLease(extIP ip.IP4, attrs *LeaseAttrs) (ip.IP4Net, error) {
  106. var err error
  107. sm.leases, err = sm.getLeases()
  108. if err != nil {
  109. return ip.IP4Net{}, err
  110. }
  111. attrBytes, err := json.Marshal(attrs)
  112. if err != nil {
  113. log.Errorf("marshal failed: %#v, %v", attrs, err)
  114. return ip.IP4Net{}, err
  115. }
  116. // try to reuse a subnet if there's one that matches our IP
  117. if l := findLeaseByIP(sm.leases, extIP); l != nil {
  118. resp, err := sm.registry.updateSubnet(l.Network.StringSep(".", "-"), string(attrBytes), subnetTTL)
  119. if err != nil {
  120. return ip.IP4Net{}, err
  121. }
  122. sm.myLease.Network = l.Network
  123. sm.myLease.Attrs = *attrs
  124. sm.leaseExp = *resp.Node.Expiration
  125. return l.Network, nil
  126. }
  127. // no existing match, grab a new one
  128. sn, err := sm.allocateSubnet()
  129. if err != nil {
  130. return ip.IP4Net{}, err
  131. }
  132. resp, err := sm.registry.createSubnet(sn.StringSep(".", "-"), string(attrBytes), subnetTTL)
  133. switch {
  134. case err == nil:
  135. sm.myLease.Network = sn
  136. sm.myLease.Attrs = *attrs
  137. sm.leaseExp = *resp.Node.Expiration
  138. return sn, nil
  139. // if etcd returned Key Already Exists, try again.
  140. case err.(*etcd.EtcdError).ErrorCode == etcdKeyAlreadyExists:
  141. return ip.IP4Net{}, nil
  142. default:
  143. return ip.IP4Net{}, err
  144. }
  145. }
  146. func (sm *SubnetManager) acquireLeaseOnce(attrs *LeaseAttrs, cancel chan bool) (ip.IP4Net, error) {
  147. for i := 0; i < registerRetries; i++ {
  148. sn, err := sm.tryAcquireLease(attrs.PublicIP, attrs)
  149. switch {
  150. case err != nil:
  151. return ip.IP4Net{}, err
  152. case sn.IP != 0:
  153. return sn, nil
  154. }
  155. // before moving on, check for cancel
  156. if interrupted(cancel) {
  157. return ip.IP4Net{}, task.ErrCanceled
  158. }
  159. }
  160. return ip.IP4Net{}, errors.New("Max retries reached trying to acquire a subnet")
  161. }
  162. func (sm *SubnetManager) GetConfig() *Config {
  163. return sm.config
  164. }
  165. /// Implementation
  166. func parseSubnetKey(s string) (ip.IP4Net, error) {
  167. if parts := subnetRegex.FindStringSubmatch(s); len(parts) == 3 {
  168. snIp := net.ParseIP(parts[1]).To4()
  169. prefixLen, err := strconv.ParseUint(parts[2], 10, 5)
  170. if snIp != nil && err == nil {
  171. return ip.IP4Net{IP: ip.FromIP(snIp), PrefixLen: uint(prefixLen)}, nil
  172. }
  173. }
  174. return ip.IP4Net{}, errors.New("Error parsing IP Subnet")
  175. }
  176. func newSubnetManager(r subnetRegistry) (*SubnetManager, error) {
  177. cfgResp, err := r.getConfig()
  178. if err != nil {
  179. return nil, err
  180. }
  181. cfg, err := ParseConfig(cfgResp.Node.Value)
  182. if err != nil {
  183. return nil, err
  184. }
  185. sm := SubnetManager{
  186. registry: r,
  187. config: cfg,
  188. }
  189. return &sm, nil
  190. }
  191. func (sm *SubnetManager) getLeases() ([]SubnetLease, error) {
  192. resp, err := sm.registry.getSubnets()
  193. var leases []SubnetLease
  194. switch {
  195. case err == nil:
  196. for _, node := range resp.Node.Nodes {
  197. sn, err := parseSubnetKey(node.Key)
  198. if err == nil {
  199. var attrs LeaseAttrs
  200. if err = json.Unmarshal([]byte(node.Value), &attrs); err == nil {
  201. lease := SubnetLease{sn, attrs}
  202. leases = append(leases, lease)
  203. }
  204. }
  205. }
  206. sm.lastIndex = resp.EtcdIndex
  207. case err.(*etcd.EtcdError).ErrorCode == etcdKeyNotFound:
  208. // key not found: treat it as empty set
  209. sm.lastIndex = err.(*etcd.EtcdError).Index
  210. default:
  211. return nil, err
  212. }
  213. return leases, nil
  214. }
  215. func deleteLease(l []SubnetLease, i int) []SubnetLease {
  216. l[i], l = l[len(l)-1], l[:len(l)-1]
  217. return l
  218. }
  219. func (sm *SubnetManager) applyLeases(newLeases []SubnetLease) EventBatch {
  220. var batch EventBatch
  221. for _, l := range newLeases {
  222. // skip self
  223. if l.Network.Equal(sm.myLease.Network) {
  224. continue
  225. }
  226. found := false
  227. for i, c := range sm.leases {
  228. if c.Network.Equal(l.Network) {
  229. sm.leases = deleteLease(sm.leases, i)
  230. found = true
  231. break
  232. }
  233. }
  234. if !found {
  235. // new subnet
  236. batch = append(batch, Event{SubnetAdded, l})
  237. }
  238. }
  239. // everything left in sm.leases has been deleted
  240. for _, c := range sm.leases {
  241. batch = append(batch, Event{SubnetRemoved, c})
  242. }
  243. sm.leases = newLeases
  244. return batch
  245. }
  246. func (sm *SubnetManager) applySubnetChange(action string, ipn ip.IP4Net, data string) (Event, error) {
  247. switch action {
  248. case "delete", "expire":
  249. for i, l := range sm.leases {
  250. if l.Network.Equal(ipn) {
  251. deleteLease(sm.leases, i)
  252. return Event{SubnetRemoved, l}, nil
  253. }
  254. }
  255. log.Errorf("Removed subnet (%s) was not found", ipn)
  256. return Event{
  257. SubnetRemoved,
  258. SubnetLease{ipn, LeaseAttrs{}},
  259. }, nil
  260. default:
  261. var attrs LeaseAttrs
  262. err := json.Unmarshal([]byte(data), &attrs)
  263. if err != nil {
  264. return Event{}, err
  265. }
  266. for i, l := range sm.leases {
  267. if l.Network.Equal(ipn) {
  268. sm.leases[i] = SubnetLease{ipn, attrs}
  269. return Event{SubnetAdded, sm.leases[i]}, nil
  270. }
  271. }
  272. sm.leases = append(sm.leases, SubnetLease{ipn, attrs})
  273. return Event{SubnetAdded, sm.leases[len(sm.leases)-1]}, nil
  274. }
  275. }
  276. func (sm *SubnetManager) allocateSubnet() (ip.IP4Net, error) {
  277. log.Infof("Picking subnet in range %s ... %s", sm.config.SubnetMin, sm.config.SubnetMax)
  278. var bag []ip.IP4
  279. sn := ip.IP4Net{IP: sm.config.SubnetMin, PrefixLen: sm.config.SubnetLen}
  280. OuterLoop:
  281. for ; sn.IP <= sm.config.SubnetMax && len(bag) < 100; sn = sn.Next() {
  282. for _, l := range sm.leases {
  283. if sn.Overlaps(l.Network) {
  284. continue OuterLoop
  285. }
  286. }
  287. bag = append(bag, sn.IP)
  288. }
  289. if len(bag) == 0 {
  290. return ip.IP4Net{}, errors.New("out of subnets")
  291. } else {
  292. i := randInt(0, len(bag))
  293. return ip.IP4Net{IP: bag[i], PrefixLen: sm.config.SubnetLen}, nil
  294. }
  295. }
  296. func (sm *SubnetManager) WatchLeases(receiver chan EventBatch, cancel chan bool) {
  297. // "catch up" by replaying all the leases we discovered during
  298. // AcquireLease
  299. var batch EventBatch
  300. for _, l := range sm.leases {
  301. if !sm.myLease.Network.Equal(l.Network) {
  302. batch = append(batch, Event{SubnetAdded, l})
  303. }
  304. }
  305. if len(batch) > 0 {
  306. receiver <- batch
  307. }
  308. for {
  309. resp, err := sm.registry.watchSubnets(sm.lastIndex+1, cancel)
  310. // watchSubnets exited by cancel chan being signaled
  311. if err == nil && resp == nil {
  312. return
  313. }
  314. var batch *EventBatch
  315. if err == nil {
  316. batch, err = sm.parseSubnetWatchResponse(resp)
  317. } else {
  318. batch, err = sm.parseSubnetWatchError(err)
  319. }
  320. if err != nil {
  321. log.Errorf("%v", err)
  322. time.Sleep(time.Second)
  323. continue
  324. }
  325. if batch != nil {
  326. receiver <- *batch
  327. }
  328. }
  329. }
  330. func (sm *SubnetManager) ListLeases(receiver chan EventBatch, cancel chan bool) {
  331. //periodly list leases
  332. for {
  333. resp, err := sm.registry.watchSubnets(sm.lastIndex+1, cancel)
  334. // watchSubnets exited by cancel chan being signaled
  335. if err == nil && resp == nil {
  336. return
  337. }
  338. leases, err := sm.getLeases()
  339. if err == nil {
  340. var batch EventBatch
  341. for _, l := range leases {
  342. // skip self
  343. if l.Network.Equal(sm.myLease.Network) {
  344. continue
  345. }
  346. batch = append(batch, Event{SubnetListed, l})
  347. }
  348. if &batch != nil {
  349. receiver <- batch
  350. }
  351. }
  352. time.Sleep(subnetListRetries * time.Second)
  353. }
  354. }
  355. func (sm *SubnetManager) parseSubnetWatchResponse(resp *etcd.Response) (batch *EventBatch, err error) {
  356. sm.lastIndex = resp.Node.ModifiedIndex
  357. sn, err := parseSubnetKey(resp.Node.Key)
  358. if err != nil {
  359. err = fmt.Errorf("Error parsing subnet IP: %s", resp.Node.Key)
  360. return
  361. }
  362. // Don't process our own changes
  363. if !sm.myLease.Network.Equal(sn) {
  364. evt, err := sm.applySubnetChange(resp.Action, sn, resp.Node.Value)
  365. if err != nil {
  366. return nil, err
  367. }
  368. batch = &EventBatch{evt}
  369. }
  370. return
  371. }
  372. func (sm *SubnetManager) parseSubnetWatchError(err error) (batch *EventBatch, out error) {
  373. etcdErr, ok := err.(*etcd.EtcdError)
  374. if ok && etcdErr.ErrorCode == etcdEventIndexCleared {
  375. // etcd maintains a history window for events and it's possible to fall behind.
  376. // to recover, get the current state and then "diff" against our cache to generate
  377. // events for the caller
  378. log.Warning("Watch of subnet leases failed because etcd index outside history window")
  379. leases, err := sm.getLeases()
  380. if err == nil {
  381. lb := sm.applyLeases(leases)
  382. batch = &lb
  383. } else {
  384. out = fmt.Errorf("Failed to retrieve subnet leases: %v", err)
  385. }
  386. } else {
  387. out = fmt.Errorf("Watch of subnet leases failed: %v", err)
  388. }
  389. return
  390. }
  391. func (sm *SubnetManager) LeaseRenewer(cancel chan bool) {
  392. for {
  393. dur := sm.leaseExp.Sub(time.Now()) - renewMargin
  394. select {
  395. case <-time.After(dur):
  396. attrBytes, err := json.Marshal(&sm.myLease.Attrs)
  397. if err != nil {
  398. log.Error("Error renewing lease (trying again in 1 min): ", err)
  399. dur = time.Minute
  400. continue
  401. }
  402. resp, err := sm.registry.updateSubnet(sm.myLease.Network.StringSep(".", "-"), string(attrBytes), subnetTTL)
  403. if err != nil {
  404. log.Error("Error renewing lease (trying again in 1 min): ", err)
  405. dur = time.Minute
  406. continue
  407. }
  408. sm.leaseExp = *resp.Node.Expiration
  409. log.Info("Lease renewed, new expiration: ", sm.leaseExp)
  410. dur = sm.leaseExp.Sub(time.Now()) - renewMargin
  411. case <-cancel:
  412. return
  413. }
  414. }
  415. }
  416. func interrupted(cancel chan bool) bool {
  417. select {
  418. case <-cancel:
  419. return true
  420. default:
  421. return false
  422. }
  423. }