subnet.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463
  1. package subnet
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "net"
  7. "regexp"
  8. "strconv"
  9. "time"
  10. "github.com/coreos/flannel/Godeps/_workspace/src/github.com/coreos/go-etcd/etcd"
  11. log "github.com/coreos/flannel/Godeps/_workspace/src/github.com/golang/glog"
  12. "github.com/coreos/flannel/pkg/ip"
  13. "github.com/coreos/flannel/pkg/task"
  14. )
  15. const (
  16. registerRetries = 10
  17. subnetTTL = 24 * 3600
  18. renewMargin = time.Hour
  19. )
  20. // etcd error codes
  21. const (
  22. etcdKeyNotFound = 100
  23. etcdKeyAlreadyExists = 105
  24. etcdEventIndexCleared = 401
  25. )
  26. const (
  27. SubnetAdded = iota
  28. SubnetRemoved
  29. )
  30. var (
  31. subnetRegex *regexp.Regexp = regexp.MustCompile(`(\d+\.\d+.\d+.\d+)-(\d+)`)
  32. )
  33. type LeaseAttrs struct {
  34. PublicIP ip.IP4
  35. BackendType string `json:",omitempty"`
  36. BackendData json.RawMessage `json:",omitempty"`
  37. }
  38. type SubnetLease struct {
  39. Network ip.IP4Net
  40. Attrs LeaseAttrs
  41. }
  42. type SubnetManager struct {
  43. registry subnetRegistry
  44. config *Config
  45. myLease SubnetLease
  46. leaseExp time.Time
  47. lastIndex uint64
  48. leases []SubnetLease
  49. }
  50. type EventType int
  51. type Event struct {
  52. Type EventType
  53. Lease SubnetLease
  54. }
  55. type EventBatch []Event
  56. func NewSubnetManager(config *EtcdConfig) (*SubnetManager, error) {
  57. esr, err := newEtcdSubnetRegistry(config)
  58. if err != nil {
  59. return nil, err
  60. }
  61. return newSubnetManager(esr)
  62. }
  63. func (sm *SubnetManager) AcquireLease(attrs *LeaseAttrs, cancel chan bool) (ip.IP4Net, error) {
  64. for {
  65. sn, err := sm.acquireLeaseOnce(attrs, cancel)
  66. switch {
  67. case err == nil:
  68. log.Info("Subnet lease acquired: ", sn)
  69. return sn, nil
  70. case err == task.ErrCanceled:
  71. return ip.IP4Net{}, err
  72. default:
  73. log.Error("Failed to acquire subnet: ", err)
  74. }
  75. select {
  76. case <-time.After(time.Second):
  77. case <-cancel:
  78. return ip.IP4Net{}, task.ErrCanceled
  79. }
  80. }
  81. }
  82. func findLeaseByIP(leases []SubnetLease, pubIP ip.IP4) *SubnetLease {
  83. for _, l := range leases {
  84. if pubIP == l.Attrs.PublicIP {
  85. return &l
  86. }
  87. }
  88. return nil
  89. }
  90. func (sm *SubnetManager) tryAcquireLease(extIP ip.IP4, attrs *LeaseAttrs) (ip.IP4Net, error) {
  91. var err error
  92. sm.leases, err = sm.getLeases()
  93. if err != nil {
  94. return ip.IP4Net{}, err
  95. }
  96. attrBytes, err := json.Marshal(attrs)
  97. if err != nil {
  98. log.Errorf("marshal failed: %#v, %v", attrs, err)
  99. return ip.IP4Net{}, err
  100. }
  101. // try to reuse a subnet if there's one that matches our IP
  102. if l := findLeaseByIP(sm.leases, extIP); l != nil {
  103. resp, err := sm.registry.updateSubnet(l.Network.StringSep(".", "-"), string(attrBytes), subnetTTL)
  104. if err != nil {
  105. return ip.IP4Net{}, err
  106. }
  107. sm.myLease.Network = l.Network
  108. sm.myLease.Attrs = *attrs
  109. sm.leaseExp = *resp.Node.Expiration
  110. return l.Network, nil
  111. }
  112. // no existing match, grab a new one
  113. sn, err := sm.allocateSubnet()
  114. if err != nil {
  115. return ip.IP4Net{}, err
  116. }
  117. resp, err := sm.registry.createSubnet(sn.StringSep(".", "-"), string(attrBytes), subnetTTL)
  118. switch {
  119. case err == nil:
  120. sm.myLease.Network = sn
  121. sm.myLease.Attrs = *attrs
  122. sm.leaseExp = *resp.Node.Expiration
  123. return sn, nil
  124. // if etcd returned Key Already Exists, try again.
  125. case err.(*etcd.EtcdError).ErrorCode == etcdKeyAlreadyExists:
  126. return ip.IP4Net{}, nil
  127. default:
  128. return ip.IP4Net{}, err
  129. }
  130. }
  131. func (sm *SubnetManager) acquireLeaseOnce(attrs *LeaseAttrs, cancel chan bool) (ip.IP4Net, error) {
  132. for i := 0; i < registerRetries; i++ {
  133. sn, err := sm.tryAcquireLease(attrs.PublicIP, attrs)
  134. switch {
  135. case err != nil:
  136. return ip.IP4Net{}, err
  137. case sn.IP != 0:
  138. return sn, nil
  139. }
  140. // before moving on, check for cancel
  141. if interrupted(cancel) {
  142. return ip.IP4Net{}, task.ErrCanceled
  143. }
  144. }
  145. return ip.IP4Net{}, errors.New("Max retries reached trying to acquire a subnet")
  146. }
  147. func (sm *SubnetManager) GetConfig() *Config {
  148. return sm.config
  149. }
  150. /// Implementation
  151. func parseSubnetKey(s string) (ip.IP4Net, error) {
  152. if parts := subnetRegex.FindStringSubmatch(s); len(parts) == 3 {
  153. snIp := net.ParseIP(parts[1]).To4()
  154. prefixLen, err := strconv.ParseUint(parts[2], 10, 5)
  155. if snIp != nil && err == nil {
  156. return ip.IP4Net{IP: ip.FromIP(snIp), PrefixLen: uint(prefixLen)}, nil
  157. }
  158. }
  159. return ip.IP4Net{}, errors.New("Error parsing IP Subnet")
  160. }
  161. func newSubnetManager(r subnetRegistry) (*SubnetManager, error) {
  162. cfgResp, err := r.getConfig()
  163. if err != nil {
  164. return nil, err
  165. }
  166. cfg, err := ParseConfig(cfgResp.Node.Value)
  167. if err != nil {
  168. return nil, err
  169. }
  170. sm := SubnetManager{
  171. registry: r,
  172. config: cfg,
  173. }
  174. return &sm, nil
  175. }
  176. func (sm *SubnetManager) getLeases() ([]SubnetLease, error) {
  177. resp, err := sm.registry.getSubnets()
  178. var leases []SubnetLease
  179. switch {
  180. case err == nil:
  181. for _, node := range resp.Node.Nodes {
  182. sn, err := parseSubnetKey(node.Key)
  183. if err == nil {
  184. var attrs LeaseAttrs
  185. if err = json.Unmarshal([]byte(node.Value), &attrs); err == nil {
  186. lease := SubnetLease{sn, attrs}
  187. leases = append(leases, lease)
  188. }
  189. }
  190. }
  191. sm.lastIndex = resp.EtcdIndex
  192. case err.(*etcd.EtcdError).ErrorCode == etcdKeyNotFound:
  193. // key not found: treat it as empty set
  194. sm.lastIndex = err.(*etcd.EtcdError).Index
  195. default:
  196. return nil, err
  197. }
  198. return leases, nil
  199. }
  200. func deleteLease(l []SubnetLease, i int) []SubnetLease {
  201. l[i], l = l[len(l)-1], l[:len(l)-1]
  202. return l
  203. }
  204. func (sm *SubnetManager) applyLeases(newLeases []SubnetLease) EventBatch {
  205. var batch EventBatch
  206. for _, l := range newLeases {
  207. // skip self
  208. if l.Network.Equal(sm.myLease.Network) {
  209. continue
  210. }
  211. found := false
  212. for i, c := range sm.leases {
  213. if c.Network.Equal(l.Network) {
  214. sm.leases = deleteLease(sm.leases, i)
  215. found = true
  216. break
  217. }
  218. }
  219. if !found {
  220. // new subnet
  221. batch = append(batch, Event{SubnetAdded, l})
  222. }
  223. }
  224. // everything left in sm.leases has been deleted
  225. for _, c := range sm.leases {
  226. batch = append(batch, Event{SubnetRemoved, c})
  227. }
  228. sm.leases = newLeases
  229. return batch
  230. }
  231. func (sm *SubnetManager) applySubnetChange(action string, ipn ip.IP4Net, data string) (Event, error) {
  232. switch action {
  233. case "delete", "expire":
  234. for i, l := range sm.leases {
  235. if l.Network.Equal(ipn) {
  236. deleteLease(sm.leases, i)
  237. return Event{SubnetRemoved, l}, nil
  238. }
  239. }
  240. log.Errorf("Removed subnet (%s) was not found", ipn)
  241. return Event{
  242. SubnetRemoved,
  243. SubnetLease{ipn, LeaseAttrs{}},
  244. }, nil
  245. default:
  246. var attrs LeaseAttrs
  247. err := json.Unmarshal([]byte(data), &attrs)
  248. if err != nil {
  249. return Event{}, err
  250. }
  251. for i, l := range sm.leases {
  252. if l.Network.Equal(ipn) {
  253. sm.leases[i] = SubnetLease{ipn, attrs}
  254. return Event{SubnetAdded, sm.leases[i]}, nil
  255. }
  256. }
  257. sm.leases = append(sm.leases, SubnetLease{ipn, attrs})
  258. return Event{SubnetAdded, sm.leases[len(sm.leases)-1]}, nil
  259. }
  260. }
  261. func (sm *SubnetManager) allocateSubnet() (ip.IP4Net, error) {
  262. log.Infof("Picking subnet in range %s ... %s", sm.config.SubnetMin, sm.config.SubnetMax)
  263. var bag []ip.IP4
  264. sn := ip.IP4Net{IP: sm.config.SubnetMin, PrefixLen: sm.config.SubnetLen}
  265. OuterLoop:
  266. for ; sn.IP <= sm.config.SubnetMax && len(bag) < 100; sn = sn.Next() {
  267. for _, l := range sm.leases {
  268. if sn.Overlaps(l.Network) {
  269. continue OuterLoop
  270. }
  271. }
  272. bag = append(bag, sn.IP)
  273. }
  274. if len(bag) == 0 {
  275. return ip.IP4Net{}, errors.New("out of subnets")
  276. } else {
  277. i := randInt(0, len(bag))
  278. return ip.IP4Net{IP: bag[i], PrefixLen: sm.config.SubnetLen}, nil
  279. }
  280. }
  281. func (sm *SubnetManager) WatchLeases(receiver chan EventBatch, cancel chan bool) {
  282. // "catch up" by replaying all the leases we discovered during
  283. // AcquireLease
  284. var batch EventBatch
  285. for _, l := range sm.leases {
  286. if !sm.myLease.Network.Equal(l.Network) {
  287. batch = append(batch, Event{SubnetAdded, l})
  288. }
  289. }
  290. if len(batch) > 0 {
  291. receiver <- batch
  292. }
  293. for {
  294. resp, err := sm.registry.watchSubnets(sm.lastIndex+1, cancel)
  295. // watchSubnets exited by cancel chan being signaled
  296. if err == nil && resp == nil {
  297. return
  298. }
  299. var batch *EventBatch
  300. if err == nil {
  301. batch, err = sm.parseSubnetWatchResponse(resp)
  302. } else {
  303. batch, err = sm.parseSubnetWatchError(err)
  304. }
  305. if err != nil {
  306. log.Errorf("%v", err)
  307. time.Sleep(time.Second)
  308. continue
  309. }
  310. if batch != nil {
  311. receiver <- *batch
  312. }
  313. }
  314. }
  315. func (sm *SubnetManager) parseSubnetWatchResponse(resp *etcd.Response) (batch *EventBatch, err error) {
  316. sm.lastIndex = resp.Node.ModifiedIndex
  317. sn, err := parseSubnetKey(resp.Node.Key)
  318. if err != nil {
  319. err = fmt.Errorf("Error parsing subnet IP: %s", resp.Node.Key)
  320. return
  321. }
  322. // Don't process our own changes
  323. if !sm.myLease.Network.Equal(sn) {
  324. evt, err := sm.applySubnetChange(resp.Action, sn, resp.Node.Value)
  325. if err != nil {
  326. return nil, err
  327. }
  328. batch = &EventBatch{evt}
  329. }
  330. return
  331. }
  332. func (sm *SubnetManager) parseSubnetWatchError(err error) (batch *EventBatch, out error) {
  333. etcdErr, ok := err.(*etcd.EtcdError)
  334. if ok && etcdErr.ErrorCode == etcdEventIndexCleared {
  335. // etcd maintains a history window for events and it's possible to fall behind.
  336. // to recover, get the current state and then "diff" against our cache to generate
  337. // events for the caller
  338. log.Warning("Watch of subnet leases failed because etcd index outside history window")
  339. leases, err := sm.getLeases()
  340. if err == nil {
  341. lb := sm.applyLeases(leases)
  342. batch = &lb
  343. } else {
  344. out = fmt.Errorf("Failed to retrieve subnet leases: %v", err)
  345. }
  346. } else {
  347. out = fmt.Errorf("Watch of subnet leases failed: %v", err)
  348. }
  349. return
  350. }
  351. func (sm *SubnetManager) LeaseRenewer(cancel chan bool) {
  352. for {
  353. dur := sm.leaseExp.Sub(time.Now()) - renewMargin
  354. select {
  355. case <-time.After(dur):
  356. attrBytes, err := json.Marshal(&sm.myLease.Attrs)
  357. if err != nil {
  358. log.Error("Error renewing lease (trying again in 1 min): ", err)
  359. dur = time.Minute
  360. continue
  361. }
  362. resp, err := sm.registry.updateSubnet(sm.myLease.Network.StringSep(".", "-"), string(attrBytes), subnetTTL)
  363. if err != nil {
  364. log.Error("Error renewing lease (trying again in 1 min): ", err)
  365. dur = time.Minute
  366. continue
  367. }
  368. sm.leaseExp = *resp.Node.Expiration
  369. log.Info("Lease renewed, new expiration: ", sm.leaseExp)
  370. dur = sm.leaseExp.Sub(time.Now()) - renewMargin
  371. case <-cancel:
  372. return
  373. }
  374. }
  375. }
  376. func interrupted(cancel chan bool) bool {
  377. select {
  378. case <-cancel:
  379. return true
  380. default:
  381. return false
  382. }
  383. }