subnet.go 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. package subnet
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "net"
  6. "regexp"
  7. "strconv"
  8. "time"
  9. "github.com/coreos-inc/kolach/Godeps/_workspace/src/github.com/coreos/go-etcd/etcd"
  10. log "github.com/coreos-inc/kolach/Godeps/_workspace/src/github.com/golang/glog"
  11. "github.com/coreos-inc/kolach/pkg"
  12. )
  13. const (
  14. registerRetries = 10
  15. subnetTTL = 24 * 3600
  16. renewMargin = time.Hour
  17. )
  18. // etcd error codes
  19. const (
  20. etcdEventIndexCleared = 401
  21. )
  22. const (
  23. SubnetAdded = iota
  24. SubnetRemoved
  25. )
  26. var (
  27. subnetRegex *regexp.Regexp = regexp.MustCompile(`(\d+\.\d+.\d+.\d+)-(\d+)`)
  28. )
  29. type SubnetLease struct {
  30. Network pkg.IP4Net
  31. Data string
  32. }
  33. type SubnetManager struct {
  34. registry subnetRegistry
  35. config *Config
  36. myLease SubnetLease
  37. leaseExp time.Time
  38. lastIndex uint64
  39. leases []SubnetLease
  40. stop chan bool
  41. }
  42. type EventType int
  43. type Event struct {
  44. Type EventType
  45. Lease SubnetLease
  46. }
  47. type EventBatch []Event
  48. func NewSubnetManager(etcdCli *etcd.Client, prefix string) (*SubnetManager, error) {
  49. esr := etcdSubnetRegistry{etcdCli, prefix}
  50. return newSubnetManager(&esr)
  51. }
  52. func (sm *SubnetManager) AcquireLease(ip pkg.IP4, data string) (pkg.IP4Net, error) {
  53. for i := 0; i < registerRetries; i++ {
  54. var err error
  55. sm.leases, err = sm.getLeases()
  56. if err != nil {
  57. return pkg.IP4Net{}, err
  58. }
  59. // try to reuse a subnet if there's one that match our IP
  60. for _, l := range sm.leases {
  61. var ba BaseAttrs
  62. err = json.Unmarshal([]byte(l.Data), &ba)
  63. if err != nil {
  64. log.Error("Error parsing subnet lease JSON: ", err)
  65. } else {
  66. if ip == ba.PublicIP {
  67. resp, err := sm.registry.updateSubnet(l.Network.StringSep(".", "-"), data, subnetTTL)
  68. if err != nil {
  69. return pkg.IP4Net{}, nil
  70. }
  71. sm.myLease.Network = l.Network
  72. sm.leaseExp = *(resp.Node.Expiration)
  73. return l.Network, nil
  74. }
  75. }
  76. }
  77. // no existing match, grab a new one
  78. sn, err := sm.allocateSubnet()
  79. if err != nil {
  80. return pkg.IP4Net{}, err
  81. }
  82. resp, err := sm.registry.createSubnet(sn.StringSep(".", "-"), data, subnetTTL)
  83. switch {
  84. case err == nil:
  85. sm.myLease.Network = sn
  86. sm.leaseExp = *(resp.Node.Expiration)
  87. return sn, nil
  88. // if etcd returned Key Already Exists, try again.
  89. case err.(*etcd.EtcdError).ErrorCode == 105:
  90. continue
  91. default:
  92. return pkg.IP4Net{}, err
  93. }
  94. }
  95. return pkg.IP4Net{}, errors.New("Max retries reached trying to acquire a subnet")
  96. }
  97. func (sm *SubnetManager) UpdateSubnet(data string) error {
  98. resp, err := sm.registry.updateSubnet(sm.myLease.Network.StringSep(".", "-"), data, subnetTTL)
  99. sm.leaseExp = *(resp.Node.Expiration)
  100. return err
  101. }
  102. func (sm *SubnetManager) Start(receiver chan EventBatch) {
  103. go sm.watchLeases(receiver)
  104. go sm.leaseRenewer()
  105. }
  106. func (sm *SubnetManager) Stop() {
  107. // once for each goroutine
  108. sm.stop <- true
  109. sm.stop <- true
  110. }
  111. func (sm *SubnetManager) GetConfig() *Config {
  112. return sm.config
  113. }
  114. /// Implementation
  115. func parseSubnetKey(s string) (pkg.IP4Net, error) {
  116. if parts := subnetRegex.FindStringSubmatch(s); len(parts) == 3 {
  117. ip := net.ParseIP(parts[1]).To4()
  118. prefixLen, err := strconv.ParseUint(parts[2], 10, 5)
  119. if ip != nil && err == nil {
  120. return pkg.IP4Net{pkg.FromIP(ip), uint(prefixLen)}, nil
  121. }
  122. }
  123. return pkg.IP4Net{}, errors.New("Error parsing IP Subnet")
  124. }
  125. type subnetRegistry interface {
  126. getConfig() (*etcd.Response, error)
  127. getSubnets() (*etcd.Response, error)
  128. createSubnet(sn, data string, ttl uint64) (*etcd.Response, error)
  129. updateSubnet(sn, data string, ttl uint64) (*etcd.Response, error)
  130. watchSubnets(since uint64, stop chan bool) (*etcd.Response, error)
  131. }
  132. type etcdSubnetRegistry struct {
  133. cli *etcd.Client
  134. prefix string
  135. }
  136. func (esr *etcdSubnetRegistry) getConfig() (*etcd.Response, error) {
  137. resp, err := esr.cli.Get(esr.prefix+"/config", false, false)
  138. if err != nil {
  139. return nil, err
  140. }
  141. return resp, nil
  142. }
  143. func (esr *etcdSubnetRegistry) getSubnets() (*etcd.Response, error) {
  144. return esr.cli.Get(esr.prefix+"/subnets", false, true)
  145. }
  146. func (esr *etcdSubnetRegistry) createSubnet(sn, data string, ttl uint64) (*etcd.Response, error) {
  147. return esr.cli.Create(esr.prefix+"/subnets/"+sn, data, ttl)
  148. }
  149. func (esr *etcdSubnetRegistry) updateSubnet(sn, data string, ttl uint64) (*etcd.Response, error) {
  150. return esr.cli.Set(esr.prefix+"/subnets/"+sn, data, ttl)
  151. }
  152. func (esr *etcdSubnetRegistry) watchSubnets(since uint64, stop chan bool) (*etcd.Response, error) {
  153. return esr.cli.Watch(esr.prefix+"/subnets", since, true, nil, stop)
  154. }
  155. func newSubnetManager(r subnetRegistry) (*SubnetManager, error) {
  156. cfgResp, err := r.getConfig()
  157. if err != nil {
  158. return nil, err
  159. }
  160. cfg, err := ParseConfig(cfgResp.Node.Value)
  161. if err != nil {
  162. return nil, err
  163. }
  164. return &SubnetManager{
  165. registry: r,
  166. config: cfg,
  167. stop: make(chan bool, 2),
  168. }, nil
  169. }
  170. func (sm *SubnetManager) getLeases() ([]SubnetLease, error) {
  171. resp, err := sm.registry.getSubnets()
  172. var leases []SubnetLease
  173. switch {
  174. case err == nil:
  175. for _, node := range resp.Node.Nodes {
  176. sn, err := parseSubnetKey(node.Key)
  177. if err == nil {
  178. lease := SubnetLease{sn, node.Value}
  179. leases = append(leases, lease)
  180. }
  181. }
  182. sm.lastIndex = resp.EtcdIndex
  183. case err.(*etcd.EtcdError).ErrorCode == 100:
  184. // key not found: treat it as empty set
  185. sm.lastIndex = err.(*etcd.EtcdError).Index
  186. default:
  187. return nil, err
  188. }
  189. return leases, nil
  190. }
  191. func deleteLease(l []SubnetLease, i int) []SubnetLease {
  192. l[i], l = l[len(l)-1], l[:len(l)-1]
  193. return l
  194. }
  195. func (sm *SubnetManager) applyLeases(newLeases []SubnetLease) EventBatch {
  196. var batch EventBatch
  197. for _, l := range newLeases {
  198. // skip self
  199. if l.Network.Equal(sm.myLease.Network) {
  200. continue
  201. }
  202. found := false
  203. for i, c := range sm.leases {
  204. if c.Network.Equal(l.Network) {
  205. sm.leases = deleteLease(sm.leases, i)
  206. found = true
  207. break
  208. }
  209. }
  210. if !found {
  211. // new subnet
  212. batch = append(batch, Event{SubnetAdded, l})
  213. }
  214. }
  215. // everything left in sm.leases has been deleted
  216. for _, c := range sm.leases {
  217. batch = append(batch, Event{SubnetRemoved, c})
  218. }
  219. sm.leases = newLeases
  220. return batch
  221. }
  222. func (sm *SubnetManager) applySubnetChange(action string, ipn pkg.IP4Net, data string) Event {
  223. switch action {
  224. case "delete", "expire":
  225. for i, l := range sm.leases {
  226. if l.Network.Equal(ipn) {
  227. deleteLease(sm.leases, i)
  228. return Event{SubnetRemoved, l}
  229. }
  230. }
  231. log.Errorf("Removed subnet (%s) was not found", ipn)
  232. return Event{
  233. SubnetRemoved,
  234. SubnetLease{ipn, ""},
  235. }
  236. default:
  237. for i, l := range sm.leases {
  238. if l.Network.Equal(ipn) {
  239. sm.leases[i] = SubnetLease{ipn, data}
  240. return Event{SubnetAdded, sm.leases[i]}
  241. }
  242. }
  243. sm.leases = append(sm.leases, SubnetLease{ipn, data})
  244. return Event{SubnetAdded, sm.leases[len(sm.leases)-1]}
  245. }
  246. }
  247. type BaseAttrs struct {
  248. PublicIP pkg.IP4
  249. }
  250. func (sm *SubnetManager) allocateSubnet() (pkg.IP4Net, error) {
  251. log.Infof("Picking subnet in range %s ... %s", sm.config.FirstIP, sm.config.LastIP)
  252. var bag []pkg.IP4
  253. sn := pkg.IP4Net{sm.config.FirstIP, sm.config.HostSubnet}
  254. OuterLoop:
  255. for ; sn.IP <= sm.config.LastIP && len(bag) < 100; sn = sn.Next() {
  256. for _, l := range sm.leases {
  257. if sn.Overlaps(l.Network) {
  258. continue OuterLoop
  259. }
  260. }
  261. bag = append(bag, sn.IP)
  262. }
  263. if len(bag) == 0 {
  264. return pkg.IP4Net{}, errors.New("out of subnets")
  265. } else {
  266. i := pkg.RandInt(0, len(bag))
  267. return pkg.IP4Net{bag[i], sm.config.HostSubnet}, nil
  268. }
  269. }
  270. func (sm *SubnetManager) watchLeases(receiver chan EventBatch) {
  271. // "catch up" by replaying all the leases we discovered during
  272. // AcquireLease
  273. var batch EventBatch
  274. for _, l := range sm.leases {
  275. if !sm.myLease.Network.Equal(l.Network) {
  276. batch = append(batch, Event{SubnetAdded, l})
  277. }
  278. }
  279. if len(batch) > 0 {
  280. receiver <- batch
  281. }
  282. for {
  283. resp, err := sm.registry.watchSubnets(sm.lastIndex+1, sm.stop)
  284. if err == nil {
  285. if resp == nil {
  286. // watchSubnets exited by stop chan being signaled
  287. return
  288. }
  289. sm.lastIndex = resp.EtcdIndex
  290. sn, err := parseSubnetKey(resp.Node.Key)
  291. if err != nil {
  292. log.Error("Error parsing subnet IP: ", resp.Node.Key)
  293. time.Sleep(time.Second)
  294. continue
  295. }
  296. // Don't process our own changes
  297. if !sm.myLease.Network.Equal(sn) {
  298. evt := sm.applySubnetChange(resp.Action, sn, resp.Node.Value)
  299. receiver <- EventBatch{evt}
  300. }
  301. } else if etcdErr, ok := err.(*etcd.EtcdError); ok && etcdErr.ErrorCode == etcdEventIndexCleared {
  302. // etcd maintains a history window for events and it's possible to fall behind.
  303. // to recover, get the current state and then "diff" against our cache to generate
  304. // events for the caller
  305. log.Warning("Watch of subnet leases failed b/c index outside history window")
  306. leases, err := sm.getLeases()
  307. if err != nil {
  308. log.Errorf("Failed to retrieve subnet leases: ", err)
  309. time.Sleep(time.Second)
  310. continue
  311. }
  312. batch = sm.applyLeases(leases)
  313. receiver <- batch
  314. } else {
  315. log.Error("Watch of subnet leases failed: ", err)
  316. continue
  317. }
  318. }
  319. }
  320. func (sm *SubnetManager) leaseRenewer() {
  321. dur := sm.leaseExp.Sub(time.Now()) - renewMargin
  322. for {
  323. select {
  324. case <-time.After(dur):
  325. resp, err := sm.registry.updateSubnet(sm.myLease.Network.StringSep(".", "-"), sm.myLease.Data, subnetTTL)
  326. if err != nil {
  327. log.Error("Error renewing lease (trying again in 1 min): ", err)
  328. dur = time.Minute
  329. continue
  330. }
  331. sm.leaseExp = *(resp.Node.Expiration)
  332. log.Info("Lease renewed, new expiration: ", sm.leaseExp)
  333. dur = sm.leaseExp.Sub(time.Now()) - renewMargin
  334. case <-sm.stop:
  335. return
  336. }
  337. }
  338. }