subnet.go 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. package subnet
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "net"
  7. "regexp"
  8. "strconv"
  9. "time"
  10. "github.com/coreos-inc/rudder/Godeps/_workspace/src/github.com/coreos/go-etcd/etcd"
  11. log "github.com/coreos-inc/rudder/Godeps/_workspace/src/github.com/golang/glog"
  12. "github.com/coreos-inc/rudder/pkg/ip"
  13. )
  14. const (
  15. registerRetries = 10
  16. subnetTTL = 24 * 3600
  17. renewMargin = time.Hour
  18. )
  19. // etcd error codes
  20. const (
  21. etcdKeyNotFound = 100
  22. etcdKeyAlreadyExists = 105
  23. etcdEventIndexCleared = 401
  24. )
  25. const (
  26. SubnetAdded = iota
  27. SubnetRemoved
  28. )
  29. var (
  30. subnetRegex *regexp.Regexp = regexp.MustCompile(`(\d+\.\d+.\d+.\d+)-(\d+)`)
  31. )
  32. type SubnetLease struct {
  33. Network ip.IP4Net
  34. Data string
  35. }
  36. type SubnetManager struct {
  37. registry subnetRegistry
  38. config *Config
  39. myLease SubnetLease
  40. leaseExp time.Time
  41. lastIndex uint64
  42. leases []SubnetLease
  43. stop chan bool
  44. }
  45. type EventType int
  46. type Event struct {
  47. Type EventType
  48. Lease SubnetLease
  49. }
  50. type EventBatch []Event
  51. func NewSubnetManager(etcdCli *etcd.Client, prefix string) (*SubnetManager, error) {
  52. esr := etcdSubnetRegistry{etcdCli, prefix}
  53. return newSubnetManager(&esr)
  54. }
  55. func (sm *SubnetManager) AcquireLease(tep ip.IP4, data string) (ip.IP4Net, error) {
  56. for i := 0; i < registerRetries; i++ {
  57. var err error
  58. sm.leases, err = sm.getLeases()
  59. if err != nil {
  60. return ip.IP4Net{}, err
  61. }
  62. // try to reuse a subnet if there's one that match our IP
  63. for _, l := range sm.leases {
  64. var ba BaseAttrs
  65. err = json.Unmarshal([]byte(l.Data), &ba)
  66. if err != nil {
  67. log.Error("Error parsing subnet lease JSON: ", err)
  68. } else {
  69. if tep == ba.PublicIP {
  70. resp, err := sm.registry.updateSubnet(l.Network.StringSep(".", "-"), data, subnetTTL)
  71. if err != nil {
  72. return ip.IP4Net{}, nil
  73. }
  74. sm.myLease.Network = l.Network
  75. sm.leaseExp = *resp.Node.Expiration
  76. return l.Network, nil
  77. }
  78. }
  79. }
  80. // no existing match, grab a new one
  81. sn, err := sm.allocateSubnet()
  82. if err != nil {
  83. return ip.IP4Net{}, err
  84. }
  85. resp, err := sm.registry.createSubnet(sn.StringSep(".", "-"), data, subnetTTL)
  86. switch {
  87. case err == nil:
  88. sm.myLease.Network = sn
  89. sm.leaseExp = *resp.Node.Expiration
  90. return sn, nil
  91. // if etcd returned Key Already Exists, try again.
  92. case err.(*etcd.EtcdError).ErrorCode == etcdKeyAlreadyExists:
  93. continue
  94. default:
  95. return ip.IP4Net{}, err
  96. }
  97. }
  98. return ip.IP4Net{}, errors.New("Max retries reached trying to acquire a subnet")
  99. }
  100. func (sm *SubnetManager) UpdateSubnet(data string) error {
  101. resp, err := sm.registry.updateSubnet(sm.myLease.Network.StringSep(".", "-"), data, subnetTTL)
  102. sm.leaseExp = *resp.Node.Expiration
  103. return err
  104. }
  105. func (sm *SubnetManager) Start(receiver chan EventBatch) {
  106. go sm.watchLeases(receiver)
  107. go sm.leaseRenewer()
  108. }
  109. func (sm *SubnetManager) Stop() {
  110. // once for each goroutine
  111. sm.stop <- true
  112. sm.stop <- true
  113. }
  114. func (sm *SubnetManager) GetConfig() *Config {
  115. return sm.config
  116. }
  117. /// Implementation
  118. func parseSubnetKey(s string) (ip.IP4Net, error) {
  119. if parts := subnetRegex.FindStringSubmatch(s); len(parts) == 3 {
  120. snIp := net.ParseIP(parts[1]).To4()
  121. prefixLen, err := strconv.ParseUint(parts[2], 10, 5)
  122. if snIp != nil && err == nil {
  123. return ip.IP4Net{ip.FromIP(snIp), uint(prefixLen)}, nil
  124. }
  125. }
  126. return ip.IP4Net{}, errors.New("Error parsing IP Subnet")
  127. }
  128. type subnetRegistry interface {
  129. getConfig() (*etcd.Response, error)
  130. getSubnets() (*etcd.Response, error)
  131. createSubnet(sn, data string, ttl uint64) (*etcd.Response, error)
  132. updateSubnet(sn, data string, ttl uint64) (*etcd.Response, error)
  133. watchSubnets(since uint64, stop chan bool) (*etcd.Response, error)
  134. }
  135. type etcdSubnetRegistry struct {
  136. cli *etcd.Client
  137. prefix string
  138. }
  139. func (esr *etcdSubnetRegistry) getConfig() (*etcd.Response, error) {
  140. resp, err := esr.cli.Get(esr.prefix+"/config", false, false)
  141. if err != nil {
  142. return nil, err
  143. }
  144. return resp, nil
  145. }
  146. func (esr *etcdSubnetRegistry) getSubnets() (*etcd.Response, error) {
  147. return esr.cli.Get(esr.prefix+"/subnets", false, true)
  148. }
  149. func (esr *etcdSubnetRegistry) createSubnet(sn, data string, ttl uint64) (*etcd.Response, error) {
  150. return esr.cli.Create(esr.prefix+"/subnets/"+sn, data, ttl)
  151. }
  152. func (esr *etcdSubnetRegistry) updateSubnet(sn, data string, ttl uint64) (*etcd.Response, error) {
  153. return esr.cli.Set(esr.prefix+"/subnets/"+sn, data, ttl)
  154. }
  155. func (esr *etcdSubnetRegistry) watchSubnets(since uint64, stop chan bool) (*etcd.Response, error) {
  156. return esr.cli.Watch(esr.prefix+"/subnets", since, true, nil, stop)
  157. }
  158. func newSubnetManager(r subnetRegistry) (*SubnetManager, error) {
  159. cfgResp, err := r.getConfig()
  160. if err != nil {
  161. return nil, err
  162. }
  163. cfg, err := ParseConfig(cfgResp.Node.Value)
  164. if err != nil {
  165. return nil, err
  166. }
  167. return &SubnetManager{
  168. registry: r,
  169. config: cfg,
  170. stop: make(chan bool, 2),
  171. }, nil
  172. }
  173. func (sm *SubnetManager) getLeases() ([]SubnetLease, error) {
  174. resp, err := sm.registry.getSubnets()
  175. var leases []SubnetLease
  176. switch {
  177. case err == nil:
  178. for _, node := range resp.Node.Nodes {
  179. sn, err := parseSubnetKey(node.Key)
  180. if err == nil {
  181. lease := SubnetLease{sn, node.Value}
  182. leases = append(leases, lease)
  183. }
  184. }
  185. sm.lastIndex = resp.EtcdIndex
  186. case err.(*etcd.EtcdError).ErrorCode == etcdKeyNotFound:
  187. // key not found: treat it as empty set
  188. sm.lastIndex = err.(*etcd.EtcdError).Index
  189. default:
  190. return nil, err
  191. }
  192. return leases, nil
  193. }
  194. func deleteLease(l []SubnetLease, i int) []SubnetLease {
  195. l[i], l = l[len(l)-1], l[:len(l)-1]
  196. return l
  197. }
  198. func (sm *SubnetManager) applyLeases(newLeases []SubnetLease) EventBatch {
  199. var batch EventBatch
  200. for _, l := range newLeases {
  201. // skip self
  202. if l.Network.Equal(sm.myLease.Network) {
  203. continue
  204. }
  205. found := false
  206. for i, c := range sm.leases {
  207. if c.Network.Equal(l.Network) {
  208. sm.leases = deleteLease(sm.leases, i)
  209. found = true
  210. break
  211. }
  212. }
  213. if !found {
  214. // new subnet
  215. batch = append(batch, Event{SubnetAdded, l})
  216. }
  217. }
  218. // everything left in sm.leases has been deleted
  219. for _, c := range sm.leases {
  220. batch = append(batch, Event{SubnetRemoved, c})
  221. }
  222. sm.leases = newLeases
  223. return batch
  224. }
  225. func (sm *SubnetManager) applySubnetChange(action string, ipn ip.IP4Net, data string) Event {
  226. switch action {
  227. case "delete", "expire":
  228. for i, l := range sm.leases {
  229. if l.Network.Equal(ipn) {
  230. deleteLease(sm.leases, i)
  231. return Event{SubnetRemoved, l}
  232. }
  233. }
  234. log.Errorf("Removed subnet (%s) was not found", ipn)
  235. return Event{
  236. SubnetRemoved,
  237. SubnetLease{ipn, ""},
  238. }
  239. default:
  240. for i, l := range sm.leases {
  241. if l.Network.Equal(ipn) {
  242. sm.leases[i] = SubnetLease{ipn, data}
  243. return Event{SubnetAdded, sm.leases[i]}
  244. }
  245. }
  246. sm.leases = append(sm.leases, SubnetLease{ipn, data})
  247. return Event{SubnetAdded, sm.leases[len(sm.leases)-1]}
  248. }
  249. }
  250. type BaseAttrs struct {
  251. PublicIP ip.IP4
  252. }
  253. func (sm *SubnetManager) allocateSubnet() (ip.IP4Net, error) {
  254. log.Infof("Picking subnet in range %s ... %s", sm.config.SubnetMin, sm.config.SubnetMax)
  255. var bag []ip.IP4
  256. sn := ip.IP4Net{sm.config.SubnetMin, sm.config.SubnetLen}
  257. OuterLoop:
  258. for ; sn.IP <= sm.config.SubnetMax && len(bag) < 100; sn = sn.Next() {
  259. for _, l := range sm.leases {
  260. if sn.Overlaps(l.Network) {
  261. continue OuterLoop
  262. }
  263. }
  264. bag = append(bag, sn.IP)
  265. }
  266. if len(bag) == 0 {
  267. return ip.IP4Net{}, errors.New("out of subnets")
  268. } else {
  269. i := randInt(0, len(bag))
  270. return ip.IP4Net{bag[i], sm.config.SubnetLen}, nil
  271. }
  272. }
  273. func (sm *SubnetManager) watchLeases(receiver chan EventBatch) {
  274. // "catch up" by replaying all the leases we discovered during
  275. // AcquireLease
  276. var batch EventBatch
  277. for _, l := range sm.leases {
  278. if !sm.myLease.Network.Equal(l.Network) {
  279. batch = append(batch, Event{SubnetAdded, l})
  280. }
  281. }
  282. if len(batch) > 0 {
  283. receiver <- batch
  284. }
  285. for {
  286. resp, err := sm.registry.watchSubnets(sm.lastIndex+1, sm.stop)
  287. // watchSubnets exited by stop chan being signaled
  288. if err == nil && resp == nil {
  289. return
  290. }
  291. var batch *EventBatch
  292. if err == nil {
  293. batch, err = sm.parseSubnetWatchResponse(resp)
  294. } else {
  295. batch, err = sm.parseSubnetWatchError(err)
  296. }
  297. if err != nil {
  298. log.Errorf("%v", err)
  299. time.Sleep(time.Second)
  300. continue
  301. }
  302. if batch != nil {
  303. receiver <- *batch
  304. }
  305. }
  306. }
  307. func (sm *SubnetManager) parseSubnetWatchResponse(resp *etcd.Response) (batch *EventBatch, err error) {
  308. sm.lastIndex = resp.EtcdIndex
  309. sn, err := parseSubnetKey(resp.Node.Key)
  310. if err != nil {
  311. err = fmt.Errorf("Error parsing subnet IP: %s", resp.Node.Key)
  312. return
  313. }
  314. // Don't process our own changes
  315. if !sm.myLease.Network.Equal(sn) {
  316. evt := sm.applySubnetChange(resp.Action, sn, resp.Node.Value)
  317. batch = &EventBatch{evt}
  318. }
  319. return
  320. }
  321. func (sm *SubnetManager) parseSubnetWatchError(err error) (batch *EventBatch, out error) {
  322. etcdErr, ok := err.(*etcd.EtcdError)
  323. if ok && etcdErr.ErrorCode == etcdEventIndexCleared {
  324. // etcd maintains a history window for events and it's possible to fall behind.
  325. // to recover, get the current state and then "diff" against our cache to generate
  326. // events for the caller
  327. log.Warning("Watch of subnet leases failed because etcd index outside history window")
  328. leases, err := sm.getLeases()
  329. if err == nil {
  330. lb := sm.applyLeases(leases)
  331. batch = &lb
  332. } else {
  333. out = fmt.Errorf("Failed to retrieve subnet leases: %v", err)
  334. }
  335. } else {
  336. out = fmt.Errorf("Watch of subnet leases failed: ", err)
  337. }
  338. return
  339. }
  340. func (sm *SubnetManager) leaseRenewer() {
  341. dur := sm.leaseExp.Sub(time.Now()) - renewMargin
  342. for {
  343. select {
  344. case <-time.After(dur):
  345. resp, err := sm.registry.updateSubnet(sm.myLease.Network.StringSep(".", "-"), sm.myLease.Data, subnetTTL)
  346. if err != nil {
  347. log.Error("Error renewing lease (trying again in 1 min): ", err)
  348. dur = time.Minute
  349. continue
  350. }
  351. sm.leaseExp = *(resp.Node.Expiration)
  352. log.Info("Lease renewed, new expiration: ", sm.leaseExp)
  353. dur = sm.leaseExp.Sub(time.Now()) - renewMargin
  354. case <-sm.stop:
  355. return
  356. }
  357. }
  358. }