qdisc_linux.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. package netlink
  2. import (
  3. "fmt"
  4. "io/ioutil"
  5. "strconv"
  6. "strings"
  7. "syscall"
  8. "github.com/vishvananda/netlink/nl"
  9. )
  10. // NOTE function is here because it uses other linux functions
  11. func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem {
  12. var limit uint32 = 1000
  13. var lossCorr, delayCorr, duplicateCorr uint32
  14. var reorderProb, reorderCorr uint32
  15. var corruptProb, corruptCorr uint32
  16. latency := nattrs.Latency
  17. loss := Percentage2u32(nattrs.Loss)
  18. gap := nattrs.Gap
  19. duplicate := Percentage2u32(nattrs.Duplicate)
  20. jitter := nattrs.Jitter
  21. // Correlation
  22. if latency > 0 && jitter > 0 {
  23. delayCorr = Percentage2u32(nattrs.DelayCorr)
  24. }
  25. if loss > 0 {
  26. lossCorr = Percentage2u32(nattrs.LossCorr)
  27. }
  28. if duplicate > 0 {
  29. duplicateCorr = Percentage2u32(nattrs.DuplicateCorr)
  30. }
  31. // FIXME should validate values(like loss/duplicate are percentages...)
  32. latency = time2Tick(latency)
  33. if nattrs.Limit != 0 {
  34. limit = nattrs.Limit
  35. }
  36. // Jitter is only value if latency is > 0
  37. if latency > 0 {
  38. jitter = time2Tick(jitter)
  39. }
  40. reorderProb = Percentage2u32(nattrs.ReorderProb)
  41. reorderCorr = Percentage2u32(nattrs.ReorderCorr)
  42. if reorderProb > 0 {
  43. // ERROR if lantency == 0
  44. if gap == 0 {
  45. gap = 1
  46. }
  47. }
  48. corruptProb = Percentage2u32(nattrs.CorruptProb)
  49. corruptCorr = Percentage2u32(nattrs.CorruptCorr)
  50. return &Netem{
  51. QdiscAttrs: attrs,
  52. Latency: latency,
  53. DelayCorr: delayCorr,
  54. Limit: limit,
  55. Loss: loss,
  56. LossCorr: lossCorr,
  57. Gap: gap,
  58. Duplicate: duplicate,
  59. DuplicateCorr: duplicateCorr,
  60. Jitter: jitter,
  61. ReorderProb: reorderProb,
  62. ReorderCorr: reorderCorr,
  63. CorruptProb: corruptProb,
  64. CorruptCorr: corruptCorr,
  65. }
  66. }
  67. // QdiscDel will delete a qdisc from the system.
  68. // Equivalent to: `tc qdisc del $qdisc`
  69. func QdiscDel(qdisc Qdisc) error {
  70. return pkgHandle.QdiscDel(qdisc)
  71. }
  72. // QdiscDel will delete a qdisc from the system.
  73. // Equivalent to: `tc qdisc del $qdisc`
  74. func (h *Handle) QdiscDel(qdisc Qdisc) error {
  75. return h.qdiscModify(syscall.RTM_DELQDISC, 0, qdisc)
  76. }
  77. // QdiscChange will change a qdisc in place
  78. // Equivalent to: `tc qdisc change $qdisc`
  79. // The parent and handle MUST NOT be changed.
  80. func QdiscChange(qdisc Qdisc) error {
  81. return pkgHandle.QdiscChange(qdisc)
  82. }
  83. // QdiscChange will change a qdisc in place
  84. // Equivalent to: `tc qdisc change $qdisc`
  85. // The parent and handle MUST NOT be changed.
  86. func (h *Handle) QdiscChange(qdisc Qdisc) error {
  87. return h.qdiscModify(syscall.RTM_NEWQDISC, 0, qdisc)
  88. }
  89. // QdiscReplace will replace a qdisc to the system.
  90. // Equivalent to: `tc qdisc replace $qdisc`
  91. // The handle MUST change.
  92. func QdiscReplace(qdisc Qdisc) error {
  93. return pkgHandle.QdiscReplace(qdisc)
  94. }
  95. // QdiscReplace will replace a qdisc to the system.
  96. // Equivalent to: `tc qdisc replace $qdisc`
  97. // The handle MUST change.
  98. func (h *Handle) QdiscReplace(qdisc Qdisc) error {
  99. return h.qdiscModify(
  100. syscall.RTM_NEWQDISC,
  101. syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE,
  102. qdisc)
  103. }
  104. // QdiscAdd will add a qdisc to the system.
  105. // Equivalent to: `tc qdisc add $qdisc`
  106. func QdiscAdd(qdisc Qdisc) error {
  107. return pkgHandle.QdiscAdd(qdisc)
  108. }
  109. // QdiscAdd will add a qdisc to the system.
  110. // Equivalent to: `tc qdisc add $qdisc`
  111. func (h *Handle) QdiscAdd(qdisc Qdisc) error {
  112. return h.qdiscModify(
  113. syscall.RTM_NEWQDISC,
  114. syscall.NLM_F_CREATE|syscall.NLM_F_EXCL,
  115. qdisc)
  116. }
  117. func (h *Handle) qdiscModify(cmd, flags int, qdisc Qdisc) error {
  118. req := h.newNetlinkRequest(cmd, flags|syscall.NLM_F_ACK)
  119. base := qdisc.Attrs()
  120. msg := &nl.TcMsg{
  121. Family: nl.FAMILY_ALL,
  122. Ifindex: int32(base.LinkIndex),
  123. Handle: base.Handle,
  124. Parent: base.Parent,
  125. }
  126. req.AddData(msg)
  127. // When deleting don't bother building the rest of the netlink payload
  128. if cmd != syscall.RTM_DELQDISC {
  129. if err := qdiscPayload(req, qdisc); err != nil {
  130. return err
  131. }
  132. }
  133. _, err := req.Execute(syscall.NETLINK_ROUTE, 0)
  134. return err
  135. }
  136. func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error {
  137. req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type())))
  138. options := nl.NewRtAttr(nl.TCA_OPTIONS, nil)
  139. if prio, ok := qdisc.(*Prio); ok {
  140. tcmap := nl.TcPrioMap{
  141. Bands: int32(prio.Bands),
  142. Priomap: prio.PriorityMap,
  143. }
  144. options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize())
  145. } else if tbf, ok := qdisc.(*Tbf); ok {
  146. opt := nl.TcTbfQopt{}
  147. // TODO: handle rate > uint32
  148. opt.Rate.Rate = uint32(tbf.Rate)
  149. opt.Limit = tbf.Limit
  150. opt.Buffer = tbf.Buffer
  151. nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize())
  152. } else if htb, ok := qdisc.(*Htb); ok {
  153. opt := nl.TcHtbGlob{}
  154. opt.Version = htb.Version
  155. opt.Rate2Quantum = htb.Rate2Quantum
  156. opt.Defcls = htb.Defcls
  157. // TODO: Handle Debug properly. For now default to 0
  158. opt.Debug = htb.Debug
  159. opt.DirectPkts = htb.DirectPkts
  160. nl.NewRtAttrChild(options, nl.TCA_HTB_INIT, opt.Serialize())
  161. // nl.NewRtAttrChild(options, nl.TCA_HTB_DIRECT_QLEN, opt.Serialize())
  162. } else if netem, ok := qdisc.(*Netem); ok {
  163. opt := nl.TcNetemQopt{}
  164. opt.Latency = netem.Latency
  165. opt.Limit = netem.Limit
  166. opt.Loss = netem.Loss
  167. opt.Gap = netem.Gap
  168. opt.Duplicate = netem.Duplicate
  169. opt.Jitter = netem.Jitter
  170. options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize())
  171. // Correlation
  172. corr := nl.TcNetemCorr{}
  173. corr.DelayCorr = netem.DelayCorr
  174. corr.LossCorr = netem.LossCorr
  175. corr.DupCorr = netem.DuplicateCorr
  176. if corr.DelayCorr > 0 || corr.LossCorr > 0 || corr.DupCorr > 0 {
  177. nl.NewRtAttrChild(options, nl.TCA_NETEM_CORR, corr.Serialize())
  178. }
  179. // Corruption
  180. corruption := nl.TcNetemCorrupt{}
  181. corruption.Probability = netem.CorruptProb
  182. corruption.Correlation = netem.CorruptCorr
  183. if corruption.Probability > 0 {
  184. nl.NewRtAttrChild(options, nl.TCA_NETEM_CORRUPT, corruption.Serialize())
  185. }
  186. // Reorder
  187. reorder := nl.TcNetemReorder{}
  188. reorder.Probability = netem.ReorderProb
  189. reorder.Correlation = netem.ReorderCorr
  190. if reorder.Probability > 0 {
  191. nl.NewRtAttrChild(options, nl.TCA_NETEM_REORDER, reorder.Serialize())
  192. }
  193. } else if _, ok := qdisc.(*Ingress); ok {
  194. // ingress filters must use the proper handle
  195. if qdisc.Attrs().Parent != HANDLE_INGRESS {
  196. return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS")
  197. }
  198. }
  199. req.AddData(options)
  200. return nil
  201. }
  202. // QdiscList gets a list of qdiscs in the system.
  203. // Equivalent to: `tc qdisc show`.
  204. // The list can be filtered by link.
  205. func QdiscList(link Link) ([]Qdisc, error) {
  206. return pkgHandle.QdiscList(link)
  207. }
  208. // QdiscList gets a list of qdiscs in the system.
  209. // Equivalent to: `tc qdisc show`.
  210. // The list can be filtered by link.
  211. func (h *Handle) QdiscList(link Link) ([]Qdisc, error) {
  212. req := h.newNetlinkRequest(syscall.RTM_GETQDISC, syscall.NLM_F_DUMP)
  213. index := int32(0)
  214. if link != nil {
  215. base := link.Attrs()
  216. h.ensureIndex(base)
  217. index = int32(base.Index)
  218. }
  219. msg := &nl.TcMsg{
  220. Family: nl.FAMILY_ALL,
  221. Ifindex: index,
  222. }
  223. req.AddData(msg)
  224. msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWQDISC)
  225. if err != nil {
  226. return nil, err
  227. }
  228. var res []Qdisc
  229. for _, m := range msgs {
  230. msg := nl.DeserializeTcMsg(m)
  231. attrs, err := nl.ParseRouteAttr(m[msg.Len():])
  232. if err != nil {
  233. return nil, err
  234. }
  235. // skip qdiscs from other interfaces
  236. if link != nil && msg.Ifindex != index {
  237. continue
  238. }
  239. base := QdiscAttrs{
  240. LinkIndex: int(msg.Ifindex),
  241. Handle: msg.Handle,
  242. Parent: msg.Parent,
  243. Refcnt: msg.Info,
  244. }
  245. var qdisc Qdisc
  246. qdiscType := ""
  247. for _, attr := range attrs {
  248. switch attr.Attr.Type {
  249. case nl.TCA_KIND:
  250. qdiscType = string(attr.Value[:len(attr.Value)-1])
  251. switch qdiscType {
  252. case "pfifo_fast":
  253. qdisc = &PfifoFast{}
  254. case "prio":
  255. qdisc = &Prio{}
  256. case "tbf":
  257. qdisc = &Tbf{}
  258. case "ingress":
  259. qdisc = &Ingress{}
  260. case "htb":
  261. qdisc = &Htb{}
  262. case "netem":
  263. qdisc = &Netem{}
  264. default:
  265. qdisc = &GenericQdisc{QdiscType: qdiscType}
  266. }
  267. case nl.TCA_OPTIONS:
  268. switch qdiscType {
  269. case "pfifo_fast":
  270. // pfifo returns TcPrioMap directly without wrapping it in rtattr
  271. if err := parsePfifoFastData(qdisc, attr.Value); err != nil {
  272. return nil, err
  273. }
  274. case "prio":
  275. // prio returns TcPrioMap directly without wrapping it in rtattr
  276. if err := parsePrioData(qdisc, attr.Value); err != nil {
  277. return nil, err
  278. }
  279. case "tbf":
  280. data, err := nl.ParseRouteAttr(attr.Value)
  281. if err != nil {
  282. return nil, err
  283. }
  284. if err := parseTbfData(qdisc, data); err != nil {
  285. return nil, err
  286. }
  287. case "htb":
  288. data, err := nl.ParseRouteAttr(attr.Value)
  289. if err != nil {
  290. return nil, err
  291. }
  292. if err := parseHtbData(qdisc, data); err != nil {
  293. return nil, err
  294. }
  295. case "netem":
  296. if err := parseNetemData(qdisc, attr.Value); err != nil {
  297. return nil, err
  298. }
  299. // no options for ingress
  300. }
  301. }
  302. }
  303. *qdisc.Attrs() = base
  304. res = append(res, qdisc)
  305. }
  306. return res, nil
  307. }
  308. func parsePfifoFastData(qdisc Qdisc, value []byte) error {
  309. pfifo := qdisc.(*PfifoFast)
  310. tcmap := nl.DeserializeTcPrioMap(value)
  311. pfifo.PriorityMap = tcmap.Priomap
  312. pfifo.Bands = uint8(tcmap.Bands)
  313. return nil
  314. }
  315. func parsePrioData(qdisc Qdisc, value []byte) error {
  316. prio := qdisc.(*Prio)
  317. tcmap := nl.DeserializeTcPrioMap(value)
  318. prio.PriorityMap = tcmap.Priomap
  319. prio.Bands = uint8(tcmap.Bands)
  320. return nil
  321. }
  322. func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
  323. native = nl.NativeEndian()
  324. htb := qdisc.(*Htb)
  325. for _, datum := range data {
  326. switch datum.Attr.Type {
  327. case nl.TCA_HTB_INIT:
  328. opt := nl.DeserializeTcHtbGlob(datum.Value)
  329. htb.Version = opt.Version
  330. htb.Rate2Quantum = opt.Rate2Quantum
  331. htb.Defcls = opt.Defcls
  332. htb.Debug = opt.Debug
  333. htb.DirectPkts = opt.DirectPkts
  334. case nl.TCA_HTB_DIRECT_QLEN:
  335. // TODO
  336. //htb.DirectQlen = native.uint32(datum.Value)
  337. }
  338. }
  339. return nil
  340. }
  341. func parseNetemData(qdisc Qdisc, value []byte) error {
  342. netem := qdisc.(*Netem)
  343. opt := nl.DeserializeTcNetemQopt(value)
  344. netem.Latency = opt.Latency
  345. netem.Limit = opt.Limit
  346. netem.Loss = opt.Loss
  347. netem.Gap = opt.Gap
  348. netem.Duplicate = opt.Duplicate
  349. netem.Jitter = opt.Jitter
  350. data, err := nl.ParseRouteAttr(value[nl.SizeofTcNetemQopt:])
  351. if err != nil {
  352. return err
  353. }
  354. for _, datum := range data {
  355. switch datum.Attr.Type {
  356. case nl.TCA_NETEM_CORR:
  357. opt := nl.DeserializeTcNetemCorr(datum.Value)
  358. netem.DelayCorr = opt.DelayCorr
  359. netem.LossCorr = opt.LossCorr
  360. netem.DuplicateCorr = opt.DupCorr
  361. case nl.TCA_NETEM_CORRUPT:
  362. opt := nl.DeserializeTcNetemCorrupt(datum.Value)
  363. netem.CorruptProb = opt.Probability
  364. netem.CorruptCorr = opt.Correlation
  365. case nl.TCA_NETEM_REORDER:
  366. opt := nl.DeserializeTcNetemReorder(datum.Value)
  367. netem.ReorderProb = opt.Probability
  368. netem.ReorderCorr = opt.Correlation
  369. }
  370. }
  371. return nil
  372. }
  373. func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error {
  374. native = nl.NativeEndian()
  375. tbf := qdisc.(*Tbf)
  376. for _, datum := range data {
  377. switch datum.Attr.Type {
  378. case nl.TCA_TBF_PARMS:
  379. opt := nl.DeserializeTcTbfQopt(datum.Value)
  380. tbf.Rate = uint64(opt.Rate.Rate)
  381. tbf.Limit = opt.Limit
  382. tbf.Buffer = opt.Buffer
  383. case nl.TCA_TBF_RATE64:
  384. tbf.Rate = native.Uint64(datum.Value[0:4])
  385. }
  386. }
  387. return nil
  388. }
  389. const (
  390. TIME_UNITS_PER_SEC = 1000000
  391. )
  392. var (
  393. tickInUsec float64
  394. clockFactor float64
  395. hz float64
  396. )
  397. func initClock() {
  398. data, err := ioutil.ReadFile("/proc/net/psched")
  399. if err != nil {
  400. return
  401. }
  402. parts := strings.Split(strings.TrimSpace(string(data)), " ")
  403. if len(parts) < 3 {
  404. return
  405. }
  406. var vals [3]uint64
  407. for i := range vals {
  408. val, err := strconv.ParseUint(parts[i], 16, 32)
  409. if err != nil {
  410. return
  411. }
  412. vals[i] = val
  413. }
  414. // compatibility
  415. if vals[2] == 1000000000 {
  416. vals[0] = vals[1]
  417. }
  418. clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC
  419. tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor
  420. hz = float64(vals[0])
  421. }
  422. func TickInUsec() float64 {
  423. if tickInUsec == 0.0 {
  424. initClock()
  425. }
  426. return tickInUsec
  427. }
  428. func ClockFactor() float64 {
  429. if clockFactor == 0.0 {
  430. initClock()
  431. }
  432. return clockFactor
  433. }
  434. func Hz() float64 {
  435. if hz == 0.0 {
  436. initClock()
  437. }
  438. return hz
  439. }
  440. func time2Tick(time uint32) uint32 {
  441. return uint32(float64(time) * TickInUsec())
  442. }
  443. func tick2Time(tick uint32) uint32 {
  444. return uint32(float64(tick) / TickInUsec())
  445. }
  446. func time2Ktime(time uint32) uint32 {
  447. return uint32(float64(time) * ClockFactor())
  448. }
  449. func ktime2Time(ktime uint32) uint32 {
  450. return uint32(float64(ktime) / ClockFactor())
  451. }
  452. func burst(rate uint64, buffer uint32) uint32 {
  453. return uint32(float64(rate) * float64(tick2Time(buffer)) / TIME_UNITS_PER_SEC)
  454. }
  455. func latency(rate uint64, limit, buffer uint32) float64 {
  456. return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer))
  457. }
  458. func Xmittime(rate uint64, size uint32) float64 {
  459. return TickInUsec() * TIME_UNITS_PER_SEC * (float64(size) / float64(rate))
  460. }