update_owners.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. #!/usr/bin/env python
  2. # Copyright 2016 The Kubernetes Authors.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import collections
  16. import csv
  17. import re
  18. import json
  19. import os
  20. import random
  21. import sys
  22. import time
  23. import urllib2
  24. import zlib
  25. BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  26. OWNERS_PATH = os.path.abspath(
  27. os.path.join(BASE_DIR, '..', 'test', 'test_owners.csv'))
  28. GCS_URL_BASE = 'https://storage.googleapis.com/kubernetes-test-history/'
  29. SKIP_MAINTAINERS = {
  30. 'a-robinson', 'aronchick', 'bgrant0607-nocc', 'david-mcmahon',
  31. 'goltermann', 'sarahnovotny'}
  32. def get_test_history(days_ago):
  33. url = time.strftime(GCS_URL_BASE + 'logs/%Y-%m-%d.json',
  34. time.gmtime(time.time() - days_ago * 24 * 60 * 60))
  35. resp = urllib2.urlopen(url)
  36. content = resp.read()
  37. if resp.headers.get('content-encoding') == 'gzip':
  38. content = zlib.decompress(content, 15 | 16)
  39. return json.loads(content)
  40. def normalize(name):
  41. name = re.sub(r'\[.*?\]|\{.*?\}', '', name)
  42. name = re.sub(r'\s+', ' ', name)
  43. return name.strip()
  44. def load_owners(fname):
  45. owners = {}
  46. with open(fname) as f:
  47. for n, (name, owner, random_assignment) in enumerate(csv.reader(f)):
  48. if n == 0:
  49. continue # header
  50. owners[normalize(name)] = (owner, int(random_assignment))
  51. return owners
  52. def write_owners(fname, owners):
  53. with open(fname, 'w') as f:
  54. out = csv.writer(f, lineterminator='\n')
  55. out.writerow(['name', 'owner', 'auto-assigned'])
  56. sort_key = lambda (k, v): (k != 'DEFAULT', k) # put 'DEFAULT' first.
  57. items = sorted(owners.items(), key=sort_key)
  58. for name, (owner, random_assignment) in items:
  59. out.writerow([name, owner, int(random_assignment)])
  60. def get_maintainers():
  61. # Github doesn't seem to support team membership listing without a key with
  62. # org admin privileges. Instead, we do it manually:
  63. # Open https://github.com/orgs/kubernetes/teams/kubernetes-maintainers
  64. # Run this in the js console:
  65. # [].slice.call(document.querySelectorAll('.team-member-username a')).map(
  66. # e => e.textContent.trim())
  67. ret = {"a-robinson", "alex-mohr", "amygdala", "andyzheng0831", "apelisse",
  68. "aronchick", "bgrant0607", "bgrant0607-nocc", "bprashanth",
  69. "brendandburns", "caesarxuchao", "childsb", "cjcullen",
  70. "david-mcmahon", "davidopp", "dchen1107", "deads2k",
  71. "derekwaynecarr", "dubstack", "eparis", "erictune", "fabioy",
  72. "fejta", "fgrzadkowski", "freehan", "ghodss", "girishkalele",
  73. "gmarek", "goltermann", "grodrigues3", "hurf", "ingvagabund", "ixdy",
  74. "jackgr", "janetkuo", "jbeda", "jdef", "jfrazelle", "jingxu97",
  75. "jlowdermilk", "jsafrane", "jszczepkowski", "justinsb", "kargakis",
  76. "karlkfi", "kelseyhightower", "kevin-wangzefeng", "krousey",
  77. "lavalamp", "liggitt", "luxas", "madhusudancs", "maisem", "mansoorj",
  78. "matchstick", "mbohlool", "mikedanese", "mml", "mtaufen", "mwielgus",
  79. "ncdc", "nikhiljindal", "piosz", "pmorie", "pwittrock", "Q-Lee",
  80. "quinton-hoole", "Random-Liu", "rmmh", "roberthbailey", "ronnielai",
  81. "saad-ali", "sarahnovotny", "smarterclayton", "soltysh", "spxtr",
  82. "sttts", "swagiaal", "thockin", "timothysc", "timstclair", "tmrts",
  83. "vishh", "vulpecula", "wojtek-t", "xiang90", "yifan-gu", "yujuhong",
  84. "zmerlynn"}
  85. return sorted(ret - SKIP_MAINTAINERS)
  86. def main():
  87. test_names = set()
  88. for days_ago in range(4):
  89. test_history = get_test_history(days_ago)
  90. test_names.update(normalize(name) for name in test_history['test_names'])
  91. test_names.add('DEFAULT')
  92. test_names = sorted(test_names)
  93. owners = load_owners(OWNERS_PATH)
  94. outdated_tests = sorted(set(owners) - set(test_names))
  95. new_tests = sorted(set(test_names) - set(owners))
  96. maintainers = get_maintainers()
  97. print '# OUTDATED TESTS (%d):' % len(outdated_tests)
  98. print '\n'.join(outdated_tests)
  99. print '# NEW TESTS (%d):' % len(new_tests)
  100. print '\n'.join(new_tests)
  101. for name in outdated_tests:
  102. owners.pop(name)
  103. print '# UNEXPECTED MAINTAINERS ',
  104. print '(randomly assigned, but not in kubernetes-maintainers)'
  105. for name, (owner, random_assignment) in sorted(owners.iteritems()):
  106. if random_assignment and owner not in maintainers:
  107. print '%-16s %s' % (owner, name)
  108. owners.pop(name)
  109. print
  110. owner_counts = collections.Counter(
  111. owner for name, (owner, random) in owners.iteritems()
  112. if owner in maintainers)
  113. for test_name in set(test_names) - set(owners):
  114. new_owner, _count = random.choice(owner_counts.most_common()[-4:])
  115. owner_counts[new_owner] += 1
  116. owners[test_name] = (new_owner, True)
  117. print '# Tests per maintainer:'
  118. for owner, count in owner_counts.most_common():
  119. print '%-20s %3d' % (owner, count)
  120. write_owners(OWNERS_PATH, owners)
  121. if __name__ == '__main__':
  122. main()