verify-flags-underscore.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. #!/usr/bin/env python
  2. # Copyright 2015 The Kubernetes Authors.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. from __future__ import print_function
  16. import json
  17. import mmap
  18. import os
  19. import re
  20. import sys
  21. import argparse
  22. parser = argparse.ArgumentParser()
  23. parser.add_argument("filenames", help="list of files to check, all files if unspecified", nargs='*')
  24. parser.add_argument("-e", "--skip-exceptions", help="ignore hack/verify-flags/exceptions.txt and print all output", action="store_true")
  25. args = parser.parse_args()
  26. # Cargo culted from http://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python
  27. def is_binary(pathname):
  28. """Return true if the given filename is binary.
  29. @raise EnvironmentError: if the file does not exist or cannot be accessed.
  30. @attention: found @ http://bytes.com/topic/python/answers/21222-determine-file-type-binary-text on 6/08/2010
  31. @author: Trent Mick <TrentM@ActiveState.com>
  32. @author: Jorge Orpinel <jorge@orpinel.com>"""
  33. try:
  34. with open(pathname, 'r') as f:
  35. CHUNKSIZE = 1024
  36. while 1:
  37. chunk = f.read(CHUNKSIZE)
  38. if '\0' in chunk: # found null byte
  39. return True
  40. if len(chunk) < CHUNKSIZE:
  41. break # done
  42. except:
  43. return True
  44. return False
  45. def get_all_files(rootdir):
  46. all_files = []
  47. for root, dirs, files in os.walk(rootdir):
  48. # don't visit certain dirs
  49. if 'vendor' in dirs:
  50. dirs.remove('vendor')
  51. if 'staging' in dirs:
  52. dirs.remove('staging')
  53. if '_output' in dirs:
  54. dirs.remove('_output')
  55. if '_gopath' in dirs:
  56. dirs.remove('_gopath')
  57. if 'third_party' in dirs:
  58. dirs.remove('third_party')
  59. if '.git' in dirs:
  60. dirs.remove('.git')
  61. if '.make' in dirs:
  62. dirs.remove('.make')
  63. if 'exceptions.txt' in files:
  64. files.remove('exceptions.txt')
  65. if 'known-flags.txt' in files:
  66. files.remove('known-flags.txt')
  67. for name in files:
  68. pathname = os.path.join(root, name)
  69. if is_binary(pathname):
  70. continue
  71. all_files.append(pathname)
  72. return all_files
  73. def normalize_files(rootdir, files):
  74. newfiles = []
  75. a = ['Godeps', '_gopath', 'third_party', '.git', 'exceptions.txt', 'known-flags.txt']
  76. for f in files:
  77. if any(x in f for x in a):
  78. continue
  79. if f.endswith(".svg"):
  80. continue
  81. if f.endswith(".gliffy"):
  82. continue
  83. if f.endswith(".md"):
  84. continue
  85. if f.endswith(".yaml"):
  86. continue
  87. newfiles.append(f)
  88. for i, f in enumerate(newfiles):
  89. if not os.path.isabs(f):
  90. newfiles[i] = os.path.join(rootdir, f)
  91. return newfiles
  92. def line_has_bad_flag(line, flagre):
  93. results = flagre.findall(line)
  94. for result in results:
  95. if not "_" in result:
  96. return False
  97. # this should exclude many cases where jinja2 templates use kube flags
  98. # as variables, except it uses _ for the variable name
  99. if "{% set" + result + "= \"" in line:
  100. return False
  101. if "pillar[" + result + "]" in line:
  102. return False
  103. if "grains" + result in line:
  104. return False
  105. # something common in juju variables...
  106. if "template_data[" + result + "]" in line:
  107. return False
  108. return True
  109. return False
  110. # The list of files might not be the whole repo. If someone only changed a
  111. # couple of files we don't want to run all of the golang files looking for
  112. # flags. Instead load the list of flags from hack/verify-flags/known-flags.txt
  113. # If running the golang files finds a new flag not in that file, return an
  114. # error and tell the user to add the flag to the flag list.
  115. def get_flags(rootdir, files):
  116. # preload the 'known' flags
  117. pathname = os.path.join(rootdir, "hack/verify-flags/known-flags.txt")
  118. f = open(pathname, 'r')
  119. flags = set(f.read().splitlines())
  120. f.close()
  121. # preload the 'known' flags which don't follow the - standard
  122. pathname = os.path.join(rootdir, "hack/verify-flags/excluded-flags.txt")
  123. f = open(pathname, 'r')
  124. excluded_flags = set(f.read().splitlines())
  125. f.close()
  126. regexs = [ re.compile('Var[P]?\([^,]*, "([^"]*)"'),
  127. re.compile('.String[P]?\("([^"]*)",[^,]+,[^)]+\)'),
  128. re.compile('.Int[P]?\("([^"]*)",[^,]+,[^)]+\)'),
  129. re.compile('.Bool[P]?\("([^"]*)",[^,]+,[^)]+\)'),
  130. re.compile('.Duration[P]?\("([^"]*)",[^,]+,[^)]+\)'),
  131. re.compile('.StringSlice[P]?\("([^"]*)",[^,]+,[^)]+\)') ]
  132. new_flags = set()
  133. new_excluded_flags = set()
  134. # walk all the files looking for any flags being declared
  135. for pathname in files:
  136. if not pathname.endswith(".go"):
  137. continue
  138. f = open(pathname, 'r')
  139. data = f.read()
  140. f.close()
  141. matches = []
  142. for regex in regexs:
  143. matches = matches + regex.findall(data)
  144. for flag in matches:
  145. if any(x in flag for x in excluded_flags):
  146. continue
  147. if "_" in flag:
  148. new_excluded_flags.add(flag)
  149. if not "-" in flag:
  150. continue
  151. if flag not in flags:
  152. new_flags.add(flag)
  153. if len(new_excluded_flags) != 0:
  154. print("Found a flag declared with an _ but which is not explicitly listed as a valid flag name in hack/verify-flags/excluded-flags.txt")
  155. print("Are you certain this flag should not have been declared with an - instead?")
  156. l = list(new_excluded_flags)
  157. l.sort()
  158. print("%s" % "\n".join(l))
  159. sys.exit(1)
  160. if len(new_flags) != 0:
  161. print("Found flags in golang files not in the list of known flags. Please add these to hack/verify-flags/known-flags.txt")
  162. l = list(new_flags)
  163. l.sort()
  164. print("%s" % "\n".join(l))
  165. sys.exit(1)
  166. return list(flags)
  167. def flags_to_re(flags):
  168. """turn the list of all flags we found into a regex find both - and _ versions"""
  169. dashRE = re.compile('[-_]')
  170. flagREs = []
  171. for flag in flags:
  172. # turn all flag names into regexs which will find both types
  173. newre = dashRE.sub('[-_]', flag)
  174. # only match if there is not a leading or trailing alphanumeric character
  175. flagREs.append("[^\w${]" + newre + "[^\w]")
  176. # turn that list of regex strings into a single large RE
  177. flagRE = "|".join(flagREs)
  178. flagRE = re.compile(flagRE)
  179. return flagRE
  180. def load_exceptions(rootdir):
  181. exceptions = set()
  182. if args.skip_exceptions:
  183. return exceptions
  184. exception_filename = os.path.join(rootdir, "hack/verify-flags/exceptions.txt")
  185. exception_file = open(exception_filename, 'r')
  186. for exception in exception_file.read().splitlines():
  187. out = exception.split(":", 1)
  188. if len(out) != 2:
  189. print("Invalid line in exceptions file: %s" % exception)
  190. continue
  191. filename = out[0]
  192. line = out[1]
  193. exceptions.add((filename, line))
  194. return exceptions
  195. def main():
  196. rootdir = os.path.dirname(__file__) + "/../"
  197. rootdir = os.path.abspath(rootdir)
  198. exceptions = load_exceptions(rootdir)
  199. if len(args.filenames) > 0:
  200. files = args.filenames
  201. else:
  202. files = get_all_files(rootdir)
  203. files = normalize_files(rootdir, files)
  204. flags = get_flags(rootdir, files)
  205. flagRE = flags_to_re(flags)
  206. bad_lines = []
  207. # walk all the file looking for any flag that was declared and now has an _
  208. for pathname in files:
  209. relname = os.path.relpath(pathname, rootdir)
  210. f = open(pathname, 'r')
  211. for line in f.read().splitlines():
  212. if line_has_bad_flag(line, flagRE):
  213. if (relname, line) not in exceptions:
  214. bad_lines.append((relname, line))
  215. f.close()
  216. if len(bad_lines) != 0:
  217. if not args.skip_exceptions:
  218. print("Found illegal 'flag' usage. If these are false negatives you should run `hack/verify-flags-underscore.py -e > hack/verify-flags/exceptions.txt` to update the list.")
  219. bad_lines.sort()
  220. for (relname, line) in bad_lines:
  221. print("%s:%s" % (relname, line))
  222. return 1
  223. if __name__ == "__main__":
  224. sys.exit(main())