1#! /usr/bin/env python3 2# 3# SPDX-License-Identifier: GPL-2.0-only 4# 5 6# TODO 7# - option to just list all broken files 8# - test suite 9# - validate signed-off-by 10 11status_values = ("accepted", "pending", "inappropriate", "backport", "submitted", "denied", "inactive-upstream") 12 13class Result: 14 # Whether the patch has an Upstream-Status or not 15 missing_upstream_status = False 16 # If the Upstream-Status tag is malformed in some way (string for bad bit) 17 malformed_upstream_status = None 18 # If the Upstream-Status value is unknown (boolean) 19 unknown_upstream_status = False 20 # The upstream status value (Pending, etc) 21 upstream_status = None 22 # Whether the patch has a Signed-off-by or not 23 missing_sob = False 24 # Whether the Signed-off-by tag is malformed in some way 25 malformed_sob = False 26 # The Signed-off-by tag value 27 sob = None 28 # Whether a patch looks like a CVE but doesn't have a CVE tag 29 missing_cve = False 30 31def blame_patch(patch): 32 """ 33 From a patch filename, return a list of "commit summary (author name <author 34 email>)" strings representing the history. 35 """ 36 import subprocess 37 return subprocess.check_output(("git", "log", 38 "--follow", "--find-renames", "--diff-filter=A", 39 "--format=%s (%aN <%aE>)", 40 "--", patch)).decode("utf-8").splitlines() 41 42def patchreview(path, patches): 43 import re, os.path 44 45 # General pattern: start of line, optional whitespace, tag with optional 46 # hyphen or spaces, maybe a colon, some whitespace, then the value, all case 47 # insensitive. 48 sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE) 49 status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*([\w-]*)", re.IGNORECASE | re.MULTILINE) 50 cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE) 51 cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE) 52 53 results = {} 54 55 for patch in patches: 56 57 fullpath = os.path.join(path, patch) 58 result = Result() 59 results[fullpath] = result 60 61 content = open(fullpath, encoding='ascii', errors='ignore').read() 62 63 # Find the Signed-off-by tag 64 match = sob_re.search(content) 65 if match: 66 value = match.group(1) 67 if value != "Signed-off-by:": 68 result.malformed_sob = value 69 result.sob = match.group(2) 70 else: 71 result.missing_sob = True 72 73 74 # Find the Upstream-Status tag 75 match = status_re.search(content) 76 if match: 77 value = match.group(1) 78 if value != "Upstream-Status:": 79 result.malformed_upstream_status = value 80 81 value = match.group(2).lower() 82 # TODO: check case 83 if value not in status_values: 84 result.unknown_upstream_status = True 85 result.upstream_status = value 86 else: 87 result.missing_upstream_status = True 88 89 # Check that patches which looks like CVEs have CVE tags 90 if cve_re.search(patch) or cve_re.search(content): 91 if not cve_tag_re.search(content): 92 result.missing_cve = True 93 # TODO: extract CVE list 94 95 return results 96 97 98def analyse(results, want_blame=False, verbose=True): 99 """ 100 want_blame: display blame data for each malformed patch 101 verbose: display per-file results instead of just summary 102 """ 103 104 # want_blame requires verbose, so disable blame if we're not verbose 105 if want_blame and not verbose: 106 want_blame = False 107 108 total_patches = 0 109 missing_sob = 0 110 malformed_sob = 0 111 missing_status = 0 112 malformed_status = 0 113 missing_cve = 0 114 pending_patches = 0 115 116 for patch in sorted(results): 117 r = results[patch] 118 total_patches += 1 119 need_blame = False 120 121 # Build statistics 122 if r.missing_sob: 123 missing_sob += 1 124 if r.malformed_sob: 125 malformed_sob += 1 126 if r.missing_upstream_status: 127 missing_status += 1 128 if r.malformed_upstream_status or r.unknown_upstream_status: 129 malformed_status += 1 130 # Count patches with no status as pending 131 pending_patches +=1 132 if r.missing_cve: 133 missing_cve += 1 134 if r.upstream_status == "pending": 135 pending_patches += 1 136 137 # Output warnings 138 if r.missing_sob: 139 need_blame = True 140 if verbose: 141 print("Missing Signed-off-by tag (%s)" % patch) 142 if r.malformed_sob: 143 need_blame = True 144 if verbose: 145 print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch)) 146 if r.missing_cve: 147 need_blame = True 148 if verbose: 149 print("Missing CVE tag (%s)" % patch) 150 if r.missing_upstream_status: 151 need_blame = True 152 if verbose: 153 print("Missing Upstream-Status tag (%s)" % patch) 154 if r.malformed_upstream_status: 155 need_blame = True 156 if verbose: 157 print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch)) 158 if r.unknown_upstream_status: 159 need_blame = True 160 if verbose: 161 print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch)) 162 163 if want_blame and need_blame: 164 print("\n".join(blame_patch(patch)) + "\n") 165 166 def percent(num): 167 try: 168 return "%d (%d%%)" % (num, round(num * 100.0 / total_patches)) 169 except ZeroDivisionError: 170 return "N/A" 171 172 if verbose: 173 print() 174 175 print("""Total patches found: %d 176Patches missing Signed-off-by: %s 177Patches with malformed Signed-off-by: %s 178Patches missing CVE: %s 179Patches missing Upstream-Status: %s 180Patches with malformed Upstream-Status: %s 181Patches in Pending state: %s""" % (total_patches, 182 percent(missing_sob), 183 percent(malformed_sob), 184 percent(missing_cve), 185 percent(missing_status), 186 percent(malformed_status), 187 percent(pending_patches))) 188 189 190 191def histogram(results): 192 from toolz import recipes, dicttoolz 193 import math 194 counts = recipes.countby(lambda r: r.upstream_status, results.values()) 195 bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts) 196 for k in bars: 197 print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k])) 198 199 200if __name__ == "__main__": 201 import argparse, subprocess, os 202 203 args = argparse.ArgumentParser(description="Patch Review Tool") 204 args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches") 205 args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results") 206 args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram") 207 args.add_argument("-j", "--json", help="update JSON") 208 args.add_argument("directory", help="directory to scan") 209 args = args.parse_args() 210 211 patches = subprocess.check_output(("git", "-C", args.directory, "ls-files", "recipes-*/**/*.patch", "recipes-*/**/*.diff")).decode("utf-8").split() 212 results = patchreview(args.directory, patches) 213 analyse(results, want_blame=args.blame, verbose=args.verbose) 214 215 if args.json: 216 import json, os.path, collections 217 if os.path.isfile(args.json): 218 data = json.load(open(args.json)) 219 else: 220 data = [] 221 222 row = collections.Counter() 223 row["total"] = len(results) 224 row["date"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%cd", "--date=format:%s"]).decode("utf-8").strip() 225 for r in results.values(): 226 if r.upstream_status in status_values: 227 row[r.upstream_status] += 1 228 if r.malformed_upstream_status or r.missing_upstream_status: 229 row['malformed-upstream-status'] += 1 230 if r.malformed_sob or r.missing_sob: 231 row['malformed-sob'] += 1 232 233 data.append(row) 234 json.dump(data, open(args.json, "w")) 235 236 if args.histogram: 237 print() 238 histogram(results) 239