1*4882a593Smuzhiyun#! /usr/bin/env python3 2*4882a593Smuzhiyun# 3*4882a593Smuzhiyun# SPDX-License-Identifier: GPL-2.0-only 4*4882a593Smuzhiyun# 5*4882a593Smuzhiyun 6*4882a593Smuzhiyun# TODO 7*4882a593Smuzhiyun# - option to just list all broken files 8*4882a593Smuzhiyun# - test suite 9*4882a593Smuzhiyun# - validate signed-off-by 10*4882a593Smuzhiyun 11*4882a593Smuzhiyunstatus_values = ("accepted", "pending", "inappropriate", "backport", "submitted", "denied", "inactive-upstream") 12*4882a593Smuzhiyun 13*4882a593Smuzhiyunclass Result: 14*4882a593Smuzhiyun # Whether the patch has an Upstream-Status or not 15*4882a593Smuzhiyun missing_upstream_status = False 16*4882a593Smuzhiyun # If the Upstream-Status tag is malformed in some way (string for bad bit) 17*4882a593Smuzhiyun malformed_upstream_status = None 18*4882a593Smuzhiyun # If the Upstream-Status value is unknown (boolean) 19*4882a593Smuzhiyun unknown_upstream_status = False 20*4882a593Smuzhiyun # The upstream status value (Pending, etc) 21*4882a593Smuzhiyun upstream_status = None 22*4882a593Smuzhiyun # Whether the patch has a Signed-off-by or not 23*4882a593Smuzhiyun missing_sob = False 24*4882a593Smuzhiyun # Whether the Signed-off-by tag is malformed in some way 25*4882a593Smuzhiyun malformed_sob = False 26*4882a593Smuzhiyun # The Signed-off-by tag value 27*4882a593Smuzhiyun sob = None 28*4882a593Smuzhiyun # Whether a patch looks like a CVE but doesn't have a CVE tag 29*4882a593Smuzhiyun missing_cve = False 30*4882a593Smuzhiyun 31*4882a593Smuzhiyundef blame_patch(patch): 32*4882a593Smuzhiyun """ 33*4882a593Smuzhiyun From a patch filename, return a list of "commit summary (author name <author 34*4882a593Smuzhiyun email>)" strings representing the history. 35*4882a593Smuzhiyun """ 36*4882a593Smuzhiyun import subprocess 37*4882a593Smuzhiyun return subprocess.check_output(("git", "log", 38*4882a593Smuzhiyun "--follow", "--find-renames", "--diff-filter=A", 39*4882a593Smuzhiyun "--format=%s (%aN <%aE>)", 40*4882a593Smuzhiyun "--", patch)).decode("utf-8").splitlines() 41*4882a593Smuzhiyun 42*4882a593Smuzhiyundef patchreview(path, patches): 43*4882a593Smuzhiyun import re, os.path 44*4882a593Smuzhiyun 45*4882a593Smuzhiyun # General pattern: start of line, optional whitespace, tag with optional 46*4882a593Smuzhiyun # hyphen or spaces, maybe a colon, some whitespace, then the value, all case 47*4882a593Smuzhiyun # insensitive. 48*4882a593Smuzhiyun sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE) 49*4882a593Smuzhiyun status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*([\w-]*)", re.IGNORECASE | re.MULTILINE) 50*4882a593Smuzhiyun cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE) 51*4882a593Smuzhiyun cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE) 52*4882a593Smuzhiyun 53*4882a593Smuzhiyun results = {} 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun for patch in patches: 56*4882a593Smuzhiyun 57*4882a593Smuzhiyun fullpath = os.path.join(path, patch) 58*4882a593Smuzhiyun result = Result() 59*4882a593Smuzhiyun results[fullpath] = result 60*4882a593Smuzhiyun 61*4882a593Smuzhiyun content = open(fullpath, encoding='ascii', errors='ignore').read() 62*4882a593Smuzhiyun 63*4882a593Smuzhiyun # Find the Signed-off-by tag 64*4882a593Smuzhiyun match = sob_re.search(content) 65*4882a593Smuzhiyun if match: 66*4882a593Smuzhiyun value = match.group(1) 67*4882a593Smuzhiyun if value != "Signed-off-by:": 68*4882a593Smuzhiyun result.malformed_sob = value 69*4882a593Smuzhiyun result.sob = match.group(2) 70*4882a593Smuzhiyun else: 71*4882a593Smuzhiyun result.missing_sob = True 72*4882a593Smuzhiyun 73*4882a593Smuzhiyun 74*4882a593Smuzhiyun # Find the Upstream-Status tag 75*4882a593Smuzhiyun match = status_re.search(content) 76*4882a593Smuzhiyun if match: 77*4882a593Smuzhiyun value = match.group(1) 78*4882a593Smuzhiyun if value != "Upstream-Status:": 79*4882a593Smuzhiyun result.malformed_upstream_status = value 80*4882a593Smuzhiyun 81*4882a593Smuzhiyun value = match.group(2).lower() 82*4882a593Smuzhiyun # TODO: check case 83*4882a593Smuzhiyun if value not in status_values: 84*4882a593Smuzhiyun result.unknown_upstream_status = True 85*4882a593Smuzhiyun result.upstream_status = value 86*4882a593Smuzhiyun else: 87*4882a593Smuzhiyun result.missing_upstream_status = True 88*4882a593Smuzhiyun 89*4882a593Smuzhiyun # Check that patches which looks like CVEs have CVE tags 90*4882a593Smuzhiyun if cve_re.search(patch) or cve_re.search(content): 91*4882a593Smuzhiyun if not cve_tag_re.search(content): 92*4882a593Smuzhiyun result.missing_cve = True 93*4882a593Smuzhiyun # TODO: extract CVE list 94*4882a593Smuzhiyun 95*4882a593Smuzhiyun return results 96*4882a593Smuzhiyun 97*4882a593Smuzhiyun 98*4882a593Smuzhiyundef analyse(results, want_blame=False, verbose=True): 99*4882a593Smuzhiyun """ 100*4882a593Smuzhiyun want_blame: display blame data for each malformed patch 101*4882a593Smuzhiyun verbose: display per-file results instead of just summary 102*4882a593Smuzhiyun """ 103*4882a593Smuzhiyun 104*4882a593Smuzhiyun # want_blame requires verbose, so disable blame if we're not verbose 105*4882a593Smuzhiyun if want_blame and not verbose: 106*4882a593Smuzhiyun want_blame = False 107*4882a593Smuzhiyun 108*4882a593Smuzhiyun total_patches = 0 109*4882a593Smuzhiyun missing_sob = 0 110*4882a593Smuzhiyun malformed_sob = 0 111*4882a593Smuzhiyun missing_status = 0 112*4882a593Smuzhiyun malformed_status = 0 113*4882a593Smuzhiyun missing_cve = 0 114*4882a593Smuzhiyun pending_patches = 0 115*4882a593Smuzhiyun 116*4882a593Smuzhiyun for patch in sorted(results): 117*4882a593Smuzhiyun r = results[patch] 118*4882a593Smuzhiyun total_patches += 1 119*4882a593Smuzhiyun need_blame = False 120*4882a593Smuzhiyun 121*4882a593Smuzhiyun # Build statistics 122*4882a593Smuzhiyun if r.missing_sob: 123*4882a593Smuzhiyun missing_sob += 1 124*4882a593Smuzhiyun if r.malformed_sob: 125*4882a593Smuzhiyun malformed_sob += 1 126*4882a593Smuzhiyun if r.missing_upstream_status: 127*4882a593Smuzhiyun missing_status += 1 128*4882a593Smuzhiyun if r.malformed_upstream_status or r.unknown_upstream_status: 129*4882a593Smuzhiyun malformed_status += 1 130*4882a593Smuzhiyun # Count patches with no status as pending 131*4882a593Smuzhiyun pending_patches +=1 132*4882a593Smuzhiyun if r.missing_cve: 133*4882a593Smuzhiyun missing_cve += 1 134*4882a593Smuzhiyun if r.upstream_status == "pending": 135*4882a593Smuzhiyun pending_patches += 1 136*4882a593Smuzhiyun 137*4882a593Smuzhiyun # Output warnings 138*4882a593Smuzhiyun if r.missing_sob: 139*4882a593Smuzhiyun need_blame = True 140*4882a593Smuzhiyun if verbose: 141*4882a593Smuzhiyun print("Missing Signed-off-by tag (%s)" % patch) 142*4882a593Smuzhiyun if r.malformed_sob: 143*4882a593Smuzhiyun need_blame = True 144*4882a593Smuzhiyun if verbose: 145*4882a593Smuzhiyun print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch)) 146*4882a593Smuzhiyun if r.missing_cve: 147*4882a593Smuzhiyun need_blame = True 148*4882a593Smuzhiyun if verbose: 149*4882a593Smuzhiyun print("Missing CVE tag (%s)" % patch) 150*4882a593Smuzhiyun if r.missing_upstream_status: 151*4882a593Smuzhiyun need_blame = True 152*4882a593Smuzhiyun if verbose: 153*4882a593Smuzhiyun print("Missing Upstream-Status tag (%s)" % patch) 154*4882a593Smuzhiyun if r.malformed_upstream_status: 155*4882a593Smuzhiyun need_blame = True 156*4882a593Smuzhiyun if verbose: 157*4882a593Smuzhiyun print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch)) 158*4882a593Smuzhiyun if r.unknown_upstream_status: 159*4882a593Smuzhiyun need_blame = True 160*4882a593Smuzhiyun if verbose: 161*4882a593Smuzhiyun print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch)) 162*4882a593Smuzhiyun 163*4882a593Smuzhiyun if want_blame and need_blame: 164*4882a593Smuzhiyun print("\n".join(blame_patch(patch)) + "\n") 165*4882a593Smuzhiyun 166*4882a593Smuzhiyun def percent(num): 167*4882a593Smuzhiyun try: 168*4882a593Smuzhiyun return "%d (%d%%)" % (num, round(num * 100.0 / total_patches)) 169*4882a593Smuzhiyun except ZeroDivisionError: 170*4882a593Smuzhiyun return "N/A" 171*4882a593Smuzhiyun 172*4882a593Smuzhiyun if verbose: 173*4882a593Smuzhiyun print() 174*4882a593Smuzhiyun 175*4882a593Smuzhiyun print("""Total patches found: %d 176*4882a593SmuzhiyunPatches missing Signed-off-by: %s 177*4882a593SmuzhiyunPatches with malformed Signed-off-by: %s 178*4882a593SmuzhiyunPatches missing CVE: %s 179*4882a593SmuzhiyunPatches missing Upstream-Status: %s 180*4882a593SmuzhiyunPatches with malformed Upstream-Status: %s 181*4882a593SmuzhiyunPatches in Pending state: %s""" % (total_patches, 182*4882a593Smuzhiyun percent(missing_sob), 183*4882a593Smuzhiyun percent(malformed_sob), 184*4882a593Smuzhiyun percent(missing_cve), 185*4882a593Smuzhiyun percent(missing_status), 186*4882a593Smuzhiyun percent(malformed_status), 187*4882a593Smuzhiyun percent(pending_patches))) 188*4882a593Smuzhiyun 189*4882a593Smuzhiyun 190*4882a593Smuzhiyun 191*4882a593Smuzhiyundef histogram(results): 192*4882a593Smuzhiyun from toolz import recipes, dicttoolz 193*4882a593Smuzhiyun import math 194*4882a593Smuzhiyun counts = recipes.countby(lambda r: r.upstream_status, results.values()) 195*4882a593Smuzhiyun bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts) 196*4882a593Smuzhiyun for k in bars: 197*4882a593Smuzhiyun print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k])) 198*4882a593Smuzhiyun 199*4882a593Smuzhiyun 200*4882a593Smuzhiyunif __name__ == "__main__": 201*4882a593Smuzhiyun import argparse, subprocess, os 202*4882a593Smuzhiyun 203*4882a593Smuzhiyun args = argparse.ArgumentParser(description="Patch Review Tool") 204*4882a593Smuzhiyun args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches") 205*4882a593Smuzhiyun args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results") 206*4882a593Smuzhiyun args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram") 207*4882a593Smuzhiyun args.add_argument("-j", "--json", help="update JSON") 208*4882a593Smuzhiyun args.add_argument("directory", help="directory to scan") 209*4882a593Smuzhiyun args = args.parse_args() 210*4882a593Smuzhiyun 211*4882a593Smuzhiyun patches = subprocess.check_output(("git", "-C", args.directory, "ls-files", "recipes-*/**/*.patch", "recipes-*/**/*.diff")).decode("utf-8").split() 212*4882a593Smuzhiyun results = patchreview(args.directory, patches) 213*4882a593Smuzhiyun analyse(results, want_blame=args.blame, verbose=args.verbose) 214*4882a593Smuzhiyun 215*4882a593Smuzhiyun if args.json: 216*4882a593Smuzhiyun import json, os.path, collections 217*4882a593Smuzhiyun if os.path.isfile(args.json): 218*4882a593Smuzhiyun data = json.load(open(args.json)) 219*4882a593Smuzhiyun else: 220*4882a593Smuzhiyun data = [] 221*4882a593Smuzhiyun 222*4882a593Smuzhiyun row = collections.Counter() 223*4882a593Smuzhiyun row["total"] = len(results) 224*4882a593Smuzhiyun row["date"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%cd", "--date=format:%s"]).decode("utf-8").strip() 225*4882a593Smuzhiyun for r in results.values(): 226*4882a593Smuzhiyun if r.upstream_status in status_values: 227*4882a593Smuzhiyun row[r.upstream_status] += 1 228*4882a593Smuzhiyun if r.malformed_upstream_status or r.missing_upstream_status: 229*4882a593Smuzhiyun row['malformed-upstream-status'] += 1 230*4882a593Smuzhiyun if r.malformed_sob or r.missing_sob: 231*4882a593Smuzhiyun row['malformed-sob'] += 1 232*4882a593Smuzhiyun 233*4882a593Smuzhiyun data.append(row) 234*4882a593Smuzhiyun json.dump(data, open(args.json, "w")) 235*4882a593Smuzhiyun 236*4882a593Smuzhiyun if args.histogram: 237*4882a593Smuzhiyun print() 238*4882a593Smuzhiyun histogram(results) 239