xref: /OK3568_Linux_fs/yocto/poky/scripts/contrib/patchreview.py (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun#! /usr/bin/env python3
2*4882a593Smuzhiyun#
3*4882a593Smuzhiyun# SPDX-License-Identifier: GPL-2.0-only
4*4882a593Smuzhiyun#
5*4882a593Smuzhiyun
6*4882a593Smuzhiyun# TODO
7*4882a593Smuzhiyun# - option to just list all broken files
8*4882a593Smuzhiyun# - test suite
9*4882a593Smuzhiyun# - validate signed-off-by
10*4882a593Smuzhiyun
11*4882a593Smuzhiyunstatus_values = ("accepted", "pending", "inappropriate", "backport", "submitted", "denied", "inactive-upstream")
12*4882a593Smuzhiyun
13*4882a593Smuzhiyunclass Result:
14*4882a593Smuzhiyun    # Whether the patch has an Upstream-Status or not
15*4882a593Smuzhiyun    missing_upstream_status = False
16*4882a593Smuzhiyun    # If the Upstream-Status tag is malformed in some way (string for bad bit)
17*4882a593Smuzhiyun    malformed_upstream_status = None
18*4882a593Smuzhiyun    # If the Upstream-Status value is unknown (boolean)
19*4882a593Smuzhiyun    unknown_upstream_status = False
20*4882a593Smuzhiyun    # The upstream status value (Pending, etc)
21*4882a593Smuzhiyun    upstream_status = None
22*4882a593Smuzhiyun    # Whether the patch has a Signed-off-by or not
23*4882a593Smuzhiyun    missing_sob = False
24*4882a593Smuzhiyun    # Whether the Signed-off-by tag is malformed in some way
25*4882a593Smuzhiyun    malformed_sob = False
26*4882a593Smuzhiyun    # The Signed-off-by tag value
27*4882a593Smuzhiyun    sob = None
28*4882a593Smuzhiyun    # Whether a patch looks like a CVE but doesn't have a CVE tag
29*4882a593Smuzhiyun    missing_cve = False
30*4882a593Smuzhiyun
31*4882a593Smuzhiyundef blame_patch(patch):
32*4882a593Smuzhiyun    """
33*4882a593Smuzhiyun    From a patch filename, return a list of "commit summary (author name <author
34*4882a593Smuzhiyun    email>)" strings representing the history.
35*4882a593Smuzhiyun    """
36*4882a593Smuzhiyun    import subprocess
37*4882a593Smuzhiyun    return subprocess.check_output(("git", "log",
38*4882a593Smuzhiyun                                    "--follow", "--find-renames", "--diff-filter=A",
39*4882a593Smuzhiyun                                    "--format=%s (%aN <%aE>)",
40*4882a593Smuzhiyun                                    "--", patch)).decode("utf-8").splitlines()
41*4882a593Smuzhiyun
42*4882a593Smuzhiyundef patchreview(path, patches):
43*4882a593Smuzhiyun    import re, os.path
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun    # General pattern: start of line, optional whitespace, tag with optional
46*4882a593Smuzhiyun    # hyphen or spaces, maybe a colon, some whitespace, then the value, all case
47*4882a593Smuzhiyun    # insensitive.
48*4882a593Smuzhiyun    sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE)
49*4882a593Smuzhiyun    status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*([\w-]*)", re.IGNORECASE | re.MULTILINE)
50*4882a593Smuzhiyun    cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE)
51*4882a593Smuzhiyun    cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE)
52*4882a593Smuzhiyun
53*4882a593Smuzhiyun    results = {}
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun    for patch in patches:
56*4882a593Smuzhiyun
57*4882a593Smuzhiyun        fullpath = os.path.join(path, patch)
58*4882a593Smuzhiyun        result = Result()
59*4882a593Smuzhiyun        results[fullpath] = result
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun        content = open(fullpath, encoding='ascii', errors='ignore').read()
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun        # Find the Signed-off-by tag
64*4882a593Smuzhiyun        match = sob_re.search(content)
65*4882a593Smuzhiyun        if match:
66*4882a593Smuzhiyun            value = match.group(1)
67*4882a593Smuzhiyun            if value != "Signed-off-by:":
68*4882a593Smuzhiyun                result.malformed_sob = value
69*4882a593Smuzhiyun            result.sob = match.group(2)
70*4882a593Smuzhiyun        else:
71*4882a593Smuzhiyun            result.missing_sob = True
72*4882a593Smuzhiyun
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun        # Find the Upstream-Status tag
75*4882a593Smuzhiyun        match = status_re.search(content)
76*4882a593Smuzhiyun        if match:
77*4882a593Smuzhiyun            value = match.group(1)
78*4882a593Smuzhiyun            if value != "Upstream-Status:":
79*4882a593Smuzhiyun                result.malformed_upstream_status = value
80*4882a593Smuzhiyun
81*4882a593Smuzhiyun            value = match.group(2).lower()
82*4882a593Smuzhiyun            # TODO: check case
83*4882a593Smuzhiyun            if value not in status_values:
84*4882a593Smuzhiyun                result.unknown_upstream_status = True
85*4882a593Smuzhiyun            result.upstream_status = value
86*4882a593Smuzhiyun        else:
87*4882a593Smuzhiyun            result.missing_upstream_status = True
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun        # Check that patches which looks like CVEs have CVE tags
90*4882a593Smuzhiyun        if cve_re.search(patch) or cve_re.search(content):
91*4882a593Smuzhiyun            if not cve_tag_re.search(content):
92*4882a593Smuzhiyun                result.missing_cve = True
93*4882a593Smuzhiyun        # TODO: extract CVE list
94*4882a593Smuzhiyun
95*4882a593Smuzhiyun    return results
96*4882a593Smuzhiyun
97*4882a593Smuzhiyun
98*4882a593Smuzhiyundef analyse(results, want_blame=False, verbose=True):
99*4882a593Smuzhiyun    """
100*4882a593Smuzhiyun    want_blame: display blame data for each malformed patch
101*4882a593Smuzhiyun    verbose: display per-file results instead of just summary
102*4882a593Smuzhiyun    """
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun    # want_blame requires verbose, so disable blame if we're not verbose
105*4882a593Smuzhiyun    if want_blame and not verbose:
106*4882a593Smuzhiyun        want_blame = False
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun    total_patches = 0
109*4882a593Smuzhiyun    missing_sob = 0
110*4882a593Smuzhiyun    malformed_sob = 0
111*4882a593Smuzhiyun    missing_status = 0
112*4882a593Smuzhiyun    malformed_status = 0
113*4882a593Smuzhiyun    missing_cve = 0
114*4882a593Smuzhiyun    pending_patches = 0
115*4882a593Smuzhiyun
116*4882a593Smuzhiyun    for patch in sorted(results):
117*4882a593Smuzhiyun        r = results[patch]
118*4882a593Smuzhiyun        total_patches += 1
119*4882a593Smuzhiyun        need_blame = False
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun        # Build statistics
122*4882a593Smuzhiyun        if r.missing_sob:
123*4882a593Smuzhiyun            missing_sob += 1
124*4882a593Smuzhiyun        if r.malformed_sob:
125*4882a593Smuzhiyun            malformed_sob += 1
126*4882a593Smuzhiyun        if r.missing_upstream_status:
127*4882a593Smuzhiyun            missing_status += 1
128*4882a593Smuzhiyun        if r.malformed_upstream_status or r.unknown_upstream_status:
129*4882a593Smuzhiyun            malformed_status += 1
130*4882a593Smuzhiyun            # Count patches with no status as pending
131*4882a593Smuzhiyun            pending_patches +=1
132*4882a593Smuzhiyun        if r.missing_cve:
133*4882a593Smuzhiyun            missing_cve += 1
134*4882a593Smuzhiyun        if r.upstream_status == "pending":
135*4882a593Smuzhiyun            pending_patches += 1
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun        # Output warnings
138*4882a593Smuzhiyun        if r.missing_sob:
139*4882a593Smuzhiyun            need_blame = True
140*4882a593Smuzhiyun            if verbose:
141*4882a593Smuzhiyun                print("Missing Signed-off-by tag (%s)" % patch)
142*4882a593Smuzhiyun        if r.malformed_sob:
143*4882a593Smuzhiyun            need_blame = True
144*4882a593Smuzhiyun            if verbose:
145*4882a593Smuzhiyun                print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch))
146*4882a593Smuzhiyun        if r.missing_cve:
147*4882a593Smuzhiyun            need_blame = True
148*4882a593Smuzhiyun            if verbose:
149*4882a593Smuzhiyun                print("Missing CVE tag (%s)" % patch)
150*4882a593Smuzhiyun        if r.missing_upstream_status:
151*4882a593Smuzhiyun            need_blame = True
152*4882a593Smuzhiyun            if verbose:
153*4882a593Smuzhiyun                print("Missing Upstream-Status tag (%s)" % patch)
154*4882a593Smuzhiyun        if r.malformed_upstream_status:
155*4882a593Smuzhiyun            need_blame = True
156*4882a593Smuzhiyun            if verbose:
157*4882a593Smuzhiyun                print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch))
158*4882a593Smuzhiyun        if r.unknown_upstream_status:
159*4882a593Smuzhiyun            need_blame = True
160*4882a593Smuzhiyun            if verbose:
161*4882a593Smuzhiyun                print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch))
162*4882a593Smuzhiyun
163*4882a593Smuzhiyun        if want_blame and need_blame:
164*4882a593Smuzhiyun            print("\n".join(blame_patch(patch)) + "\n")
165*4882a593Smuzhiyun
166*4882a593Smuzhiyun    def percent(num):
167*4882a593Smuzhiyun        try:
168*4882a593Smuzhiyun            return "%d (%d%%)" % (num, round(num * 100.0 / total_patches))
169*4882a593Smuzhiyun        except ZeroDivisionError:
170*4882a593Smuzhiyun            return "N/A"
171*4882a593Smuzhiyun
172*4882a593Smuzhiyun    if verbose:
173*4882a593Smuzhiyun        print()
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun    print("""Total patches found: %d
176*4882a593SmuzhiyunPatches missing Signed-off-by: %s
177*4882a593SmuzhiyunPatches with malformed Signed-off-by: %s
178*4882a593SmuzhiyunPatches missing CVE: %s
179*4882a593SmuzhiyunPatches missing Upstream-Status: %s
180*4882a593SmuzhiyunPatches with malformed Upstream-Status: %s
181*4882a593SmuzhiyunPatches in Pending state: %s""" % (total_patches,
182*4882a593Smuzhiyun                                   percent(missing_sob),
183*4882a593Smuzhiyun                                   percent(malformed_sob),
184*4882a593Smuzhiyun                                   percent(missing_cve),
185*4882a593Smuzhiyun                                   percent(missing_status),
186*4882a593Smuzhiyun                                   percent(malformed_status),
187*4882a593Smuzhiyun                                   percent(pending_patches)))
188*4882a593Smuzhiyun
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun
191*4882a593Smuzhiyundef histogram(results):
192*4882a593Smuzhiyun    from toolz import recipes, dicttoolz
193*4882a593Smuzhiyun    import math
194*4882a593Smuzhiyun    counts = recipes.countby(lambda r: r.upstream_status, results.values())
195*4882a593Smuzhiyun    bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts)
196*4882a593Smuzhiyun    for k in bars:
197*4882a593Smuzhiyun        print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k]))
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun
200*4882a593Smuzhiyunif __name__ == "__main__":
201*4882a593Smuzhiyun    import argparse, subprocess, os
202*4882a593Smuzhiyun
203*4882a593Smuzhiyun    args = argparse.ArgumentParser(description="Patch Review Tool")
204*4882a593Smuzhiyun    args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches")
205*4882a593Smuzhiyun    args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results")
206*4882a593Smuzhiyun    args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram")
207*4882a593Smuzhiyun    args.add_argument("-j", "--json", help="update JSON")
208*4882a593Smuzhiyun    args.add_argument("directory", help="directory to scan")
209*4882a593Smuzhiyun    args = args.parse_args()
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun    patches = subprocess.check_output(("git", "-C", args.directory, "ls-files", "recipes-*/**/*.patch", "recipes-*/**/*.diff")).decode("utf-8").split()
212*4882a593Smuzhiyun    results = patchreview(args.directory, patches)
213*4882a593Smuzhiyun    analyse(results, want_blame=args.blame, verbose=args.verbose)
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun    if args.json:
216*4882a593Smuzhiyun        import json, os.path, collections
217*4882a593Smuzhiyun        if os.path.isfile(args.json):
218*4882a593Smuzhiyun            data = json.load(open(args.json))
219*4882a593Smuzhiyun        else:
220*4882a593Smuzhiyun            data = []
221*4882a593Smuzhiyun
222*4882a593Smuzhiyun        row = collections.Counter()
223*4882a593Smuzhiyun        row["total"] = len(results)
224*4882a593Smuzhiyun        row["date"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%cd", "--date=format:%s"]).decode("utf-8").strip()
225*4882a593Smuzhiyun        for r in results.values():
226*4882a593Smuzhiyun            if r.upstream_status in status_values:
227*4882a593Smuzhiyun                row[r.upstream_status] += 1
228*4882a593Smuzhiyun            if r.malformed_upstream_status or r.missing_upstream_status:
229*4882a593Smuzhiyun                row['malformed-upstream-status'] += 1
230*4882a593Smuzhiyun            if r.malformed_sob or r.missing_sob:
231*4882a593Smuzhiyun                row['malformed-sob'] += 1
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun        data.append(row)
234*4882a593Smuzhiyun        json.dump(data, open(args.json, "w"))
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun    if args.histogram:
237*4882a593Smuzhiyun        print()
238*4882a593Smuzhiyun        histogram(results)
239