xref: /OK3568_Linux_fs/yocto/poky/scripts/contrib/patchreview.py (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1#! /usr/bin/env python3
2#
3# SPDX-License-Identifier: GPL-2.0-only
4#
5
6# TODO
7# - option to just list all broken files
8# - test suite
9# - validate signed-off-by
10
11status_values = ("accepted", "pending", "inappropriate", "backport", "submitted", "denied", "inactive-upstream")
12
13class Result:
14    # Whether the patch has an Upstream-Status or not
15    missing_upstream_status = False
16    # If the Upstream-Status tag is malformed in some way (string for bad bit)
17    malformed_upstream_status = None
18    # If the Upstream-Status value is unknown (boolean)
19    unknown_upstream_status = False
20    # The upstream status value (Pending, etc)
21    upstream_status = None
22    # Whether the patch has a Signed-off-by or not
23    missing_sob = False
24    # Whether the Signed-off-by tag is malformed in some way
25    malformed_sob = False
26    # The Signed-off-by tag value
27    sob = None
28    # Whether a patch looks like a CVE but doesn't have a CVE tag
29    missing_cve = False
30
31def blame_patch(patch):
32    """
33    From a patch filename, return a list of "commit summary (author name <author
34    email>)" strings representing the history.
35    """
36    import subprocess
37    return subprocess.check_output(("git", "log",
38                                    "--follow", "--find-renames", "--diff-filter=A",
39                                    "--format=%s (%aN <%aE>)",
40                                    "--", patch)).decode("utf-8").splitlines()
41
42def patchreview(path, patches):
43    import re, os.path
44
45    # General pattern: start of line, optional whitespace, tag with optional
46    # hyphen or spaces, maybe a colon, some whitespace, then the value, all case
47    # insensitive.
48    sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE)
49    status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*([\w-]*)", re.IGNORECASE | re.MULTILINE)
50    cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE)
51    cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE)
52
53    results = {}
54
55    for patch in patches:
56
57        fullpath = os.path.join(path, patch)
58        result = Result()
59        results[fullpath] = result
60
61        content = open(fullpath, encoding='ascii', errors='ignore').read()
62
63        # Find the Signed-off-by tag
64        match = sob_re.search(content)
65        if match:
66            value = match.group(1)
67            if value != "Signed-off-by:":
68                result.malformed_sob = value
69            result.sob = match.group(2)
70        else:
71            result.missing_sob = True
72
73
74        # Find the Upstream-Status tag
75        match = status_re.search(content)
76        if match:
77            value = match.group(1)
78            if value != "Upstream-Status:":
79                result.malformed_upstream_status = value
80
81            value = match.group(2).lower()
82            # TODO: check case
83            if value not in status_values:
84                result.unknown_upstream_status = True
85            result.upstream_status = value
86        else:
87            result.missing_upstream_status = True
88
89        # Check that patches which looks like CVEs have CVE tags
90        if cve_re.search(patch) or cve_re.search(content):
91            if not cve_tag_re.search(content):
92                result.missing_cve = True
93        # TODO: extract CVE list
94
95    return results
96
97
98def analyse(results, want_blame=False, verbose=True):
99    """
100    want_blame: display blame data for each malformed patch
101    verbose: display per-file results instead of just summary
102    """
103
104    # want_blame requires verbose, so disable blame if we're not verbose
105    if want_blame and not verbose:
106        want_blame = False
107
108    total_patches = 0
109    missing_sob = 0
110    malformed_sob = 0
111    missing_status = 0
112    malformed_status = 0
113    missing_cve = 0
114    pending_patches = 0
115
116    for patch in sorted(results):
117        r = results[patch]
118        total_patches += 1
119        need_blame = False
120
121        # Build statistics
122        if r.missing_sob:
123            missing_sob += 1
124        if r.malformed_sob:
125            malformed_sob += 1
126        if r.missing_upstream_status:
127            missing_status += 1
128        if r.malformed_upstream_status or r.unknown_upstream_status:
129            malformed_status += 1
130            # Count patches with no status as pending
131            pending_patches +=1
132        if r.missing_cve:
133            missing_cve += 1
134        if r.upstream_status == "pending":
135            pending_patches += 1
136
137        # Output warnings
138        if r.missing_sob:
139            need_blame = True
140            if verbose:
141                print("Missing Signed-off-by tag (%s)" % patch)
142        if r.malformed_sob:
143            need_blame = True
144            if verbose:
145                print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch))
146        if r.missing_cve:
147            need_blame = True
148            if verbose:
149                print("Missing CVE tag (%s)" % patch)
150        if r.missing_upstream_status:
151            need_blame = True
152            if verbose:
153                print("Missing Upstream-Status tag (%s)" % patch)
154        if r.malformed_upstream_status:
155            need_blame = True
156            if verbose:
157                print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch))
158        if r.unknown_upstream_status:
159            need_blame = True
160            if verbose:
161                print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch))
162
163        if want_blame and need_blame:
164            print("\n".join(blame_patch(patch)) + "\n")
165
166    def percent(num):
167        try:
168            return "%d (%d%%)" % (num, round(num * 100.0 / total_patches))
169        except ZeroDivisionError:
170            return "N/A"
171
172    if verbose:
173        print()
174
175    print("""Total patches found: %d
176Patches missing Signed-off-by: %s
177Patches with malformed Signed-off-by: %s
178Patches missing CVE: %s
179Patches missing Upstream-Status: %s
180Patches with malformed Upstream-Status: %s
181Patches in Pending state: %s""" % (total_patches,
182                                   percent(missing_sob),
183                                   percent(malformed_sob),
184                                   percent(missing_cve),
185                                   percent(missing_status),
186                                   percent(malformed_status),
187                                   percent(pending_patches)))
188
189
190
191def histogram(results):
192    from toolz import recipes, dicttoolz
193    import math
194    counts = recipes.countby(lambda r: r.upstream_status, results.values())
195    bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts)
196    for k in bars:
197        print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k]))
198
199
200if __name__ == "__main__":
201    import argparse, subprocess, os
202
203    args = argparse.ArgumentParser(description="Patch Review Tool")
204    args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches")
205    args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results")
206    args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram")
207    args.add_argument("-j", "--json", help="update JSON")
208    args.add_argument("directory", help="directory to scan")
209    args = args.parse_args()
210
211    patches = subprocess.check_output(("git", "-C", args.directory, "ls-files", "recipes-*/**/*.patch", "recipes-*/**/*.diff")).decode("utf-8").split()
212    results = patchreview(args.directory, patches)
213    analyse(results, want_blame=args.blame, verbose=args.verbose)
214
215    if args.json:
216        import json, os.path, collections
217        if os.path.isfile(args.json):
218            data = json.load(open(args.json))
219        else:
220            data = []
221
222        row = collections.Counter()
223        row["total"] = len(results)
224        row["date"] = subprocess.check_output(["git", "-C", args.directory, "show", "-s", "--pretty=format:%cd", "--date=format:%s"]).decode("utf-8").strip()
225        for r in results.values():
226            if r.upstream_status in status_values:
227                row[r.upstream_status] += 1
228            if r.malformed_upstream_status or r.missing_upstream_status:
229                row['malformed-upstream-status'] += 1
230            if r.malformed_sob or r.missing_sob:
231                row['malformed-sob'] += 1
232
233        data.append(row)
234        json.dump(data, open(args.json, "w"))
235
236    if args.histogram:
237        print()
238        histogram(results)
239