1*4882a593Smuzhiyun#!/usr/bin/env python 2*4882a593Smuzhiyun# SPDX-License-Identifier: GPL-2.0 3*4882a593Smuzhiyun# Copyright Thomas Gleixner <tglx@linutronix.de> 4*4882a593Smuzhiyun 5*4882a593Smuzhiyunfrom argparse import ArgumentParser 6*4882a593Smuzhiyunfrom ply import lex, yacc 7*4882a593Smuzhiyunimport locale 8*4882a593Smuzhiyunimport traceback 9*4882a593Smuzhiyunimport sys 10*4882a593Smuzhiyunimport git 11*4882a593Smuzhiyunimport re 12*4882a593Smuzhiyunimport os 13*4882a593Smuzhiyun 14*4882a593Smuzhiyunclass ParserException(Exception): 15*4882a593Smuzhiyun def __init__(self, tok, txt): 16*4882a593Smuzhiyun self.tok = tok 17*4882a593Smuzhiyun self.txt = txt 18*4882a593Smuzhiyun 19*4882a593Smuzhiyunclass SPDXException(Exception): 20*4882a593Smuzhiyun def __init__(self, el, txt): 21*4882a593Smuzhiyun self.el = el 22*4882a593Smuzhiyun self.txt = txt 23*4882a593Smuzhiyun 24*4882a593Smuzhiyunclass SPDXdata(object): 25*4882a593Smuzhiyun def __init__(self): 26*4882a593Smuzhiyun self.license_files = 0 27*4882a593Smuzhiyun self.exception_files = 0 28*4882a593Smuzhiyun self.licenses = [ ] 29*4882a593Smuzhiyun self.exceptions = { } 30*4882a593Smuzhiyun 31*4882a593Smuzhiyun# Read the spdx data from the LICENSES directory 32*4882a593Smuzhiyundef read_spdxdata(repo): 33*4882a593Smuzhiyun 34*4882a593Smuzhiyun # The subdirectories of LICENSES in the kernel source 35*4882a593Smuzhiyun # Note: exceptions needs to be parsed as last directory. 36*4882a593Smuzhiyun license_dirs = [ "preferred", "dual", "deprecated", "exceptions" ] 37*4882a593Smuzhiyun lictree = repo.head.commit.tree['LICENSES'] 38*4882a593Smuzhiyun 39*4882a593Smuzhiyun spdx = SPDXdata() 40*4882a593Smuzhiyun 41*4882a593Smuzhiyun for d in license_dirs: 42*4882a593Smuzhiyun for el in lictree[d].traverse(): 43*4882a593Smuzhiyun if not os.path.isfile(el.path): 44*4882a593Smuzhiyun continue 45*4882a593Smuzhiyun 46*4882a593Smuzhiyun exception = None 47*4882a593Smuzhiyun for l in open(el.path).readlines(): 48*4882a593Smuzhiyun if l.startswith('Valid-License-Identifier:'): 49*4882a593Smuzhiyun lid = l.split(':')[1].strip().upper() 50*4882a593Smuzhiyun if lid in spdx.licenses: 51*4882a593Smuzhiyun raise SPDXException(el, 'Duplicate License Identifier: %s' %lid) 52*4882a593Smuzhiyun else: 53*4882a593Smuzhiyun spdx.licenses.append(lid) 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun elif l.startswith('SPDX-Exception-Identifier:'): 56*4882a593Smuzhiyun exception = l.split(':')[1].strip().upper() 57*4882a593Smuzhiyun spdx.exceptions[exception] = [] 58*4882a593Smuzhiyun 59*4882a593Smuzhiyun elif l.startswith('SPDX-Licenses:'): 60*4882a593Smuzhiyun for lic in l.split(':')[1].upper().strip().replace(' ', '').replace('\t', '').split(','): 61*4882a593Smuzhiyun if not lic in spdx.licenses: 62*4882a593Smuzhiyun raise SPDXException(None, 'Exception %s missing license %s' %(exception, lic)) 63*4882a593Smuzhiyun spdx.exceptions[exception].append(lic) 64*4882a593Smuzhiyun 65*4882a593Smuzhiyun elif l.startswith("License-Text:"): 66*4882a593Smuzhiyun if exception: 67*4882a593Smuzhiyun if not len(spdx.exceptions[exception]): 68*4882a593Smuzhiyun raise SPDXException(el, 'Exception %s is missing SPDX-Licenses' %exception) 69*4882a593Smuzhiyun spdx.exception_files += 1 70*4882a593Smuzhiyun else: 71*4882a593Smuzhiyun spdx.license_files += 1 72*4882a593Smuzhiyun break 73*4882a593Smuzhiyun return spdx 74*4882a593Smuzhiyun 75*4882a593Smuzhiyunclass id_parser(object): 76*4882a593Smuzhiyun 77*4882a593Smuzhiyun reserved = [ 'AND', 'OR', 'WITH' ] 78*4882a593Smuzhiyun tokens = [ 'LPAR', 'RPAR', 'ID', 'EXC' ] + reserved 79*4882a593Smuzhiyun 80*4882a593Smuzhiyun precedence = ( ('nonassoc', 'AND', 'OR'), ) 81*4882a593Smuzhiyun 82*4882a593Smuzhiyun t_ignore = ' \t' 83*4882a593Smuzhiyun 84*4882a593Smuzhiyun def __init__(self, spdx): 85*4882a593Smuzhiyun self.spdx = spdx 86*4882a593Smuzhiyun self.lasttok = None 87*4882a593Smuzhiyun self.lastid = None 88*4882a593Smuzhiyun self.lexer = lex.lex(module = self, reflags = re.UNICODE) 89*4882a593Smuzhiyun # Initialize the parser. No debug file and no parser rules stored on disk 90*4882a593Smuzhiyun # The rules are small enough to be generated on the fly 91*4882a593Smuzhiyun self.parser = yacc.yacc(module = self, write_tables = False, debug = False) 92*4882a593Smuzhiyun self.lines_checked = 0 93*4882a593Smuzhiyun self.checked = 0 94*4882a593Smuzhiyun self.spdx_valid = 0 95*4882a593Smuzhiyun self.spdx_errors = 0 96*4882a593Smuzhiyun self.curline = 0 97*4882a593Smuzhiyun self.deepest = 0 98*4882a593Smuzhiyun 99*4882a593Smuzhiyun # Validate License and Exception IDs 100*4882a593Smuzhiyun def validate(self, tok): 101*4882a593Smuzhiyun id = tok.value.upper() 102*4882a593Smuzhiyun if tok.type == 'ID': 103*4882a593Smuzhiyun if not id in self.spdx.licenses: 104*4882a593Smuzhiyun raise ParserException(tok, 'Invalid License ID') 105*4882a593Smuzhiyun self.lastid = id 106*4882a593Smuzhiyun elif tok.type == 'EXC': 107*4882a593Smuzhiyun if id not in self.spdx.exceptions: 108*4882a593Smuzhiyun raise ParserException(tok, 'Invalid Exception ID') 109*4882a593Smuzhiyun if self.lastid not in self.spdx.exceptions[id]: 110*4882a593Smuzhiyun raise ParserException(tok, 'Exception not valid for license %s' %self.lastid) 111*4882a593Smuzhiyun self.lastid = None 112*4882a593Smuzhiyun elif tok.type != 'WITH': 113*4882a593Smuzhiyun self.lastid = None 114*4882a593Smuzhiyun 115*4882a593Smuzhiyun # Lexer functions 116*4882a593Smuzhiyun def t_RPAR(self, tok): 117*4882a593Smuzhiyun r'\)' 118*4882a593Smuzhiyun self.lasttok = tok.type 119*4882a593Smuzhiyun return tok 120*4882a593Smuzhiyun 121*4882a593Smuzhiyun def t_LPAR(self, tok): 122*4882a593Smuzhiyun r'\(' 123*4882a593Smuzhiyun self.lasttok = tok.type 124*4882a593Smuzhiyun return tok 125*4882a593Smuzhiyun 126*4882a593Smuzhiyun def t_ID(self, tok): 127*4882a593Smuzhiyun r'[A-Za-z.0-9\-+]+' 128*4882a593Smuzhiyun 129*4882a593Smuzhiyun if self.lasttok == 'EXC': 130*4882a593Smuzhiyun print(tok) 131*4882a593Smuzhiyun raise ParserException(tok, 'Missing parentheses') 132*4882a593Smuzhiyun 133*4882a593Smuzhiyun tok.value = tok.value.strip() 134*4882a593Smuzhiyun val = tok.value.upper() 135*4882a593Smuzhiyun 136*4882a593Smuzhiyun if val in self.reserved: 137*4882a593Smuzhiyun tok.type = val 138*4882a593Smuzhiyun elif self.lasttok == 'WITH': 139*4882a593Smuzhiyun tok.type = 'EXC' 140*4882a593Smuzhiyun 141*4882a593Smuzhiyun self.lasttok = tok.type 142*4882a593Smuzhiyun self.validate(tok) 143*4882a593Smuzhiyun return tok 144*4882a593Smuzhiyun 145*4882a593Smuzhiyun def t_error(self, tok): 146*4882a593Smuzhiyun raise ParserException(tok, 'Invalid token') 147*4882a593Smuzhiyun 148*4882a593Smuzhiyun def p_expr(self, p): 149*4882a593Smuzhiyun '''expr : ID 150*4882a593Smuzhiyun | ID WITH EXC 151*4882a593Smuzhiyun | expr AND expr 152*4882a593Smuzhiyun | expr OR expr 153*4882a593Smuzhiyun | LPAR expr RPAR''' 154*4882a593Smuzhiyun pass 155*4882a593Smuzhiyun 156*4882a593Smuzhiyun def p_error(self, p): 157*4882a593Smuzhiyun if not p: 158*4882a593Smuzhiyun raise ParserException(None, 'Unfinished license expression') 159*4882a593Smuzhiyun else: 160*4882a593Smuzhiyun raise ParserException(p, 'Syntax error') 161*4882a593Smuzhiyun 162*4882a593Smuzhiyun def parse(self, expr): 163*4882a593Smuzhiyun self.lasttok = None 164*4882a593Smuzhiyun self.lastid = None 165*4882a593Smuzhiyun self.parser.parse(expr, lexer = self.lexer) 166*4882a593Smuzhiyun 167*4882a593Smuzhiyun def parse_lines(self, fd, maxlines, fname): 168*4882a593Smuzhiyun self.checked += 1 169*4882a593Smuzhiyun self.curline = 0 170*4882a593Smuzhiyun try: 171*4882a593Smuzhiyun for line in fd: 172*4882a593Smuzhiyun line = line.decode(locale.getpreferredencoding(False), errors='ignore') 173*4882a593Smuzhiyun self.curline += 1 174*4882a593Smuzhiyun if self.curline > maxlines: 175*4882a593Smuzhiyun break 176*4882a593Smuzhiyun self.lines_checked += 1 177*4882a593Smuzhiyun if line.find("SPDX-License-Identifier:") < 0: 178*4882a593Smuzhiyun continue 179*4882a593Smuzhiyun expr = line.split(':')[1].strip() 180*4882a593Smuzhiyun # Remove trailing comment closure 181*4882a593Smuzhiyun if line.strip().endswith('*/'): 182*4882a593Smuzhiyun expr = expr.rstrip('*/').strip() 183*4882a593Smuzhiyun # Remove trailing xml comment closure 184*4882a593Smuzhiyun if line.strip().endswith('-->'): 185*4882a593Smuzhiyun expr = expr.rstrip('-->').strip() 186*4882a593Smuzhiyun # Special case for SH magic boot code files 187*4882a593Smuzhiyun if line.startswith('LIST \"'): 188*4882a593Smuzhiyun expr = expr.rstrip('\"').strip() 189*4882a593Smuzhiyun self.parse(expr) 190*4882a593Smuzhiyun self.spdx_valid += 1 191*4882a593Smuzhiyun # 192*4882a593Smuzhiyun # Should we check for more SPDX ids in the same file and 193*4882a593Smuzhiyun # complain if there are any? 194*4882a593Smuzhiyun # 195*4882a593Smuzhiyun break 196*4882a593Smuzhiyun 197*4882a593Smuzhiyun except ParserException as pe: 198*4882a593Smuzhiyun if pe.tok: 199*4882a593Smuzhiyun col = line.find(expr) + pe.tok.lexpos 200*4882a593Smuzhiyun tok = pe.tok.value 201*4882a593Smuzhiyun sys.stdout.write('%s: %d:%d %s: %s\n' %(fname, self.curline, col, pe.txt, tok)) 202*4882a593Smuzhiyun else: 203*4882a593Smuzhiyun sys.stdout.write('%s: %d:0 %s\n' %(fname, self.curline, col, pe.txt)) 204*4882a593Smuzhiyun self.spdx_errors += 1 205*4882a593Smuzhiyun 206*4882a593Smuzhiyundef scan_git_tree(tree): 207*4882a593Smuzhiyun for el in tree.traverse(): 208*4882a593Smuzhiyun # Exclude stuff which would make pointless noise 209*4882a593Smuzhiyun # FIXME: Put this somewhere more sensible 210*4882a593Smuzhiyun if el.path.startswith("LICENSES"): 211*4882a593Smuzhiyun continue 212*4882a593Smuzhiyun if el.path.find("license-rules.rst") >= 0: 213*4882a593Smuzhiyun continue 214*4882a593Smuzhiyun if not os.path.isfile(el.path): 215*4882a593Smuzhiyun continue 216*4882a593Smuzhiyun with open(el.path, 'rb') as fd: 217*4882a593Smuzhiyun parser.parse_lines(fd, args.maxlines, el.path) 218*4882a593Smuzhiyun 219*4882a593Smuzhiyundef scan_git_subtree(tree, path): 220*4882a593Smuzhiyun for p in path.strip('/').split('/'): 221*4882a593Smuzhiyun tree = tree[p] 222*4882a593Smuzhiyun scan_git_tree(tree) 223*4882a593Smuzhiyun 224*4882a593Smuzhiyunif __name__ == '__main__': 225*4882a593Smuzhiyun 226*4882a593Smuzhiyun ap = ArgumentParser(description='SPDX expression checker') 227*4882a593Smuzhiyun ap.add_argument('path', nargs='*', help='Check path or file. If not given full git tree scan. For stdin use "-"') 228*4882a593Smuzhiyun ap.add_argument('-m', '--maxlines', type=int, default=15, 229*4882a593Smuzhiyun help='Maximum number of lines to scan in a file. Default 15') 230*4882a593Smuzhiyun ap.add_argument('-v', '--verbose', action='store_true', help='Verbose statistics output') 231*4882a593Smuzhiyun args = ap.parse_args() 232*4882a593Smuzhiyun 233*4882a593Smuzhiyun # Sanity check path arguments 234*4882a593Smuzhiyun if '-' in args.path and len(args.path) > 1: 235*4882a593Smuzhiyun sys.stderr.write('stdin input "-" must be the only path argument\n') 236*4882a593Smuzhiyun sys.exit(1) 237*4882a593Smuzhiyun 238*4882a593Smuzhiyun try: 239*4882a593Smuzhiyun # Use git to get the valid license expressions 240*4882a593Smuzhiyun repo = git.Repo(os.getcwd()) 241*4882a593Smuzhiyun assert not repo.bare 242*4882a593Smuzhiyun 243*4882a593Smuzhiyun # Initialize SPDX data 244*4882a593Smuzhiyun spdx = read_spdxdata(repo) 245*4882a593Smuzhiyun 246*4882a593Smuzhiyun # Initilize the parser 247*4882a593Smuzhiyun parser = id_parser(spdx) 248*4882a593Smuzhiyun 249*4882a593Smuzhiyun except SPDXException as se: 250*4882a593Smuzhiyun if se.el: 251*4882a593Smuzhiyun sys.stderr.write('%s: %s\n' %(se.el.path, se.txt)) 252*4882a593Smuzhiyun else: 253*4882a593Smuzhiyun sys.stderr.write('%s\n' %se.txt) 254*4882a593Smuzhiyun sys.exit(1) 255*4882a593Smuzhiyun 256*4882a593Smuzhiyun except Exception as ex: 257*4882a593Smuzhiyun sys.stderr.write('FAIL: %s\n' %ex) 258*4882a593Smuzhiyun sys.stderr.write('%s\n' %traceback.format_exc()) 259*4882a593Smuzhiyun sys.exit(1) 260*4882a593Smuzhiyun 261*4882a593Smuzhiyun try: 262*4882a593Smuzhiyun if len(args.path) and args.path[0] == '-': 263*4882a593Smuzhiyun stdin = os.fdopen(sys.stdin.fileno(), 'rb') 264*4882a593Smuzhiyun parser.parse_lines(stdin, args.maxlines, '-') 265*4882a593Smuzhiyun else: 266*4882a593Smuzhiyun if args.path: 267*4882a593Smuzhiyun for p in args.path: 268*4882a593Smuzhiyun if os.path.isfile(p): 269*4882a593Smuzhiyun parser.parse_lines(open(p, 'rb'), args.maxlines, p) 270*4882a593Smuzhiyun elif os.path.isdir(p): 271*4882a593Smuzhiyun scan_git_subtree(repo.head.reference.commit.tree, p) 272*4882a593Smuzhiyun else: 273*4882a593Smuzhiyun sys.stderr.write('path %s does not exist\n' %p) 274*4882a593Smuzhiyun sys.exit(1) 275*4882a593Smuzhiyun else: 276*4882a593Smuzhiyun # Full git tree scan 277*4882a593Smuzhiyun scan_git_tree(repo.head.commit.tree) 278*4882a593Smuzhiyun 279*4882a593Smuzhiyun if args.verbose: 280*4882a593Smuzhiyun sys.stderr.write('\n') 281*4882a593Smuzhiyun sys.stderr.write('License files: %12d\n' %spdx.license_files) 282*4882a593Smuzhiyun sys.stderr.write('Exception files: %12d\n' %spdx.exception_files) 283*4882a593Smuzhiyun sys.stderr.write('License IDs %12d\n' %len(spdx.licenses)) 284*4882a593Smuzhiyun sys.stderr.write('Exception IDs %12d\n' %len(spdx.exceptions)) 285*4882a593Smuzhiyun sys.stderr.write('\n') 286*4882a593Smuzhiyun sys.stderr.write('Files checked: %12d\n' %parser.checked) 287*4882a593Smuzhiyun sys.stderr.write('Lines checked: %12d\n' %parser.lines_checked) 288*4882a593Smuzhiyun sys.stderr.write('Files with SPDX: %12d\n' %parser.spdx_valid) 289*4882a593Smuzhiyun sys.stderr.write('Files with errors: %12d\n' %parser.spdx_errors) 290*4882a593Smuzhiyun 291*4882a593Smuzhiyun sys.exit(0) 292*4882a593Smuzhiyun 293*4882a593Smuzhiyun except Exception as ex: 294*4882a593Smuzhiyun sys.stderr.write('FAIL: %s\n' %ex) 295*4882a593Smuzhiyun sys.stderr.write('%s\n' %traceback.format_exc()) 296*4882a593Smuzhiyun sys.exit(1) 297