1*4882a593Smuzhiyun#!/usr/bin/env python3 2*4882a593Smuzhiyun# SPDX-License-Identifier: GPL-2.0 3*4882a593Smuzhiyun# 4*4882a593Smuzhiyun# Copyright (C) Google LLC, 2018 5*4882a593Smuzhiyun# 6*4882a593Smuzhiyun# Author: Tom Roeder <tmroeder@google.com> 7*4882a593Smuzhiyun# 8*4882a593Smuzhiyun"""A tool for generating compile_commands.json in the Linux kernel.""" 9*4882a593Smuzhiyun 10*4882a593Smuzhiyunimport argparse 11*4882a593Smuzhiyunimport json 12*4882a593Smuzhiyunimport logging 13*4882a593Smuzhiyunimport os 14*4882a593Smuzhiyunimport re 15*4882a593Smuzhiyunimport subprocess 16*4882a593Smuzhiyunimport sys 17*4882a593Smuzhiyun 18*4882a593Smuzhiyun_DEFAULT_OUTPUT = 'compile_commands.json' 19*4882a593Smuzhiyun_DEFAULT_LOG_LEVEL = 'WARNING' 20*4882a593Smuzhiyun 21*4882a593Smuzhiyun_FILENAME_PATTERN = r'^\..*\.cmd$' 22*4882a593Smuzhiyun_LINE_PATTERN = r'^cmd_[^ ]*\.o := (.* )([^ ]*\.c)$' 23*4882a593Smuzhiyun_VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] 24*4882a593Smuzhiyun 25*4882a593Smuzhiyun 26*4882a593Smuzhiyundef parse_arguments(): 27*4882a593Smuzhiyun """Sets up and parses command-line arguments. 28*4882a593Smuzhiyun 29*4882a593Smuzhiyun Returns: 30*4882a593Smuzhiyun log_level: A logging level to filter log output. 31*4882a593Smuzhiyun directory: The work directory where the objects were built. 32*4882a593Smuzhiyun ar: Command used for parsing .a archives. 33*4882a593Smuzhiyun output: Where to write the compile-commands JSON file. 34*4882a593Smuzhiyun paths: The list of files/directories to handle to find .cmd files. 35*4882a593Smuzhiyun """ 36*4882a593Smuzhiyun usage = 'Creates a compile_commands.json database from kernel .cmd files' 37*4882a593Smuzhiyun parser = argparse.ArgumentParser(description=usage) 38*4882a593Smuzhiyun 39*4882a593Smuzhiyun directory_help = ('specify the output directory used for the kernel build ' 40*4882a593Smuzhiyun '(defaults to the working directory)') 41*4882a593Smuzhiyun parser.add_argument('-d', '--directory', type=str, default='.', 42*4882a593Smuzhiyun help=directory_help) 43*4882a593Smuzhiyun 44*4882a593Smuzhiyun output_help = ('path to the output command database (defaults to ' + 45*4882a593Smuzhiyun _DEFAULT_OUTPUT + ')') 46*4882a593Smuzhiyun parser.add_argument('-o', '--output', type=str, default=_DEFAULT_OUTPUT, 47*4882a593Smuzhiyun help=output_help) 48*4882a593Smuzhiyun 49*4882a593Smuzhiyun log_level_help = ('the level of log messages to produce (defaults to ' + 50*4882a593Smuzhiyun _DEFAULT_LOG_LEVEL + ')') 51*4882a593Smuzhiyun parser.add_argument('--log_level', choices=_VALID_LOG_LEVELS, 52*4882a593Smuzhiyun default=_DEFAULT_LOG_LEVEL, help=log_level_help) 53*4882a593Smuzhiyun 54*4882a593Smuzhiyun ar_help = 'command used for parsing .a archives' 55*4882a593Smuzhiyun parser.add_argument('-a', '--ar', type=str, default='llvm-ar', help=ar_help) 56*4882a593Smuzhiyun 57*4882a593Smuzhiyun paths_help = ('directories to search or files to parse ' 58*4882a593Smuzhiyun '(files should be *.o, *.a, or modules.order). ' 59*4882a593Smuzhiyun 'If nothing is specified, the current directory is searched') 60*4882a593Smuzhiyun parser.add_argument('paths', type=str, nargs='*', help=paths_help) 61*4882a593Smuzhiyun 62*4882a593Smuzhiyun args = parser.parse_args() 63*4882a593Smuzhiyun 64*4882a593Smuzhiyun return (args.log_level, 65*4882a593Smuzhiyun os.path.abspath(args.directory), 66*4882a593Smuzhiyun args.output, 67*4882a593Smuzhiyun args.ar, 68*4882a593Smuzhiyun args.paths if len(args.paths) > 0 else [args.directory]) 69*4882a593Smuzhiyun 70*4882a593Smuzhiyun 71*4882a593Smuzhiyundef cmdfiles_in_dir(directory): 72*4882a593Smuzhiyun """Generate the iterator of .cmd files found under the directory. 73*4882a593Smuzhiyun 74*4882a593Smuzhiyun Walk under the given directory, and yield every .cmd file found. 75*4882a593Smuzhiyun 76*4882a593Smuzhiyun Args: 77*4882a593Smuzhiyun directory: The directory to search for .cmd files. 78*4882a593Smuzhiyun 79*4882a593Smuzhiyun Yields: 80*4882a593Smuzhiyun The path to a .cmd file. 81*4882a593Smuzhiyun """ 82*4882a593Smuzhiyun 83*4882a593Smuzhiyun filename_matcher = re.compile(_FILENAME_PATTERN) 84*4882a593Smuzhiyun 85*4882a593Smuzhiyun for dirpath, _, filenames in os.walk(directory): 86*4882a593Smuzhiyun for filename in filenames: 87*4882a593Smuzhiyun if filename_matcher.match(filename): 88*4882a593Smuzhiyun yield os.path.join(dirpath, filename) 89*4882a593Smuzhiyun 90*4882a593Smuzhiyun 91*4882a593Smuzhiyundef to_cmdfile(path): 92*4882a593Smuzhiyun """Return the path of .cmd file used for the given build artifact 93*4882a593Smuzhiyun 94*4882a593Smuzhiyun Args: 95*4882a593Smuzhiyun Path: file path 96*4882a593Smuzhiyun 97*4882a593Smuzhiyun Returns: 98*4882a593Smuzhiyun The path to .cmd file 99*4882a593Smuzhiyun """ 100*4882a593Smuzhiyun dir, base = os.path.split(path) 101*4882a593Smuzhiyun return os.path.join(dir, '.' + base + '.cmd') 102*4882a593Smuzhiyun 103*4882a593Smuzhiyun 104*4882a593Smuzhiyundef cmdfiles_for_o(obj): 105*4882a593Smuzhiyun """Generate the iterator of .cmd files associated with the object 106*4882a593Smuzhiyun 107*4882a593Smuzhiyun Yield the .cmd file used to build the given object 108*4882a593Smuzhiyun 109*4882a593Smuzhiyun Args: 110*4882a593Smuzhiyun obj: The object path 111*4882a593Smuzhiyun 112*4882a593Smuzhiyun Yields: 113*4882a593Smuzhiyun The path to .cmd file 114*4882a593Smuzhiyun """ 115*4882a593Smuzhiyun yield to_cmdfile(obj) 116*4882a593Smuzhiyun 117*4882a593Smuzhiyun 118*4882a593Smuzhiyundef cmdfiles_for_a(archive, ar): 119*4882a593Smuzhiyun """Generate the iterator of .cmd files associated with the archive. 120*4882a593Smuzhiyun 121*4882a593Smuzhiyun Parse the given archive, and yield every .cmd file used to build it. 122*4882a593Smuzhiyun 123*4882a593Smuzhiyun Args: 124*4882a593Smuzhiyun archive: The archive to parse 125*4882a593Smuzhiyun 126*4882a593Smuzhiyun Yields: 127*4882a593Smuzhiyun The path to every .cmd file found 128*4882a593Smuzhiyun """ 129*4882a593Smuzhiyun for obj in subprocess.check_output([ar, '-t', archive]).decode().split(): 130*4882a593Smuzhiyun yield to_cmdfile(obj) 131*4882a593Smuzhiyun 132*4882a593Smuzhiyun 133*4882a593Smuzhiyundef cmdfiles_for_modorder(modorder): 134*4882a593Smuzhiyun """Generate the iterator of .cmd files associated with the modules.order. 135*4882a593Smuzhiyun 136*4882a593Smuzhiyun Parse the given modules.order, and yield every .cmd file used to build the 137*4882a593Smuzhiyun contained modules. 138*4882a593Smuzhiyun 139*4882a593Smuzhiyun Args: 140*4882a593Smuzhiyun modorder: The modules.order file to parse 141*4882a593Smuzhiyun 142*4882a593Smuzhiyun Yields: 143*4882a593Smuzhiyun The path to every .cmd file found 144*4882a593Smuzhiyun """ 145*4882a593Smuzhiyun with open(modorder) as f: 146*4882a593Smuzhiyun for line in f: 147*4882a593Smuzhiyun ko = line.rstrip() 148*4882a593Smuzhiyun base, ext = os.path.splitext(ko) 149*4882a593Smuzhiyun if ext != '.ko': 150*4882a593Smuzhiyun sys.exit('{}: module path must end with .ko'.format(ko)) 151*4882a593Smuzhiyun mod = base + '.mod' 152*4882a593Smuzhiyun # The first line of *.mod lists the objects that compose the module. 153*4882a593Smuzhiyun with open(mod) as m: 154*4882a593Smuzhiyun for obj in m.readline().split(): 155*4882a593Smuzhiyun yield to_cmdfile(obj) 156*4882a593Smuzhiyun 157*4882a593Smuzhiyun 158*4882a593Smuzhiyundef process_line(root_directory, command_prefix, file_path): 159*4882a593Smuzhiyun """Extracts information from a .cmd line and creates an entry from it. 160*4882a593Smuzhiyun 161*4882a593Smuzhiyun Args: 162*4882a593Smuzhiyun root_directory: The directory that was searched for .cmd files. Usually 163*4882a593Smuzhiyun used directly in the "directory" entry in compile_commands.json. 164*4882a593Smuzhiyun command_prefix: The extracted command line, up to the last element. 165*4882a593Smuzhiyun file_path: The .c file from the end of the extracted command. 166*4882a593Smuzhiyun Usually relative to root_directory, but sometimes absolute. 167*4882a593Smuzhiyun 168*4882a593Smuzhiyun Returns: 169*4882a593Smuzhiyun An entry to append to compile_commands. 170*4882a593Smuzhiyun 171*4882a593Smuzhiyun Raises: 172*4882a593Smuzhiyun ValueError: Could not find the extracted file based on file_path and 173*4882a593Smuzhiyun root_directory or file_directory. 174*4882a593Smuzhiyun """ 175*4882a593Smuzhiyun # The .cmd files are intended to be included directly by Make, so they 176*4882a593Smuzhiyun # escape the pound sign '#', either as '\#' or '$(pound)' (depending on the 177*4882a593Smuzhiyun # kernel version). The compile_commands.json file is not interepreted 178*4882a593Smuzhiyun # by Make, so this code replaces the escaped version with '#'. 179*4882a593Smuzhiyun prefix = command_prefix.replace('\#', '#').replace('$(pound)', '#') 180*4882a593Smuzhiyun 181*4882a593Smuzhiyun # Use os.path.abspath() to normalize the path resolving '.' and '..' . 182*4882a593Smuzhiyun abs_path = os.path.abspath(os.path.join(root_directory, file_path)) 183*4882a593Smuzhiyun if not os.path.exists(abs_path): 184*4882a593Smuzhiyun raise ValueError('File %s not found' % abs_path) 185*4882a593Smuzhiyun return { 186*4882a593Smuzhiyun 'directory': root_directory, 187*4882a593Smuzhiyun 'file': abs_path, 188*4882a593Smuzhiyun 'command': prefix + file_path, 189*4882a593Smuzhiyun } 190*4882a593Smuzhiyun 191*4882a593Smuzhiyun 192*4882a593Smuzhiyundef main(): 193*4882a593Smuzhiyun """Walks through the directory and finds and parses .cmd files.""" 194*4882a593Smuzhiyun log_level, directory, output, ar, paths = parse_arguments() 195*4882a593Smuzhiyun 196*4882a593Smuzhiyun level = getattr(logging, log_level) 197*4882a593Smuzhiyun logging.basicConfig(format='%(levelname)s: %(message)s', level=level) 198*4882a593Smuzhiyun 199*4882a593Smuzhiyun line_matcher = re.compile(_LINE_PATTERN) 200*4882a593Smuzhiyun 201*4882a593Smuzhiyun compile_commands = [] 202*4882a593Smuzhiyun 203*4882a593Smuzhiyun for path in paths: 204*4882a593Smuzhiyun # If 'path' is a directory, handle all .cmd files under it. 205*4882a593Smuzhiyun # Otherwise, handle .cmd files associated with the file. 206*4882a593Smuzhiyun # Most of built-in objects are linked via archives (built-in.a or lib.a) 207*4882a593Smuzhiyun # but some objects are linked to vmlinux directly. 208*4882a593Smuzhiyun # Modules are listed in modules.order. 209*4882a593Smuzhiyun if os.path.isdir(path): 210*4882a593Smuzhiyun cmdfiles = cmdfiles_in_dir(path) 211*4882a593Smuzhiyun elif path.endswith('.o'): 212*4882a593Smuzhiyun cmdfiles = cmdfiles_for_o(path) 213*4882a593Smuzhiyun elif path.endswith('.a'): 214*4882a593Smuzhiyun cmdfiles = cmdfiles_for_a(path, ar) 215*4882a593Smuzhiyun elif path.endswith('modules.order'): 216*4882a593Smuzhiyun cmdfiles = cmdfiles_for_modorder(path) 217*4882a593Smuzhiyun else: 218*4882a593Smuzhiyun sys.exit('{}: unknown file type'.format(path)) 219*4882a593Smuzhiyun 220*4882a593Smuzhiyun for cmdfile in cmdfiles: 221*4882a593Smuzhiyun with open(cmdfile, 'rt') as f: 222*4882a593Smuzhiyun result = line_matcher.match(f.readline()) 223*4882a593Smuzhiyun if result: 224*4882a593Smuzhiyun try: 225*4882a593Smuzhiyun entry = process_line(directory, result.group(1), 226*4882a593Smuzhiyun result.group(2)) 227*4882a593Smuzhiyun compile_commands.append(entry) 228*4882a593Smuzhiyun except ValueError as err: 229*4882a593Smuzhiyun logging.info('Could not add line from %s: %s', 230*4882a593Smuzhiyun cmdfile, err) 231*4882a593Smuzhiyun 232*4882a593Smuzhiyun with open(output, 'wt') as f: 233*4882a593Smuzhiyun json.dump(compile_commands, f, indent=2, sort_keys=True) 234*4882a593Smuzhiyun 235*4882a593Smuzhiyun 236*4882a593Smuzhiyunif __name__ == '__main__': 237*4882a593Smuzhiyun main() 238