1# 2# Copyright BitBake Contributors 3# 4# SPDX-License-Identifier: GPL-2.0-only 5# 6 7""" 8BitBake code parser 9 10Parses actual code (i.e. python and shell) for functions and in-line 11expressions. Used mainly to determine dependencies on other functions 12and variables within the BitBake metadata. Also provides a cache for 13this information in order to speed up processing. 14 15(Not to be confused with the code that parses the metadata itself, 16see lib/bb/parse/ for that). 17 18NOTE: if you change how the parsers gather information you will almost 19certainly need to increment CodeParserCache.CACHE_VERSION below so that 20any existing codeparser cache gets invalidated. Additionally you'll need 21to increment __cache_version__ in cache.py in order to ensure that old 22recipe caches don't trigger "Taskhash mismatch" errors. 23 24""" 25 26import ast 27import sys 28import codegen 29import logging 30import bb.pysh as pysh 31import bb.utils, bb.data 32import hashlib 33from itertools import chain 34from bb.pysh import pyshyacc, pyshlex 35from bb.cache import MultiProcessCache 36 37logger = logging.getLogger('BitBake.CodeParser') 38 39def bbhash(s): 40 return hashlib.sha256(s.encode("utf-8")).hexdigest() 41 42def check_indent(codestr): 43 """If the code is indented, add a top level piece of code to 'remove' the indentation""" 44 45 i = 0 46 while codestr[i] in ["\n", "\t", " "]: 47 i = i + 1 48 49 if i == 0: 50 return codestr 51 52 if codestr[i-1] == "\t" or codestr[i-1] == " ": 53 if codestr[0] == "\n": 54 # Since we're adding a line, we need to remove one line of any empty padding 55 # to ensure line numbers are correct 56 codestr = codestr[1:] 57 return "if 1:\n" + codestr 58 59 return codestr 60 61# A custom getstate/setstate using tuples is actually worth 15% cachesize by 62# avoiding duplication of the attribute names! 63 64 65class SetCache(object): 66 def __init__(self): 67 self.setcache = {} 68 69 def internSet(self, items): 70 71 new = [] 72 for i in items: 73 new.append(sys.intern(i)) 74 s = frozenset(new) 75 h = hash(s) 76 if h in self.setcache: 77 return self.setcache[h] 78 self.setcache[h] = s 79 return s 80 81codecache = SetCache() 82 83class pythonCacheLine(object): 84 def __init__(self, refs, execs, contains): 85 self.refs = codecache.internSet(refs) 86 self.execs = codecache.internSet(execs) 87 self.contains = {} 88 for c in contains: 89 self.contains[c] = codecache.internSet(contains[c]) 90 91 def __getstate__(self): 92 return (self.refs, self.execs, self.contains) 93 94 def __setstate__(self, state): 95 (refs, execs, contains) = state 96 self.__init__(refs, execs, contains) 97 def __hash__(self): 98 l = (hash(self.refs), hash(self.execs)) 99 for c in sorted(self.contains.keys()): 100 l = l + (c, hash(self.contains[c])) 101 return hash(l) 102 def __repr__(self): 103 return " ".join([str(self.refs), str(self.execs), str(self.contains)]) 104 105 106class shellCacheLine(object): 107 def __init__(self, execs): 108 self.execs = codecache.internSet(execs) 109 110 def __getstate__(self): 111 return (self.execs) 112 113 def __setstate__(self, state): 114 (execs) = state 115 self.__init__(execs) 116 def __hash__(self): 117 return hash(self.execs) 118 def __repr__(self): 119 return str(self.execs) 120 121class CodeParserCache(MultiProcessCache): 122 cache_file_name = "bb_codeparser.dat" 123 # NOTE: you must increment this if you change how the parsers gather information, 124 # so that an existing cache gets invalidated. Additionally you'll need 125 # to increment __cache_version__ in cache.py in order to ensure that old 126 # recipe caches don't trigger "Taskhash mismatch" errors. 127 CACHE_VERSION = 11 128 129 def __init__(self): 130 MultiProcessCache.__init__(self) 131 self.pythoncache = self.cachedata[0] 132 self.shellcache = self.cachedata[1] 133 self.pythoncacheextras = self.cachedata_extras[0] 134 self.shellcacheextras = self.cachedata_extras[1] 135 136 # To avoid duplication in the codeparser cache, keep 137 # a lookup of hashes of objects we already have 138 self.pythoncachelines = {} 139 self.shellcachelines = {} 140 141 def newPythonCacheLine(self, refs, execs, contains): 142 cacheline = pythonCacheLine(refs, execs, contains) 143 h = hash(cacheline) 144 if h in self.pythoncachelines: 145 return self.pythoncachelines[h] 146 self.pythoncachelines[h] = cacheline 147 return cacheline 148 149 def newShellCacheLine(self, execs): 150 cacheline = shellCacheLine(execs) 151 h = hash(cacheline) 152 if h in self.shellcachelines: 153 return self.shellcachelines[h] 154 self.shellcachelines[h] = cacheline 155 return cacheline 156 157 def init_cache(self, d): 158 # Check if we already have the caches 159 if self.pythoncache: 160 return 161 162 MultiProcessCache.init_cache(self, d) 163 164 # cachedata gets re-assigned in the parent 165 self.pythoncache = self.cachedata[0] 166 self.shellcache = self.cachedata[1] 167 168 def create_cachedata(self): 169 data = [{}, {}] 170 return data 171 172codeparsercache = CodeParserCache() 173 174def parser_cache_init(d): 175 codeparsercache.init_cache(d) 176 177def parser_cache_save(): 178 codeparsercache.save_extras() 179 180def parser_cache_savemerge(): 181 codeparsercache.save_merge() 182 183Logger = logging.getLoggerClass() 184class BufferedLogger(Logger): 185 def __init__(self, name, level=0, target=None): 186 Logger.__init__(self, name) 187 self.setLevel(level) 188 self.buffer = [] 189 self.target = target 190 191 def handle(self, record): 192 self.buffer.append(record) 193 194 def flush(self): 195 for record in self.buffer: 196 if self.target.isEnabledFor(record.levelno): 197 self.target.handle(record) 198 self.buffer = [] 199 200class DummyLogger(): 201 def flush(self): 202 return 203 204class PythonParser(): 205 getvars = (".getVar", ".appendVar", ".prependVar", "oe.utils.conditional") 206 getvarflags = (".getVarFlag", ".appendVarFlag", ".prependVarFlag") 207 containsfuncs = ("bb.utils.contains", "base_contains") 208 containsanyfuncs = ("bb.utils.contains_any", "bb.utils.filter") 209 execfuncs = ("bb.build.exec_func", "bb.build.exec_task") 210 211 def warn(self, func, arg): 212 """Warn about calls of bitbake APIs which pass a non-literal 213 argument for the variable name, as we're not able to track such 214 a reference. 215 """ 216 217 try: 218 funcstr = codegen.to_source(func) 219 argstr = codegen.to_source(arg) 220 except TypeError: 221 self.log.debug2('Failed to convert function and argument to source form') 222 else: 223 self.log.debug(self.unhandled_message % (funcstr, argstr)) 224 225 def visit_Call(self, node): 226 name = self.called_node_name(node.func) 227 if name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs): 228 if isinstance(node.args[0], ast.Str): 229 varname = node.args[0].s 230 if name in self.containsfuncs and isinstance(node.args[1], ast.Str): 231 if varname not in self.contains: 232 self.contains[varname] = set() 233 self.contains[varname].add(node.args[1].s) 234 elif name in self.containsanyfuncs and isinstance(node.args[1], ast.Str): 235 if varname not in self.contains: 236 self.contains[varname] = set() 237 self.contains[varname].update(node.args[1].s.split()) 238 elif name.endswith(self.getvarflags): 239 if isinstance(node.args[1], ast.Str): 240 self.references.add('%s[%s]' % (varname, node.args[1].s)) 241 else: 242 self.warn(node.func, node.args[1]) 243 else: 244 self.references.add(varname) 245 else: 246 self.warn(node.func, node.args[0]) 247 elif name and name.endswith(".expand"): 248 if isinstance(node.args[0], ast.Str): 249 value = node.args[0].s 250 d = bb.data.init() 251 parser = d.expandWithRefs(value, self.name) 252 self.references |= parser.references 253 self.execs |= parser.execs 254 for varname in parser.contains: 255 if varname not in self.contains: 256 self.contains[varname] = set() 257 self.contains[varname] |= parser.contains[varname] 258 elif name in self.execfuncs: 259 if isinstance(node.args[0], ast.Str): 260 self.var_execs.add(node.args[0].s) 261 else: 262 self.warn(node.func, node.args[0]) 263 elif name and isinstance(node.func, (ast.Name, ast.Attribute)): 264 self.execs.add(name) 265 266 def called_node_name(self, node): 267 """Given a called node, return its original string form""" 268 components = [] 269 while node: 270 if isinstance(node, ast.Attribute): 271 components.append(node.attr) 272 node = node.value 273 elif isinstance(node, ast.Name): 274 components.append(node.id) 275 return '.'.join(reversed(components)) 276 else: 277 break 278 279 def __init__(self, name, log): 280 self.name = name 281 self.var_execs = set() 282 self.contains = {} 283 self.execs = set() 284 self.references = set() 285 self._log = log 286 # Defer init as expensive 287 self.log = DummyLogger() 288 289 self.unhandled_message = "in call of %s, argument '%s' is not a string literal" 290 self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message) 291 292 def parse_python(self, node, lineno=0, filename="<string>"): 293 if not node or not node.strip(): 294 return 295 296 h = bbhash(str(node)) 297 298 if h in codeparsercache.pythoncache: 299 self.references = set(codeparsercache.pythoncache[h].refs) 300 self.execs = set(codeparsercache.pythoncache[h].execs) 301 self.contains = {} 302 for i in codeparsercache.pythoncache[h].contains: 303 self.contains[i] = set(codeparsercache.pythoncache[h].contains[i]) 304 return 305 306 if h in codeparsercache.pythoncacheextras: 307 self.references = set(codeparsercache.pythoncacheextras[h].refs) 308 self.execs = set(codeparsercache.pythoncacheextras[h].execs) 309 self.contains = {} 310 for i in codeparsercache.pythoncacheextras[h].contains: 311 self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i]) 312 return 313 314 # Need to parse so take the hit on the real log buffer 315 self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, self._log) 316 317 # We can't add to the linenumbers for compile, we can pad to the correct number of blank lines though 318 node = "\n" * int(lineno) + node 319 code = compile(check_indent(str(node)), filename, "exec", 320 ast.PyCF_ONLY_AST) 321 322 for n in ast.walk(code): 323 if n.__class__.__name__ == "Call": 324 self.visit_Call(n) 325 326 self.execs.update(self.var_execs) 327 328 codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains) 329 330class ShellParser(): 331 def __init__(self, name, log): 332 self.funcdefs = set() 333 self.allexecs = set() 334 self.execs = set() 335 self._name = name 336 self._log = log 337 # Defer init as expensive 338 self.log = DummyLogger() 339 340 self.unhandled_template = "unable to handle non-literal command '%s'" 341 self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template) 342 343 def parse_shell(self, value): 344 """Parse the supplied shell code in a string, returning the external 345 commands it executes. 346 """ 347 348 h = bbhash(str(value)) 349 350 if h in codeparsercache.shellcache: 351 self.execs = set(codeparsercache.shellcache[h].execs) 352 return self.execs 353 354 if h in codeparsercache.shellcacheextras: 355 self.execs = set(codeparsercache.shellcacheextras[h].execs) 356 return self.execs 357 358 # Need to parse so take the hit on the real log buffer 359 self.log = BufferedLogger('BitBake.Data.%s' % self._name, logging.DEBUG, self._log) 360 361 self._parse_shell(value) 362 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs) 363 364 codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs) 365 366 return self.execs 367 368 def _parse_shell(self, value): 369 try: 370 tokens, _ = pyshyacc.parse(value, eof=True, debug=False) 371 except Exception: 372 bb.error('Error during parse shell code, the last 5 lines are:\n%s' % '\n'.join(value.split('\n')[-5:])) 373 raise 374 375 self.process_tokens(tokens) 376 377 def process_tokens(self, tokens): 378 """Process a supplied portion of the syntax tree as returned by 379 pyshyacc.parse. 380 """ 381 382 def function_definition(value): 383 self.funcdefs.add(value.name) 384 return [value.body], None 385 386 def case_clause(value): 387 # Element 0 of each item in the case is the list of patterns, and 388 # Element 1 of each item in the case is the list of commands to be 389 # executed when that pattern matches. 390 words = chain(*[item[0] for item in value.items]) 391 cmds = chain(*[item[1] for item in value.items]) 392 return cmds, words 393 394 def if_clause(value): 395 main = chain(value.cond, value.if_cmds) 396 rest = value.else_cmds 397 if isinstance(rest, tuple) and rest[0] == "elif": 398 return chain(main, if_clause(rest[1])) 399 else: 400 return chain(main, rest) 401 402 def simple_command(value): 403 return None, chain(value.words, (assign[1] for assign in value.assigns)) 404 405 token_handlers = { 406 "and_or": lambda x: ((x.left, x.right), None), 407 "async": lambda x: ([x], None), 408 "brace_group": lambda x: (x.cmds, None), 409 "for_clause": lambda x: (x.cmds, x.items), 410 "function_definition": function_definition, 411 "if_clause": lambda x: (if_clause(x), None), 412 "pipeline": lambda x: (x.commands, None), 413 "redirect_list": lambda x: ([x.cmd], None), 414 "subshell": lambda x: (x.cmds, None), 415 "while_clause": lambda x: (chain(x.condition, x.cmds), None), 416 "until_clause": lambda x: (chain(x.condition, x.cmds), None), 417 "simple_command": simple_command, 418 "case_clause": case_clause, 419 } 420 421 def process_token_list(tokens): 422 for token in tokens: 423 if isinstance(token, list): 424 process_token_list(token) 425 continue 426 name, value = token 427 try: 428 more_tokens, words = token_handlers[name](value) 429 except KeyError: 430 raise NotImplementedError("Unsupported token type " + name) 431 432 if more_tokens: 433 self.process_tokens(more_tokens) 434 435 if words: 436 self.process_words(words) 437 438 process_token_list(tokens) 439 440 def process_words(self, words): 441 """Process a set of 'words' in pyshyacc parlance, which includes 442 extraction of executed commands from $() blocks, as well as grabbing 443 the command name argument. 444 """ 445 446 words = list(words) 447 for word in list(words): 448 wtree = pyshlex.make_wordtree(word[1]) 449 for part in wtree: 450 if not isinstance(part, list): 451 continue 452 453 if part[0] in ('`', '$('): 454 command = pyshlex.wordtree_as_string(part[1:-1]) 455 self._parse_shell(command) 456 457 if word[0] in ("cmd_name", "cmd_word"): 458 if word in words: 459 words.remove(word) 460 461 usetoken = False 462 for word in words: 463 if word[0] in ("cmd_name", "cmd_word") or \ 464 (usetoken and word[0] == "TOKEN"): 465 if "=" in word[1]: 466 usetoken = True 467 continue 468 469 cmd = word[1] 470 if cmd.startswith("$"): 471 self.log.debug(self.unhandled_template % cmd) 472 elif cmd == "eval": 473 command = " ".join(word for _, word in words[1:]) 474 self._parse_shell(command) 475 else: 476 self.allexecs.add(cmd) 477 break 478