xref: /OK3568_Linux_fs/yocto/poky/bitbake/lib/bb/codeparser.py (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1#
2# Copyright BitBake Contributors
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6
7"""
8BitBake code parser
9
10Parses actual code (i.e. python and shell) for functions and in-line
11expressions. Used mainly to determine dependencies on other functions
12and variables within the BitBake metadata. Also provides a cache for
13this information in order to speed up processing.
14
15(Not to be confused with the code that parses the metadata itself,
16see lib/bb/parse/ for that).
17
18NOTE: if you change how the parsers gather information you will almost
19certainly need to increment CodeParserCache.CACHE_VERSION below so that
20any existing codeparser cache gets invalidated. Additionally you'll need
21to increment __cache_version__ in cache.py in order to ensure that old
22recipe caches don't trigger "Taskhash mismatch" errors.
23
24"""
25
26import ast
27import sys
28import codegen
29import logging
30import bb.pysh as pysh
31import bb.utils, bb.data
32import hashlib
33from itertools import chain
34from bb.pysh import pyshyacc, pyshlex
35from bb.cache import MultiProcessCache
36
37logger = logging.getLogger('BitBake.CodeParser')
38
39def bbhash(s):
40    return hashlib.sha256(s.encode("utf-8")).hexdigest()
41
42def check_indent(codestr):
43    """If the code is indented, add a top level piece of code to 'remove' the indentation"""
44
45    i = 0
46    while codestr[i] in ["\n", "\t", " "]:
47        i = i + 1
48
49    if i == 0:
50        return codestr
51
52    if codestr[i-1] == "\t" or codestr[i-1] == " ":
53        if codestr[0] == "\n":
54            # Since we're adding a line, we need to remove one line of any empty padding
55            # to ensure line numbers are correct
56            codestr = codestr[1:]
57        return "if 1:\n" + codestr
58
59    return codestr
60
61# A custom getstate/setstate using tuples is actually worth 15% cachesize by
62# avoiding duplication of the attribute names!
63
64
65class SetCache(object):
66    def __init__(self):
67        self.setcache = {}
68
69    def internSet(self, items):
70
71        new = []
72        for i in items:
73            new.append(sys.intern(i))
74        s = frozenset(new)
75        h = hash(s)
76        if h in self.setcache:
77            return self.setcache[h]
78        self.setcache[h] = s
79        return s
80
81codecache = SetCache()
82
83class pythonCacheLine(object):
84    def __init__(self, refs, execs, contains):
85        self.refs = codecache.internSet(refs)
86        self.execs = codecache.internSet(execs)
87        self.contains = {}
88        for c in contains:
89            self.contains[c] = codecache.internSet(contains[c])
90
91    def __getstate__(self):
92        return (self.refs, self.execs, self.contains)
93
94    def __setstate__(self, state):
95        (refs, execs, contains) = state
96        self.__init__(refs, execs, contains)
97    def __hash__(self):
98        l = (hash(self.refs), hash(self.execs))
99        for c in sorted(self.contains.keys()):
100            l = l + (c, hash(self.contains[c]))
101        return hash(l)
102    def __repr__(self):
103        return " ".join([str(self.refs), str(self.execs), str(self.contains)])
104
105
106class shellCacheLine(object):
107    def __init__(self, execs):
108        self.execs = codecache.internSet(execs)
109
110    def __getstate__(self):
111        return (self.execs)
112
113    def __setstate__(self, state):
114        (execs) = state
115        self.__init__(execs)
116    def __hash__(self):
117        return hash(self.execs)
118    def __repr__(self):
119        return str(self.execs)
120
121class CodeParserCache(MultiProcessCache):
122    cache_file_name = "bb_codeparser.dat"
123    # NOTE: you must increment this if you change how the parsers gather information,
124    # so that an existing cache gets invalidated. Additionally you'll need
125    # to increment __cache_version__ in cache.py in order to ensure that old
126    # recipe caches don't trigger "Taskhash mismatch" errors.
127    CACHE_VERSION = 11
128
129    def __init__(self):
130        MultiProcessCache.__init__(self)
131        self.pythoncache = self.cachedata[0]
132        self.shellcache = self.cachedata[1]
133        self.pythoncacheextras = self.cachedata_extras[0]
134        self.shellcacheextras = self.cachedata_extras[1]
135
136        # To avoid duplication in the codeparser cache, keep
137        # a lookup of hashes of objects we already have
138        self.pythoncachelines = {}
139        self.shellcachelines = {}
140
141    def newPythonCacheLine(self, refs, execs, contains):
142        cacheline = pythonCacheLine(refs, execs, contains)
143        h = hash(cacheline)
144        if h in self.pythoncachelines:
145            return self.pythoncachelines[h]
146        self.pythoncachelines[h] = cacheline
147        return cacheline
148
149    def newShellCacheLine(self, execs):
150        cacheline = shellCacheLine(execs)
151        h = hash(cacheline)
152        if h in self.shellcachelines:
153            return self.shellcachelines[h]
154        self.shellcachelines[h] = cacheline
155        return cacheline
156
157    def init_cache(self, d):
158        # Check if we already have the caches
159        if self.pythoncache:
160            return
161
162        MultiProcessCache.init_cache(self, d)
163
164        # cachedata gets re-assigned in the parent
165        self.pythoncache = self.cachedata[0]
166        self.shellcache = self.cachedata[1]
167
168    def create_cachedata(self):
169        data = [{}, {}]
170        return data
171
172codeparsercache = CodeParserCache()
173
174def parser_cache_init(d):
175    codeparsercache.init_cache(d)
176
177def parser_cache_save():
178    codeparsercache.save_extras()
179
180def parser_cache_savemerge():
181    codeparsercache.save_merge()
182
183Logger = logging.getLoggerClass()
184class BufferedLogger(Logger):
185    def __init__(self, name, level=0, target=None):
186        Logger.__init__(self, name)
187        self.setLevel(level)
188        self.buffer = []
189        self.target = target
190
191    def handle(self, record):
192        self.buffer.append(record)
193
194    def flush(self):
195        for record in self.buffer:
196            if self.target.isEnabledFor(record.levelno):
197                self.target.handle(record)
198        self.buffer = []
199
200class DummyLogger():
201    def flush(self):
202        return
203
204class PythonParser():
205    getvars = (".getVar", ".appendVar", ".prependVar", "oe.utils.conditional")
206    getvarflags = (".getVarFlag", ".appendVarFlag", ".prependVarFlag")
207    containsfuncs = ("bb.utils.contains", "base_contains")
208    containsanyfuncs = ("bb.utils.contains_any",  "bb.utils.filter")
209    execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
210
211    def warn(self, func, arg):
212        """Warn about calls of bitbake APIs which pass a non-literal
213        argument for the variable name, as we're not able to track such
214        a reference.
215        """
216
217        try:
218            funcstr = codegen.to_source(func)
219            argstr = codegen.to_source(arg)
220        except TypeError:
221            self.log.debug2('Failed to convert function and argument to source form')
222        else:
223            self.log.debug(self.unhandled_message % (funcstr, argstr))
224
225    def visit_Call(self, node):
226        name = self.called_node_name(node.func)
227        if name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs):
228            if isinstance(node.args[0], ast.Str):
229                varname = node.args[0].s
230                if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
231                    if varname not in self.contains:
232                        self.contains[varname] = set()
233                    self.contains[varname].add(node.args[1].s)
234                elif name in self.containsanyfuncs and isinstance(node.args[1], ast.Str):
235                    if varname not in self.contains:
236                        self.contains[varname] = set()
237                    self.contains[varname].update(node.args[1].s.split())
238                elif name.endswith(self.getvarflags):
239                    if isinstance(node.args[1], ast.Str):
240                        self.references.add('%s[%s]' % (varname, node.args[1].s))
241                    else:
242                        self.warn(node.func, node.args[1])
243                else:
244                    self.references.add(varname)
245            else:
246                self.warn(node.func, node.args[0])
247        elif name and name.endswith(".expand"):
248            if isinstance(node.args[0], ast.Str):
249                value = node.args[0].s
250                d = bb.data.init()
251                parser = d.expandWithRefs(value, self.name)
252                self.references |= parser.references
253                self.execs |= parser.execs
254                for varname in parser.contains:
255                    if varname not in self.contains:
256                        self.contains[varname] = set()
257                    self.contains[varname] |= parser.contains[varname]
258        elif name in self.execfuncs:
259            if isinstance(node.args[0], ast.Str):
260                self.var_execs.add(node.args[0].s)
261            else:
262                self.warn(node.func, node.args[0])
263        elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
264            self.execs.add(name)
265
266    def called_node_name(self, node):
267        """Given a called node, return its original string form"""
268        components = []
269        while node:
270            if isinstance(node, ast.Attribute):
271                components.append(node.attr)
272                node = node.value
273            elif isinstance(node, ast.Name):
274                components.append(node.id)
275                return '.'.join(reversed(components))
276            else:
277                break
278
279    def __init__(self, name, log):
280        self.name = name
281        self.var_execs = set()
282        self.contains = {}
283        self.execs = set()
284        self.references = set()
285        self._log = log
286        # Defer init as expensive
287        self.log = DummyLogger()
288
289        self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
290        self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
291
292    def parse_python(self, node, lineno=0, filename="<string>"):
293        if not node or not node.strip():
294            return
295
296        h = bbhash(str(node))
297
298        if h in codeparsercache.pythoncache:
299            self.references = set(codeparsercache.pythoncache[h].refs)
300            self.execs = set(codeparsercache.pythoncache[h].execs)
301            self.contains = {}
302            for i in codeparsercache.pythoncache[h].contains:
303                self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
304            return
305
306        if h in codeparsercache.pythoncacheextras:
307            self.references = set(codeparsercache.pythoncacheextras[h].refs)
308            self.execs = set(codeparsercache.pythoncacheextras[h].execs)
309            self.contains = {}
310            for i in codeparsercache.pythoncacheextras[h].contains:
311                self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
312            return
313
314        # Need to parse so take the hit on the real log buffer
315        self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, self._log)
316
317        # We can't add to the linenumbers for compile, we can pad to the correct number of blank lines though
318        node = "\n" * int(lineno) + node
319        code = compile(check_indent(str(node)), filename, "exec",
320                       ast.PyCF_ONLY_AST)
321
322        for n in ast.walk(code):
323            if n.__class__.__name__ == "Call":
324                self.visit_Call(n)
325
326        self.execs.update(self.var_execs)
327
328        codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)
329
330class ShellParser():
331    def __init__(self, name, log):
332        self.funcdefs = set()
333        self.allexecs = set()
334        self.execs = set()
335        self._name = name
336        self._log = log
337        # Defer init as expensive
338        self.log = DummyLogger()
339
340        self.unhandled_template = "unable to handle non-literal command '%s'"
341        self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
342
343    def parse_shell(self, value):
344        """Parse the supplied shell code in a string, returning the external
345        commands it executes.
346        """
347
348        h = bbhash(str(value))
349
350        if h in codeparsercache.shellcache:
351            self.execs = set(codeparsercache.shellcache[h].execs)
352            return self.execs
353
354        if h in codeparsercache.shellcacheextras:
355            self.execs = set(codeparsercache.shellcacheextras[h].execs)
356            return self.execs
357
358        # Need to parse so take the hit on the real log buffer
359        self.log = BufferedLogger('BitBake.Data.%s' % self._name, logging.DEBUG, self._log)
360
361        self._parse_shell(value)
362        self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
363
364        codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)
365
366        return self.execs
367
368    def _parse_shell(self, value):
369        try:
370            tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
371        except Exception:
372            bb.error('Error during parse shell code, the last 5 lines are:\n%s' % '\n'.join(value.split('\n')[-5:]))
373            raise
374
375        self.process_tokens(tokens)
376
377    def process_tokens(self, tokens):
378        """Process a supplied portion of the syntax tree as returned by
379        pyshyacc.parse.
380        """
381
382        def function_definition(value):
383            self.funcdefs.add(value.name)
384            return [value.body], None
385
386        def case_clause(value):
387            # Element 0 of each item in the case is the list of patterns, and
388            # Element 1 of each item in the case is the list of commands to be
389            # executed when that pattern matches.
390            words = chain(*[item[0] for item in value.items])
391            cmds  = chain(*[item[1] for item in value.items])
392            return cmds, words
393
394        def if_clause(value):
395            main = chain(value.cond, value.if_cmds)
396            rest = value.else_cmds
397            if isinstance(rest, tuple) and rest[0] == "elif":
398                return chain(main, if_clause(rest[1]))
399            else:
400                return chain(main, rest)
401
402        def simple_command(value):
403            return None, chain(value.words, (assign[1] for assign in value.assigns))
404
405        token_handlers = {
406            "and_or": lambda x: ((x.left, x.right), None),
407            "async": lambda x: ([x], None),
408            "brace_group": lambda x: (x.cmds, None),
409            "for_clause": lambda x: (x.cmds, x.items),
410            "function_definition": function_definition,
411            "if_clause": lambda x: (if_clause(x), None),
412            "pipeline": lambda x: (x.commands, None),
413            "redirect_list": lambda x: ([x.cmd], None),
414            "subshell": lambda x: (x.cmds, None),
415            "while_clause": lambda x: (chain(x.condition, x.cmds), None),
416            "until_clause": lambda x: (chain(x.condition, x.cmds), None),
417            "simple_command": simple_command,
418            "case_clause": case_clause,
419        }
420
421        def process_token_list(tokens):
422            for token in tokens:
423                if isinstance(token, list):
424                    process_token_list(token)
425                    continue
426                name, value = token
427                try:
428                    more_tokens, words = token_handlers[name](value)
429                except KeyError:
430                    raise NotImplementedError("Unsupported token type " + name)
431
432                if more_tokens:
433                    self.process_tokens(more_tokens)
434
435                if words:
436                    self.process_words(words)
437
438        process_token_list(tokens)
439
440    def process_words(self, words):
441        """Process a set of 'words' in pyshyacc parlance, which includes
442        extraction of executed commands from $() blocks, as well as grabbing
443        the command name argument.
444        """
445
446        words = list(words)
447        for word in list(words):
448            wtree = pyshlex.make_wordtree(word[1])
449            for part in wtree:
450                if not isinstance(part, list):
451                    continue
452
453                if part[0] in ('`', '$('):
454                    command = pyshlex.wordtree_as_string(part[1:-1])
455                    self._parse_shell(command)
456
457                    if word[0] in ("cmd_name", "cmd_word"):
458                        if word in words:
459                            words.remove(word)
460
461        usetoken = False
462        for word in words:
463            if word[0] in ("cmd_name", "cmd_word") or \
464               (usetoken and word[0] == "TOKEN"):
465                if "=" in word[1]:
466                    usetoken = True
467                    continue
468
469                cmd = word[1]
470                if cmd.startswith("$"):
471                    self.log.debug(self.unhandled_template % cmd)
472                elif cmd == "eval":
473                    command = " ".join(word for _, word in words[1:])
474                    self._parse_shell(command)
475                else:
476                    self.allexecs.add(cmd)
477                break
478