xref: /OK3568_Linux_fs/yocto/poky/bitbake/lib/bb/siggen.py (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1#
2# Copyright BitBake Contributors
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6
7import hashlib
8import logging
9import os
10import re
11import tempfile
12import pickle
13import bb.data
14import difflib
15import simplediff
16import json
17import bb.compress.zstd
18from bb.checksum import FileChecksumCache
19from bb import runqueue
20import hashserv
21import hashserv.client
22
23logger = logging.getLogger('BitBake.SigGen')
24hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
25
26class SetEncoder(json.JSONEncoder):
27    def default(self, obj):
28        if isinstance(obj, set):
29            return dict(_set_object=list(sorted(obj)))
30        return json.JSONEncoder.default(self, obj)
31
32def SetDecoder(dct):
33    if '_set_object' in dct:
34        return set(dct['_set_object'])
35    return dct
36
37def init(d):
38    siggens = [obj for obj in globals().values()
39                      if type(obj) is type and issubclass(obj, SignatureGenerator)]
40
41    desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
42    for sg in siggens:
43        if desired == sg.name:
44            return sg(d)
45    else:
46        logger.error("Invalid signature generator '%s', using default 'noop'\n"
47                     "Available generators: %s", desired,
48                     ', '.join(obj.name for obj in siggens))
49        return SignatureGenerator(d)
50
51class SignatureGenerator(object):
52    """
53    """
54    name = "noop"
55
56    # If the derived class supports multiconfig datacaches, set this to True
57    # The default is False for backward compatibility with derived signature
58    # generators that do not understand multiconfig caches
59    supports_multiconfig_datacaches = False
60
61    def __init__(self, data):
62        self.basehash = {}
63        self.taskhash = {}
64        self.unihash = {}
65        self.runtaskdeps = {}
66        self.file_checksum_values = {}
67        self.taints = {}
68        self.unitaskhashes = {}
69        self.tidtopn = {}
70        self.setscenetasks = set()
71
72    def finalise(self, fn, d, varient):
73        return
74
75    def postparsing_clean_cache(self):
76        return
77
78    def get_unihash(self, tid):
79        return self.taskhash[tid]
80
81    def prep_taskhash(self, tid, deps, dataCaches):
82        return
83
84    def get_taskhash(self, tid, deps, dataCaches):
85        self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest()
86        return self.taskhash[tid]
87
88    def writeout_file_checksum_cache(self):
89        """Write/update the file checksum cache onto disk"""
90        return
91
92    def stampfile(self, stampbase, file_name, taskname, extrainfo):
93        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
94
95    def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
96        return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
97
98    def dump_sigtask(self, fn, task, stampbase, runtime):
99        return
100
101    def invalidate_task(self, task, d, fn):
102        bb.build.del_stamp(task, d, fn)
103
104    def dump_sigs(self, dataCache, options):
105        return
106
107    def get_taskdata(self):
108        return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
109
110    def set_taskdata(self, data):
111        self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
112
113    def reset(self, data):
114        self.__init__(data)
115
116    def get_taskhashes(self):
117        return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn
118
119    def set_taskhashes(self, hashes):
120        self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes
121
122    def save_unitaskhashes(self):
123        return
124
125    def set_setscene_tasks(self, setscene_tasks):
126        return
127
128    @classmethod
129    def get_data_caches(cls, dataCaches, mc):
130        """
131        This function returns the datacaches that should be passed to signature
132        generator functions. If the signature generator supports multiconfig
133        caches, the entire dictionary of data caches is sent, otherwise a
134        special proxy is sent that support both index access to all
135        multiconfigs, and also direct access for the default multiconfig.
136
137        The proxy class allows code in this class itself to always use
138        multiconfig aware code (to ease maintenance), but derived classes that
139        are unaware of multiconfig data caches can still access the default
140        multiconfig as expected.
141
142        Do not override this function in derived classes; it will be removed in
143        the future when support for multiconfig data caches is mandatory
144        """
145        class DataCacheProxy(object):
146            def __init__(self):
147                pass
148
149            def __getitem__(self, key):
150                return dataCaches[key]
151
152            def __getattr__(self, name):
153                return getattr(dataCaches[mc], name)
154
155        if cls.supports_multiconfig_datacaches:
156            return dataCaches
157
158        return DataCacheProxy()
159
160    def exit(self):
161        return
162
163class SignatureGeneratorBasic(SignatureGenerator):
164    """
165    """
166    name = "basic"
167
168    def __init__(self, data):
169        self.basehash = {}
170        self.taskhash = {}
171        self.unihash = {}
172        self.taskdeps = {}
173        self.runtaskdeps = {}
174        self.file_checksum_values = {}
175        self.taints = {}
176        self.gendeps = {}
177        self.lookupcache = {}
178        self.setscenetasks = set()
179        self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split())
180        self.taskhash_ignore_tasks = None
181        self.init_rundepcheck(data)
182        checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
183        if checksum_cache_file:
184            self.checksum_cache = FileChecksumCache()
185            self.checksum_cache.init_cache(data, checksum_cache_file)
186        else:
187            self.checksum_cache = None
188
189        self.unihash_cache = bb.cache.SimpleCache("3")
190        self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
191        self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
192        self.tidtopn = {}
193
194    def init_rundepcheck(self, data):
195        self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None
196        if self.taskhash_ignore_tasks:
197            self.twl = re.compile(self.taskhash_ignore_tasks)
198        else:
199            self.twl = None
200
201    def _build_data(self, fn, d):
202
203        ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
204        tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars)
205
206        taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, fn)
207
208        for task in tasklist:
209            tid = fn + ":" + task
210            if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
211                bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
212                bb.error("The following commands may help:")
213                cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task)
214                # Make sure sigdata is dumped before run printdiff
215                bb.error("%s -Snone" % cmd)
216                bb.error("Then:")
217                bb.error("%s -Sprintdiff\n" % cmd)
218            self.basehash[tid] = basehash[tid]
219
220        self.taskdeps[fn] = taskdeps
221        self.gendeps[fn] = gendeps
222        self.lookupcache[fn] = lookupcache
223
224        return taskdeps
225
226    def set_setscene_tasks(self, setscene_tasks):
227        self.setscenetasks = set(setscene_tasks)
228
229    def finalise(self, fn, d, variant):
230
231        mc = d.getVar("__BBMULTICONFIG", False) or ""
232        if variant or mc:
233            fn = bb.cache.realfn2virtual(fn, variant, mc)
234
235        try:
236            taskdeps = self._build_data(fn, d)
237        except bb.parse.SkipRecipe:
238            raise
239        except:
240            bb.warn("Error during finalise of %s" % fn)
241            raise
242
243        #Slow but can be useful for debugging mismatched basehashes
244        #for task in self.taskdeps[fn]:
245        #    self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
246
247        for task in taskdeps:
248            d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task])
249
250    def postparsing_clean_cache(self):
251        #
252        # After parsing we can remove some things from memory to reduce our memory footprint
253        #
254        self.gendeps = {}
255        self.lookupcache = {}
256        self.taskdeps = {}
257
258    def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
259        # Return True if we should keep the dependency, False to drop it
260        # We only manipulate the dependencies for packages not in the ignore
261        # list
262        if self.twl and not self.twl.search(recipename):
263            # then process the actual dependencies
264            if self.twl.search(depname):
265                return False
266        return True
267
268    def read_taint(self, fn, task, stampbase):
269        taint = None
270        try:
271            with open(stampbase + '.' + task + '.taint', 'r') as taintf:
272                taint = taintf.read()
273        except IOError:
274            pass
275        return taint
276
277    def prep_taskhash(self, tid, deps, dataCaches):
278
279        (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid)
280
281        self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
282        self.runtaskdeps[tid] = []
283        self.file_checksum_values[tid] = []
284        recipename = dataCaches[mc].pkg_fn[fn]
285
286        self.tidtopn[tid] = recipename
287
288        for dep in sorted(deps, key=clean_basepath):
289            (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
290            depname = dataCaches[depmc].pkg_fn[depmcfn]
291            if not self.supports_multiconfig_datacaches and mc != depmc:
292                # If the signature generator doesn't understand multiconfig
293                # data caches, any dependency not in the same multiconfig must
294                # be skipped for backward compatibility
295                continue
296            if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
297                continue
298            if dep not in self.taskhash:
299                bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
300            self.runtaskdeps[tid].append(dep)
301
302        if task in dataCaches[mc].file_checksums[fn]:
303            if self.checksum_cache:
304                checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
305            else:
306                checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude)
307            for (f,cs) in checksums:
308                self.file_checksum_values[tid].append((f,cs))
309
310        taskdep = dataCaches[mc].task_deps[fn]
311        if 'nostamp' in taskdep and task in taskdep['nostamp']:
312            # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
313            if tid in self.taints and self.taints[tid].startswith("nostamp:"):
314                # Don't reset taint value upon every call
315                pass
316            else:
317                import uuid
318                taint = str(uuid.uuid4())
319                self.taints[tid] = "nostamp:" + taint
320
321        taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn])
322        if taint:
323            self.taints[tid] = taint
324            logger.warning("%s is tainted from a forced run" % tid)
325
326        return
327
328    def get_taskhash(self, tid, deps, dataCaches):
329
330        data = self.basehash[tid]
331        for dep in self.runtaskdeps[tid]:
332            data += self.get_unihash(dep)
333
334        for (f, cs) in self.file_checksum_values[tid]:
335            if cs:
336                if "/./" in f:
337                    data += "./" + f.split("/./")[1]
338                data += cs
339
340        if tid in self.taints:
341            if self.taints[tid].startswith("nostamp:"):
342                data += self.taints[tid][8:]
343            else:
344                data += self.taints[tid]
345
346        h = hashlib.sha256(data.encode("utf-8")).hexdigest()
347        self.taskhash[tid] = h
348        #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
349        return h
350
351    def writeout_file_checksum_cache(self):
352        """Write/update the file checksum cache onto disk"""
353        if self.checksum_cache:
354            self.checksum_cache.save_extras()
355            self.checksum_cache.save_merge()
356        else:
357            bb.fetch2.fetcher_parse_save()
358            bb.fetch2.fetcher_parse_done()
359
360    def save_unitaskhashes(self):
361        self.unihash_cache.save(self.unitaskhashes)
362
363    def dump_sigtask(self, fn, task, stampbase, runtime):
364
365        tid = fn + ":" + task
366        referencestamp = stampbase
367        if isinstance(runtime, str) and runtime.startswith("customfile"):
368            sigfile = stampbase
369            referencestamp = runtime[11:]
370        elif runtime and tid in self.taskhash:
371            sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid)
372        else:
373            sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid]
374
375        with bb.utils.umask(0o002):
376            bb.utils.mkdirhier(os.path.dirname(sigfile))
377
378        data = {}
379        data['task'] = task
380        data['basehash_ignore_vars'] = self.basehash_ignore_vars
381        data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks
382        data['taskdeps'] = self.taskdeps[fn][task]
383        data['basehash'] = self.basehash[tid]
384        data['gendeps'] = {}
385        data['varvals'] = {}
386        data['varvals'][task] = self.lookupcache[fn][task]
387        for dep in self.taskdeps[fn][task]:
388            if dep in self.basehash_ignore_vars:
389                continue
390            data['gendeps'][dep] = self.gendeps[fn][dep]
391            data['varvals'][dep] = self.lookupcache[fn][dep]
392
393        if runtime and tid in self.taskhash:
394            data['runtaskdeps'] = self.runtaskdeps[tid]
395            data['file_checksum_values'] = []
396            for f,cs in self.file_checksum_values[tid]:
397                if "/./" in f:
398                    data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
399                else:
400                    data['file_checksum_values'].append((os.path.basename(f), cs))
401            data['runtaskhashes'] = {}
402            for dep in data['runtaskdeps']:
403                data['runtaskhashes'][dep] = self.get_unihash(dep)
404            data['taskhash'] = self.taskhash[tid]
405            data['unihash'] = self.get_unihash(tid)
406
407        taint = self.read_taint(fn, task, referencestamp)
408        if taint:
409            data['taint'] = taint
410
411        if runtime and tid in self.taints:
412            if 'nostamp:' in self.taints[tid]:
413                data['taint'] = self.taints[tid]
414
415        computed_basehash = calc_basehash(data)
416        if computed_basehash != self.basehash[tid]:
417            bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid))
418        if runtime and tid in self.taskhash:
419            computed_taskhash = calc_taskhash(data)
420            if computed_taskhash != self.taskhash[tid]:
421                bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
422                sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
423
424        fd, tmpfile = bb.utils.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
425        try:
426            with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
427                json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
428                f.flush()
429            os.chmod(tmpfile, 0o664)
430            bb.utils.rename(tmpfile, sigfile)
431        except (OSError, IOError) as err:
432            try:
433                os.unlink(tmpfile)
434            except OSError:
435                pass
436            raise err
437
438    def dump_sigfn(self, fn, dataCaches, options):
439        if fn in self.taskdeps:
440            for task in self.taskdeps[fn]:
441                tid = fn + ":" + task
442                mc = bb.runqueue.mc_from_tid(tid)
443                if tid not in self.taskhash:
444                    continue
445                if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
446                    bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
447                    bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
448                self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
449
450class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
451    name = "basichash"
452
453    def get_stampfile_hash(self, tid):
454        if tid in self.taskhash:
455            return self.taskhash[tid]
456
457        # If task is not in basehash, then error
458        return self.basehash[tid]
459
460    def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
461        if taskname != "do_setscene" and taskname.endswith("_setscene"):
462            tid = fn + ":" + taskname[:-9]
463        else:
464            tid = fn + ":" + taskname
465        if clean:
466            h = "*"
467        else:
468            h = self.get_stampfile_hash(tid)
469
470        return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
471
472    def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
473        return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
474
475    def invalidate_task(self, task, d, fn):
476        bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
477        bb.build.write_taint(task, d, fn)
478
479class SignatureGeneratorUniHashMixIn(object):
480    def __init__(self, data):
481        self.extramethod = {}
482        super().__init__(data)
483
484    def get_taskdata(self):
485        return (self.server, self.method, self.extramethod) + super().get_taskdata()
486
487    def set_taskdata(self, data):
488        self.server, self.method, self.extramethod = data[:3]
489        super().set_taskdata(data[3:])
490
491    def client(self):
492        if getattr(self, '_client', None) is None:
493            self._client = hashserv.create_client(self.server)
494        return self._client
495
496    def reset(self, data):
497        if getattr(self, '_client', None) is not None:
498            self._client.close()
499            self._client = None
500        return super().reset(data)
501
502    def exit(self):
503        if getattr(self, '_client', None) is not None:
504            self._client.close()
505            self._client = None
506        return super().exit()
507
508    def get_stampfile_hash(self, tid):
509        if tid in self.taskhash:
510            # If a unique hash is reported, use it as the stampfile hash. This
511            # ensures that if a task won't be re-run if the taskhash changes,
512            # but it would result in the same output hash
513            unihash = self._get_unihash(tid)
514            if unihash is not None:
515                return unihash
516
517        return super().get_stampfile_hash(tid)
518
519    def set_unihash(self, tid, unihash):
520        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
521        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
522        self.unitaskhashes[key] = (self.taskhash[tid], unihash)
523        self.unihash[tid] = unihash
524
525    def _get_unihash(self, tid, checkkey=None):
526        if tid not in self.tidtopn:
527            return None
528        (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
529        key = mc + ":" + self.tidtopn[tid] + ":" + taskname
530        if key not in self.unitaskhashes:
531            return None
532        if not checkkey:
533            checkkey = self.taskhash[tid]
534        (key, unihash) = self.unitaskhashes[key]
535        if key != checkkey:
536            return None
537        return unihash
538
539    def get_unihash(self, tid):
540        taskhash = self.taskhash[tid]
541
542        # If its not a setscene task we can return
543        if self.setscenetasks and tid not in self.setscenetasks:
544            self.unihash[tid] = None
545            return taskhash
546
547        # TODO: This cache can grow unbounded. It probably only needs to keep
548        # for each task
549        unihash =  self._get_unihash(tid)
550        if unihash is not None:
551            self.unihash[tid] = unihash
552            return unihash
553
554        # In the absence of being able to discover a unique hash from the
555        # server, make it be equivalent to the taskhash. The unique "hash" only
556        # really needs to be a unique string (not even necessarily a hash), but
557        # making it match the taskhash has a few advantages:
558        #
559        # 1) All of the sstate code that assumes hashes can be the same
560        # 2) It provides maximal compatibility with builders that don't use
561        #    an equivalency server
562        # 3) The value is easy for multiple independent builders to derive the
563        #    same unique hash from the same input. This means that if the
564        #    independent builders find the same taskhash, but it isn't reported
565        #    to the server, there is a better chance that they will agree on
566        #    the unique hash.
567        unihash = taskhash
568
569        try:
570            method = self.method
571            if tid in self.extramethod:
572                method = method + self.extramethod[tid]
573            data = self.client().get_unihash(method, self.taskhash[tid])
574            if data:
575                unihash = data
576                # A unique hash equal to the taskhash is not very interesting,
577                # so it is reported it at debug level 2. If they differ, that
578                # is much more interesting, so it is reported at debug level 1
579                hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
580            else:
581                hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
582        except ConnectionError as e:
583            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
584
585        self.set_unihash(tid, unihash)
586        self.unihash[tid] = unihash
587        return unihash
588
589    def report_unihash(self, path, task, d):
590        import importlib
591
592        taskhash = d.getVar('BB_TASKHASH')
593        unihash = d.getVar('BB_UNIHASH')
594        report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
595        tempdir = d.getVar('T')
596        fn = d.getVar('BB_FILENAME')
597        tid = fn + ':do_' + task
598        key = tid + ':' + taskhash
599
600        if self.setscenetasks and tid not in self.setscenetasks:
601            return
602
603        # This can happen if locked sigs are in action. Detect and just exit
604        if taskhash != self.taskhash[tid]:
605            return
606
607        # Sanity checks
608        cache_unihash = self._get_unihash(tid, checkkey=taskhash)
609        if cache_unihash is None:
610            bb.fatal('%s not in unihash cache. Please report this error' % key)
611
612        if cache_unihash != unihash:
613            bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash))
614
615        sigfile = None
616        sigfile_name = "depsig.do_%s.%d" % (task, os.getpid())
617        sigfile_link = "depsig.do_%s" % task
618
619        try:
620            sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b')
621
622            locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d}
623
624            if "." in self.method:
625                (module, method) = self.method.rsplit('.', 1)
626                locs['method'] = getattr(importlib.import_module(module), method)
627                outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs)
628            else:
629                outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
630
631            try:
632                extra_data = {}
633
634                owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
635                if owner:
636                    extra_data['owner'] = owner
637
638                if report_taskdata:
639                    sigfile.seek(0)
640
641                    extra_data['PN'] = d.getVar('PN')
642                    extra_data['PV'] = d.getVar('PV')
643                    extra_data['PR'] = d.getVar('PR')
644                    extra_data['task'] = task
645                    extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
646
647                method = self.method
648                if tid in self.extramethod:
649                    method = method + self.extramethod[tid]
650
651                data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data)
652                new_unihash = data['unihash']
653
654                if new_unihash != unihash:
655                    hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
656                    bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
657                    self.set_unihash(tid, new_unihash)
658                    d.setVar('BB_UNIHASH', new_unihash)
659                else:
660                    hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
661            except ConnectionError as e:
662                bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
663        finally:
664            if sigfile:
665                sigfile.close()
666
667                sigfile_link_path = os.path.join(tempdir, sigfile_link)
668                bb.utils.remove(sigfile_link_path)
669
670                try:
671                    os.symlink(sigfile_name, sigfile_link_path)
672                except OSError:
673                    pass
674
675    def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches):
676        try:
677            extra_data = {}
678            method = self.method
679            if tid in self.extramethod:
680                method = method + self.extramethod[tid]
681
682            data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
683            hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
684
685            if data is None:
686                bb.warn("Server unable to handle unihash report")
687                return False
688
689            finalunihash = data['unihash']
690
691            if finalunihash == current_unihash:
692                hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash))
693            elif finalunihash == wanted_unihash:
694                hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash))
695                self.set_unihash(tid, finalunihash)
696                return True
697            else:
698                # TODO: What to do here?
699                hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
700
701        except ConnectionError as e:
702            bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
703
704        return False
705
706#
707# Dummy class used for bitbake-selftest
708#
709class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash):
710    name = "TestEquivHash"
711    def init_rundepcheck(self, data):
712        super().init_rundepcheck(data)
713        self.server = data.getVar('BB_HASHSERVE')
714        self.method = "sstate_output_hash"
715
716#
717# Dummy class used for bitbake-selftest
718#
719class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash):
720    name = "TestMulticonfigDepends"
721    supports_multiconfig_datacaches = True
722
723def dump_this_task(outfile, d):
724    import bb.parse
725    fn = d.getVar("BB_FILENAME")
726    task = "do_" + d.getVar("BB_CURRENTTASK")
727    referencestamp = bb.build.stamp_internal(task, d, None, True)
728    bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
729
730def init_colors(enable_color):
731    """Initialise colour dict for passing to compare_sigfiles()"""
732    # First set up the colours
733    colors = {'color_title':   '\033[1m',
734              'color_default': '\033[0m',
735              'color_add':     '\033[0;32m',
736              'color_remove':  '\033[0;31m',
737             }
738    # Leave all keys present but clear the values
739    if not enable_color:
740        for k in colors.keys():
741            colors[k] = ''
742    return colors
743
744def worddiff_str(oldstr, newstr, colors=None):
745    if not colors:
746        colors = init_colors(False)
747    diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
748    ret = []
749    for change, value in diff:
750        value = ' '.join(value)
751        if change == '=':
752            ret.append(value)
753        elif change == '+':
754            item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors)
755            ret.append(item)
756        elif change == '-':
757            item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors)
758            ret.append(item)
759    whitespace_note = ''
760    if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
761        whitespace_note = ' (whitespace changed)'
762    return '"%s"%s' % (' '.join(ret), whitespace_note)
763
764def list_inline_diff(oldlist, newlist, colors=None):
765    if not colors:
766        colors = init_colors(False)
767    diff = simplediff.diff(oldlist, newlist)
768    ret = []
769    for change, value in diff:
770        value = ' '.join(value)
771        if change == '=':
772            ret.append("'%s'" % value)
773        elif change == '+':
774            item = '{color_add}+{value}{color_default}'.format(value=value, **colors)
775            ret.append(item)
776        elif change == '-':
777            item = '{color_remove}-{value}{color_default}'.format(value=value, **colors)
778            ret.append(item)
779    return '[%s]' % (', '.join(ret))
780
781def clean_basepath(basepath):
782    basepath, dir, recipe_task = basepath.rsplit("/", 2)
783    cleaned = dir + '/' + recipe_task
784
785    if basepath[0] == '/':
786        return cleaned
787
788    if basepath.startswith("mc:") and basepath.count(':') >= 2:
789        mc, mc_name, basepath = basepath.split(":", 2)
790        mc_suffix = ':mc:' + mc_name
791    else:
792        mc_suffix = ''
793
794    # mc stuff now removed from basepath. Whatever was next, if present will be the first
795    # suffix. ':/', recipe path start, marks the end of this. Something like
796    # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
797    if basepath[0] != '/':
798        cleaned += ':' + basepath.split(':/', 1)[0]
799
800    return cleaned + mc_suffix
801
802def clean_basepaths(a):
803    b = {}
804    for x in a:
805        b[clean_basepath(x)] = a[x]
806    return b
807
808def clean_basepaths_list(a):
809    b = []
810    for x in a:
811        b.append(clean_basepath(x))
812    return b
813
814# Handled renamed fields
815def handle_renames(data):
816    if 'basewhitelist' in data:
817        data['basehash_ignore_vars'] = data['basewhitelist']
818        del data['basewhitelist']
819    if 'taskwhitelist' in data:
820        data['taskhash_ignore_tasks'] = data['taskwhitelist']
821        del data['taskwhitelist']
822
823
824def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
825    output = []
826
827    colors = init_colors(color)
828    def color_format(formatstr, **values):
829        """
830        Return colour formatted string.
831        NOTE: call with the format string, not an already formatted string
832        containing values (otherwise you could have trouble with { and }
833        characters)
834        """
835        if not formatstr.endswith('{color_default}'):
836            formatstr += '{color_default}'
837        # In newer python 3 versions you can pass both of these directly,
838        # but we only require 3.4 at the moment
839        formatparams = {}
840        formatparams.update(colors)
841        formatparams.update(values)
842        return formatstr.format(**formatparams)
843
844    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
845        a_data = json.load(f, object_hook=SetDecoder)
846    with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
847        b_data = json.load(f, object_hook=SetDecoder)
848
849    for data in [a_data, b_data]:
850        handle_renames(data)
851
852    def dict_diff(a, b, ignored_vars=set()):
853        sa = set(a.keys())
854        sb = set(b.keys())
855        common = sa & sb
856        changed = set()
857        for i in common:
858            if a[i] != b[i] and i not in ignored_vars:
859                changed.add(i)
860        added = sb - sa
861        removed = sa - sb
862        return changed, added, removed
863
864    def file_checksums_diff(a, b):
865        from collections import Counter
866
867        # Convert lists back to tuples
868        a = [(f[0], f[1]) for f in a]
869        b = [(f[0], f[1]) for f in b]
870
871        # Compare lists, ensuring we can handle duplicate filenames if they exist
872        removedcount = Counter(a)
873        removedcount.subtract(b)
874        addedcount = Counter(b)
875        addedcount.subtract(a)
876        added = []
877        for x in b:
878            if addedcount[x] > 0:
879                addedcount[x] -= 1
880                added.append(x)
881        removed = []
882        changed = []
883        for x in a:
884            if removedcount[x] > 0:
885                removedcount[x] -= 1
886                for y in added:
887                    if y[0] == x[0]:
888                        changed.append((x[0], x[1], y[1]))
889                        added.remove(y)
890                        break
891                else:
892                    removed.append(x)
893        added = [x[0] for x in added]
894        removed = [x[0] for x in removed]
895        return changed, added, removed
896
897    if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']:
898        output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars']))
899        if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']:
900            output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars']))
901
902    if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']:
903        output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks']))
904        if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']:
905            output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks']))
906
907    if a_data['taskdeps'] != b_data['taskdeps']:
908        output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
909
910    if a_data['basehash'] != b_data['basehash'] and not collapsed:
911        output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
912
913    changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars'])
914    if changed:
915        for dep in sorted(changed):
916            output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
917            if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
918                output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
919    if added:
920        for dep in sorted(added):
921            output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
922    if removed:
923        for dep in sorted(removed):
924            output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
925
926
927    changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
928    if changed:
929        for dep in sorted(changed):
930            oldval = a_data['varvals'][dep]
931            newval = b_data['varvals'][dep]
932            if newval and oldval and ('\n' in oldval or '\n' in newval):
933                diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
934                # Cut off the first two lines, since we aren't interested in
935                # the old/new filename (they are blank anyway in this case)
936                difflines = list(diff)[2:]
937                if color:
938                    # Add colour to diff output
939                    for i, line in enumerate(difflines):
940                        if line.startswith('+'):
941                            line = color_format('{color_add}{line}', line=line)
942                            difflines[i] = line
943                        elif line.startswith('-'):
944                            line = color_format('{color_remove}{line}', line=line)
945                            difflines[i] = line
946                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines)))
947            elif newval and oldval and (' ' in oldval or ' ' in newval):
948                output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors)))
949            else:
950                output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
951
952    if not 'file_checksum_values' in a_data:
953         a_data['file_checksum_values'] = []
954    if not 'file_checksum_values' in b_data:
955         b_data['file_checksum_values'] = []
956
957    changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
958    if changed:
959        for f, old, new in changed:
960            output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new))
961    if added:
962        for f in added:
963            output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f))
964    if removed:
965        for f in removed:
966            output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f))
967
968    if not 'runtaskdeps' in a_data:
969         a_data['runtaskdeps'] = {}
970    if not 'runtaskdeps' in b_data:
971         b_data['runtaskdeps'] = {}
972
973    if not collapsed:
974        if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
975            changed = ["Number of task dependencies changed"]
976        else:
977            changed = []
978            for idx, task in enumerate(a_data['runtaskdeps']):
979                a = a_data['runtaskdeps'][idx]
980                b = b_data['runtaskdeps'][idx]
981                if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
982                    changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b]))
983
984        if changed:
985            clean_a = clean_basepaths_list(a_data['runtaskdeps'])
986            clean_b = clean_basepaths_list(b_data['runtaskdeps'])
987            if clean_a != clean_b:
988                output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
989            else:
990                output.append(color_format("{color_title}runtaskdeps changed:"))
991            output.append("\n".join(changed))
992
993
994    if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
995        a = clean_basepaths(a_data['runtaskhashes'])
996        b = clean_basepaths(b_data['runtaskhashes'])
997        changed, added, removed = dict_diff(a, b)
998        if added:
999            for dep in sorted(added):
1000                bdep_found = False
1001                if removed:
1002                    for bdep in removed:
1003                        if b[dep] == a[bdep]:
1004                            #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
1005                            bdep_found = True
1006                if not bdep_found:
1007                    output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep]))
1008        if removed:
1009            for dep in sorted(removed):
1010                adep_found = False
1011                if added:
1012                    for adep in added:
1013                        if b[adep] == a[dep]:
1014                            #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
1015                            adep_found = True
1016                if not adep_found:
1017                    output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep]))
1018        if changed:
1019            for dep in sorted(changed):
1020                if not collapsed:
1021                    output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep]))
1022                if callable(recursecb):
1023                    recout = recursecb(dep, a[dep], b[dep])
1024                    if recout:
1025                        if collapsed:
1026                            output.extend(recout)
1027                        else:
1028                            # If a dependent hash changed, might as well print the line above and then defer to the changes in
1029                            # that hash since in all likelyhood, they're the same changes this task also saw.
1030                            output = [output[-1]] + recout
1031                            break
1032
1033    a_taint = a_data.get('taint', None)
1034    b_taint = b_data.get('taint', None)
1035    if a_taint != b_taint:
1036        if a_taint and a_taint.startswith('nostamp:'):
1037            a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):')
1038        if b_taint and b_taint.startswith('nostamp:'):
1039            b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):')
1040        output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint))
1041
1042    return output
1043
1044
1045def calc_basehash(sigdata):
1046    task = sigdata['task']
1047    basedata = sigdata['varvals'][task]
1048
1049    if basedata is None:
1050        basedata = ''
1051
1052    alldeps = sigdata['taskdeps']
1053    for dep in alldeps:
1054        basedata = basedata + dep
1055        val = sigdata['varvals'][dep]
1056        if val is not None:
1057            basedata = basedata + str(val)
1058
1059    return hashlib.sha256(basedata.encode("utf-8")).hexdigest()
1060
1061def calc_taskhash(sigdata):
1062    data = sigdata['basehash']
1063
1064    for dep in sigdata['runtaskdeps']:
1065        data = data + sigdata['runtaskhashes'][dep]
1066
1067    for c in sigdata['file_checksum_values']:
1068        if c[1]:
1069            if "./" in c[0]:
1070                data = data + c[0]
1071            data = data + c[1]
1072
1073    if 'taint' in sigdata:
1074        if 'nostamp:' in sigdata['taint']:
1075            data = data + sigdata['taint'][8:]
1076        else:
1077            data = data + sigdata['taint']
1078
1079    return hashlib.sha256(data.encode("utf-8")).hexdigest()
1080
1081
1082def dump_sigfile(a):
1083    output = []
1084
1085    with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1086        a_data = json.load(f, object_hook=SetDecoder)
1087
1088    handle_renames(a_data)
1089
1090    output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars'])))
1091
1092    output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or [])))
1093
1094    output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1095
1096    output.append("basehash: %s" % (a_data['basehash']))
1097
1098    for dep in sorted(a_data['gendeps']):
1099        output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
1100
1101    for dep in sorted(a_data['varvals']):
1102        output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1103
1104    if 'runtaskdeps' in a_data:
1105        output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
1106
1107    if 'file_checksum_values' in a_data:
1108        output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
1109
1110    if 'runtaskhashes' in a_data:
1111        for dep in sorted(a_data['runtaskhashes']):
1112            output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1113
1114    if 'taint' in a_data:
1115        if a_data['taint'].startswith('nostamp:'):
1116            msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):')
1117        else:
1118            msg = a_data['taint']
1119        output.append("Tainted (by forced/invalidated task): %s" % msg)
1120
1121    if 'task' in a_data:
1122        computed_basehash = calc_basehash(a_data)
1123        output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
1124    else:
1125        output.append("Unable to compute base hash")
1126
1127    computed_taskhash = calc_taskhash(a_data)
1128    output.append("Computed task hash is %s" % computed_taskhash)
1129
1130    return output
1131