1# 2# Copyright BitBake Contributors 3# 4# SPDX-License-Identifier: GPL-2.0-only 5# 6 7import hashlib 8import logging 9import os 10import re 11import tempfile 12import pickle 13import bb.data 14import difflib 15import simplediff 16import json 17import bb.compress.zstd 18from bb.checksum import FileChecksumCache 19from bb import runqueue 20import hashserv 21import hashserv.client 22 23logger = logging.getLogger('BitBake.SigGen') 24hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv') 25 26class SetEncoder(json.JSONEncoder): 27 def default(self, obj): 28 if isinstance(obj, set): 29 return dict(_set_object=list(sorted(obj))) 30 return json.JSONEncoder.default(self, obj) 31 32def SetDecoder(dct): 33 if '_set_object' in dct: 34 return set(dct['_set_object']) 35 return dct 36 37def init(d): 38 siggens = [obj for obj in globals().values() 39 if type(obj) is type and issubclass(obj, SignatureGenerator)] 40 41 desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop" 42 for sg in siggens: 43 if desired == sg.name: 44 return sg(d) 45 else: 46 logger.error("Invalid signature generator '%s', using default 'noop'\n" 47 "Available generators: %s", desired, 48 ', '.join(obj.name for obj in siggens)) 49 return SignatureGenerator(d) 50 51class SignatureGenerator(object): 52 """ 53 """ 54 name = "noop" 55 56 # If the derived class supports multiconfig datacaches, set this to True 57 # The default is False for backward compatibility with derived signature 58 # generators that do not understand multiconfig caches 59 supports_multiconfig_datacaches = False 60 61 def __init__(self, data): 62 self.basehash = {} 63 self.taskhash = {} 64 self.unihash = {} 65 self.runtaskdeps = {} 66 self.file_checksum_values = {} 67 self.taints = {} 68 self.unitaskhashes = {} 69 self.tidtopn = {} 70 self.setscenetasks = set() 71 72 def finalise(self, fn, d, varient): 73 return 74 75 def postparsing_clean_cache(self): 76 return 77 78 def get_unihash(self, tid): 79 return self.taskhash[tid] 80 81 def prep_taskhash(self, tid, deps, dataCaches): 82 return 83 84 def get_taskhash(self, tid, deps, dataCaches): 85 self.taskhash[tid] = hashlib.sha256(tid.encode("utf-8")).hexdigest() 86 return self.taskhash[tid] 87 88 def writeout_file_checksum_cache(self): 89 """Write/update the file checksum cache onto disk""" 90 return 91 92 def stampfile(self, stampbase, file_name, taskname, extrainfo): 93 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 94 95 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 96 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 97 98 def dump_sigtask(self, fn, task, stampbase, runtime): 99 return 100 101 def invalidate_task(self, task, d, fn): 102 bb.build.del_stamp(task, d, fn) 103 104 def dump_sigs(self, dataCache, options): 105 return 106 107 def get_taskdata(self): 108 return (self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks) 109 110 def set_taskdata(self, data): 111 self.runtaskdeps, self.taskhash, self.unihash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data 112 113 def reset(self, data): 114 self.__init__(data) 115 116 def get_taskhashes(self): 117 return self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn 118 119 def set_taskhashes(self, hashes): 120 self.taskhash, self.unihash, self.unitaskhashes, self.tidtopn = hashes 121 122 def save_unitaskhashes(self): 123 return 124 125 def set_setscene_tasks(self, setscene_tasks): 126 return 127 128 @classmethod 129 def get_data_caches(cls, dataCaches, mc): 130 """ 131 This function returns the datacaches that should be passed to signature 132 generator functions. If the signature generator supports multiconfig 133 caches, the entire dictionary of data caches is sent, otherwise a 134 special proxy is sent that support both index access to all 135 multiconfigs, and also direct access for the default multiconfig. 136 137 The proxy class allows code in this class itself to always use 138 multiconfig aware code (to ease maintenance), but derived classes that 139 are unaware of multiconfig data caches can still access the default 140 multiconfig as expected. 141 142 Do not override this function in derived classes; it will be removed in 143 the future when support for multiconfig data caches is mandatory 144 """ 145 class DataCacheProxy(object): 146 def __init__(self): 147 pass 148 149 def __getitem__(self, key): 150 return dataCaches[key] 151 152 def __getattr__(self, name): 153 return getattr(dataCaches[mc], name) 154 155 if cls.supports_multiconfig_datacaches: 156 return dataCaches 157 158 return DataCacheProxy() 159 160 def exit(self): 161 return 162 163class SignatureGeneratorBasic(SignatureGenerator): 164 """ 165 """ 166 name = "basic" 167 168 def __init__(self, data): 169 self.basehash = {} 170 self.taskhash = {} 171 self.unihash = {} 172 self.taskdeps = {} 173 self.runtaskdeps = {} 174 self.file_checksum_values = {} 175 self.taints = {} 176 self.gendeps = {} 177 self.lookupcache = {} 178 self.setscenetasks = set() 179 self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split()) 180 self.taskhash_ignore_tasks = None 181 self.init_rundepcheck(data) 182 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 183 if checksum_cache_file: 184 self.checksum_cache = FileChecksumCache() 185 self.checksum_cache.init_cache(data, checksum_cache_file) 186 else: 187 self.checksum_cache = None 188 189 self.unihash_cache = bb.cache.SimpleCache("3") 190 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) 191 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() 192 self.tidtopn = {} 193 194 def init_rundepcheck(self, data): 195 self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None 196 if self.taskhash_ignore_tasks: 197 self.twl = re.compile(self.taskhash_ignore_tasks) 198 else: 199 self.twl = None 200 201 def _build_data(self, fn, d): 202 203 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 204 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars) 205 206 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, fn) 207 208 for task in tasklist: 209 tid = fn + ":" + task 210 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 211 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 212 bb.error("The following commands may help:") 213 cmd = "$ bitbake %s -c%s" % (d.getVar('PN'), task) 214 # Make sure sigdata is dumped before run printdiff 215 bb.error("%s -Snone" % cmd) 216 bb.error("Then:") 217 bb.error("%s -Sprintdiff\n" % cmd) 218 self.basehash[tid] = basehash[tid] 219 220 self.taskdeps[fn] = taskdeps 221 self.gendeps[fn] = gendeps 222 self.lookupcache[fn] = lookupcache 223 224 return taskdeps 225 226 def set_setscene_tasks(self, setscene_tasks): 227 self.setscenetasks = set(setscene_tasks) 228 229 def finalise(self, fn, d, variant): 230 231 mc = d.getVar("__BBMULTICONFIG", False) or "" 232 if variant or mc: 233 fn = bb.cache.realfn2virtual(fn, variant, mc) 234 235 try: 236 taskdeps = self._build_data(fn, d) 237 except bb.parse.SkipRecipe: 238 raise 239 except: 240 bb.warn("Error during finalise of %s" % fn) 241 raise 242 243 #Slow but can be useful for debugging mismatched basehashes 244 #for task in self.taskdeps[fn]: 245 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False) 246 247 for task in taskdeps: 248 d.setVar("BB_BASEHASH:task-%s" % task, self.basehash[fn + ":" + task]) 249 250 def postparsing_clean_cache(self): 251 # 252 # After parsing we can remove some things from memory to reduce our memory footprint 253 # 254 self.gendeps = {} 255 self.lookupcache = {} 256 self.taskdeps = {} 257 258 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches): 259 # Return True if we should keep the dependency, False to drop it 260 # We only manipulate the dependencies for packages not in the ignore 261 # list 262 if self.twl and not self.twl.search(recipename): 263 # then process the actual dependencies 264 if self.twl.search(depname): 265 return False 266 return True 267 268 def read_taint(self, fn, task, stampbase): 269 taint = None 270 try: 271 with open(stampbase + '.' + task + '.taint', 'r') as taintf: 272 taint = taintf.read() 273 except IOError: 274 pass 275 return taint 276 277 def prep_taskhash(self, tid, deps, dataCaches): 278 279 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid) 280 281 self.basehash[tid] = dataCaches[mc].basetaskhash[tid] 282 self.runtaskdeps[tid] = [] 283 self.file_checksum_values[tid] = [] 284 recipename = dataCaches[mc].pkg_fn[fn] 285 286 self.tidtopn[tid] = recipename 287 288 for dep in sorted(deps, key=clean_basepath): 289 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep) 290 depname = dataCaches[depmc].pkg_fn[depmcfn] 291 if not self.supports_multiconfig_datacaches and mc != depmc: 292 # If the signature generator doesn't understand multiconfig 293 # data caches, any dependency not in the same multiconfig must 294 # be skipped for backward compatibility 295 continue 296 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches): 297 continue 298 if dep not in self.taskhash: 299 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 300 self.runtaskdeps[tid].append(dep) 301 302 if task in dataCaches[mc].file_checksums[fn]: 303 if self.checksum_cache: 304 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 305 else: 306 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 307 for (f,cs) in checksums: 308 self.file_checksum_values[tid].append((f,cs)) 309 310 taskdep = dataCaches[mc].task_deps[fn] 311 if 'nostamp' in taskdep and task in taskdep['nostamp']: 312 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 313 if tid in self.taints and self.taints[tid].startswith("nostamp:"): 314 # Don't reset taint value upon every call 315 pass 316 else: 317 import uuid 318 taint = str(uuid.uuid4()) 319 self.taints[tid] = "nostamp:" + taint 320 321 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn]) 322 if taint: 323 self.taints[tid] = taint 324 logger.warning("%s is tainted from a forced run" % tid) 325 326 return 327 328 def get_taskhash(self, tid, deps, dataCaches): 329 330 data = self.basehash[tid] 331 for dep in self.runtaskdeps[tid]: 332 data += self.get_unihash(dep) 333 334 for (f, cs) in self.file_checksum_values[tid]: 335 if cs: 336 if "/./" in f: 337 data += "./" + f.split("/./")[1] 338 data += cs 339 340 if tid in self.taints: 341 if self.taints[tid].startswith("nostamp:"): 342 data += self.taints[tid][8:] 343 else: 344 data += self.taints[tid] 345 346 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 347 self.taskhash[tid] = h 348 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task]) 349 return h 350 351 def writeout_file_checksum_cache(self): 352 """Write/update the file checksum cache onto disk""" 353 if self.checksum_cache: 354 self.checksum_cache.save_extras() 355 self.checksum_cache.save_merge() 356 else: 357 bb.fetch2.fetcher_parse_save() 358 bb.fetch2.fetcher_parse_done() 359 360 def save_unitaskhashes(self): 361 self.unihash_cache.save(self.unitaskhashes) 362 363 def dump_sigtask(self, fn, task, stampbase, runtime): 364 365 tid = fn + ":" + task 366 referencestamp = stampbase 367 if isinstance(runtime, str) and runtime.startswith("customfile"): 368 sigfile = stampbase 369 referencestamp = runtime[11:] 370 elif runtime and tid in self.taskhash: 371 sigfile = stampbase + "." + task + ".sigdata" + "." + self.get_unihash(tid) 372 else: 373 sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[tid] 374 375 with bb.utils.umask(0o002): 376 bb.utils.mkdirhier(os.path.dirname(sigfile)) 377 378 data = {} 379 data['task'] = task 380 data['basehash_ignore_vars'] = self.basehash_ignore_vars 381 data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks 382 data['taskdeps'] = self.taskdeps[fn][task] 383 data['basehash'] = self.basehash[tid] 384 data['gendeps'] = {} 385 data['varvals'] = {} 386 data['varvals'][task] = self.lookupcache[fn][task] 387 for dep in self.taskdeps[fn][task]: 388 if dep in self.basehash_ignore_vars: 389 continue 390 data['gendeps'][dep] = self.gendeps[fn][dep] 391 data['varvals'][dep] = self.lookupcache[fn][dep] 392 393 if runtime and tid in self.taskhash: 394 data['runtaskdeps'] = self.runtaskdeps[tid] 395 data['file_checksum_values'] = [] 396 for f,cs in self.file_checksum_values[tid]: 397 if "/./" in f: 398 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs)) 399 else: 400 data['file_checksum_values'].append((os.path.basename(f), cs)) 401 data['runtaskhashes'] = {} 402 for dep in data['runtaskdeps']: 403 data['runtaskhashes'][dep] = self.get_unihash(dep) 404 data['taskhash'] = self.taskhash[tid] 405 data['unihash'] = self.get_unihash(tid) 406 407 taint = self.read_taint(fn, task, referencestamp) 408 if taint: 409 data['taint'] = taint 410 411 if runtime and tid in self.taints: 412 if 'nostamp:' in self.taints[tid]: 413 data['taint'] = self.taints[tid] 414 415 computed_basehash = calc_basehash(data) 416 if computed_basehash != self.basehash[tid]: 417 bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[tid], tid)) 418 if runtime and tid in self.taskhash: 419 computed_taskhash = calc_taskhash(data) 420 if computed_taskhash != self.taskhash[tid]: 421 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 422 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 423 424 fd, tmpfile = bb.utils.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 425 try: 426 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f: 427 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder) 428 f.flush() 429 os.chmod(tmpfile, 0o664) 430 bb.utils.rename(tmpfile, sigfile) 431 except (OSError, IOError) as err: 432 try: 433 os.unlink(tmpfile) 434 except OSError: 435 pass 436 raise err 437 438 def dump_sigfn(self, fn, dataCaches, options): 439 if fn in self.taskdeps: 440 for task in self.taskdeps[fn]: 441 tid = fn + ":" + task 442 mc = bb.runqueue.mc_from_tid(tid) 443 if tid not in self.taskhash: 444 continue 445 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]: 446 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid) 447 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid])) 448 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True) 449 450class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 451 name = "basichash" 452 453 def get_stampfile_hash(self, tid): 454 if tid in self.taskhash: 455 return self.taskhash[tid] 456 457 # If task is not in basehash, then error 458 return self.basehash[tid] 459 460 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False): 461 if taskname != "do_setscene" and taskname.endswith("_setscene"): 462 tid = fn + ":" + taskname[:-9] 463 else: 464 tid = fn + ":" + taskname 465 if clean: 466 h = "*" 467 else: 468 h = self.get_stampfile_hash(tid) 469 470 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 471 472 def stampcleanmask(self, stampbase, fn, taskname, extrainfo): 473 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True) 474 475 def invalidate_task(self, task, d, fn): 476 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task)) 477 bb.build.write_taint(task, d, fn) 478 479class SignatureGeneratorUniHashMixIn(object): 480 def __init__(self, data): 481 self.extramethod = {} 482 super().__init__(data) 483 484 def get_taskdata(self): 485 return (self.server, self.method, self.extramethod) + super().get_taskdata() 486 487 def set_taskdata(self, data): 488 self.server, self.method, self.extramethod = data[:3] 489 super().set_taskdata(data[3:]) 490 491 def client(self): 492 if getattr(self, '_client', None) is None: 493 self._client = hashserv.create_client(self.server) 494 return self._client 495 496 def reset(self, data): 497 if getattr(self, '_client', None) is not None: 498 self._client.close() 499 self._client = None 500 return super().reset(data) 501 502 def exit(self): 503 if getattr(self, '_client', None) is not None: 504 self._client.close() 505 self._client = None 506 return super().exit() 507 508 def get_stampfile_hash(self, tid): 509 if tid in self.taskhash: 510 # If a unique hash is reported, use it as the stampfile hash. This 511 # ensures that if a task won't be re-run if the taskhash changes, 512 # but it would result in the same output hash 513 unihash = self._get_unihash(tid) 514 if unihash is not None: 515 return unihash 516 517 return super().get_stampfile_hash(tid) 518 519 def set_unihash(self, tid, unihash): 520 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 521 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 522 self.unitaskhashes[key] = (self.taskhash[tid], unihash) 523 self.unihash[tid] = unihash 524 525 def _get_unihash(self, tid, checkkey=None): 526 if tid not in self.tidtopn: 527 return None 528 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) 529 key = mc + ":" + self.tidtopn[tid] + ":" + taskname 530 if key not in self.unitaskhashes: 531 return None 532 if not checkkey: 533 checkkey = self.taskhash[tid] 534 (key, unihash) = self.unitaskhashes[key] 535 if key != checkkey: 536 return None 537 return unihash 538 539 def get_unihash(self, tid): 540 taskhash = self.taskhash[tid] 541 542 # If its not a setscene task we can return 543 if self.setscenetasks and tid not in self.setscenetasks: 544 self.unihash[tid] = None 545 return taskhash 546 547 # TODO: This cache can grow unbounded. It probably only needs to keep 548 # for each task 549 unihash = self._get_unihash(tid) 550 if unihash is not None: 551 self.unihash[tid] = unihash 552 return unihash 553 554 # In the absence of being able to discover a unique hash from the 555 # server, make it be equivalent to the taskhash. The unique "hash" only 556 # really needs to be a unique string (not even necessarily a hash), but 557 # making it match the taskhash has a few advantages: 558 # 559 # 1) All of the sstate code that assumes hashes can be the same 560 # 2) It provides maximal compatibility with builders that don't use 561 # an equivalency server 562 # 3) The value is easy for multiple independent builders to derive the 563 # same unique hash from the same input. This means that if the 564 # independent builders find the same taskhash, but it isn't reported 565 # to the server, there is a better chance that they will agree on 566 # the unique hash. 567 unihash = taskhash 568 569 try: 570 method = self.method 571 if tid in self.extramethod: 572 method = method + self.extramethod[tid] 573 data = self.client().get_unihash(method, self.taskhash[tid]) 574 if data: 575 unihash = data 576 # A unique hash equal to the taskhash is not very interesting, 577 # so it is reported it at debug level 2. If they differ, that 578 # is much more interesting, so it is reported at debug level 1 579 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 580 else: 581 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 582 except ConnectionError as e: 583 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 584 585 self.set_unihash(tid, unihash) 586 self.unihash[tid] = unihash 587 return unihash 588 589 def report_unihash(self, path, task, d): 590 import importlib 591 592 taskhash = d.getVar('BB_TASKHASH') 593 unihash = d.getVar('BB_UNIHASH') 594 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 595 tempdir = d.getVar('T') 596 fn = d.getVar('BB_FILENAME') 597 tid = fn + ':do_' + task 598 key = tid + ':' + taskhash 599 600 if self.setscenetasks and tid not in self.setscenetasks: 601 return 602 603 # This can happen if locked sigs are in action. Detect and just exit 604 if taskhash != self.taskhash[tid]: 605 return 606 607 # Sanity checks 608 cache_unihash = self._get_unihash(tid, checkkey=taskhash) 609 if cache_unihash is None: 610 bb.fatal('%s not in unihash cache. Please report this error' % key) 611 612 if cache_unihash != unihash: 613 bb.fatal("Cache unihash %s doesn't match BB_UNIHASH %s" % (cache_unihash, unihash)) 614 615 sigfile = None 616 sigfile_name = "depsig.do_%s.%d" % (task, os.getpid()) 617 sigfile_link = "depsig.do_%s" % task 618 619 try: 620 sigfile = open(os.path.join(tempdir, sigfile_name), 'w+b') 621 622 locs = {'path': path, 'sigfile': sigfile, 'task': task, 'd': d} 623 624 if "." in self.method: 625 (module, method) = self.method.rsplit('.', 1) 626 locs['method'] = getattr(importlib.import_module(module), method) 627 outhash = bb.utils.better_eval('method(path, sigfile, task, d)', locs) 628 else: 629 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 630 631 try: 632 extra_data = {} 633 634 owner = d.getVar('SSTATE_HASHEQUIV_OWNER') 635 if owner: 636 extra_data['owner'] = owner 637 638 if report_taskdata: 639 sigfile.seek(0) 640 641 extra_data['PN'] = d.getVar('PN') 642 extra_data['PV'] = d.getVar('PV') 643 extra_data['PR'] = d.getVar('PR') 644 extra_data['task'] = task 645 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 646 647 method = self.method 648 if tid in self.extramethod: 649 method = method + self.extramethod[tid] 650 651 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data) 652 new_unihash = data['unihash'] 653 654 if new_unihash != unihash: 655 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 656 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) 657 self.set_unihash(tid, new_unihash) 658 d.setVar('BB_UNIHASH', new_unihash) 659 else: 660 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 661 except ConnectionError as e: 662 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 663 finally: 664 if sigfile: 665 sigfile.close() 666 667 sigfile_link_path = os.path.join(tempdir, sigfile_link) 668 bb.utils.remove(sigfile_link_path) 669 670 try: 671 os.symlink(sigfile_name, sigfile_link_path) 672 except OSError: 673 pass 674 675 def report_unihash_equiv(self, tid, taskhash, wanted_unihash, current_unihash, datacaches): 676 try: 677 extra_data = {} 678 method = self.method 679 if tid in self.extramethod: 680 method = method + self.extramethod[tid] 681 682 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data) 683 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data))) 684 685 if data is None: 686 bb.warn("Server unable to handle unihash report") 687 return False 688 689 finalunihash = data['unihash'] 690 691 if finalunihash == current_unihash: 692 hashequiv_logger.verbose('Task %s unihash %s unchanged by server' % (tid, finalunihash)) 693 elif finalunihash == wanted_unihash: 694 hashequiv_logger.verbose('Task %s unihash changed %s -> %s as wanted' % (tid, current_unihash, finalunihash)) 695 self.set_unihash(tid, finalunihash) 696 return True 697 else: 698 # TODO: What to do here? 699 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash)) 700 701 except ConnectionError as e: 702 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 703 704 return False 705 706# 707# Dummy class used for bitbake-selftest 708# 709class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureGeneratorBasicHash): 710 name = "TestEquivHash" 711 def init_rundepcheck(self, data): 712 super().init_rundepcheck(data) 713 self.server = data.getVar('BB_HASHSERVE') 714 self.method = "sstate_output_hash" 715 716# 717# Dummy class used for bitbake-selftest 718# 719class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash): 720 name = "TestMulticonfigDepends" 721 supports_multiconfig_datacaches = True 722 723def dump_this_task(outfile, d): 724 import bb.parse 725 fn = d.getVar("BB_FILENAME") 726 task = "do_" + d.getVar("BB_CURRENTTASK") 727 referencestamp = bb.build.stamp_internal(task, d, None, True) 728 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp) 729 730def init_colors(enable_color): 731 """Initialise colour dict for passing to compare_sigfiles()""" 732 # First set up the colours 733 colors = {'color_title': '\033[1m', 734 'color_default': '\033[0m', 735 'color_add': '\033[0;32m', 736 'color_remove': '\033[0;31m', 737 } 738 # Leave all keys present but clear the values 739 if not enable_color: 740 for k in colors.keys(): 741 colors[k] = '' 742 return colors 743 744def worddiff_str(oldstr, newstr, colors=None): 745 if not colors: 746 colors = init_colors(False) 747 diff = simplediff.diff(oldstr.split(' '), newstr.split(' ')) 748 ret = [] 749 for change, value in diff: 750 value = ' '.join(value) 751 if change == '=': 752 ret.append(value) 753 elif change == '+': 754 item = '{color_add}{{+{value}+}}{color_default}'.format(value=value, **colors) 755 ret.append(item) 756 elif change == '-': 757 item = '{color_remove}[-{value}-]{color_default}'.format(value=value, **colors) 758 ret.append(item) 759 whitespace_note = '' 760 if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()): 761 whitespace_note = ' (whitespace changed)' 762 return '"%s"%s' % (' '.join(ret), whitespace_note) 763 764def list_inline_diff(oldlist, newlist, colors=None): 765 if not colors: 766 colors = init_colors(False) 767 diff = simplediff.diff(oldlist, newlist) 768 ret = [] 769 for change, value in diff: 770 value = ' '.join(value) 771 if change == '=': 772 ret.append("'%s'" % value) 773 elif change == '+': 774 item = '{color_add}+{value}{color_default}'.format(value=value, **colors) 775 ret.append(item) 776 elif change == '-': 777 item = '{color_remove}-{value}{color_default}'.format(value=value, **colors) 778 ret.append(item) 779 return '[%s]' % (', '.join(ret)) 780 781def clean_basepath(basepath): 782 basepath, dir, recipe_task = basepath.rsplit("/", 2) 783 cleaned = dir + '/' + recipe_task 784 785 if basepath[0] == '/': 786 return cleaned 787 788 if basepath.startswith("mc:") and basepath.count(':') >= 2: 789 mc, mc_name, basepath = basepath.split(":", 2) 790 mc_suffix = ':mc:' + mc_name 791 else: 792 mc_suffix = '' 793 794 # mc stuff now removed from basepath. Whatever was next, if present will be the first 795 # suffix. ':/', recipe path start, marks the end of this. Something like 796 # 'virtual:a[:b[:c]]:/path...' (b and c being optional) 797 if basepath[0] != '/': 798 cleaned += ':' + basepath.split(':/', 1)[0] 799 800 return cleaned + mc_suffix 801 802def clean_basepaths(a): 803 b = {} 804 for x in a: 805 b[clean_basepath(x)] = a[x] 806 return b 807 808def clean_basepaths_list(a): 809 b = [] 810 for x in a: 811 b.append(clean_basepath(x)) 812 return b 813 814# Handled renamed fields 815def handle_renames(data): 816 if 'basewhitelist' in data: 817 data['basehash_ignore_vars'] = data['basewhitelist'] 818 del data['basewhitelist'] 819 if 'taskwhitelist' in data: 820 data['taskhash_ignore_tasks'] = data['taskwhitelist'] 821 del data['taskwhitelist'] 822 823 824def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 825 output = [] 826 827 colors = init_colors(color) 828 def color_format(formatstr, **values): 829 """ 830 Return colour formatted string. 831 NOTE: call with the format string, not an already formatted string 832 containing values (otherwise you could have trouble with { and } 833 characters) 834 """ 835 if not formatstr.endswith('{color_default}'): 836 formatstr += '{color_default}' 837 # In newer python 3 versions you can pass both of these directly, 838 # but we only require 3.4 at the moment 839 formatparams = {} 840 formatparams.update(colors) 841 formatparams.update(values) 842 return formatstr.format(**formatparams) 843 844 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 845 a_data = json.load(f, object_hook=SetDecoder) 846 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f: 847 b_data = json.load(f, object_hook=SetDecoder) 848 849 for data in [a_data, b_data]: 850 handle_renames(data) 851 852 def dict_diff(a, b, ignored_vars=set()): 853 sa = set(a.keys()) 854 sb = set(b.keys()) 855 common = sa & sb 856 changed = set() 857 for i in common: 858 if a[i] != b[i] and i not in ignored_vars: 859 changed.add(i) 860 added = sb - sa 861 removed = sa - sb 862 return changed, added, removed 863 864 def file_checksums_diff(a, b): 865 from collections import Counter 866 867 # Convert lists back to tuples 868 a = [(f[0], f[1]) for f in a] 869 b = [(f[0], f[1]) for f in b] 870 871 # Compare lists, ensuring we can handle duplicate filenames if they exist 872 removedcount = Counter(a) 873 removedcount.subtract(b) 874 addedcount = Counter(b) 875 addedcount.subtract(a) 876 added = [] 877 for x in b: 878 if addedcount[x] > 0: 879 addedcount[x] -= 1 880 added.append(x) 881 removed = [] 882 changed = [] 883 for x in a: 884 if removedcount[x] > 0: 885 removedcount[x] -= 1 886 for y in added: 887 if y[0] == x[0]: 888 changed.append((x[0], x[1], y[1])) 889 added.remove(y) 890 break 891 else: 892 removed.append(x) 893 added = [x[0] for x in added] 894 removed = [x[0] for x in removed] 895 return changed, added, removed 896 897 if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']: 898 output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars'])) 899 if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']: 900 output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars'])) 901 902 if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']: 903 output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks'])) 904 if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']: 905 output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks'])) 906 907 if a_data['taskdeps'] != b_data['taskdeps']: 908 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 909 910 if a_data['basehash'] != b_data['basehash'] and not collapsed: 911 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 912 913 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars']) 914 if changed: 915 for dep in sorted(changed): 916 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 917 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 918 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 919 if added: 920 for dep in sorted(added): 921 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 922 if removed: 923 for dep in sorted(removed): 924 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 925 926 927 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 928 if changed: 929 for dep in sorted(changed): 930 oldval = a_data['varvals'][dep] 931 newval = b_data['varvals'][dep] 932 if newval and oldval and ('\n' in oldval or '\n' in newval): 933 diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='') 934 # Cut off the first two lines, since we aren't interested in 935 # the old/new filename (they are blank anyway in this case) 936 difflines = list(diff)[2:] 937 if color: 938 # Add colour to diff output 939 for i, line in enumerate(difflines): 940 if line.startswith('+'): 941 line = color_format('{color_add}{line}', line=line) 942 difflines[i] = line 943 elif line.startswith('-'): 944 line = color_format('{color_remove}{line}', line=line) 945 difflines[i] = line 946 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff='\n'.join(difflines))) 947 elif newval and oldval and (' ' in oldval or ' ' in newval): 948 output.append(color_format("{color_title}Variable {var} value changed:{color_default}\n{diff}", var=dep, diff=worddiff_str(oldval, newval, colors))) 949 else: 950 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 951 952 if not 'file_checksum_values' in a_data: 953 a_data['file_checksum_values'] = [] 954 if not 'file_checksum_values' in b_data: 955 b_data['file_checksum_values'] = [] 956 957 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 958 if changed: 959 for f, old, new in changed: 960 output.append(color_format("{color_title}Checksum for file %s changed{color_default} from %s to %s") % (f, old, new)) 961 if added: 962 for f in added: 963 output.append(color_format("{color_title}Dependency on checksum of file %s was added") % (f)) 964 if removed: 965 for f in removed: 966 output.append(color_format("{color_title}Dependency on checksum of file %s was removed") % (f)) 967 968 if not 'runtaskdeps' in a_data: 969 a_data['runtaskdeps'] = {} 970 if not 'runtaskdeps' in b_data: 971 b_data['runtaskdeps'] = {} 972 973 if not collapsed: 974 if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']): 975 changed = ["Number of task dependencies changed"] 976 else: 977 changed = [] 978 for idx, task in enumerate(a_data['runtaskdeps']): 979 a = a_data['runtaskdeps'][idx] 980 b = b_data['runtaskdeps'][idx] 981 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 982 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b])) 983 984 if changed: 985 clean_a = clean_basepaths_list(a_data['runtaskdeps']) 986 clean_b = clean_basepaths_list(b_data['runtaskdeps']) 987 if clean_a != clean_b: 988 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 989 else: 990 output.append(color_format("{color_title}runtaskdeps changed:")) 991 output.append("\n".join(changed)) 992 993 994 if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data: 995 a = clean_basepaths(a_data['runtaskhashes']) 996 b = clean_basepaths(b_data['runtaskhashes']) 997 changed, added, removed = dict_diff(a, b) 998 if added: 999 for dep in sorted(added): 1000 bdep_found = False 1001 if removed: 1002 for bdep in removed: 1003 if b[dep] == a[bdep]: 1004 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 1005 bdep_found = True 1006 if not bdep_found: 1007 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep])) 1008 if removed: 1009 for dep in sorted(removed): 1010 adep_found = False 1011 if added: 1012 for adep in added: 1013 if b[adep] == a[dep]: 1014 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 1015 adep_found = True 1016 if not adep_found: 1017 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep])) 1018 if changed: 1019 for dep in sorted(changed): 1020 if not collapsed: 1021 output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep])) 1022 if callable(recursecb): 1023 recout = recursecb(dep, a[dep], b[dep]) 1024 if recout: 1025 if collapsed: 1026 output.extend(recout) 1027 else: 1028 # If a dependent hash changed, might as well print the line above and then defer to the changes in 1029 # that hash since in all likelyhood, they're the same changes this task also saw. 1030 output = [output[-1]] + recout 1031 break 1032 1033 a_taint = a_data.get('taint', None) 1034 b_taint = b_data.get('taint', None) 1035 if a_taint != b_taint: 1036 if a_taint and a_taint.startswith('nostamp:'): 1037 a_taint = a_taint.replace('nostamp:', 'nostamp(uuid4):') 1038 if b_taint and b_taint.startswith('nostamp:'): 1039 b_taint = b_taint.replace('nostamp:', 'nostamp(uuid4):') 1040 output.append(color_format("{color_title}Taint (by forced/invalidated task) changed{color_default} from %s to %s") % (a_taint, b_taint)) 1041 1042 return output 1043 1044 1045def calc_basehash(sigdata): 1046 task = sigdata['task'] 1047 basedata = sigdata['varvals'][task] 1048 1049 if basedata is None: 1050 basedata = '' 1051 1052 alldeps = sigdata['taskdeps'] 1053 for dep in alldeps: 1054 basedata = basedata + dep 1055 val = sigdata['varvals'][dep] 1056 if val is not None: 1057 basedata = basedata + str(val) 1058 1059 return hashlib.sha256(basedata.encode("utf-8")).hexdigest() 1060 1061def calc_taskhash(sigdata): 1062 data = sigdata['basehash'] 1063 1064 for dep in sigdata['runtaskdeps']: 1065 data = data + sigdata['runtaskhashes'][dep] 1066 1067 for c in sigdata['file_checksum_values']: 1068 if c[1]: 1069 if "./" in c[0]: 1070 data = data + c[0] 1071 data = data + c[1] 1072 1073 if 'taint' in sigdata: 1074 if 'nostamp:' in sigdata['taint']: 1075 data = data + sigdata['taint'][8:] 1076 else: 1077 data = data + sigdata['taint'] 1078 1079 return hashlib.sha256(data.encode("utf-8")).hexdigest() 1080 1081 1082def dump_sigfile(a): 1083 output = [] 1084 1085 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f: 1086 a_data = json.load(f, object_hook=SetDecoder) 1087 1088 handle_renames(a_data) 1089 1090 output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars']))) 1091 1092 output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or []))) 1093 1094 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 1095 1096 output.append("basehash: %s" % (a_data['basehash'])) 1097 1098 for dep in sorted(a_data['gendeps']): 1099 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep]))) 1100 1101 for dep in sorted(a_data['varvals']): 1102 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 1103 1104 if 'runtaskdeps' in a_data: 1105 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps']))) 1106 1107 if 'file_checksum_values' in a_data: 1108 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values']))) 1109 1110 if 'runtaskhashes' in a_data: 1111 for dep in sorted(a_data['runtaskhashes']): 1112 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 1113 1114 if 'taint' in a_data: 1115 if a_data['taint'].startswith('nostamp:'): 1116 msg = a_data['taint'].replace('nostamp:', 'nostamp(uuid4):') 1117 else: 1118 msg = a_data['taint'] 1119 output.append("Tainted (by forced/invalidated task): %s" % msg) 1120 1121 if 'task' in a_data: 1122 computed_basehash = calc_basehash(a_data) 1123 output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash'])) 1124 else: 1125 output.append("Unable to compute base hash") 1126 1127 computed_taskhash = calc_taskhash(a_data) 1128 output.append("Computed task hash is %s" % computed_taskhash) 1129 1130 return output 1131