1*4882a593Smuzhiyun# Local file checksum cache implementation 2*4882a593Smuzhiyun# 3*4882a593Smuzhiyun# Copyright (C) 2012 Intel Corporation 4*4882a593Smuzhiyun# 5*4882a593Smuzhiyun# SPDX-License-Identifier: GPL-2.0-only 6*4882a593Smuzhiyun# 7*4882a593Smuzhiyun 8*4882a593Smuzhiyunimport glob 9*4882a593Smuzhiyunimport operator 10*4882a593Smuzhiyunimport os 11*4882a593Smuzhiyunimport stat 12*4882a593Smuzhiyunimport bb.utils 13*4882a593Smuzhiyunimport logging 14*4882a593Smuzhiyunimport re 15*4882a593Smuzhiyunfrom bb.cache import MultiProcessCache 16*4882a593Smuzhiyun 17*4882a593Smuzhiyunlogger = logging.getLogger("BitBake.Cache") 18*4882a593Smuzhiyun 19*4882a593Smuzhiyunfilelist_regex = re.compile(r'(?:(?<=:True)|(?<=:False))\s+') 20*4882a593Smuzhiyun 21*4882a593Smuzhiyun# mtime cache (non-persistent) 22*4882a593Smuzhiyun# based upon the assumption that files do not change during bitbake run 23*4882a593Smuzhiyunclass FileMtimeCache(object): 24*4882a593Smuzhiyun cache = {} 25*4882a593Smuzhiyun 26*4882a593Smuzhiyun def cached_mtime(self, f): 27*4882a593Smuzhiyun if f not in self.cache: 28*4882a593Smuzhiyun self.cache[f] = os.stat(f)[stat.ST_MTIME] 29*4882a593Smuzhiyun return self.cache[f] 30*4882a593Smuzhiyun 31*4882a593Smuzhiyun def cached_mtime_noerror(self, f): 32*4882a593Smuzhiyun if f not in self.cache: 33*4882a593Smuzhiyun try: 34*4882a593Smuzhiyun self.cache[f] = os.stat(f)[stat.ST_MTIME] 35*4882a593Smuzhiyun except OSError: 36*4882a593Smuzhiyun return 0 37*4882a593Smuzhiyun return self.cache[f] 38*4882a593Smuzhiyun 39*4882a593Smuzhiyun def update_mtime(self, f): 40*4882a593Smuzhiyun self.cache[f] = os.stat(f)[stat.ST_MTIME] 41*4882a593Smuzhiyun return self.cache[f] 42*4882a593Smuzhiyun 43*4882a593Smuzhiyun def clear(self): 44*4882a593Smuzhiyun self.cache.clear() 45*4882a593Smuzhiyun 46*4882a593Smuzhiyun# Checksum + mtime cache (persistent) 47*4882a593Smuzhiyunclass FileChecksumCache(MultiProcessCache): 48*4882a593Smuzhiyun cache_file_name = "local_file_checksum_cache.dat" 49*4882a593Smuzhiyun CACHE_VERSION = 1 50*4882a593Smuzhiyun 51*4882a593Smuzhiyun def __init__(self): 52*4882a593Smuzhiyun self.mtime_cache = FileMtimeCache() 53*4882a593Smuzhiyun MultiProcessCache.__init__(self) 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun def get_checksum(self, f): 56*4882a593Smuzhiyun f = os.path.normpath(f) 57*4882a593Smuzhiyun entry = self.cachedata[0].get(f) 58*4882a593Smuzhiyun cmtime = self.mtime_cache.cached_mtime(f) 59*4882a593Smuzhiyun if entry: 60*4882a593Smuzhiyun (mtime, hashval) = entry 61*4882a593Smuzhiyun if cmtime == mtime: 62*4882a593Smuzhiyun return hashval 63*4882a593Smuzhiyun else: 64*4882a593Smuzhiyun bb.debug(2, "file %s changed mtime, recompute checksum" % f) 65*4882a593Smuzhiyun 66*4882a593Smuzhiyun hashval = bb.utils.md5_file(f) 67*4882a593Smuzhiyun self.cachedata_extras[0][f] = (cmtime, hashval) 68*4882a593Smuzhiyun return hashval 69*4882a593Smuzhiyun 70*4882a593Smuzhiyun def merge_data(self, source, dest): 71*4882a593Smuzhiyun for h in source[0]: 72*4882a593Smuzhiyun if h in dest: 73*4882a593Smuzhiyun (smtime, _) = source[0][h] 74*4882a593Smuzhiyun (dmtime, _) = dest[0][h] 75*4882a593Smuzhiyun if smtime > dmtime: 76*4882a593Smuzhiyun dest[0][h] = source[0][h] 77*4882a593Smuzhiyun else: 78*4882a593Smuzhiyun dest[0][h] = source[0][h] 79*4882a593Smuzhiyun 80*4882a593Smuzhiyun def get_checksums(self, filelist, pn, localdirsexclude): 81*4882a593Smuzhiyun """Get checksums for a list of files""" 82*4882a593Smuzhiyun 83*4882a593Smuzhiyun def checksum_file(f): 84*4882a593Smuzhiyun try: 85*4882a593Smuzhiyun checksum = self.get_checksum(f) 86*4882a593Smuzhiyun except OSError as e: 87*4882a593Smuzhiyun bb.warn("Unable to get checksum for %s SRC_URI entry %s: %s" % (pn, os.path.basename(f), e)) 88*4882a593Smuzhiyun return None 89*4882a593Smuzhiyun return checksum 90*4882a593Smuzhiyun 91*4882a593Smuzhiyun # 92*4882a593Smuzhiyun # Changing the format of file-checksums is problematic as both OE and Bitbake have 93*4882a593Smuzhiyun # knowledge of them. We need to encode a new piece of data, the portion of the path 94*4882a593Smuzhiyun # we care about from a checksum perspective. This means that files that change subdirectory 95*4882a593Smuzhiyun # are tracked by the task hashes. To do this, we do something horrible and put a "/./" into 96*4882a593Smuzhiyun # the path. The filesystem handles it but it gives us a marker to know which subsection 97*4882a593Smuzhiyun # of the path to cache. 98*4882a593Smuzhiyun # 99*4882a593Smuzhiyun def checksum_dir(pth): 100*4882a593Smuzhiyun # Handle directories recursively 101*4882a593Smuzhiyun if pth == "/": 102*4882a593Smuzhiyun bb.fatal("Refusing to checksum /") 103*4882a593Smuzhiyun pth = pth.rstrip("/") 104*4882a593Smuzhiyun dirchecksums = [] 105*4882a593Smuzhiyun for root, dirs, files in os.walk(pth, topdown=True): 106*4882a593Smuzhiyun [dirs.remove(d) for d in list(dirs) if d in localdirsexclude] 107*4882a593Smuzhiyun for name in files: 108*4882a593Smuzhiyun fullpth = os.path.join(root, name).replace(pth, os.path.join(pth, ".")) 109*4882a593Smuzhiyun checksum = checksum_file(fullpth) 110*4882a593Smuzhiyun if checksum: 111*4882a593Smuzhiyun dirchecksums.append((fullpth, checksum)) 112*4882a593Smuzhiyun return dirchecksums 113*4882a593Smuzhiyun 114*4882a593Smuzhiyun checksums = [] 115*4882a593Smuzhiyun for pth in filelist_regex.split(filelist): 116*4882a593Smuzhiyun if not pth: 117*4882a593Smuzhiyun continue 118*4882a593Smuzhiyun pth = pth.strip() 119*4882a593Smuzhiyun if not pth: 120*4882a593Smuzhiyun continue 121*4882a593Smuzhiyun exist = pth.split(":")[1] 122*4882a593Smuzhiyun if exist == "False": 123*4882a593Smuzhiyun continue 124*4882a593Smuzhiyun pth = pth.split(":")[0] 125*4882a593Smuzhiyun if '*' in pth: 126*4882a593Smuzhiyun # Handle globs 127*4882a593Smuzhiyun for f in glob.glob(pth): 128*4882a593Smuzhiyun if os.path.isdir(f): 129*4882a593Smuzhiyun if not os.path.islink(f): 130*4882a593Smuzhiyun checksums.extend(checksum_dir(f)) 131*4882a593Smuzhiyun else: 132*4882a593Smuzhiyun checksum = checksum_file(f) 133*4882a593Smuzhiyun if checksum: 134*4882a593Smuzhiyun checksums.append((f, checksum)) 135*4882a593Smuzhiyun elif os.path.isdir(pth): 136*4882a593Smuzhiyun if not os.path.islink(pth): 137*4882a593Smuzhiyun checksums.extend(checksum_dir(pth)) 138*4882a593Smuzhiyun else: 139*4882a593Smuzhiyun checksum = checksum_file(pth) 140*4882a593Smuzhiyun if checksum: 141*4882a593Smuzhiyun checksums.append((pth, checksum)) 142*4882a593Smuzhiyun 143*4882a593Smuzhiyun checksums.sort(key=operator.itemgetter(1)) 144*4882a593Smuzhiyun return checksums 145