1*4882a593Smuzhiyun# 2*4882a593Smuzhiyun# Copyright (c) 2017, Intel Corporation. 3*4882a593Smuzhiyun# 4*4882a593Smuzhiyun# SPDX-License-Identifier: GPL-2.0-only 5*4882a593Smuzhiyun# 6*4882a593Smuzhiyun"""Functionality for analyzing buildstats""" 7*4882a593Smuzhiyunimport json 8*4882a593Smuzhiyunimport logging 9*4882a593Smuzhiyunimport os 10*4882a593Smuzhiyunimport re 11*4882a593Smuzhiyunfrom collections import namedtuple 12*4882a593Smuzhiyunfrom statistics import mean 13*4882a593Smuzhiyun 14*4882a593Smuzhiyun 15*4882a593Smuzhiyunlog = logging.getLogger() 16*4882a593Smuzhiyun 17*4882a593Smuzhiyun 18*4882a593Smuzhiyuntaskdiff_fields = ('pkg', 'pkg_op', 'task', 'task_op', 'value1', 'value2', 19*4882a593Smuzhiyun 'absdiff', 'reldiff') 20*4882a593SmuzhiyunTaskDiff = namedtuple('TaskDiff', ' '.join(taskdiff_fields)) 21*4882a593Smuzhiyun 22*4882a593Smuzhiyun 23*4882a593Smuzhiyunclass BSError(Exception): 24*4882a593Smuzhiyun """Error handling of buildstats""" 25*4882a593Smuzhiyun pass 26*4882a593Smuzhiyun 27*4882a593Smuzhiyun 28*4882a593Smuzhiyunclass BSTask(dict): 29*4882a593Smuzhiyun def __init__(self, *args, **kwargs): 30*4882a593Smuzhiyun self['start_time'] = None 31*4882a593Smuzhiyun self['elapsed_time'] = None 32*4882a593Smuzhiyun self['status'] = None 33*4882a593Smuzhiyun self['iostat'] = {} 34*4882a593Smuzhiyun self['rusage'] = {} 35*4882a593Smuzhiyun self['child_rusage'] = {} 36*4882a593Smuzhiyun super(BSTask, self).__init__(*args, **kwargs) 37*4882a593Smuzhiyun 38*4882a593Smuzhiyun @property 39*4882a593Smuzhiyun def cputime(self): 40*4882a593Smuzhiyun """Sum of user and system time taken by the task""" 41*4882a593Smuzhiyun rusage = self['rusage']['ru_stime'] + self['rusage']['ru_utime'] 42*4882a593Smuzhiyun if self['child_rusage']: 43*4882a593Smuzhiyun # Child rusage may have been optimized out 44*4882a593Smuzhiyun return rusage + self['child_rusage']['ru_stime'] + self['child_rusage']['ru_utime'] 45*4882a593Smuzhiyun else: 46*4882a593Smuzhiyun return rusage 47*4882a593Smuzhiyun 48*4882a593Smuzhiyun @property 49*4882a593Smuzhiyun def walltime(self): 50*4882a593Smuzhiyun """Elapsed wall clock time""" 51*4882a593Smuzhiyun return self['elapsed_time'] 52*4882a593Smuzhiyun 53*4882a593Smuzhiyun @property 54*4882a593Smuzhiyun def read_bytes(self): 55*4882a593Smuzhiyun """Bytes read from the block layer""" 56*4882a593Smuzhiyun return self['iostat']['read_bytes'] 57*4882a593Smuzhiyun 58*4882a593Smuzhiyun @property 59*4882a593Smuzhiyun def write_bytes(self): 60*4882a593Smuzhiyun """Bytes written to the block layer""" 61*4882a593Smuzhiyun return self['iostat']['write_bytes'] 62*4882a593Smuzhiyun 63*4882a593Smuzhiyun @property 64*4882a593Smuzhiyun def read_ops(self): 65*4882a593Smuzhiyun """Number of read operations on the block layer""" 66*4882a593Smuzhiyun if self['child_rusage']: 67*4882a593Smuzhiyun # Child rusage may have been optimized out 68*4882a593Smuzhiyun return self['rusage']['ru_inblock'] + self['child_rusage']['ru_inblock'] 69*4882a593Smuzhiyun else: 70*4882a593Smuzhiyun return self['rusage']['ru_inblock'] 71*4882a593Smuzhiyun 72*4882a593Smuzhiyun @property 73*4882a593Smuzhiyun def write_ops(self): 74*4882a593Smuzhiyun """Number of write operations on the block layer""" 75*4882a593Smuzhiyun if self['child_rusage']: 76*4882a593Smuzhiyun # Child rusage may have been optimized out 77*4882a593Smuzhiyun return self['rusage']['ru_oublock'] + self['child_rusage']['ru_oublock'] 78*4882a593Smuzhiyun else: 79*4882a593Smuzhiyun return self['rusage']['ru_oublock'] 80*4882a593Smuzhiyun 81*4882a593Smuzhiyun @classmethod 82*4882a593Smuzhiyun def from_file(cls, buildstat_file, fallback_end=0): 83*4882a593Smuzhiyun """Read buildstat text file. fallback_end is an optional end time for tasks that are not recorded as finishing.""" 84*4882a593Smuzhiyun bs_task = cls() 85*4882a593Smuzhiyun log.debug("Reading task buildstats from %s", buildstat_file) 86*4882a593Smuzhiyun end_time = None 87*4882a593Smuzhiyun with open(buildstat_file) as fobj: 88*4882a593Smuzhiyun for line in fobj.readlines(): 89*4882a593Smuzhiyun key, val = line.split(':', 1) 90*4882a593Smuzhiyun val = val.strip() 91*4882a593Smuzhiyun if key == 'Started': 92*4882a593Smuzhiyun start_time = float(val) 93*4882a593Smuzhiyun bs_task['start_time'] = start_time 94*4882a593Smuzhiyun elif key == 'Ended': 95*4882a593Smuzhiyun end_time = float(val) 96*4882a593Smuzhiyun elif key.startswith('IO '): 97*4882a593Smuzhiyun split = key.split() 98*4882a593Smuzhiyun bs_task['iostat'][split[1]] = int(val) 99*4882a593Smuzhiyun elif key.find('rusage') >= 0: 100*4882a593Smuzhiyun split = key.split() 101*4882a593Smuzhiyun ru_key = split[-1] 102*4882a593Smuzhiyun if ru_key in ('ru_stime', 'ru_utime'): 103*4882a593Smuzhiyun val = float(val) 104*4882a593Smuzhiyun else: 105*4882a593Smuzhiyun val = int(val) 106*4882a593Smuzhiyun ru_type = 'rusage' if split[0] == 'rusage' else \ 107*4882a593Smuzhiyun 'child_rusage' 108*4882a593Smuzhiyun bs_task[ru_type][ru_key] = val 109*4882a593Smuzhiyun elif key == 'Status': 110*4882a593Smuzhiyun bs_task['status'] = val 111*4882a593Smuzhiyun # If the task didn't finish, fill in the fallback end time if specified 112*4882a593Smuzhiyun if start_time and not end_time and fallback_end: 113*4882a593Smuzhiyun end_time = fallback_end 114*4882a593Smuzhiyun if start_time and end_time: 115*4882a593Smuzhiyun bs_task['elapsed_time'] = end_time - start_time 116*4882a593Smuzhiyun else: 117*4882a593Smuzhiyun raise BSError("{} looks like a invalid buildstats file".format(buildstat_file)) 118*4882a593Smuzhiyun return bs_task 119*4882a593Smuzhiyun 120*4882a593Smuzhiyun 121*4882a593Smuzhiyunclass BSTaskAggregate(object): 122*4882a593Smuzhiyun """Class representing multiple runs of the same task""" 123*4882a593Smuzhiyun properties = ('cputime', 'walltime', 'read_bytes', 'write_bytes', 124*4882a593Smuzhiyun 'read_ops', 'write_ops') 125*4882a593Smuzhiyun 126*4882a593Smuzhiyun def __init__(self, tasks=None): 127*4882a593Smuzhiyun self._tasks = tasks or [] 128*4882a593Smuzhiyun self._properties = {} 129*4882a593Smuzhiyun 130*4882a593Smuzhiyun def __getattr__(self, name): 131*4882a593Smuzhiyun if name in self.properties: 132*4882a593Smuzhiyun if name not in self._properties: 133*4882a593Smuzhiyun # Calculate properties on demand only. We only provide mean 134*4882a593Smuzhiyun # value, so far 135*4882a593Smuzhiyun self._properties[name] = mean([getattr(t, name) for t in self._tasks]) 136*4882a593Smuzhiyun return self._properties[name] 137*4882a593Smuzhiyun else: 138*4882a593Smuzhiyun raise AttributeError("'BSTaskAggregate' has no attribute '{}'".format(name)) 139*4882a593Smuzhiyun 140*4882a593Smuzhiyun def append(self, task): 141*4882a593Smuzhiyun """Append new task""" 142*4882a593Smuzhiyun # Reset pre-calculated properties 143*4882a593Smuzhiyun assert isinstance(task, BSTask), "Type is '{}' instead of 'BSTask'".format(type(task)) 144*4882a593Smuzhiyun self._properties = {} 145*4882a593Smuzhiyun self._tasks.append(task) 146*4882a593Smuzhiyun 147*4882a593Smuzhiyun 148*4882a593Smuzhiyunclass BSRecipe(object): 149*4882a593Smuzhiyun """Class representing buildstats of one recipe""" 150*4882a593Smuzhiyun def __init__(self, name, epoch, version, revision): 151*4882a593Smuzhiyun self.name = name 152*4882a593Smuzhiyun self.epoch = epoch 153*4882a593Smuzhiyun self.version = version 154*4882a593Smuzhiyun self.revision = revision 155*4882a593Smuzhiyun if epoch is None: 156*4882a593Smuzhiyun self.evr = "{}-{}".format(version, revision) 157*4882a593Smuzhiyun else: 158*4882a593Smuzhiyun self.evr = "{}_{}-{}".format(epoch, version, revision) 159*4882a593Smuzhiyun self.tasks = {} 160*4882a593Smuzhiyun 161*4882a593Smuzhiyun def aggregate(self, bsrecipe): 162*4882a593Smuzhiyun """Aggregate data of another recipe buildstats""" 163*4882a593Smuzhiyun if self.nevr != bsrecipe.nevr: 164*4882a593Smuzhiyun raise ValueError("Refusing to aggregate buildstats, recipe version " 165*4882a593Smuzhiyun "differs: {} vs. {}".format(self.nevr, bsrecipe.nevr)) 166*4882a593Smuzhiyun if set(self.tasks.keys()) != set(bsrecipe.tasks.keys()): 167*4882a593Smuzhiyun raise ValueError("Refusing to aggregate buildstats, set of tasks " 168*4882a593Smuzhiyun "in {} differ".format(self.name)) 169*4882a593Smuzhiyun 170*4882a593Smuzhiyun for taskname, taskdata in bsrecipe.tasks.items(): 171*4882a593Smuzhiyun if not isinstance(self.tasks[taskname], BSTaskAggregate): 172*4882a593Smuzhiyun self.tasks[taskname] = BSTaskAggregate([self.tasks[taskname]]) 173*4882a593Smuzhiyun self.tasks[taskname].append(taskdata) 174*4882a593Smuzhiyun 175*4882a593Smuzhiyun @property 176*4882a593Smuzhiyun def nevr(self): 177*4882a593Smuzhiyun return self.name + '-' + self.evr 178*4882a593Smuzhiyun 179*4882a593Smuzhiyun 180*4882a593Smuzhiyunclass BuildStats(dict): 181*4882a593Smuzhiyun """Class representing buildstats of one build""" 182*4882a593Smuzhiyun 183*4882a593Smuzhiyun @property 184*4882a593Smuzhiyun def num_tasks(self): 185*4882a593Smuzhiyun """Get number of tasks""" 186*4882a593Smuzhiyun num = 0 187*4882a593Smuzhiyun for recipe in self.values(): 188*4882a593Smuzhiyun num += len(recipe.tasks) 189*4882a593Smuzhiyun return num 190*4882a593Smuzhiyun 191*4882a593Smuzhiyun @classmethod 192*4882a593Smuzhiyun def from_json(cls, bs_json): 193*4882a593Smuzhiyun """Create new BuildStats object from JSON object""" 194*4882a593Smuzhiyun buildstats = cls() 195*4882a593Smuzhiyun for recipe in bs_json: 196*4882a593Smuzhiyun if recipe['name'] in buildstats: 197*4882a593Smuzhiyun raise BSError("Cannot handle multiple versions of the same " 198*4882a593Smuzhiyun "package ({})".format(recipe['name'])) 199*4882a593Smuzhiyun bsrecipe = BSRecipe(recipe['name'], recipe['epoch'], 200*4882a593Smuzhiyun recipe['version'], recipe['revision']) 201*4882a593Smuzhiyun for task, data in recipe['tasks'].items(): 202*4882a593Smuzhiyun bsrecipe.tasks[task] = BSTask(data) 203*4882a593Smuzhiyun 204*4882a593Smuzhiyun buildstats[recipe['name']] = bsrecipe 205*4882a593Smuzhiyun 206*4882a593Smuzhiyun return buildstats 207*4882a593Smuzhiyun 208*4882a593Smuzhiyun @staticmethod 209*4882a593Smuzhiyun def from_file_json(path): 210*4882a593Smuzhiyun """Load buildstats from a JSON file""" 211*4882a593Smuzhiyun with open(path) as fobj: 212*4882a593Smuzhiyun bs_json = json.load(fobj) 213*4882a593Smuzhiyun return BuildStats.from_json(bs_json) 214*4882a593Smuzhiyun 215*4882a593Smuzhiyun 216*4882a593Smuzhiyun @staticmethod 217*4882a593Smuzhiyun def split_nevr(nevr): 218*4882a593Smuzhiyun """Split name and version information from recipe "nevr" string""" 219*4882a593Smuzhiyun n_e_v, revision = nevr.rsplit('-', 1) 220*4882a593Smuzhiyun match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[0-9]\S*)$', 221*4882a593Smuzhiyun n_e_v) 222*4882a593Smuzhiyun if not match: 223*4882a593Smuzhiyun # If we're not able to parse a version starting with a number, just 224*4882a593Smuzhiyun # take the part after last dash 225*4882a593Smuzhiyun match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[^-]+)$', 226*4882a593Smuzhiyun n_e_v) 227*4882a593Smuzhiyun name = match.group('name') 228*4882a593Smuzhiyun version = match.group('version') 229*4882a593Smuzhiyun epoch = match.group('epoch') 230*4882a593Smuzhiyun return name, epoch, version, revision 231*4882a593Smuzhiyun 232*4882a593Smuzhiyun @staticmethod 233*4882a593Smuzhiyun def parse_top_build_stats(path): 234*4882a593Smuzhiyun """ 235*4882a593Smuzhiyun Parse the top-level build_stats file for build-wide start and duration. 236*4882a593Smuzhiyun """ 237*4882a593Smuzhiyun start = elapsed = 0 238*4882a593Smuzhiyun with open(path) as fobj: 239*4882a593Smuzhiyun for line in fobj.readlines(): 240*4882a593Smuzhiyun key, val = line.split(':', 1) 241*4882a593Smuzhiyun val = val.strip() 242*4882a593Smuzhiyun if key == 'Build Started': 243*4882a593Smuzhiyun start = float(val) 244*4882a593Smuzhiyun elif key == "Elapsed time": 245*4882a593Smuzhiyun elapsed = float(val.split()[0]) 246*4882a593Smuzhiyun return start, elapsed 247*4882a593Smuzhiyun 248*4882a593Smuzhiyun @classmethod 249*4882a593Smuzhiyun def from_dir(cls, path): 250*4882a593Smuzhiyun """Load buildstats from a buildstats directory""" 251*4882a593Smuzhiyun top_stats = os.path.join(path, 'build_stats') 252*4882a593Smuzhiyun if not os.path.isfile(top_stats): 253*4882a593Smuzhiyun raise BSError("{} does not look like a buildstats directory".format(path)) 254*4882a593Smuzhiyun 255*4882a593Smuzhiyun log.debug("Reading buildstats directory %s", path) 256*4882a593Smuzhiyun buildstats = cls() 257*4882a593Smuzhiyun build_started, build_elapsed = buildstats.parse_top_build_stats(top_stats) 258*4882a593Smuzhiyun build_end = build_started + build_elapsed 259*4882a593Smuzhiyun 260*4882a593Smuzhiyun subdirs = os.listdir(path) 261*4882a593Smuzhiyun for dirname in subdirs: 262*4882a593Smuzhiyun recipe_dir = os.path.join(path, dirname) 263*4882a593Smuzhiyun if dirname == "reduced_proc_pressure" or not os.path.isdir(recipe_dir): 264*4882a593Smuzhiyun continue 265*4882a593Smuzhiyun name, epoch, version, revision = cls.split_nevr(dirname) 266*4882a593Smuzhiyun bsrecipe = BSRecipe(name, epoch, version, revision) 267*4882a593Smuzhiyun for task in os.listdir(recipe_dir): 268*4882a593Smuzhiyun bsrecipe.tasks[task] = BSTask.from_file( 269*4882a593Smuzhiyun os.path.join(recipe_dir, task), build_end) 270*4882a593Smuzhiyun if name in buildstats: 271*4882a593Smuzhiyun raise BSError("Cannot handle multiple versions of the same " 272*4882a593Smuzhiyun "package ({})".format(name)) 273*4882a593Smuzhiyun buildstats[name] = bsrecipe 274*4882a593Smuzhiyun 275*4882a593Smuzhiyun return buildstats 276*4882a593Smuzhiyun 277*4882a593Smuzhiyun def aggregate(self, buildstats): 278*4882a593Smuzhiyun """Aggregate other buildstats into this""" 279*4882a593Smuzhiyun if set(self.keys()) != set(buildstats.keys()): 280*4882a593Smuzhiyun raise ValueError("Refusing to aggregate buildstats, set of " 281*4882a593Smuzhiyun "recipes is different: %s" % (set(self.keys()) ^ set(buildstats.keys()))) 282*4882a593Smuzhiyun for pkg, data in buildstats.items(): 283*4882a593Smuzhiyun self[pkg].aggregate(data) 284*4882a593Smuzhiyun 285*4882a593Smuzhiyun 286*4882a593Smuzhiyundef diff_buildstats(bs1, bs2, stat_attr, min_val=None, min_absdiff=None, only_tasks=[]): 287*4882a593Smuzhiyun """Compare the tasks of two buildstats""" 288*4882a593Smuzhiyun tasks_diff = [] 289*4882a593Smuzhiyun pkgs = set(bs1.keys()).union(set(bs2.keys())) 290*4882a593Smuzhiyun for pkg in pkgs: 291*4882a593Smuzhiyun tasks1 = bs1[pkg].tasks if pkg in bs1 else {} 292*4882a593Smuzhiyun tasks2 = bs2[pkg].tasks if pkg in bs2 else {} 293*4882a593Smuzhiyun if only_tasks: 294*4882a593Smuzhiyun tasks1 = {k: v for k, v in tasks1.items() if k in only_tasks} 295*4882a593Smuzhiyun tasks2 = {k: v for k, v in tasks2.items() if k in only_tasks} 296*4882a593Smuzhiyun 297*4882a593Smuzhiyun if not tasks1: 298*4882a593Smuzhiyun pkg_op = '+' 299*4882a593Smuzhiyun elif not tasks2: 300*4882a593Smuzhiyun pkg_op = '-' 301*4882a593Smuzhiyun else: 302*4882a593Smuzhiyun pkg_op = ' ' 303*4882a593Smuzhiyun 304*4882a593Smuzhiyun for task in set(tasks1.keys()).union(set(tasks2.keys())): 305*4882a593Smuzhiyun task_op = ' ' 306*4882a593Smuzhiyun if task in tasks1: 307*4882a593Smuzhiyun val1 = getattr(bs1[pkg].tasks[task], stat_attr) 308*4882a593Smuzhiyun else: 309*4882a593Smuzhiyun task_op = '+' 310*4882a593Smuzhiyun val1 = 0 311*4882a593Smuzhiyun if task in tasks2: 312*4882a593Smuzhiyun val2 = getattr(bs2[pkg].tasks[task], stat_attr) 313*4882a593Smuzhiyun else: 314*4882a593Smuzhiyun val2 = 0 315*4882a593Smuzhiyun task_op = '-' 316*4882a593Smuzhiyun 317*4882a593Smuzhiyun if val1 == 0: 318*4882a593Smuzhiyun reldiff = float('inf') 319*4882a593Smuzhiyun else: 320*4882a593Smuzhiyun reldiff = 100 * (val2 - val1) / val1 321*4882a593Smuzhiyun 322*4882a593Smuzhiyun if min_val and max(val1, val2) < min_val: 323*4882a593Smuzhiyun log.debug("Filtering out %s:%s (%s)", pkg, task, 324*4882a593Smuzhiyun max(val1, val2)) 325*4882a593Smuzhiyun continue 326*4882a593Smuzhiyun if min_absdiff and abs(val2 - val1) < min_absdiff: 327*4882a593Smuzhiyun log.debug("Filtering out %s:%s (difference of %s)", pkg, task, 328*4882a593Smuzhiyun val2-val1) 329*4882a593Smuzhiyun continue 330*4882a593Smuzhiyun tasks_diff.append(TaskDiff(pkg, pkg_op, task, task_op, val1, val2, 331*4882a593Smuzhiyun val2-val1, reldiff)) 332*4882a593Smuzhiyun return tasks_diff 333*4882a593Smuzhiyun 334*4882a593Smuzhiyun 335*4882a593Smuzhiyunclass BSVerDiff(object): 336*4882a593Smuzhiyun """Class representing recipe version differences between two buildstats""" 337*4882a593Smuzhiyun def __init__(self, bs1, bs2): 338*4882a593Smuzhiyun RecipeVerDiff = namedtuple('RecipeVerDiff', 'left right') 339*4882a593Smuzhiyun 340*4882a593Smuzhiyun recipes1 = set(bs1.keys()) 341*4882a593Smuzhiyun recipes2 = set(bs2.keys()) 342*4882a593Smuzhiyun 343*4882a593Smuzhiyun self.new = dict([(r, bs2[r]) for r in sorted(recipes2 - recipes1)]) 344*4882a593Smuzhiyun self.dropped = dict([(r, bs1[r]) for r in sorted(recipes1 - recipes2)]) 345*4882a593Smuzhiyun self.echanged = {} 346*4882a593Smuzhiyun self.vchanged = {} 347*4882a593Smuzhiyun self.rchanged = {} 348*4882a593Smuzhiyun self.unchanged = {} 349*4882a593Smuzhiyun self.empty_diff = False 350*4882a593Smuzhiyun 351*4882a593Smuzhiyun common = recipes2.intersection(recipes1) 352*4882a593Smuzhiyun if common: 353*4882a593Smuzhiyun for recipe in common: 354*4882a593Smuzhiyun rdiff = RecipeVerDiff(bs1[recipe], bs2[recipe]) 355*4882a593Smuzhiyun if bs1[recipe].epoch != bs2[recipe].epoch: 356*4882a593Smuzhiyun self.echanged[recipe] = rdiff 357*4882a593Smuzhiyun elif bs1[recipe].version != bs2[recipe].version: 358*4882a593Smuzhiyun self.vchanged[recipe] = rdiff 359*4882a593Smuzhiyun elif bs1[recipe].revision != bs2[recipe].revision: 360*4882a593Smuzhiyun self.rchanged[recipe] = rdiff 361*4882a593Smuzhiyun else: 362*4882a593Smuzhiyun self.unchanged[recipe] = rdiff 363*4882a593Smuzhiyun 364*4882a593Smuzhiyun if len(recipes1) == len(recipes2) == len(self.unchanged): 365*4882a593Smuzhiyun self.empty_diff = True 366*4882a593Smuzhiyun 367*4882a593Smuzhiyun def __bool__(self): 368*4882a593Smuzhiyun return not self.empty_diff 369