1# Copyright (c) 2011 The Chromium OS Authors. 2# 3# SPDX-License-Identifier: GPL-2.0+ 4# 5 6import math 7import os 8import re 9import shutil 10import tempfile 11 12import command 13import commit 14import gitutil 15from series import Series 16 17# Tags that we detect and remove 18re_remove = re.compile('^BUG=|^TEST=|^BRANCH=|^Change-Id:|^Review URL:' 19 '|Reviewed-on:|Commit-\w*:') 20 21# Lines which are allowed after a TEST= line 22re_allowed_after_test = re.compile('^Signed-off-by:') 23 24# Signoffs 25re_signoff = re.compile('^Signed-off-by: *(.*)') 26 27# The start of the cover letter 28re_cover = re.compile('^Cover-letter:') 29 30# A cover letter Cc 31re_cover_cc = re.compile('^Cover-letter-cc: *(.*)') 32 33# Patch series tag 34re_series_tag = re.compile('^Series-([a-z-]*): *(.*)') 35 36# Commit series tag 37re_commit_tag = re.compile('^Commit-([a-z-]*): *(.*)') 38 39# Commit tags that we want to collect and keep 40re_tag = re.compile('^(Tested-by|Acked-by|Reviewed-by|Patch-cc): (.*)') 41 42# The start of a new commit in the git log 43re_commit = re.compile('^commit ([0-9a-f]*)$') 44 45# We detect these since checkpatch doesn't always do it 46re_space_before_tab = re.compile('^[+].* \t') 47 48# States we can be in - can we use range() and still have comments? 49STATE_MSG_HEADER = 0 # Still in the message header 50STATE_PATCH_SUBJECT = 1 # In patch subject (first line of log for a commit) 51STATE_PATCH_HEADER = 2 # In patch header (after the subject) 52STATE_DIFFS = 3 # In the diff part (past --- line) 53 54class PatchStream: 55 """Class for detecting/injecting tags in a patch or series of patches 56 57 We support processing the output of 'git log' to read out the tags we 58 are interested in. We can also process a patch file in order to remove 59 unwanted tags or inject additional ones. These correspond to the two 60 phases of processing. 61 """ 62 def __init__(self, series, name=None, is_log=False): 63 self.skip_blank = False # True to skip a single blank line 64 self.found_test = False # Found a TEST= line 65 self.lines_after_test = 0 # MNumber of lines found after TEST= 66 self.warn = [] # List of warnings we have collected 67 self.linenum = 1 # Output line number we are up to 68 self.in_section = None # Name of start...END section we are in 69 self.notes = [] # Series notes 70 self.section = [] # The current section...END section 71 self.series = series # Info about the patch series 72 self.is_log = is_log # True if indent like git log 73 self.in_change = 0 # Non-zero if we are in a change list 74 self.blank_count = 0 # Number of blank lines stored up 75 self.state = STATE_MSG_HEADER # What state are we in? 76 self.signoff = [] # Contents of signoff line 77 self.commit = None # Current commit 78 79 def AddToSeries(self, line, name, value): 80 """Add a new Series-xxx tag. 81 82 When a Series-xxx tag is detected, we come here to record it, if we 83 are scanning a 'git log'. 84 85 Args: 86 line: Source line containing tag (useful for debug/error messages) 87 name: Tag name (part after 'Series-') 88 value: Tag value (part after 'Series-xxx: ') 89 """ 90 if name == 'notes': 91 self.in_section = name 92 self.skip_blank = False 93 if self.is_log: 94 self.series.AddTag(self.commit, line, name, value) 95 96 def AddToCommit(self, line, name, value): 97 """Add a new Commit-xxx tag. 98 99 When a Commit-xxx tag is detected, we come here to record it. 100 101 Args: 102 line: Source line containing tag (useful for debug/error messages) 103 name: Tag name (part after 'Commit-') 104 value: Tag value (part after 'Commit-xxx: ') 105 """ 106 if name == 'notes': 107 self.in_section = 'commit-' + name 108 self.skip_blank = False 109 110 def CloseCommit(self): 111 """Save the current commit into our commit list, and reset our state""" 112 if self.commit and self.is_log: 113 self.series.AddCommit(self.commit) 114 self.commit = None 115 116 def ProcessLine(self, line): 117 """Process a single line of a patch file or commit log 118 119 This process a line and returns a list of lines to output. The list 120 may be empty or may contain multiple output lines. 121 122 This is where all the complicated logic is located. The class's 123 state is used to move between different states and detect things 124 properly. 125 126 We can be in one of two modes: 127 self.is_log == True: This is 'git log' mode, where most output is 128 indented by 4 characters and we are scanning for tags 129 130 self.is_log == False: This is 'patch' mode, where we already have 131 all the tags, and are processing patches to remove junk we 132 don't want, and add things we think are required. 133 134 Args: 135 line: text line to process 136 137 Returns: 138 list of output lines, or [] if nothing should be output 139 """ 140 # Initially we have no output. Prepare the input line string 141 out = [] 142 line = line.rstrip('\n') 143 144 commit_match = re_commit.match(line) if self.is_log else None 145 146 if self.is_log: 147 if line[:4] == ' ': 148 line = line[4:] 149 150 # Handle state transition and skipping blank lines 151 series_tag_match = re_series_tag.match(line) 152 commit_tag_match = re_commit_tag.match(line) 153 cover_match = re_cover.match(line) 154 cover_cc_match = re_cover_cc.match(line) 155 signoff_match = re_signoff.match(line) 156 tag_match = None 157 if self.state == STATE_PATCH_HEADER: 158 tag_match = re_tag.match(line) 159 is_blank = not line.strip() 160 if is_blank: 161 if (self.state == STATE_MSG_HEADER 162 or self.state == STATE_PATCH_SUBJECT): 163 self.state += 1 164 165 # We don't have a subject in the text stream of patch files 166 # It has its own line with a Subject: tag 167 if not self.is_log and self.state == STATE_PATCH_SUBJECT: 168 self.state += 1 169 elif commit_match: 170 self.state = STATE_MSG_HEADER 171 172 # If we are in a section, keep collecting lines until we see END 173 if self.in_section: 174 if line == 'END': 175 if self.in_section == 'cover': 176 self.series.cover = self.section 177 elif self.in_section == 'notes': 178 if self.is_log: 179 self.series.notes += self.section 180 elif self.in_section == 'commit-notes': 181 if self.is_log: 182 self.commit.notes += self.section 183 else: 184 self.warn.append("Unknown section '%s'" % self.in_section) 185 self.in_section = None 186 self.skip_blank = True 187 self.section = [] 188 else: 189 self.section.append(line) 190 191 # Detect the commit subject 192 elif not is_blank and self.state == STATE_PATCH_SUBJECT: 193 self.commit.subject = line 194 195 # Detect the tags we want to remove, and skip blank lines 196 elif re_remove.match(line) and not commit_tag_match: 197 self.skip_blank = True 198 199 # TEST= should be the last thing in the commit, so remove 200 # everything after it 201 if line.startswith('TEST='): 202 self.found_test = True 203 elif self.skip_blank and is_blank: 204 self.skip_blank = False 205 206 # Detect the start of a cover letter section 207 elif cover_match: 208 self.in_section = 'cover' 209 self.skip_blank = False 210 211 elif cover_cc_match: 212 value = cover_cc_match.group(1) 213 self.AddToSeries(line, 'cover-cc', value) 214 215 # If we are in a change list, key collected lines until a blank one 216 elif self.in_change: 217 if is_blank: 218 # Blank line ends this change list 219 self.in_change = 0 220 elif line == '---': 221 self.in_change = 0 222 out = self.ProcessLine(line) 223 else: 224 if self.is_log: 225 self.series.AddChange(self.in_change, self.commit, line) 226 self.skip_blank = False 227 228 # Detect Series-xxx tags 229 elif series_tag_match: 230 name = series_tag_match.group(1) 231 value = series_tag_match.group(2) 232 if name == 'changes': 233 # value is the version number: e.g. 1, or 2 234 try: 235 value = int(value) 236 except ValueError as str: 237 raise ValueError("%s: Cannot decode version info '%s'" % 238 (self.commit.hash, line)) 239 self.in_change = int(value) 240 else: 241 self.AddToSeries(line, name, value) 242 self.skip_blank = True 243 244 # Detect Commit-xxx tags 245 elif commit_tag_match: 246 name = commit_tag_match.group(1) 247 value = commit_tag_match.group(2) 248 if name == 'notes': 249 self.AddToCommit(line, name, value) 250 self.skip_blank = True 251 252 # Detect the start of a new commit 253 elif commit_match: 254 self.CloseCommit() 255 self.commit = commit.Commit(commit_match.group(1)) 256 257 # Detect tags in the commit message 258 elif tag_match: 259 # Remove Tested-by self, since few will take much notice 260 if (tag_match.group(1) == 'Tested-by' and 261 tag_match.group(2).find(os.getenv('USER') + '@') != -1): 262 self.warn.append("Ignoring %s" % line) 263 elif tag_match.group(1) == 'Patch-cc': 264 self.commit.AddCc(tag_match.group(2).split(',')) 265 else: 266 out = [line] 267 268 # Suppress duplicate signoffs 269 elif signoff_match: 270 if (self.is_log or not self.commit or 271 self.commit.CheckDuplicateSignoff(signoff_match.group(1))): 272 out = [line] 273 274 # Well that means this is an ordinary line 275 else: 276 pos = 1 277 # Look for ugly ASCII characters 278 for ch in line: 279 # TODO: Would be nicer to report source filename and line 280 if ord(ch) > 0x80: 281 self.warn.append("Line %d/%d ('%s') has funny ascii char" % 282 (self.linenum, pos, line)) 283 pos += 1 284 285 # Look for space before tab 286 m = re_space_before_tab.match(line) 287 if m: 288 self.warn.append('Line %d/%d has space before tab' % 289 (self.linenum, m.start())) 290 291 # OK, we have a valid non-blank line 292 out = [line] 293 self.linenum += 1 294 self.skip_blank = False 295 if self.state == STATE_DIFFS: 296 pass 297 298 # If this is the start of the diffs section, emit our tags and 299 # change log 300 elif line == '---': 301 self.state = STATE_DIFFS 302 303 # Output the tags (signeoff first), then change list 304 out = [] 305 log = self.series.MakeChangeLog(self.commit) 306 out += [line] 307 if self.commit: 308 out += self.commit.notes 309 out += [''] + log 310 elif self.found_test: 311 if not re_allowed_after_test.match(line): 312 self.lines_after_test += 1 313 314 return out 315 316 def Finalize(self): 317 """Close out processing of this patch stream""" 318 self.CloseCommit() 319 if self.lines_after_test: 320 self.warn.append('Found %d lines after TEST=' % 321 self.lines_after_test) 322 323 def ProcessStream(self, infd, outfd): 324 """Copy a stream from infd to outfd, filtering out unwanting things. 325 326 This is used to process patch files one at a time. 327 328 Args: 329 infd: Input stream file object 330 outfd: Output stream file object 331 """ 332 # Extract the filename from each diff, for nice warnings 333 fname = None 334 last_fname = None 335 re_fname = re.compile('diff --git a/(.*) b/.*') 336 while True: 337 line = infd.readline() 338 if not line: 339 break 340 out = self.ProcessLine(line) 341 342 # Try to detect blank lines at EOF 343 for line in out: 344 match = re_fname.match(line) 345 if match: 346 last_fname = fname 347 fname = match.group(1) 348 if line == '+': 349 self.blank_count += 1 350 else: 351 if self.blank_count and (line == '-- ' or match): 352 self.warn.append("Found possible blank line(s) at " 353 "end of file '%s'" % last_fname) 354 outfd.write('+\n' * self.blank_count) 355 outfd.write(line + '\n') 356 self.blank_count = 0 357 self.Finalize() 358 359 360def GetMetaDataForList(commit_range, git_dir=None, count=None, 361 series = None, allow_overwrite=False): 362 """Reads out patch series metadata from the commits 363 364 This does a 'git log' on the relevant commits and pulls out the tags we 365 are interested in. 366 367 Args: 368 commit_range: Range of commits to count (e.g. 'HEAD..base') 369 git_dir: Path to git repositiory (None to use default) 370 count: Number of commits to list, or None for no limit 371 series: Series object to add information into. By default a new series 372 is started. 373 allow_overwrite: Allow tags to overwrite an existing tag 374 Returns: 375 A Series object containing information about the commits. 376 """ 377 if not series: 378 series = Series() 379 series.allow_overwrite = allow_overwrite 380 params = gitutil.LogCmd(commit_range, reverse=True, count=count, 381 git_dir=git_dir) 382 stdout = command.RunPipe([params], capture=True).stdout 383 ps = PatchStream(series, is_log=True) 384 for line in stdout.splitlines(): 385 ps.ProcessLine(line) 386 ps.Finalize() 387 return series 388 389def GetMetaData(start, count): 390 """Reads out patch series metadata from the commits 391 392 This does a 'git log' on the relevant commits and pulls out the tags we 393 are interested in. 394 395 Args: 396 start: Commit to start from: 0=HEAD, 1=next one, etc. 397 count: Number of commits to list 398 """ 399 return GetMetaDataForList('HEAD~%d' % start, None, count) 400 401def FixPatch(backup_dir, fname, series, commit): 402 """Fix up a patch file, by adding/removing as required. 403 404 We remove our tags from the patch file, insert changes lists, etc. 405 The patch file is processed in place, and overwritten. 406 407 A backup file is put into backup_dir (if not None). 408 409 Args: 410 fname: Filename to patch file to process 411 series: Series information about this patch set 412 commit: Commit object for this patch file 413 Return: 414 A list of errors, or [] if all ok. 415 """ 416 handle, tmpname = tempfile.mkstemp() 417 outfd = os.fdopen(handle, 'w') 418 infd = open(fname, 'r') 419 ps = PatchStream(series) 420 ps.commit = commit 421 ps.ProcessStream(infd, outfd) 422 infd.close() 423 outfd.close() 424 425 # Create a backup file if required 426 if backup_dir: 427 shutil.copy(fname, os.path.join(backup_dir, os.path.basename(fname))) 428 shutil.move(tmpname, fname) 429 return ps.warn 430 431def FixPatches(series, fnames): 432 """Fix up a list of patches identified by filenames 433 434 The patch files are processed in place, and overwritten. 435 436 Args: 437 series: The series object 438 fnames: List of patch files to process 439 """ 440 # Current workflow creates patches, so we shouldn't need a backup 441 backup_dir = None #tempfile.mkdtemp('clean-patch') 442 count = 0 443 for fname in fnames: 444 commit = series.commits[count] 445 commit.patch = fname 446 result = FixPatch(backup_dir, fname, series, commit) 447 if result: 448 print '%d warnings for %s:' % (len(result), fname) 449 for warn in result: 450 print '\t', warn 451 print 452 count += 1 453 print 'Cleaned %d patches' % count 454 return series 455 456def InsertCoverLetter(fname, series, count): 457 """Inserts a cover letter with the required info into patch 0 458 459 Args: 460 fname: Input / output filename of the cover letter file 461 series: Series object 462 count: Number of patches in the series 463 """ 464 fd = open(fname, 'r') 465 lines = fd.readlines() 466 fd.close() 467 468 fd = open(fname, 'w') 469 text = series.cover 470 prefix = series.GetPatchPrefix() 471 for line in lines: 472 if line.startswith('Subject:'): 473 # if more than 10 or 100 patches, it should say 00/xx, 000/xxx, etc 474 zero_repeat = int(math.log10(count)) + 1 475 zero = '0' * zero_repeat 476 line = 'Subject: [%s %s/%d] %s\n' % (prefix, zero, count, text[0]) 477 478 # Insert our cover letter 479 elif line.startswith('*** BLURB HERE ***'): 480 # First the blurb test 481 line = '\n'.join(text[1:]) + '\n' 482 if series.get('notes'): 483 line += '\n'.join(series.notes) + '\n' 484 485 # Now the change list 486 out = series.MakeChangeLog(None) 487 line += '\n' + '\n'.join(out) 488 fd.write(line) 489 fd.close() 490