1*4882a593Smuzhiyun#!/usr/bin/env python2 2*4882a593Smuzhiyun# 3*4882a593Smuzhiyun# Copyright (C) 2014, Masahiro Yamada <yamada.m@jp.panasonic.com> 4*4882a593Smuzhiyun# 5*4882a593Smuzhiyun# SPDX-License-Identifier: GPL-2.0+ 6*4882a593Smuzhiyun# 7*4882a593Smuzhiyun 8*4882a593Smuzhiyun''' 9*4882a593SmuzhiyunA tool to create/update the mailmap file 10*4882a593Smuzhiyun 11*4882a593SmuzhiyunThe command 'git shortlog' summarizes git log output in a format suitable 12*4882a593Smuzhiyunfor inclusion in release announcements. Each commit will be grouped by 13*4882a593Smuzhiyunauthor and title. 14*4882a593Smuzhiyun 15*4882a593SmuzhiyunOne problem is that the authors' name and/or email address is sometimes 16*4882a593Smuzhiyunspelled differently. The .mailmap feature can be used to coalesce together 17*4882a593Smuzhiyuncommits by the same persion. 18*4882a593Smuzhiyun(See 'man git-shortlog' for furthur information of this feature.) 19*4882a593Smuzhiyun 20*4882a593SmuzhiyunThis tool helps to create/update the mailmap file. 21*4882a593Smuzhiyun 22*4882a593SmuzhiyunIt runs 'git shortlog' internally and searches differently spelled author 23*4882a593Smuzhiyunnames which share the same email address. The author name with the most 24*4882a593Smuzhiyuncommits is asuumed to be a canonical real name. If the number of commits 25*4882a593Smuzhiyunfrom the cananonical name is equal to or greater than 'MIN_COMMITS', 26*4882a593Smuzhiyunthe entry for the cananical name will be output. ('MIN_COMMITS' is used 27*4882a593Smuzhiyunhere because we do not want to create a fat mailmap by adding every author 28*4882a593Smuzhiyunwith only a few commits.) 29*4882a593Smuzhiyun 30*4882a593SmuzhiyunIf there exists a mailmap file specified by the mailmap.file configuration 31*4882a593Smuzhiyunoptions or '.mailmap' at the toplevel of the repository, it is used as 32*4882a593Smuzhiyuna base file. (The mailmap.file configuration takes precedence over the 33*4882a593Smuzhiyun'.mailmap' file if both exist.) 34*4882a593Smuzhiyun 35*4882a593SmuzhiyunThe base file and the newly added entries are merged together and sorted 36*4882a593Smuzhiyunalphabetically (but the comment block is kept untouched), and then printed 37*4882a593Smuzhiyunto standard output. 38*4882a593Smuzhiyun 39*4882a593SmuzhiyunUsage 40*4882a593Smuzhiyun----- 41*4882a593Smuzhiyun 42*4882a593Smuzhiyun scripts/mailmapper 43*4882a593Smuzhiyun 44*4882a593Smuzhiyunprints the mailmapping to standard output. 45*4882a593Smuzhiyun 46*4882a593Smuzhiyun scripts/mailmapper > tmp; mv tmp .mailmap 47*4882a593Smuzhiyun 48*4882a593Smuzhiyunwill be useful for updating '.mailmap' file. 49*4882a593Smuzhiyun''' 50*4882a593Smuzhiyun 51*4882a593Smuzhiyunimport sys 52*4882a593Smuzhiyunimport os 53*4882a593Smuzhiyunimport subprocess 54*4882a593Smuzhiyun 55*4882a593Smuzhiyun# The entries only for the canonical names with MIN_COMMITS or more commits. 56*4882a593Smuzhiyun# This limitation is used so as not to create a too big mailmap file. 57*4882a593SmuzhiyunMIN_COMMITS = 50 58*4882a593Smuzhiyun 59*4882a593Smuzhiyuntry: 60*4882a593Smuzhiyun toplevel = subprocess.check_output(['git', 'rev-parse', '--show-toplevel']) 61*4882a593Smuzhiyunexcept subprocess.CalledProcessError: 62*4882a593Smuzhiyun sys.exit('Please run in a git repository.') 63*4882a593Smuzhiyun 64*4882a593Smuzhiyun# strip '\n' 65*4882a593Smuzhiyuntoplevel = toplevel.rstrip() 66*4882a593Smuzhiyun 67*4882a593Smuzhiyun# Change the current working directory to the toplevel of the respository 68*4882a593Smuzhiyun# for our easier life. 69*4882a593Smuzhiyunos.chdir(toplevel) 70*4882a593Smuzhiyun 71*4882a593Smuzhiyun# First, create 'auther name' vs 'number of commits' database. 72*4882a593Smuzhiyun# We assume the name with the most commits as the canonical real name. 73*4882a593Smuzhiyunshortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n']) 74*4882a593Smuzhiyun 75*4882a593Smuzhiyuncommits_per_name = {} 76*4882a593Smuzhiyun 77*4882a593Smuzhiyunfor line in shortlog.splitlines(): 78*4882a593Smuzhiyun try: 79*4882a593Smuzhiyun commits, name = line.split(None, 1) 80*4882a593Smuzhiyun except ValueError: 81*4882a593Smuzhiyun # ignore lines with an empty author name 82*4882a593Smuzhiyun pass 83*4882a593Smuzhiyun commits_per_name[name] = int(commits) 84*4882a593Smuzhiyun 85*4882a593Smuzhiyun# Next, coalesce the auther names with the same email address 86*4882a593Smuzhiyunshortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n', '-e']) 87*4882a593Smuzhiyun 88*4882a593Smuzhiyunmail_vs_name = {} 89*4882a593Smuzhiyunoutput = {} 90*4882a593Smuzhiyun 91*4882a593Smuzhiyunfor line in shortlog.splitlines(): 92*4882a593Smuzhiyun # tmp, mail = line.rsplit(None, 1) is not safe 93*4882a593Smuzhiyun # because weird email addresses might include whitespaces 94*4882a593Smuzhiyun tmp, mail = line.split('<') 95*4882a593Smuzhiyun mail = '<' + mail.rstrip() 96*4882a593Smuzhiyun try: 97*4882a593Smuzhiyun _, name = tmp.rstrip().split(None, 1) 98*4882a593Smuzhiyun except ValueError: 99*4882a593Smuzhiyun # author name is empty 100*4882a593Smuzhiyun name = '' 101*4882a593Smuzhiyun if mail in mail_vs_name: 102*4882a593Smuzhiyun # another name for the same email address 103*4882a593Smuzhiyun prev_name = mail_vs_name[mail] 104*4882a593Smuzhiyun # Take the name with more commits 105*4882a593Smuzhiyun major_name = sorted([prev_name, name], 106*4882a593Smuzhiyun key=lambda x: commits_per_name[x] if x else 0)[1] 107*4882a593Smuzhiyun mail_vs_name[mail] = major_name 108*4882a593Smuzhiyun if commits_per_name[major_name] > MIN_COMMITS: 109*4882a593Smuzhiyun output[mail] = major_name 110*4882a593Smuzhiyun else: 111*4882a593Smuzhiyun mail_vs_name[mail] = name 112*4882a593Smuzhiyun 113*4882a593Smuzhiyun# [1] If there exists a mailmap file at the location pointed to 114*4882a593Smuzhiyun# by the mailmap.file configuration option, update it. 115*4882a593Smuzhiyun# [2] If the file .mailmap exists at the toplevel of the repository, update it. 116*4882a593Smuzhiyun# [3] Otherwise, create a new mailmap file. 117*4882a593Smuzhiyunmailmap_files = [] 118*4882a593Smuzhiyun 119*4882a593Smuzhiyuntry: 120*4882a593Smuzhiyun config_mailmap = subprocess.check_output(['git', 'config', 'mailmap.file']) 121*4882a593Smuzhiyunexcept subprocess.CalledProcessError: 122*4882a593Smuzhiyun config_mailmap = '' 123*4882a593Smuzhiyun 124*4882a593Smuzhiyunconfig_mailmap = config_mailmap.rstrip() 125*4882a593Smuzhiyunif config_mailmap: 126*4882a593Smuzhiyun mailmap_files.append(config_mailmap) 127*4882a593Smuzhiyun 128*4882a593Smuzhiyunmailmap_files.append('.mailmap') 129*4882a593Smuzhiyun 130*4882a593Smuzhiyuninfile = None 131*4882a593Smuzhiyun 132*4882a593Smuzhiyunfor map_file in mailmap_files: 133*4882a593Smuzhiyun try: 134*4882a593Smuzhiyun infile = open(map_file) 135*4882a593Smuzhiyun except: 136*4882a593Smuzhiyun # Failed to open. Try next. 137*4882a593Smuzhiyun continue 138*4882a593Smuzhiyun break 139*4882a593Smuzhiyun 140*4882a593Smuzhiyuncomment_block = [] 141*4882a593Smuzhiyunoutput_lines = [] 142*4882a593Smuzhiyun 143*4882a593Smuzhiyunif infile: 144*4882a593Smuzhiyun for line in infile: 145*4882a593Smuzhiyun if line[0] == '#' or line[0] == '\n': 146*4882a593Smuzhiyun comment_block.append(line) 147*4882a593Smuzhiyun else: 148*4882a593Smuzhiyun output_lines.append(line) 149*4882a593Smuzhiyun break 150*4882a593Smuzhiyun for line in infile: 151*4882a593Smuzhiyun output_lines.append(line) 152*4882a593Smuzhiyun infile.close() 153*4882a593Smuzhiyun 154*4882a593Smuzhiyunfor mail, name in output.items(): 155*4882a593Smuzhiyun output_lines.append(name + ' ' + mail + '\n') 156*4882a593Smuzhiyun 157*4882a593Smuzhiyunoutput_lines.sort() 158*4882a593Smuzhiyun 159*4882a593Smuzhiyunsys.stdout.write(''.join(comment_block + output_lines)) 160