Commit b9e2331d authored by Joe Perches's avatar Joe Perches Committed by Linus Torvalds

scripts/get_maintainer.pl: use mailmap in name deduplication and other updates

Use Florian Mickler's mailmap routine to reduce name duplication.

o Add subroutine deduplicate_email to centralize code
o Add hashes for deduplicate_(name|address)_hash
o Remove now unused @interactive_to
o Whitespace neatening
o Add command line --help text
o Add --mailmap command line option control
o Interactive changes:
   - Add toggles for maintainer, git and list selections
   - Default selection is all
   - Add mailmap control

Update to 0.26-beta5
Signed-off-by: default avatarJoe Perches <joe@perches.com>
Cc: Florian Mickler <florian@mickler.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 47abc722
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
use strict; use strict;
my $P = $0; my $P = $0;
my $V = '0.26-beta4'; my $V = '0.26-beta5';
use Getopt::Long qw(:config no_auto_abbrev); use Getopt::Long qw(:config no_auto_abbrev);
...@@ -36,6 +36,7 @@ my $email_git_since = "1-year-ago"; ...@@ -36,6 +36,7 @@ my $email_git_since = "1-year-ago";
my $email_hg_since = "-365"; my $email_hg_since = "-365";
my $interactive = 0; my $interactive = 0;
my $email_remove_duplicates = 1; my $email_remove_duplicates = 1;
my $email_use_mailmap = 1;
my $output_multiline = 1; my $output_multiline = 1;
my $output_separator = ", "; my $output_separator = ", ";
my $output_roles = 0; my $output_roles = 0;
...@@ -192,6 +193,7 @@ if (!GetOptions( ...@@ -192,6 +193,7 @@ if (!GetOptions(
'hg-since=s' => \$email_hg_since, 'hg-since=s' => \$email_hg_since,
'i|interactive!' => \$interactive, 'i|interactive!' => \$interactive,
'remove-duplicates!' => \$email_remove_duplicates, 'remove-duplicates!' => \$email_remove_duplicates,
'mailmap!' => \$email_use_mailmap,
'm!' => \$email_maintainer, 'm!' => \$email_maintainer,
'n!' => \$email_usename, 'n!' => \$email_usename,
'l!' => \$email_list, 'l!' => \$email_list,
...@@ -300,17 +302,17 @@ close($maint); ...@@ -300,17 +302,17 @@ close($maint);
# Read mail address map # Read mail address map
# #
my $mailmap = read_mailmap(); my $mailmap;
read_mailmap();
sub read_mailmap { sub read_mailmap {
my $mailmap = { $mailmap = {
names => {}, names => {},
addresses => {} addresses => {}
}; };
if (!$email_remove_duplicates) { return if (!$email_use_mailmap || !(-f "${lk_path}.mailmap"));
return $mailmap;
}
open(my $mailmap_file, '<', "${lk_path}.mailmap") open(my $mailmap_file, '<', "${lk_path}.mailmap")
or warn "$P: Can't open .mailmap: $!\n"; or warn "$P: Can't open .mailmap: $!\n";
...@@ -331,6 +333,7 @@ sub read_mailmap { ...@@ -331,6 +333,7 @@ sub read_mailmap {
my $address = $2; my $address = $2;
$real_name =~ s/\s+$//; $real_name =~ s/\s+$//;
($real_name, $address) = parse_email("$real_name <$address>");
$mailmap->{names}->{$address} = $real_name; $mailmap->{names}->{$address} = $real_name;
} elsif (/^<([^\s]+)>\s*<([^\s]+)>$/) { } elsif (/^<([^\s]+)>\s*<([^\s]+)>$/) {
...@@ -340,12 +343,13 @@ sub read_mailmap { ...@@ -340,12 +343,13 @@ sub read_mailmap {
$mailmap->{addresses}->{$wrong_address} = $real_address; $mailmap->{addresses}->{$wrong_address} = $real_address;
} elsif (/^(.+)<([^\s]+)>\s*<([^\s]+)>$/) { } elsif (/^(.+)<([^\s]+)>\s*<([^\s]+)>$/) {
my $real_name= $1; my $real_name = $1;
my $real_address = $2; my $real_address = $2;
my $wrong_address = $3; my $wrong_address = $3;
$real_name =~ s/\s+$//; $real_name =~ s/\s+$//;
($real_name, $real_address) =
parse_email("$real_name <$real_address>");
$mailmap->{names}->{$wrong_address} = $real_name; $mailmap->{names}->{$wrong_address} = $real_name;
$mailmap->{addresses}->{$wrong_address} = $real_address; $mailmap->{addresses}->{$wrong_address} = $real_address;
...@@ -356,15 +360,19 @@ sub read_mailmap { ...@@ -356,15 +360,19 @@ sub read_mailmap {
my $wrong_address = $4; my $wrong_address = $4;
$real_name =~ s/\s+$//; $real_name =~ s/\s+$//;
($real_name, $real_address) =
parse_email("$real_name <$real_address>");
$wrong_name =~ s/\s+$//; $wrong_name =~ s/\s+$//;
($wrong_name, $wrong_address) =
parse_email("$wrong_name <$wrong_address>");
$mailmap->{names}->{format_email($wrong_name,$wrong_address,1)} = $real_name; my $wrong_email = format_email($wrong_name, $wrong_address, 1);
$mailmap->{addresses}->{format_email($wrong_name,$wrong_address,1)} = $real_address; $mailmap->{names}->{$wrong_email} = $real_name;
$mailmap->{addresses}->{$wrong_email} = $real_address;
} }
} }
close($mailmap_file); close($mailmap_file);
return $mailmap;
} }
## use the filenames on the command line or find the filenames in the patchfiles ## use the filenames on the command line or find the filenames in the patchfiles
...@@ -453,7 +461,8 @@ my @scm = (); ...@@ -453,7 +461,8 @@ my @scm = ();
my @web = (); my @web = ();
my @subsystem = (); my @subsystem = ();
my @status = (); my @status = ();
my @interactive_to = (); my %deduplicate_name_hash = ();
my %deduplicate_address_hash = ();
my $signature_pattern; my $signature_pattern;
my @maintainers = get_maintainers(); my @maintainers = get_maintainers();
...@@ -497,7 +506,8 @@ sub get_maintainers { ...@@ -497,7 +506,8 @@ sub get_maintainers {
@web = (); @web = ();
@subsystem = (); @subsystem = ();
@status = (); @status = ();
@interactive_to = (); %deduplicate_name_hash = ();
%deduplicate_address_hash = ();
if ($email_git_all_signature_types) { if ($email_git_all_signature_types) {
$signature_pattern = "(.+?)[Bb][Yy]:"; $signature_pattern = "(.+?)[Bb][Yy]:";
} else { } else {
...@@ -506,7 +516,7 @@ sub get_maintainers { ...@@ -506,7 +516,7 @@ sub get_maintainers {
# Find responsible parties # Find responsible parties
my %exact_pattern_match_hash; my %exact_pattern_match_hash = ();
foreach my $file (@files) { foreach my $file (@files) {
...@@ -590,7 +600,9 @@ sub get_maintainers { ...@@ -590,7 +600,9 @@ sub get_maintainers {
} }
} }
@interactive_to = (@email_to, @list_to); foreach my $email (@email_to, @list_to) {
$email->[0] = deduplicate_email($email->[0]);
}
foreach my $file (@files) { foreach my $file (@files) {
if ($email && if ($email &&
...@@ -637,8 +649,7 @@ sub get_maintainers { ...@@ -637,8 +649,7 @@ sub get_maintainers {
} }
if ($interactive) { if ($interactive) {
@interactive_to = @to; @to = interactive_get_maintainers(\@to);
@to = interactive_get_maintainers(\@interactive_to);
} }
return @to; return @to;
...@@ -702,8 +713,9 @@ Output type options: ...@@ -702,8 +713,9 @@ Output type options:
Other options: Other options:
--pattern-depth => Number of pattern directory traversals (default: 0 (all)) --pattern-depth => Number of pattern directory traversals (default: 0 (all))
--keywords => scan patch for keywords (default: 1 (on)) --keywords => scan patch for keywords (default: $keywords)
--sections => print the entire subsystem sections with pattern matches --sections => print all of the subsystem sections with pattern matches
--mailmap => use .mailmap file (default: $email_use_mailmap)
--version => show version --version => show version
--help => show this help information --help => show this help information
...@@ -1107,7 +1119,7 @@ sub which_conf { ...@@ -1107,7 +1119,7 @@ sub which_conf {
} }
sub mailmap_email { sub mailmap_email {
my $line = shift; my ($line) = @_;
my ($name, $address) = parse_email($line); my ($name, $address) = parse_email($line);
my $email = format_email($name, $address, 1); my $email = format_email($name, $address, 1);
...@@ -1136,26 +1148,25 @@ sub mailmap_email { ...@@ -1136,26 +1148,25 @@ sub mailmap_email {
sub mailmap { sub mailmap {
my (@addresses) = @_; my (@addresses) = @_;
my @ret = (); my @mapped_emails = ();
foreach my $line (@addresses) { foreach my $line (@addresses) {
push(@ret, mailmap_email($line), 1); push(@mapped_emails, mailmap_email($line));
} }
merge_by_realname(@mapped_emails) if ($email_use_mailmap);
merge_by_realname(@ret) if $email_remove_duplicates; return @mapped_emails;
return @ret;
} }
sub merge_by_realname { sub merge_by_realname {
my %address_map; my %address_map;
my (@emails) = @_; my (@emails) = @_;
foreach my $email (@emails) { foreach my $email (@emails) {
my ($name, $address) = parse_email($email); my ($name, $address) = parse_email($email);
if (!exists $address_map{$name}) { if (exists $address_map{$name}) {
$address_map{$name} = $address;
} else {
$address = $address_map{$name}; $address = $address_map{$name};
$email = format_email($name,$address,1); $email = format_email($name, $address, 1);
} else {
$address_map{$name} = $address;
} }
} }
} }
...@@ -1194,8 +1205,7 @@ sub extract_formatted_signatures { ...@@ -1194,8 +1205,7 @@ sub extract_formatted_signatures {
## Reformat email addresses (with names) to avoid badly written signatures ## Reformat email addresses (with names) to avoid badly written signatures
foreach my $signer (@signature_lines) { foreach my $signer (@signature_lines) {
my ($name, $address) = parse_email($signer); $signer = deduplicate_email($signer);
$signer = format_email($name, $address, 1);
} }
return (\@type, \@signature_lines); return (\@type, \@signature_lines);
...@@ -1339,6 +1349,7 @@ sub vcs_exists { ...@@ -1339,6 +1349,7 @@ sub vcs_exists {
} }
sub vcs_is_git { sub vcs_is_git {
vcs_exists();
return $vcs_used == 1; return $vcs_used == 1;
} }
...@@ -1357,11 +1368,9 @@ sub interactive_get_maintainers { ...@@ -1357,11 +1368,9 @@ sub interactive_get_maintainers {
my %signed; my %signed;
my $count = 0; my $count = 0;
my $maintained = 0; my $maintained = 0;
#select maintainers by default
foreach my $entry (@list) { foreach my $entry (@list) {
my $role = $entry->[1]; $maintained = 1 if ($entry->[1] =~ /^(maintainer|supporter)/i);
$selected{$count} = ($role =~ /^(maintainer|supporter|open list)/i); $selected{$count} = 1;
$maintained = 1 if ($role =~ /^(maintainer|supporter)/i);
$authored{$count} = 0; $authored{$count} = 0;
$signed{$count} = 0; $signed{$count} = 0;
$count++; $count++;
...@@ -1418,24 +1427,34 @@ sub interactive_get_maintainers { ...@@ -1418,24 +1427,34 @@ sub interactive_get_maintainers {
if ($print_options) { if ($print_options) {
$print_options = 0; $print_options = 0;
if (vcs_exists()) { if (vcs_exists()) {
print STDERR print STDERR <<EOT
"\nVersion Control options:\n" .
"g use git history [$email_git]\n" . Version Control options:
"gf use git-fallback [$email_git_fallback]\n" . g use git history [$email_git]
"b use git blame [$email_git_blame]\n" . gf use git-fallback [$email_git_fallback]
"bs use blame signatures [$email_git_blame_signatures]\n" . b use git blame [$email_git_blame]
"c# minimum commits [$email_git_min_signatures]\n" . bs use blame signatures [$email_git_blame_signatures]
"%# min percent [$email_git_min_percent]\n" . c# minimum commits [$email_git_min_signatures]
"d# history to use [$$date_ref]\n" . %# min percent [$email_git_min_percent]
"x# max maintainers [$email_git_max_maintainers]\n" . d# history to use [$$date_ref]
"t all signature types [$email_git_all_signature_types]\n"; x# max maintainers [$email_git_max_maintainers]
t all signature types [$email_git_all_signature_types]
m use .mailmap [$email_use_mailmap]
EOT
} }
print STDERR "\nAdditional options:\n" . print STDERR <<EOT
"0 toggle all\n" .
"f emails in file [$file_emails]\n" . Additional options:
"k keywords in file [$keywords]\n" . 0 toggle all
"r remove duplicates [$email_remove_duplicates]\n" . tm toggle maintainers
"p# pattern match depth [$pattern_depth]\n"; tg toggle git entries
tl toggle open list entries
ts toggle subscriber list entries
f emails in file [$file_emails]
k keywords in file [$keywords]
r remove duplicates [$email_remove_duplicates]
p# pattern match depth [$pattern_depth]
EOT
} }
print STDERR print STDERR
"\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): "; "\n#(toggle), A#(author), S#(signed) *(all), ^(none), O(options), Y(approve): ";
...@@ -1471,6 +1490,28 @@ sub interactive_get_maintainers { ...@@ -1471,6 +1490,28 @@ sub interactive_get_maintainers {
for (my $i = 0; $i < $count; $i++) { for (my $i = 0; $i < $count; $i++) {
$selected{$i} = !$selected{$i}; $selected{$i} = !$selected{$i};
} }
} elsif ($sel eq "t") {
if (lc($str) eq "m") {
for (my $i = 0; $i < $count; $i++) {
$selected{$i} = !$selected{$i}
if ($list[$i]->[1] =~ /^(maintainer|supporter)/i);
}
} elsif (lc($str) eq "g") {
for (my $i = 0; $i < $count; $i++) {
$selected{$i} = !$selected{$i}
if ($list[$i]->[1] =~ /^(author|commit|signer)/i);
}
} elsif (lc($str) eq "l") {
for (my $i = 0; $i < $count; $i++) {
$selected{$i} = !$selected{$i}
if ($list[$i]->[1] =~ /^(open list)/i);
}
} elsif (lc($str) eq "s") {
for (my $i = 0; $i < $count; $i++) {
$selected{$i} = !$selected{$i}
if ($list[$i]->[1] =~ /^(subscriber list)/i);
}
}
} elsif ($sel eq "a") { } elsif ($sel eq "a") {
if ($val > 0 && $val <= $count) { if ($val > 0 && $val <= $count) {
$authored{$val - 1} = !$authored{$val - 1}; $authored{$val - 1} = !$authored{$val - 1};
...@@ -1539,6 +1580,10 @@ sub interactive_get_maintainers { ...@@ -1539,6 +1580,10 @@ sub interactive_get_maintainers {
} elsif ($sel eq "r") { } elsif ($sel eq "r") {
bool_invert(\$email_remove_duplicates); bool_invert(\$email_remove_duplicates);
$rerun = 1; $rerun = 1;
} elsif ($sel eq "m") {
bool_invert(\$email_use_mailmap);
read_mailmap();
$rerun = 1;
} elsif ($sel eq "k") { } elsif ($sel eq "k") {
bool_invert(\$keywords); bool_invert(\$keywords);
$rerun = 1; $rerun = 1;
...@@ -1602,6 +1647,36 @@ sub bool_invert { ...@@ -1602,6 +1647,36 @@ sub bool_invert {
} }
} }
sub deduplicate_email {
my ($email) = @_;
my $matched = 0;
my ($name, $address) = parse_email($email);
$email = format_email($name, $address, 1);
$email = mailmap_email($email);
return $email if (!$email_remove_duplicates);
($name, $address) = parse_email($email);
if ($deduplicate_name_hash{lc($name)}) {
$name = $deduplicate_name_hash{lc($name)}->[0];
$address = $deduplicate_name_hash{lc($name)}->[1];
$matched = 1;
} elsif ($deduplicate_address_hash{lc($address)}) {
$name = $deduplicate_address_hash{lc($address)}->[0];
$address = $deduplicate_address_hash{lc($address)}->[1];
$matched = 1;
}
if (!$matched) {
$deduplicate_name_hash{lc($name)} = [ $name, $address ];
$deduplicate_address_hash{lc($address)} = [ $name, $address ];
}
$email = format_email($name, $address, 1);
$email = mailmap_email($email);
return $email;
}
sub save_commits_by_author { sub save_commits_by_author {
my (@lines) = @_; my (@lines) = @_;
...@@ -1611,20 +1686,8 @@ sub save_commits_by_author { ...@@ -1611,20 +1686,8 @@ sub save_commits_by_author {
foreach my $line (@lines) { foreach my $line (@lines) {
if ($line =~ m/$VCS_cmds{"author_pattern"}/) { if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
my $matched = 0;
my $author = $1; my $author = $1;
my ($name, $address) = parse_email($author); $author = deduplicate_email($author);
foreach my $to (@interactive_to) {
my ($to_name, $to_address) = parse_email($to->[0]);
if ($email_remove_duplicates &&
((lc($name) eq lc($to_name)) ||
(lc($address) eq lc($to_address)))) {
$author = $to->[0];
$matched = 1;
last;
}
}
$author = format_email($name, $address, 1) if (!$matched);
push(@authors, $author); push(@authors, $author);
} }
push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/); push(@commits, $1) if ($line =~ m/$VCS_cmds{"commit_pattern"}/);
...@@ -1665,19 +1728,7 @@ sub save_commits_by_signer { ...@@ -1665,19 +1728,7 @@ sub save_commits_by_signer {
my $type = $types[0]; my $type = $types[0];
my $signer = $signers[0]; my $signer = $signers[0];
my $matched = 0; $signer = deduplicate_email($signer);
my ($name, $address) = parse_email($signer);
foreach my $to (@interactive_to) {
my ($to_name, $to_address) = parse_email($to->[0]);
if ($email_remove_duplicates &&
((lc($name) eq lc($to_name)) ||
(lc($address) eq lc($to_address)))) {
$signer = $to->[0];
$matched = 1;
last;
}
$signer = format_email($name, $address, 1) if (!$matched);
}
my $exists = 0; my $exists = 0;
foreach my $ref(@{$commit_signer_hash{$signer}}) { foreach my $ref(@{$commit_signer_hash{$signer}}) {
...@@ -1751,6 +1802,11 @@ sub vcs_file_signoffs { ...@@ -1751,6 +1802,11 @@ sub vcs_file_signoffs {
$cmd =~ s/(\$\w+)/$1/eeg; # interpolate $cmd $cmd =~ s/(\$\w+)/$1/eeg; # interpolate $cmd
($commits, @signers) = vcs_find_signers($cmd); ($commits, @signers) = vcs_find_signers($cmd);
foreach my $signer (@signers) {
$signer = deduplicate_email($signer);
}
vcs_assign("commit_signer", $commits, @signers); vcs_assign("commit_signer", $commits, @signers);
} }
...@@ -1828,9 +1884,8 @@ sub vcs_file_blame { ...@@ -1828,9 +1884,8 @@ sub vcs_file_blame {
foreach my $line (@lines) { foreach my $line (@lines) {
if ($line =~ m/$VCS_cmds{"author_pattern"}/) { if ($line =~ m/$VCS_cmds{"author_pattern"}/) {
my $author = $1; my $author = $1;
my ($name, $address) = parse_email($author); $author = deduplicate_email($author);
$author = format_email($name, $address, 1); push(@authors, $author);
push(@authors, $1);
} }
} }
...@@ -1846,9 +1901,12 @@ sub vcs_file_blame { ...@@ -1846,9 +1901,12 @@ sub vcs_file_blame {
$cmd =~ s/(\$\w+)/$1/eeg; #interpolate $cmd $cmd =~ s/(\$\w+)/$1/eeg; #interpolate $cmd
my @author = vcs_find_author($cmd); my @author = vcs_find_author($cmd);
next if !@author; next if !@author;
my $formatted_author = deduplicate_email($author[0]);
my $count = grep(/$commit/, @all_commits); my $count = grep(/$commit/, @all_commits);
for ($i = 0; $i < $count ; $i++) { for ($i = 0; $i < $count ; $i++) {
push(@blame_signers, $author[0]); push(@blame_signers, $formatted_author);
} }
} }
} }
...@@ -1856,8 +1914,14 @@ sub vcs_file_blame { ...@@ -1856,8 +1914,14 @@ sub vcs_file_blame {
vcs_assign("authored lines", $total_lines, @blame_signers); vcs_assign("authored lines", $total_lines, @blame_signers);
} }
} }
foreach my $signer (@signers) {
$signer = deduplicate_email($signer);
}
vcs_assign("commits", $total_commits, @signers); vcs_assign("commits", $total_commits, @signers);
} else { } else {
foreach my $signer (@signers) {
$signer = deduplicate_email($signer);
}
vcs_assign("modified commits", $total_commits, @signers); vcs_assign("modified commits", $total_commits, @signers);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment