5 use open ':std', OUT => ':utf8';
8 use Getopt::Long qw(:config bundling);
10 our $VERSION = '1.00';
20 'min|min-count|unique|u:i',
21 'max|max-count|show|n:i',
23 'version|V' => sub { Getopt::Long::VersionMessage() },
24 'usage|h' => sub { Getopt::Long::HelpMessage() },
25 'help|man|?' => sub { Getopt::Long::HelpMessage(-verbose => 2) },
28 my $inputstream = $opt{''} ? \*ARGV : eval {
30 Git::command_output_pipe('log', '-z', '--pretty=format:%h%n%b', @ARGV);
31 } || die "Automatic git log failed: $@";
36 my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) | cc | reference /imsx;
38 my (%headercount, @headercache);
40 while (readline $inputstream) {
41 s/^ ([0-9a-f]{4,40}) \n//msx;
42 my $hash = $opt{hash} ? $1 : undef;
44 # strip commit seperator
46 # skip expensive checks without potential identifier
48 # try to parse as UTF-8
49 eval { $_ = decode(utf8 => $_, Encode::FB_CROAK()); return 1 }
50 # if invalid, assume it's latin1
51 or $_ = decode(cp1252 => $_);
54 for (reverse split /\n\n/) {
72 push @header, $_ if defined $opt{max};
78 state $BY = qr{ (?: -? b[yu] )? \Z }imsx;
79 s{\A si (?:ge?n|n?g) (?:e?[dt])? -? (?:of+)? $BY}{Signed-off-by}imsx;
80 s{\A ack (?:ed|de)? $BY}{Acked-by}imsx;
81 s{\A review (?:e?d)? $BY}{Reviewed-by}imsx;
82 s{\A teste[dt] $BY}{Tested-by}imsx;
86 if (defined $opt{grep}) {
87 $_ ~~ qr/$opt{grep}/im or next LINE;
90 given ($opt{simplify} // 'none') {
91 when (['email', 'authors']) {
95 < [^@>]+ (?: @ | \h?\W? at \W?\h? ) [a-z0-9.-]+ >
99 when (['var', 'vars', '']) {
100 when ($header[0] =~ m/[ _-] (?: by | to ) $ | ^cc$/imsx) {
104 s{\b (https?)://\S+ }{[$1]}gmsx; # url
105 s{(?: < | \A ) [^@>\s]+ @ [^>]+ (?: > | \Z )}{<...>}igmsx; # address
106 s{\b [0-9]+ \b}{[num]}gmsx; # number
107 s{\b [Ig]? [0-9a-f]{ 40} \b}{[sha1]}gmsx; # hash
108 s{\b [Ig]? [0-9a-f]{6,40} \b}{[hash]}gmsx; # abbrev
111 when (['all', 'contents']) {
114 when (['none', 'no', '0']) {
117 die "Unknown simplify option: '$_'\n";
121 if ($opt{'ignore-case'}) {
122 $_ = lc for $header[0], $header[1] // ();
125 pop @header if not defined $header[-1];
127 push @headers, \@header;
130 next BLOCK if not @headers;
132 if ($opt{debug} and $prefix) {
133 say sprintf ': invalid lines in %s (%s)', $hash // 'block', $prefix;
137 my $line = $_->[2] // join(': ', @$_);
138 $line =~ s/\A/$hash /msx if defined $hash;
140 if (defined $opt{min} or $opt{max} or $opt{count}) {
141 my $counter = \$headercount{ $_->[0] }->{ $_->[1] // '' };
142 my $excess = ${$counter}++ - ($opt{min} // 0);
143 next if $excess >= ($opt{max} || 1);
146 push @headercache, [ $line, $excess ? \undef : $counter ];
158 say ${$_->[1]} // '', "\t", $_->[0];
165 git-grep-footer - Find custom header lines in commit messages
169 F<git-grep-footer> [OPTIONS] [-- <git log options>]
171 F<git> log -z --pretty=format:%b | F<git-grep-footer> [OPTIONS] -
175 Filters out header sections near the end of a commit body,
176 a common convention to list custom metadata such as
177 C<Signed-off-by> and C<Acked-by>.
179 Sections are identified by at least one leading keyword containing a dash
180 (or exceptionally recognised)
187 =item -i, --ignore-case
189 Lowercases everything.
191 =item -s, --simplify[=<rule>]
193 Modifies values to hide specific details.
194 Several different rules are supported:
198 =item I<var> (default)
200 Replaces highly variable contents such as numbers, hashes, and addresses,
201 leaving only exceptional annotations as distinct text.
202 Attributes ending in I<-to> or I<-by> are assumed variable author names
203 and omitted entirely,
204 unless they contain a colon indicating possible attribute exceptions.
208 Filters out author lines following the git signoff convention,
209 i.e. an <email address> optionally preceded by a name.
213 Values will be hidden entirely, so only attribute names remain.
217 =item --grep=<pattern>
219 Only include lines matching the specified regular expression.
220 Case insensitivity can be disabled by prepending C<(?-i)>.
222 =item -u, --unique[=<threshold>]
224 Each match is only shown once,
225 optionally after it has already occurred a given amount of times.
227 =item -n, --show[=<limit>]
229 The original line is given for each match,
230 but simplifications still apply for duplicate determination.
231 Additional samples are optionally given upto the given maximum.
235 Prefixes (unique) lines by the number of occurrences.
236 Causes output to be buffered until all input has been read (obviously).
240 Prefixes the SHA1 hash of the (or a) matching commit.
248 =item git-grep-footer --grep=^ack v2.6.32..v2.6.33
250 Search for I<Acked-by> lines for version I<v2.6.33>.
251 Append C<-uin> to skip reoccurrences.
253 =item git-grep-footer -u --grep=junio
255 Show distinct lines mentioning a specific author.
257 =item git-grep-footer -c --simplify --grep=^si
259 Compare various capitalisations and (mis)spellings of signoffs.
261 =item git-grep-footer -c --simplify=all -i | sort -n -r | head -n10
263 List the ten most frequently used attribute names.
265 =item git-grep-footer -n2 -i -s --hash -- --reverse
267 The earliest two usages of each distinct identifier.
273 Mischa POSLAWSKY <perl@shiar.org>
277 This software is free software;
278 you can redistribute and/or modify it under the terms of the GNU GPL