documentation and help options
[git-grep-footer.git] / git-grep-footer
1 #!/usr/bin/perl
2 use 5.010;
3 use strict;
4 use warnings;
5 use open ':std', OUT => ':utf8';
6 use Encode 'decode';
7 use Data::Dump 'pp';
8 use Getopt::Long qw(:config bundling);
9
10 GetOptions(\my %opt,
11         'debug!',
12         'simplify|s:s',
13         'ignore-case|i!',
14         'min|min-count|unique|u:i',
15         'max|max-count|show|n:i',
16         'version|V'  => sub { Getopt::Long::VersionMessage() },
17         'usage|h'    => sub { Getopt::Long::HelpMessage() },
18         'help|man|?' => sub { Getopt::Long::HelpMessage(-verbose => 2) },
19 ) or exit 129;
20
21 local $| = 1;
22 local $/ = "\0";
23
24 my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) /ix;
25
26 while (readline) {
27         s/(.+)\n//m;
28         my $hash = $1;
29
30         # strip commit seperator
31         chomp;
32         # skip expensive checks without potential identifier
33         m/:/ or next;
34         # try to parse as UTF-8
35         eval { $_ = decode(utf8   => $_, Encode::FB_CROAK()) };
36         # if invalid, assume it's latin1
37                $_ = decode(cp1252 => $_) if $@;
38
39         my $prefix = 0;
40         my %attr;
41
42         BLOCK:
43         for (reverse split /\n\n/) {
44                 my @headers;
45
46                 LINE:
47                 for (split /\n/) {
48                         next if not /\S/;
49                         my @header = m{
50                                 ^
51                                 (?<key> $HEADERMATCH)
52                                 : \s*
53                                 (?<val> \S .+)
54                                 $
55                         }imx or do {
56                                 $prefix++;
57                                 next LINE;
58                         };
59
60                         push @header, $_ if defined $opt{max};
61
62                         given ($opt{simplify} // 'none') {
63                                 when (['email', 'authors']) {
64                                         $header[1] =~ s{
65                                                 \A
66                                                 (?: [^:;]+ )?
67                                                 < [^@>]+ (?: @ | \h?\W? at \W?\h? ) [a-z0-9.-]+ >
68                                                 \Z
69                                         }{<...>}imsx;
70                                 }
71                                 when (['var', 'vars', '']) {
72                                         when ($header[0] =~ /[ _-] (?: by | to ) $/imsx) {
73                                                 $header[1] = undef;
74                                         }
75                                         for ($header[1]) {
76                                                 s{\b (https?)://\S+ }{[$1]}gmsx;  # url
77                                                 s{(?: < | \A ) [^@>\s]+ @ [^>]+ (?: > | \Z )}{<...>}igmsx;  # address
78                                                 s{\b [0-9]+ \b}{[num]}gmsx;  # number
79                                                 s{\b I? [0-9a-f]{40} \b}{[sha1]}gmsx;  # hash
80                                         }
81                                 }
82                                 when (['all', 'contents']) {
83                                         $header[1] = undef;
84                                 }
85                                 when (['none', 'no', '0']) {
86                                 }
87                                 default {
88                                         die "Unknown simplify option: '$_'\n";
89                                 }
90                         }
91
92                         if ($opt{'ignore-case'}) {
93                                 $_ = lc for $header[0], $header[1] // ();
94                         }
95
96                         pop @header if not defined $header[-1];
97
98                         push @headers, \@header;
99                 }
100
101                 next BLOCK if not @headers;
102
103                 if ($opt{debug} and $prefix) {
104                         say "infix junk in commit $hash";
105                 }
106
107                 for (@headers) {
108                         if (defined $opt{min} or $opt{max}) {
109                                 state $seen;
110                                 my $count = $seen->{ $_->[0] }->{ $_->[1] // '' }++;
111                                 next if $count >= ($opt{min} // 0) + ($opt{max} || 1);
112                                 next if $count < ($opt{min} // 0);
113                         }
114                         say $_->[2] // join(': ', @$_);
115                 }
116
117                 last BLOCK;
118         }
119 }
120
121 __END__
122
123 =head1 NAME
124
125 git-grep-footer - Find custom header lines in commit messages
126
127 =head1 SYNOPSIS
128
129 F<git> log --pretty=%b%x00 | F<git-grep-footer> [OPTIONS]
130
131 =head1 DESCRIPTION
132
133 Filters out header sections near the end of a commit body,
134 a common convention to list custom metadata such as
135 C<Signed-off-by> and C<Acked-by>.
136
137 Sections are identified by at least one leading keyword containing a dash
138 followed by a colon.
139
140 =head1 OPTIONS
141
142 =over
143
144 =item -i, --ignore-case
145
146 Lowercases everything.
147
148 =item -s, --simplify[=<rule>]
149
150 Modifies values to hide specific details.
151 Several different rules are supported:
152
153 =over
154
155 =item I<var> (default)
156
157 Replaces highly variable contents such as numbers, hashes, and addresses,
158 leaving only exceptional annotations as distinct text.
159 Attributes ending in I<-to> or I<-by> are assumed variable author names
160 and omitted entirely,
161 unless they contain a colon indicating possible attribute exceptions.
162
163 =item I<email>
164
165 Filters out author lines following the git signoff convention,
166 i.e. an <email address> optionally preceded by a name.
167
168 =item I<all>
169
170 Values will be hidden entirely, so only attribute names remain.
171
172 =back
173
174 =item -u, --unique[=<threshold>]
175
176 Each match is only shown once,
177 optionally after it has already occurred a given amount of times.
178
179 =item -n, --show[=<limit>]
180
181 The original line is given for each match,
182 but simplifications still apply for duplicate determination.
183 Additional samples are optionally given upto the given maximum.
184
185 =back
186
187 =head1 AUTHOR
188
189 Mischa POSLAWSKY <perl@shiar.org>
190
191 =head1 LICENSE
192
193 Copyright. All rights reserved.
194