sample options (original line(s), unique threshold)
[git-grep-footer.git] / git-grep-footer
1 #!/usr/bin/perl
2 use 5.010;
3 use strict;
4 use warnings;
5 use open ':std', OUT => ':utf8';
6 use Encode 'decode';
7 use Data::Dump 'pp';
8 use Getopt::Long;
9
10 GetOptions(\my %opt,
11         'debug!',
12         'simplify|s:s',
13         'ignore-case|i!',
14         'min|min-count|unique|u:i',
15         'max|max-count|show|n:i',
16 ) or die;
17
18 local $| = 1;
19 local $/ = "\0";
20
21 my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) /ix;
22
23 while (readline) {
24         s/(.+)\n//m;
25         my $hash = $1;
26
27         # strip commit seperator
28         chomp;
29         # skip expensive checks without potential identifier
30         m/:/ or next;
31         # try to parse as UTF-8
32         eval { $_ = decode(utf8   => $_, Encode::FB_CROAK()) };
33         # if invalid, assume it's latin1
34                $_ = decode(cp1252 => $_) if $@;
35
36         my $prefix = 0;
37         my %attr;
38
39         BLOCK:
40         for (reverse split /\n\n/) {
41                 my @headers;
42
43                 LINE:
44                 for (split /\n/) {
45                         next if not /\S/;
46                         my @header = m{
47                                 ^
48                                 (?<key> $HEADERMATCH)
49                                 : \s*
50                                 (?<val> \S .+)
51                                 $
52                         }imx or do {
53                                 $prefix++;
54                                 next LINE;
55                         };
56
57                         push @header, $_ if defined $opt{max};
58
59                         given ($opt{simplify} // 'no') {
60                                 when ('strict') {
61                                         $header[1] =~ s{
62                                                 \A
63                                                 (?: [^:]+ )?
64                                                 < [^@>]+ (?: @ | \h?\W? at \W?\h? ) [a-z0-9.-]+ >
65                                                 \Z
66                                         }{<...>}imsx;
67                                 }
68                                 when (['text', '']) {
69                                         when ($header[0] =~ /[ _-] (?: by | to ) $/imsx) {
70                                                 $header[1] = undef;
71                                         }
72                                         for ($header[1]) {
73                                                 s{\b (https?)://\S+ }{[$1]}gmsx;  # url
74                                                 s{(?: < | \A ) [^@>\s]+ @ [^>]+ (?: > | \Z )}{<...>}igmsx;  # address
75                                                 s{\b [0-9]+ \b}{[num]}gmsx;  # number
76                                                 s{\b I? [0-9a-f]{40} \b}{[sha1]}gmsx;  # hash
77                                         }
78                                 }
79                                 when (['all', 'any']) {
80                                         $header[1] = undef;
81                                 }
82                                 when ('no') {
83                                 }
84                                 default {
85                                         die "Unknown simplify option: '$_'\n";
86                                 }
87                         }
88
89                         if ($opt{'ignore-case'}) {
90                                 $_ = lc for $header[0], $header[1] // ();
91                         }
92
93                         pop @header if not defined $header[-1];
94
95                         push @headers, \@header;
96                 }
97
98                 next BLOCK if not @headers;
99
100                 if ($opt{debug} and $prefix) {
101                         say "infix junk in commit $hash";
102                 }
103
104                 for (@headers) {
105                         if (defined $opt{min} or $opt{max}) {
106                                 state $seen;
107                                 my $count = $seen->{ $_->[0] }->{ $_->[1] // '' }++;
108                                 next if $count >= ($opt{min} // 0) + ($opt{max} || 1);
109                                 next if $count < ($opt{min} // 0);
110                         }
111                         say $_->[2] // join(': ', @$_);
112                 }
113
114                 last BLOCK;
115         }
116 }