options to hide duplicates (simplify, ignore-case, unique)
[git-grep-footer.git] / git-grep-footer
1 #!/usr/bin/perl -0 -CO
2 use 5.010;
3 use strict;
4 use warnings;
5 use Encode 'decode';
6 use Data::Dump 'pp';
7 use Getopt::Long;
8
9 GetOptions(\my %opt,
10         'debug!',
11         'simplify|s:s',
12         'unique|u!',
13         'ignore-case|i!',
14 ) or die;
15
16 local $| = 1;
17
18 my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) /ix;
19
20 while (readline) {
21         s/(.+)\n//m;
22         my $hash = $1;
23
24         # strip commit seperator
25         chomp;
26         # skip expensive checks without potential identifier
27         m/:/ or next;
28         # try to parse as UTF-8
29         eval { $_ = decode(utf8   => $_, Encode::FB_CROAK()) };
30         # if invalid, assume it's latin1
31                $_ = decode(cp1252 => $_) if $@;
32
33         my $prefix = 0;
34         my %attr;
35
36         BLOCK:
37         for (reverse split /\n\n/) {
38                 my @headers;
39
40                 LINE:
41                 for (split /\n/) {
42                         next if not /\S/;
43                         my @header = m{
44                                 ^
45                                 (?<key> $HEADERMATCH)
46                                 : \s*
47                                 (?<val> \S .+)
48                                 $
49                         }imx or do {
50                                 $prefix++;
51                                 next LINE;
52                         };
53
54                         given ($opt{simplify} // 'no') {
55                                 when ('strict') {
56                                         $header[1] =~ s{
57                                                 \A
58                                                 (?: [^:]+ )?
59                                                 < [^@>]+ (?: @ | \h?\W? at \W?\h? ) [a-z0-9.-]+ >
60                                                 \Z
61                                         }{<...>}imsx;
62                                 }
63                                 when (['text', '']) {
64                                         when ($header[0] =~ /[ _-] (?: by | to ) $/imsx) {
65                                                 pop @header;
66                                         }
67                                         for ($header[1]) {
68                                                 s{\b (https?)://\S+ }{[$1]}gmsx;  # url
69                                                 s{(?: < | \A ) [^@>\s]+ @ [^>]+ (?: > | \Z )}{<...>}igmsx;  # address
70                                                 s{\b [0-9]+ \b}{[num]}gmsx;  # number
71                                                 s{\b I? [0-9a-f]{40} \b}{[sha1]}gmsx;  # hash
72                                         }
73                                 }
74                                 when (['all', 'any']) {
75                                         pop @header;
76                                 }
77                                 when ('no') {
78                                 }
79                                 default {
80                                         die "Unknown simplify option: '$_'\n";
81                                 }
82                         }
83
84                         if ($opt{'ignore-case'}) {
85                                 $_ = lc for @header;
86                         }
87
88                         push @headers, \@header;
89                 }
90
91                 next BLOCK if not @headers;
92
93                 if ($opt{debug} and $prefix) {
94                         say "infix junk in commit $hash";
95                 }
96
97                 for (@headers) {
98                         if ($opt{unique}) {
99                                 state $seen;
100                                 next if $seen->{ $_->[0] }->{ $_->[1] // '' }++;
101                         }
102                         say join ': ', @$_;
103                 }
104
105                 last BLOCK;
106         }
107 }