5 use open ':std', OUT => ':utf8';
14 'min|min-count|unique|u:i',
15 'max|max-count|show|n:i',
21 my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) /ix;
27 # strip commit seperator
29 # skip expensive checks without potential identifier
31 # try to parse as UTF-8
32 eval { $_ = decode(utf8 => $_, Encode::FB_CROAK()) };
33 # if invalid, assume it's latin1
34 $_ = decode(cp1252 => $_) if $@;
40 for (reverse split /\n\n/) {
57 push @header, $_ if defined $opt{max};
59 given ($opt{simplify} // 'no') {
64 < [^@>]+ (?: @ | \h?\W? at \W?\h? ) [a-z0-9.-]+ >
69 when ($header[0] =~ /[ _-] (?: by | to ) $/imsx) {
73 s{\b (https?)://\S+ }{[$1]}gmsx; # url
74 s{(?: < | \A ) [^@>\s]+ @ [^>]+ (?: > | \Z )}{<...>}igmsx; # address
75 s{\b [0-9]+ \b}{[num]}gmsx; # number
76 s{\b I? [0-9a-f]{40} \b}{[sha1]}gmsx; # hash
79 when (['all', 'any']) {
85 die "Unknown simplify option: '$_'\n";
89 if ($opt{'ignore-case'}) {
90 $_ = lc for $header[0], $header[1] // ();
93 pop @header if not defined $header[-1];
95 push @headers, \@header;
98 next BLOCK if not @headers;
100 if ($opt{debug} and $prefix) {
101 say "infix junk in commit $hash";
105 if (defined $opt{min} or $opt{max}) {
107 my $count = $seen->{ $_->[0] }->{ $_->[1] // '' }++;
108 next if $count >= ($opt{min} // 0) + ($opt{max} || 1);
109 next if $count < ($opt{min} // 0);
111 say $_->[2] // join(': ', @$_);