documentation and help options
[git-grep-footer.git] / git-grep-footer
index 026f5bcc718e68b336007b19e2da5334201289b6..ff279a606bc2ad4cc8545b1e2e14f039b23550ce 100755 (executable)
-#!/bin/sh
-git log --pretty=%b%x00 "$@" |
-perl -n0 -wMstrict -E '
+#!/usr/bin/perl
+use 5.010;
+use strict;
+use warnings;
+use open ':std', OUT => ':utf8';
+use Encode 'decode';
+use Data::Dump 'pp';
+use Getopt::Long qw(:config bundling);
+
+GetOptions(\my %opt,
+       'debug!',
+       'simplify|s:s',
+       'ignore-case|i!',
+       'min|min-count|unique|u:i',
+       'max|max-count|show|n:i',
+       'version|V'  => sub { Getopt::Long::VersionMessage() },
+       'usage|h'    => sub { Getopt::Long::HelpMessage() },
+       'help|man|?' => sub { Getopt::Long::HelpMessage(-verbose => 2) },
+) or exit 129;
+
+local $| = 1;
+local $/ = "\0";
+
+my $HEADERMATCH = qr/ [a-z]+ (?: (?:-\w+)+ | \ by ) /ix;
+
+while (readline) {
+       s/(.+)\n//m;
+       my $hash = $1;
+
+       # strip commit seperator
+       chomp;
+       # skip expensive checks without potential identifier
+       m/:/ or next;
+       # try to parse as UTF-8
+       eval { $_ = decode(utf8   => $_, Encode::FB_CROAK()) };
+       # if invalid, assume it's latin1
+              $_ = decode(cp1252 => $_) if $@;
+
+       my $prefix = 0;
+       my %attr;
+
+       BLOCK:
        for (reverse split /\n\n/) {
-               my @headers = grep m{
-                       ^ (?: [a-z]+ (?: (?:-\w+)+ | \ by ) ) : \s* \S
-               }imx, split /\n/ or next;
-               say for @headers;
-               last;
+               my @headers;
+
+               LINE:
+               for (split /\n/) {
+                       next if not /\S/;
+                       my @header = m{
+                               ^
+                               (?<key> $HEADERMATCH)
+                               : \s*
+                               (?<val> \S .+)
+                               $
+                       }imx or do {
+                               $prefix++;
+                               next LINE;
+                       };
+
+                       push @header, $_ if defined $opt{max};
+
+                       given ($opt{simplify} // 'none') {
+                               when (['email', 'authors']) {
+                                       $header[1] =~ s{
+                                               \A
+                                               (?: [^:;]+ )?
+                                               < [^@>]+ (?: @ | \h?\W? at \W?\h? ) [a-z0-9.-]+ >
+                                               \Z
+                                       }{<...>}imsx;
+                               }
+                               when (['var', 'vars', '']) {
+                                       when ($header[0] =~ /[ _-] (?: by | to ) $/imsx) {
+                                               $header[1] = undef;
+                                       }
+                                       for ($header[1]) {
+                                               s{\b (https?)://\S+ }{[$1]}gmsx;  # url
+                                               s{(?: < | \A ) [^@>\s]+ @ [^>]+ (?: > | \Z )}{<...>}igmsx;  # address
+                                               s{\b [0-9]+ \b}{[num]}gmsx;  # number
+                                               s{\b I? [0-9a-f]{40} \b}{[sha1]}gmsx;  # hash
+                                       }
+                               }
+                               when (['all', 'contents']) {
+                                       $header[1] = undef;
+                               }
+                               when (['none', 'no', '0']) {
+                               }
+                               default {
+                                       die "Unknown simplify option: '$_'\n";
+                               }
+                       }
+
+                       if ($opt{'ignore-case'}) {
+                               $_ = lc for $header[0], $header[1] // ();
+                       }
+
+                       pop @header if not defined $header[-1];
+
+                       push @headers, \@header;
+               }
+
+               next BLOCK if not @headers;
+
+               if ($opt{debug} and $prefix) {
+                       say "infix junk in commit $hash";
+               }
+
+               for (@headers) {
+                       if (defined $opt{min} or $opt{max}) {
+                               state $seen;
+                               my $count = $seen->{ $_->[0] }->{ $_->[1] // '' }++;
+                               next if $count >= ($opt{min} // 0) + ($opt{max} || 1);
+                               next if $count < ($opt{min} // 0);
+                       }
+                       say $_->[2] // join(': ', @$_);
+               }
+
+               last BLOCK;
        }
-'
+}
+
+__END__
+
+=head1 NAME
+
+git-grep-footer - Find custom header lines in commit messages
+
+=head1 SYNOPSIS
+
+F<git> log --pretty=%b%x00 | F<git-grep-footer> [OPTIONS]
+
+=head1 DESCRIPTION
+
+Filters out header sections near the end of a commit body,
+a common convention to list custom metadata such as
+C<Signed-off-by> and C<Acked-by>.
+
+Sections are identified by at least one leading keyword containing a dash
+followed by a colon.
+
+=head1 OPTIONS
+
+=over
+
+=item -i, --ignore-case
+
+Lowercases everything.
+
+=item -s, --simplify[=<rule>]
+
+Modifies values to hide specific details.
+Several different rules are supported:
+
+=over
+
+=item I<var> (default)
+
+Replaces highly variable contents such as numbers, hashes, and addresses,
+leaving only exceptional annotations as distinct text.
+Attributes ending in I<-to> or I<-by> are assumed variable author names
+and omitted entirely,
+unless they contain a colon indicating possible attribute exceptions.
+
+=item I<email>
+
+Filters out author lines following the git signoff convention,
+i.e. an <email address> optionally preceded by a name.
+
+=item I<all>
+
+Values will be hidden entirely, so only attribute names remain.
+
+=back
+
+=item -u, --unique[=<threshold>]
+
+Each match is only shown once,
+optionally after it has already occurred a given amount of times.
+
+=item -n, --show[=<limit>]
+
+The original line is given for each match,
+but simplifications still apply for duplicate determination.
+Additional samples are optionally given upto the given maximum.
+
+=back
+
+=head1 AUTHOR
+
+Mischa POSLAWSKY <perl@shiar.org>
+
+=head1 LICENSE
+
+Copyright. All rights reserved.
+