count duplicates anchored by specified field
authorMischa POSLAWSKY <perl@shiar.org>
Fri, 2 Dec 2022 22:15:08 +0000 (23:15 +0100)
committerMischa POSLAWSKY <perl@shiar.org>
Mon, 12 Dec 2022 21:44:42 +0000 (22:44 +0100)
barcat
t/input/duplicolumns.txt [new file with mode: 0644]
t/t1664-uniq_first_column.out [new file with mode: 0644]
t/t1665-uniq_number_column.out [new file with mode: 0644]
t/t1666-uniq_last_column.out [new file with mode: 0644]
t/t1668-uniq_regex.out [new file with mode: 0644]
t/t1669-uniq_capture.out [new file with mode: 0644]

diff --git a/barcat b/barcat
index 3b73b675192497780af97c4a7cf113aa858349d0..9a19d02a2c141529c3ddb5cbd875e9027649986c 100755 (executable)
--- a/barcat
+++ b/barcat
@@ -210,11 +210,11 @@ if (defined $opt{interval}) {
 }
 
 my $float = qr<[0-9]* [.]? [0-9]+ (?: e[+-]?[0-9]+ )?>; # positive numberish
-my $valmatch = qr< $opt{anchor} ( \h* -? $float |) >x;
+my $valmatch = qr< $opt{anchor} ( \h* -? $float |) >;
 while (defined ($_ = $opt{input} ? shift @{ $opt{input} } : readline)) {
        s/\r?\n\z//;
        if ($opt{count}) {
-               my ($valnum) = m/(\S*)/;
+               my ($valnum) = m/$opt{anchor} (\S*)/;
                $valnum //= '';
                $uniq{$valnum}++ and next;
                push @lines, "\n " . $_;
@@ -776,7 +776,7 @@ Users on the current server while preserving order:
 
 Number of HTTP requests per day:
 
-    cat httpd/access.log | cut -d\  -f4 | cut -d: -f1 | uniq -c | barcat
+    barcat -cf'\[([^:]+)' httpd/access.log
 
 Any kind of database query results, preserving returned alignment:
 
diff --git a/t/input/duplicolumns.txt b/t/input/duplicolumns.txt
new file mode 100644 (file)
index 0000000..29cd618
--- /dev/null
@@ -0,0 +1,12 @@
+prefix 1 1
+random 2 2
+prefix 3 1
+text 1 2
+prefix 2 1
+prefix 3 2
+prefix 1 1
+prefix 2 2
+prefix 3 0
+prefix 1 2
+prefix 2
+prefix 3 2
diff --git a/t/t1664-uniq_first_column.out b/t/t1664-uniq_first_column.out
new file mode 100644 (file)
index 0000000..31c1a16
--- /dev/null
@@ -0,0 +1,4 @@
+barcat -c input/duplicolumns.txt
+10 prefix 1 1 -+-------=--->------------
+ 1 random 2 2 -+-
+ 1 text 1 2   -+-
diff --git a/t/t1665-uniq_number_column.out b/t/t1665-uniq_number_column.out
new file mode 100644 (file)
index 0000000..7973147
--- /dev/null
@@ -0,0 +1,4 @@
+barcat -c -f+0 input/duplicolumns.txt
+4 prefix 1 1 --------------------------+
+4 random 2 2 --------------------------+
+4 prefix 3 1 --------------------------+
diff --git a/t/t1666-uniq_last_column.out b/t/t1666-uniq_last_column.out
new file mode 100644 (file)
index 0000000..a65f050
--- /dev/null
@@ -0,0 +1,5 @@
+barcat -c -f2 input/duplicolumns.txt
+4 prefix 1 1 ---<------+-=-----
+6 random 2 2 ---<------+-=-------->-----
+1 prefix 3 0 ---<-
+1 prefix 2   ---<-
diff --git a/t/t1668-uniq_regex.out b/t/t1668-uniq_regex.out
new file mode 100644 (file)
index 0000000..7bbd521
--- /dev/null
@@ -0,0 +1,11 @@
+barcat -c -f'\S\K(.*)' input/duplicolumns.txt
+2 prefix 1 1 ------------+--=-----------
+1 random 2 2 ------------+-
+1 prefix 3 1 ------------+-
+1 text 1 2   ------------+-
+1 prefix 2 1 ------------+-
+2 prefix 3 2 ------------+--=-----------
+1 prefix 2 2 ------------+-
+1 prefix 3 0 ------------+-
+1 prefix 1 2 ------------+-
+1 prefix 2   ------------+-
diff --git a/t/t1669-uniq_capture.out b/t/t1669-uniq_capture.out
new file mode 100644 (file)
index 0000000..6552875
--- /dev/null
@@ -0,0 +1,4 @@
+barcat -c -f'(e.)' input/duplicates.txt
+17 thrice    --+------=---->------------
+ 1  indented --
+ 2 different --+