From 28a07c68bc2845abab5506c7a088ff843eaba00f Mon Sep 17 00:00:00 2001 From: Mischa POSLAWSKY Date: Sun, 4 Dec 2022 06:25:01 +0100 Subject: [PATCH] unspecified field counts entire lines Expected behaviour equivalent to sort|uniq. --- barcat | 15 ++++++++++----- t/t1662-uniq_words.out | 14 ++++++++------ t/t1663-uniq_columns.out | 11 +++++++++++ t/t1664-uniq_first_column.out | 2 +- t/t1668-uniq_regex.out | 16 +++++----------- t/t1806-reset_uniq.out | 7 ++++--- 6 files changed, 39 insertions(+), 26 deletions(-) create mode 100644 t/t1663-uniq_columns.out diff --git a/barcat b/barcat index 0f0cfb8..bae7989 100755 --- a/barcat +++ b/barcat @@ -142,7 +142,6 @@ $opt{'graph-format'} //= '-'; $opt{trim} *= $opt{width} / 100 if $opt{trimpct}; $opt{units} = [split //, ' kMGTPEZYRQqryzafpn'.($opt{ascii} ? 'u' : 'μ').'m'] if $opt{'human-readable'}; -$opt{anchor} //= qr/\A/; $opt{'value-length'} = 4 if $opt{units}; $opt{'value-length'} = 1 if $opt{unmodified}; $opt{'signal-stat'} //= exists $SIG{INFO} ? 'INFO' : 'QUIT'; @@ -211,13 +210,15 @@ if (defined $opt{interval}) { } my $float = qr<[0-9]* [.]? [0-9]+ (?: e[+-]?[0-9]+ )?>; # positive numberish -my $valmatch = qr< $opt{anchor} ( \h* -? $float |) >; +my $valmatch = $opt{anchor} // qr/\A/; +$valmatch .= !$opt{count} ? qr/( \h* -? $float |)/ : + $opt{anchor} ? qr/(\S*)/ : qr/(.*)/; + while (defined ($_ = $opt{input} ? shift @{ $opt{input} } : readline)) { s/\r?\n\z//; my $valnum; if ($opt{count}) { - ($valnum) = m/$opt{anchor} (\S*)/; - $valnum //= ''; + $valnum = m/$valmatch/ && $1; $uniq{$valnum}++ and next; push @values, $valnum; s/\A/\n /; @@ -535,9 +536,13 @@ or the I environment variable. =item B<-c>, B<--count> Omit repetitions and count the number of occurrences. -Similar to piping input to C +Similar to piping input through C but keeping the order of first appearances. +Lines are omitted if they (or a specified field) are identical, +and the amount of matches is prepended and used as values +for bars and subsequent statistics. + =item B<-f>, B<--field>=([B<+>]I | I) Compare values after a given number of whitespace separators, diff --git a/t/t1662-uniq_words.out b/t/t1662-uniq_words.out index 737a885..2a5fff4 100644 --- a/t/t1662-uniq_words.out +++ b/t/t1662-uniq_words.out @@ -1,7 +1,9 @@ barcat --count input/duplicates.txt - 3 thrice ----<+-> - 2 twice ----< - 1 once --- -10 most ----<+->=------------------ - 3 ----<+-> - 1 different --- + 3 thrice -+-->=-- + 2 twice -+--> + 1 once -+- +10 most -+-->=------------------- + 1 -+- + 1 indented -+- + 1 different -+- + 1 different -+- diff --git a/t/t1663-uniq_columns.out b/t/t1663-uniq_columns.out new file mode 100644 index 0000000..84e8306 --- /dev/null +++ b/t/t1663-uniq_columns.out @@ -0,0 +1,11 @@ +barcat -c input/duplicolumns.txt +2 prefix 1 1 ------------+--=----------- +1 random 2 2 ------------+- +1 prefix 3 1 ------------+- +1 text 1 2 ------------+- +1 prefix 2 1 ------------+- +2 prefix 3 2 ------------+--=----------- +1 prefix 2 2 ------------+- +1 prefix 3 0 ------------+- +1 prefix 1 2 ------------+- +1 prefix 2 ------------+- diff --git a/t/t1664-uniq_first_column.out b/t/t1664-uniq_first_column.out index 31c1a16..a416c76 100644 --- a/t/t1664-uniq_first_column.out +++ b/t/t1664-uniq_first_column.out @@ -1,4 +1,4 @@ -barcat -c input/duplicolumns.txt +barcat -c -f0 input/duplicolumns.txt 10 prefix 1 1 -+-------=--->------------ 1 random 2 2 -+- 1 text 1 2 -+- diff --git a/t/t1668-uniq_regex.out b/t/t1668-uniq_regex.out index 7bbd521..6fc394c 100644 --- a/t/t1668-uniq_regex.out +++ b/t/t1668-uniq_regex.out @@ -1,11 +1,5 @@ -barcat -c -f'\S\K(.*)' input/duplicolumns.txt -2 prefix 1 1 ------------+--=----------- -1 random 2 2 ------------+- -1 prefix 3 1 ------------+- -1 text 1 2 ------------+- -1 prefix 2 1 ------------+- -2 prefix 3 2 ------------+--=----------- -1 prefix 2 2 ------------+- -1 prefix 3 0 ------------+- -1 prefix 1 2 ------------+- -1 prefix 2 ------------+- +barcat -c -f'prefix\ ' input/duplicolumns.txt +3 prefix 1 1 -------------------+ +2 random 2 2 -------------- +4 prefix 3 1 -------------------+-->---- +3 prefix 2 1 -------------------+ diff --git a/t/t1806-reset_uniq.out b/t/t1806-reset_uniq.out index 8b9467f..c245746 100644 --- a/t/t1806-reset_uniq.out +++ b/t/t1806-reset_uniq.out @@ -6,6 +6,7 @@ barcat -ct-7 input/duplicates.txt ----- +++++++++++++++++ 6 most ----<--+---=>-------------------- 1 ----<- -3 most ---<----+=>--- -2 indented ---<----+ -1 different ---<- +3 most ---+---=->--- +1 indented ---+ +1 different ---+ +1 different ---+ -- 2.30.0