From: Mischa POSLAWSKY Date: Fri, 2 Dec 2022 20:41:32 +0000 (+0100) Subject: count option providing improved uniq -c X-Git-Tag: v1.10~14 X-Git-Url: http://git.shiar.nl/barcat.git/commitdiff_plain/6672fed2b85ccecb6c34aa019b1e87b8d31b8172 count option providing improved uniq -c --- diff --git a/barcat b/barcat index 0cfdb2d..3b73b67 100755 --- a/barcat +++ b/barcat @@ -26,6 +26,7 @@ GetOptions(\%opt, $opt{anchor} = qr/$_/; } or die $@ =~ s/(?:\ at\ \N+)?\Z/ for option $_[0]/r; }, + 'count|c!', 'human-readable|H!', 'sexagesimal!', 'reformat!', @@ -189,7 +190,7 @@ $opt{'value-format'} = $opt{sexagesimal} ? sub { $opt{'value-format'} ||= sub { sprintf '%.8g', $_[0] }; -my (@lines, @values, @order); +my (@lines, @values, @order, %uniq); $SIG{$_} = \&show_stat for $opt{'signal-stat'} || (); $SIG{ALRM} = sub { @@ -212,6 +213,14 @@ my $float = qr<[0-9]* [.]? [0-9]+ (?: e[+-]?[0-9]+ )?>; # positive numberish my $valmatch = qr< $opt{anchor} ( \h* -? $float |) >x; while (defined ($_ = $opt{input} ? shift @{ $opt{input} } : readline)) { s/\r?\n\z//; + if ($opt{count}) { + my ($valnum) = m/(\S*)/; + $valnum //= ''; + $uniq{$valnum}++ and next; + push @lines, "\n " . $_; + push @values, $valnum; + next; + } s/\A\h*// unless $opt{unmodified}; my $valnum = s/$valmatch/\n/ && $1; push @values, $valnum; @@ -247,6 +256,11 @@ state $nr = $opt{hidemin} ? $opt{hidemin}->($#lines) : 0; my $limit = $opt{hidemax} ? $opt{hidemax}->($#lines, $nr) : $#lines; +if ($opt{count}) { + $_ = $uniq{$_} for @values; + @order = @values; +} + @order = sort { $b <=> $a } @order unless tied @order; my $maxval = $opt{maxval} // ( $opt{hidemax} ? max grep { length } @values[$nr .. $limit] : @@ -438,6 +452,8 @@ Usage: /\_/\ Options: -a, --[no-]ascii Restrict user interface to ASCII characters -C, --[no-]color Force colored output of values and bar markers + -c, --count Omit repetitions and count the number of + occurrences -f, --field=([+]N|REGEXP) Compare values after a given number of whitespace separators @@ -509,6 +525,12 @@ disabled otherwise such as when piped or redirected. Can also be disabled by setting B<-M> or the I environment variable. +=item B<-c>, B<--count> + +Omit repetitions and count the number of occurrences. +Similar to piping input to C +but keeping the order of first appearances. + =item B<-f>, B<--field>=([B<+>]I | I) Compare values after a given number of whitespace separators, @@ -742,16 +764,16 @@ Monitor network latency from prefixed results: ping google.com | barcat -f'time=\K' -t -Commonly used after counting, for example users on the current server: - - users | tr ' ' '\n' | sort | uniq -c | barcat - -Letter frequencies in text files: +Commonly used after counting, eg letter frequencies in text files: cat /usr/share/games/fortunes/*.u8 | perl -CS -nE 'say for grep length, split /\PL*/, uc' | sort | uniq -c | barcat +Users on the current server while preserving order: + + users | tr ' ' '\n' | barcat -c + Number of HTTP requests per day: cat httpd/access.log | cut -d\ -f4 | cut -d: -f1 | uniq -c | barcat diff --git a/t/input/duplicates.txt b/t/input/duplicates.txt new file mode 100644 index 0000000..124b263 --- /dev/null +++ b/t/input/duplicates.txt @@ -0,0 +1,20 @@ +thrice +twice +thrice +once +twice +most +thrice +most +most +most +most +most +most + +most + indented +different + different +most +most diff --git a/t/t0002-usage.out b/t/t0002-usage.out index 55ac83f..a9ff9d7 100644 --- a/t/t0002-usage.out +++ b/t/t0002-usage.out @@ -1,2 +1,2 @@ barcat -h |wc -l -36 +38 diff --git a/t/t1662-uniq_words.out b/t/t1662-uniq_words.out new file mode 100644 index 0000000..737a885 --- /dev/null +++ b/t/t1662-uniq_words.out @@ -0,0 +1,7 @@ +barcat --count input/duplicates.txt + 3 thrice ----<+-> + 2 twice ----< + 1 once --- +10 most ----<+->=------------------ + 3 ----<+-> + 1 different ---