X-Git-Url: http://git.shiar.nl/barcat.git/blobdiff_plain/37ae26f3382bb9f040399930cd5b1ee15a7cfba8..147c2783799a5fec580b7cf58b8fb23b8310513b:/barcat diff --git a/barcat b/barcat index fe8c11d..cb3282e 100755 --- a/barcat +++ b/barcat @@ -26,6 +26,7 @@ GetOptions(\%opt, $opt{anchor} = qr/$_/; } or die $@ =~ s/(?:\ at\ \N+)?\Z/ for option $_[0]/r; }, + 'count|c!', 'human-readable|H!', 'sexagesimal!', 'reformat!', @@ -94,7 +95,7 @@ GetOptions(\%opt, 202 208 214 220 226 227 228 229 230 231 159 )], whites => [qw( 1;30 0;37 1;37 )], - greys => [map {"38;5;$_"} 0, 232..255, 15], + grays => [map {"38;5;$_"} 0, 232..255, 15], random => [map {"38;5;$_"} List::Util::shuffle(17..231)], rainbow=> [map {"38;5;$_"} 196, # r @@ -141,7 +142,6 @@ $opt{'graph-format'} //= '-'; $opt{trim} *= $opt{width} / 100 if $opt{trimpct}; $opt{units} = [split //, ' kMGTPEZYRQqryzafpn'.($opt{ascii} ? 'u' : 'μ').'m'] if $opt{'human-readable'}; -$opt{anchor} //= qr/\A/; $opt{'value-length'} = 4 if $opt{units}; $opt{'value-length'} = 1 if $opt{unmodified}; $opt{'signal-stat'} //= exists $SIG{INFO} ? 'INFO' : 'QUIT'; @@ -189,7 +189,7 @@ $opt{'value-format'} = $opt{sexagesimal} ? sub { $opt{'value-format'} ||= sub { sprintf '%.8g', $_[0] }; -my (@lines, @values, @order); +my (@lines, @values, @order, %uniq); $SIG{$_} = \&show_stat for $opt{'signal-stat'} || (); $SIG{ALRM} = sub { @@ -205,17 +205,31 @@ if (defined $opt{interval}) { eval { require Tie::Array::Sorted; tie @order, 'Tie::Array::Sorted', sub { $_[1] <=> $_[0] }; - } or warn $@, "Expect slowdown with large datasets!\n"; + } or warn $@, "Expect slowdown with large datasets!\n" + unless $opt{count}; } my $float = qr<[0-9]* [.]? [0-9]+ (?: e[+-]?[0-9]+ )?>; # positive numberish -my $valmatch = qr< $opt{anchor} ( \h* -? $float |) >x; +my $valmatch = $opt{anchor} // qr/\A/; +$valmatch .= !$opt{count} ? qr/( \h* -? $float |)/ : + $opt{anchor} ? qr/(\S*)/ : qr/(.*)/; + while (defined ($_ = $opt{input} ? shift @{ $opt{input} } : readline)) { s/\r?\n\z//; - s/\A\h*// unless $opt{unmodified}; - my $valnum = s/$valmatch/\n/ && $1; - push @values, $valnum; - push @order, $valnum if length $valnum; + my $valnum; + if ($opt{count}) { + $valnum = m/$valmatch/ && $1; + $uniq{$valnum}++ and next; + push @values, $valnum; + s/\A/\n /; + } + else { + s/\A\h*// unless $opt{unmodified}; + $valnum = s/$valmatch/\n/ && $1; + push @values, $valnum; + push @order, $valnum if length $valnum; + } + if (defined $opt{trim} and defined $valnum) { my $trimpos = abs $opt{trim}; $trimpos -= length $valnum if $opt{unmodified}; @@ -228,6 +242,8 @@ while (defined ($_ = $opt{input} ? shift @{ $opt{input} } : readline)) { } } push @lines, $_; +} +continue { show_lines() if defined $opt{interval} and $opt{interval} < 0 and $. % $opt{interval} == 0; } @@ -247,6 +263,11 @@ state $nr = $opt{hidemin} ? $opt{hidemin}->($#lines) : 0; my $limit = $opt{hidemax} ? $opt{hidemax}->($#lines, $nr) : $#lines; +if ($opt{count}) { + $_ = $uniq{$_} for @values[$nr .. $limit]; + @order = @values; +} + @order = sort { $b <=> $a } @order unless tied @order; my $maxval = $opt{maxval} // ( $opt{hidemax} ? max grep { length } @values[$nr .. $limit] : @@ -266,21 +287,9 @@ my @barmark; if ($opt{markers} and $size > 0) { for my $markspec (split /\h/, $opt{markers}) { my ($char, $func) = split //, $markspec, 2; + my $increment = $func =~ s/[+]\z//; my @pos = eval { - if ($func eq 'avg') { - return sum(@order) / @order; - } - elsif ($func =~ /\A([0-9.]+)v\z/) { - $1 <= 100 or die( - "Invalid marker $char: percentile $1 out of bounds\n" - ); - my $index = $#order * $1 / 100; - return ($order[$index] + $order[$index + .5]) / 2; - } - elsif ($func =~ /\A-?[0-9.]+\z/) { - return $func; - } - elsif ($func =~ /\A\/($float)\z/) { + if ($func =~ /\A\/($float)\z/) { my @range = my $multiple = my $next = $1; while ($next < $maxval) { $multiple *= 10 if $opt{log}; @@ -288,19 +297,17 @@ if ($opt{markers} and $size > 0) { } return @range; } - else { - die "Unknown marker $char: $func\n"; - } - }; - @pos or do { - warn $@ if $@; + return calc($func); + } or do { + warn "Invalid marker $char: $@" if $@; next; }; for my $pos (@pos) { $pos -= $minval; $pos &&= log $pos if $opt{log}; $pos >= 0 or next; - color(36) for $barmark[$pos / $range * $size] = $char; + $increment ||= $minval && !$pos; + color(36) for $barmark[$pos / $range * $size + $increment + .5] = $char; } } @@ -374,12 +381,14 @@ continue { $nr++; } say $opt{palette} ? color(0) : '' if $opt{spark}; +%uniq = () if $opt{interval} and $opt{count}; return $nr; } sub show_stat { my %vars = ( + partsum => undef, count => int @order, lines => int @lines, ); @@ -401,16 +410,45 @@ sub show_stat { return 1; } +sub calc { + my ($func) = @_; + if ($func eq 'avg') { + return sum(@order) / @order; + } + elsif ($func eq 'sum') { + return sum(@order); + } + elsif ($func =~ /\A([0-9.]+)v\z/) { + $1 <= 100 or die( + "percentile $1 out of bounds\n" + ); + my $index = $#order * $1 / 100; + my $f = $index - int $index; + my $val = $order[$index]; + if ($f) { + my $next = $order[$index + 1]; + $val -= $f * ($val - $next); + } + return $val; + } + elsif ($func =~ /\A-?[0-9.]+\z/) { + return $func; + } + else { + die "$func unknown\n"; + } +} + sub varfmt { my ($fmt, $vars) = @_; $fmt =~ s[\$\{ \h*+ ((?: [^{}]++ | \{(?1)\} )+) \}]{ my ($name, $op, $cmd) = split /\s*([;:])/, $1, 2; my $format = $name =~ s/\+// || $name !~ s/\#// && $opt{reformat}; - local $_ = $vars->{$name}; + local $_ = exists $vars->{$name} ? $vars->{$name} : calc($name); defined && do { $_ = $opt{'value-format'}->($_) if $format; if ($cmd and $op eq ':') { - $_ = varfmt($cmd, $vars); + $_ = !!$_ && varfmt($cmd, $vars); } elsif ($cmd) { eval $cmd; @@ -438,6 +476,8 @@ Usage: /\_/\ Options: -a, --[no-]ascii Restrict user interface to ASCII characters -C, --[no-]color Force colored output of values and bar markers + -c, --count Omit repetitions and count the number of + occurrences -f, --field=([+]N|REGEXP) Compare values after a given number of whitespace separators @@ -485,7 +525,6 @@ Visualizes relative sizes of values read from input Contents are concatenated similar to I, but numbers are reformatted and a bar graph is appended to each line. -Don't worry, barcat does not drink and divide. It can has various options for input and output (re)formatting, but remains limited to one-dimensional charts. For more complex graphing needs @@ -509,6 +548,16 @@ disabled otherwise such as when piped or redirected. Can also be disabled by setting B<-M> or the I environment variable. +=item B<-c>, B<--count> + +Omit repetitions and count the number of occurrences. +Similar to piping input through C +but keeping the order of first appearances. + +Lines are omitted if they (or a specified field) are identical, +and the amount of matches is prepended and used as values +for bars and subsequent statistics. + =item B<-f>, B<--field>=([B<+>]I | I) Compare values after a given number of whitespace separators, @@ -595,16 +644,19 @@ For example C<:/1> for a grid at every integer. =item IB -Ranked value at the given percentile. -The default shows C<+> at C<50v> for the mean or median; -the middle value or average between middle values. -One standard deviation right of the mean is at about C<68.3v>. +Ranked value at the given percentile, +or score at or below which a percentage falls +in its frequency distribution (inclusive). + +The default shows C<+> at C<50v> for the mean or median: +the middle value or interpolation between two values. +One standard deviation below the median is at about C<68v>. The default includes C<< >31.73v <68.27v >> to encompass all I results, or 68% of all entries, by I<< <--> >>. =item B -Matches the average; +Matches the average (arithmetic mean); the sum of all values divided by the number of counted lines. Indicated by default as C<=>. @@ -619,7 +671,7 @@ These options can be set to customize this range. =item B<--palette>=(I | I...) Override colors of parsed numbers. -Can be any CSI escape, such as C<90> for default dark grey, +Can be any CSI escape, such as C<90> for default dark gray, or alternatively C<1;30> for bright black. In case of additional colors, @@ -628,8 +680,35 @@ If unspecified, these are green and red respectively (C<31 90 32>). Multiple intermediate colors will be distributed relative to the size of values. -Predefined color schemes are named I and I, -or I and I for 256-color variants. +A non-numeric name can refer to a predefined color scheme: + +=over 8 + +=item B + +Minimal set of monochrome brightnesses. + +=item B + +Utilize the 24 grayscale ramp in 256-color terminals. + +=item B + +Gradient red to white in 7 out of 16 colors. + +=item B + +Extended to 17 colors out of 256. + +=item B + +Saturated red to green to blue to red. + +=item B + +All 215 extended colors in unrelated orders. + +=back =item B<-_>, B<--spark> @@ -715,19 +794,19 @@ Monitor network latency from prefixed results: ping google.com | barcat -f'time=\K' -t -Commonly used after counting, for example users on the current server: - - users | tr ' ' '\n' | sort | uniq -c | barcat - -Letter frequencies in text files: +Commonly used after counting, eg letter frequencies in text files: cat /usr/share/games/fortunes/*.u8 | perl -CS -nE 'say for grep length, split /\PL*/, uc' | sort | uniq -c | barcat +Users on the current server while preserving order: + + users | tr ' ' '\n' | barcat -c + Number of HTTP requests per day: - cat httpd/access.log | cut -d\ -f4 | cut -d: -f1 | uniq -c | barcat + barcat -cf'\[([^:]+)' httpd/access.log Any kind of database query results, preserving returned alignment: