X-Git-Url: http://git.shiar.nl/barcat.git/blobdiff_plain/f73baa573f52f7916dfff2fab50596aaa413f528..b2284fb09db3525ea8a930c98d6fe493e51ca45c:/barcat diff --git a/barcat b/barcat index d93f251..753e323 100755 --- a/barcat +++ b/barcat @@ -26,6 +26,7 @@ GetOptions(\%opt, $opt{anchor} = qr/$_/; } or die $@ =~ s/(?:\ at\ \N+)?\Z/ for option $_[0]/r; }, + 'count|c!', 'human-readable|H!', 'sexagesimal!', 'reformat!', @@ -40,20 +41,44 @@ GetOptions(\%opt, $opt{trim} = $optval; }, 'value-length=i', - 'hidemin=i', - 'hidemax=i', 'minval=f', 'maxval=f', 'limit|L:s' => sub { my ($optname, $optval) = @_; $optval ||= 0; $optval =~ /\A-[0-9]+\z/ and $optval .= '-'; # tail shorthand - ($opt{hidemin}, $opt{hidemax}) = - $optval =~ m/\A (?: (-? [0-9]+)? - )? ([0-9]+)? \z/ or die( + $optval =~ s/[+]/--/; + my ($start, $end) = + $optval =~ m/\A (?: (-? [0-9]+)? - )? (-? [0-9]+)? \z/ or die( "Value \"$optval\" invalid for option limit", " (range expected)\n" ); + $start ||= 1; + $start--; + s/\A-0*\z// and $_ ||= undef for $end // (); + + $opt{hidemin} = sub { + my ($lines) = @_; + if ($start < 0) { + return max(0, $lines + $start + 2); + } + return $start; + } if $start; + $opt{hidemax} = sub { + my ($limit, $offset) = @_; + if ($end < 0) { + return $offset - $end - 1; # count + } + elsif ($start < 0) { + return $limit - $end + 1; # bottom + } + elsif ($end <= $limit) { + return $end - 1; # less + } + return $limit; + } if defined $end; }, + 'log|e!', 'header!', 'markers|m=s', 'graph-format=s' => sub { @@ -70,7 +95,7 @@ GetOptions(\%opt, 202 208 214 220 226 227 228 229 230 231 159 )], whites => [qw( 1;30 0;37 1;37 )], - greys => [map {"38;5;$_"} 0, 232..255, 15], + grays => [map {"38;5;$_"} 0, 232..255, 15], random => [map {"38;5;$_"} List::Util::shuffle(17..231)], rainbow=> [map {"38;5;$_"} 196, # r @@ -89,6 +114,7 @@ GetOptions(\%opt, }; }, 'stat|s!', + 'report=s', 'signal-stat=s', 'unmodified|u!', 'width|w=i', @@ -114,18 +140,27 @@ $opt{width} ||= $ENV{COLUMNS} || qx(tput cols) || 80 unless $opt{spark}; $opt{color} //= $ENV{NO_COLOR} ? 0 : -t *STDOUT; # enable on tty $opt{'graph-format'} //= '-'; $opt{trim} *= $opt{width} / 100 if $opt{trimpct}; -$opt{units} = [split //, ' kMGTPEZYyzafpn'.($opt{ascii} ? 'u' : 'μ').'m'] +$opt{units} = [split //, ' kMGTPEZYRQqryzafpn'.($opt{ascii} ? 'u' : 'μ').'m'] if $opt{'human-readable'}; -$opt{anchor} //= qr/\A/; $opt{'value-length'} = 4 if $opt{units}; $opt{'value-length'} = 1 if $opt{unmodified}; $opt{'signal-stat'} //= exists $SIG{INFO} ? 'INFO' : 'QUIT'; $opt{markers} //= '=avg >31.73v <68.27v +50v |0'; +$opt{report} //= join('', + '${partsum+; $_ .= " of "}', + '${sum+; color(1); $_ .= " total in "}', + '${count#} values', + '${lines#; $_ = $_ != @order && " over $_ lines"}', + sprintf('${count: (%s)}', join ', ', + '${min; color(31)} min', + '${avg; $opt{reformat} or $_ = sprintf "%0.2f", $_; color(36)} avg', + '${max; color(32)} max', + ), +); $opt{palette} //= $opt{color} && [31, 90, 32]; $opt{indicators} = [split //, $opt{indicators} || ($opt{ascii} ? ' .oO' : $opt{spark} ? ' ▁▂▃▄▅▆▇█' : ' ▏▎▍▌▋▊▉█') ] if defined $opt{indicators} or $opt{spark}; -$opt{hidemin} = ($opt{hidemin} || 1) - 1; $opt{input} = (@ARGV && $ARGV[0] =~ m/\A[-0-9]/) ? \@ARGV : undef and undef $opt{interval}; @@ -154,7 +189,7 @@ $opt{'value-format'} = $opt{sexagesimal} ? sub { $opt{'value-format'} ||= sub { sprintf '%.8g', $_[0] }; -my (@lines, @values, @order); +my (@lines, @values, @order, %uniq); $SIG{$_} = \&show_stat for $opt{'signal-stat'} || (); $SIG{ALRM} = sub { @@ -170,18 +205,31 @@ if (defined $opt{interval}) { eval { require Tie::Array::Sorted; tie @order, 'Tie::Array::Sorted', sub { $_[1] <=> $_[0] }; - } or warn $@, "Expect slowdown with large datasets!\n"; + } or warn $@, "Expect slowdown with large datasets!\n" + unless $opt{count}; } -my $valmatch = qr< - $opt{anchor} ( \h* -? [0-9]* [.]? [0-9]+ (?: e[+-]?[0-9]+ )? |) ->x; +my $float = qr<[0-9]* [.]? [0-9]+ (?: e[+-]?[0-9]+ )?>; # positive numberish +my $valmatch = $opt{anchor} // qr/\A/; +$valmatch .= !$opt{count} ? qr/( \h* -? $float |)/ : + $opt{anchor} ? qr/(\S*)/ : qr/(.*)/; + while (defined ($_ = $opt{input} ? shift @{ $opt{input} } : readline)) { s/\r?\n\z//; - s/\A\h*// unless $opt{unmodified}; - my $valnum = s/$valmatch/\n/ && $1; - push @values, $valnum; - push @order, $valnum if length $valnum; + my $valnum; + if ($opt{count}) { + $valnum = m/$valmatch/ && $1; + $uniq{$valnum}++ and next; + push @values, $valnum; + s/\A/\n /; + } + else { + s/\A\h*// unless $opt{unmodified}; + $valnum = s/$valmatch/\n/ && $1; + push @values, $valnum; + push @order, $valnum if length $valnum; + } + if (defined $opt{trim} and defined $valnum) { my $trimpos = abs $opt{trim}; $trimpos -= length $valnum if $opt{unmodified}; @@ -194,6 +242,8 @@ while (defined ($_ = $opt{input} ? shift @{ $opt{input} } : readline)) { } } push @lines, $_; +} +continue { show_lines() if defined $opt{interval} and $opt{interval} < 0 and $. % $opt{interval} == 0; } @@ -208,19 +258,14 @@ sub color { sub show_lines { -state $nr = - $opt{hidemin} < 0 ? max(0, @lines + $opt{hidemin} + 1) : - $opt{hidemin}; +state $nr = $opt{hidemin} ? $opt{hidemin}->($#lines) : 0; @lines > $nr or return; -my $limit = $#lines; -if (defined $opt{hidemax}) { - if ($opt{hidemin} and $opt{hidemin} < 0) { - $limit -= $opt{hidemax} - 1; - } - elsif ($opt{hidemax} <= $limit) { - $limit = $opt{hidemax} - 1; - } +my $limit = $opt{hidemax} ? $opt{hidemax}->($#lines, $nr) : $#lines; + +if ($opt{count}) { + $_ = $uniq{$_} for @values[$nr .. $limit]; + @order = @values; } @order = sort { $b <=> $a } @order unless tied @order; @@ -230,18 +275,19 @@ my $maxval = $opt{maxval} // ( ) // 0; my $minval = $opt{minval} // min $order[-1] // (), 0; my $range = $maxval - $minval; +$range &&= log $range if $opt{log}; my $lenval = $opt{'value-length'} // max map { length } @order; my $len = defined $opt{trim} && $opt{trim} <= 0 ? -$opt{trim} + 1 : - max map { length $values[$_] && length $lines[$_] } - 0 .. min $#lines, $opt{hidemax} || (); # left padding + max(map { length $values[$_] && length $lines[$_] } $nr .. $limit) + // 0; # left padding my $size = defined $opt{width} && $range && - ($opt{width} - $lenval - $len - !!$opt{indicators}) / $range; # bar multiplication + ($opt{width} - $lenval - $len - !!$opt{indicators}); # bar multiplication my @barmark; if ($opt{markers} and $size > 0) { for my $markspec (split /\h/, $opt{markers}) { my ($char, $func) = split //, $markspec, 2; - my $pos = eval { + my @pos = eval { if ($func eq 'avg') { return sum(@order) / @order; } @@ -255,28 +301,39 @@ if ($opt{markers} and $size > 0) { elsif ($func =~ /\A-?[0-9.]+\z/) { return $func; } + elsif ($func =~ /\A\/($float)\z/) { + my @range = my $multiple = my $next = $1; + while ($next < $maxval) { + $multiple *= 10 if $opt{log}; + push @range, $next += $multiple; + } + return @range; + } else { die "Unknown marker $char: $func\n"; } }; - defined $pos or do { + @pos or do { warn $@ if $@; next; }; - $pos -= $minval; - $pos >= 0 or next; - color(36) for $barmark[$pos * $size] = $char; + for my $pos (@pos) { + $pos -= $minval; + $pos &&= log $pos if $opt{log}; + $pos >= 0 or next; + color(36) for $barmark[$pos / $range * $size] = $char; + } } state $lastmax = $maxval; if ($maxval > $lastmax) { print ' ' x ($lenval + $len); - printf color(90); + print color(90); printf '%-*s', - ($lastmax - $minval) * $size + .5, - '-' x (($values[$nr - 1] - $minval) * $size); + ($lastmax - $minval) * $size / $range + .5, + '-' x (($values[$nr - 1] - $minval) * $size / $range); print color(92); - say '+' x (($range - $lastmax) * $size + .5); + say '+' x (($range - $lastmax) * $size / $range + .5); print color(0); $lastmax = $maxval; } @@ -285,14 +342,19 @@ if ($opt{markers} and $size > 0) { say( color(31), sprintf('%*s', $lenval, $minval), color(90), '-', color(36), '+', - color(32), sprintf('%*s', $size * $range - 3, $maxval), + color(32), sprintf('%*s', $size - 3, $maxval), color(90), '-', color(36), '+', color(0), ) if $opt{header}; while ($nr <= $limit) { my $val = $values[$nr]; - my $rel = length $val && $range && min(1, ($val - $minval) / $range); + my $rel; + if (length $val) { + $rel = $val - $minval; + $rel &&= log $rel if $opt{log}; + $rel = min(1, $rel / $range) if $range; # 0..1 + } my $color = !length $val || !$opt{palette} ? undef : $val == $order[0] ? $opt{palette}->[-1] : # max $val == $order[-1] ? $opt{palette}->[0] : # min @@ -323,45 +385,65 @@ while ($nr <= $limit) { next; } printf '%-*s', $len + length($val), $line; - print $barmark[$_] // $opt{'graph-format'} - for 1 .. $size && (($values[$nr] || 0) - $minval) * $size + .5; + if ($rel and $size) { + print $barmark[$_] // $opt{'graph-format'} + for 1 .. $rel * $size + .5; + } say ''; } continue { $nr++; } say $opt{palette} ? color(0) : '' if $opt{spark}; +%uniq = () if $opt{interval} and $opt{count}; return $nr; } sub show_stat { - if ($opt{hidemin} or $opt{hidemax}) { - my $linemin = $opt{hidemin}; - my $linemax = ($opt{hidemax} || @lines) - 1; - if ($linemin < 0) { - $linemin += @lines; - $linemax = @lines - $linemax; - } - printf '%.8g of ', $opt{'value-format'}->( - sum(grep {length} @values[$linemin .. $linemax]) // 0 - ); - } + my %vars = ( + count => int @order, + lines => int @lines, + ); + my $linemin = !$opt{hidemin} ? 0 : + ($vars{start} = $opt{hidemin}->($#lines)); + my $linemax = !$opt{hidemax} ? $#lines : + ($vars{end} = $opt{hidemax}->($#lines, $vars{start})); if (@order) { - my $total = sum @order; - printf '%s total', color(1) . $opt{'value-format'}->($total) . color(0); - printf ' in %d values', scalar @order; - printf ' over %d lines', scalar @lines if @order != @lines; - printf(' (%s min, %s avg, %s max)', - color(31) . ($opt{reformat} ? $opt{'value-format'} : sub {$_[0]})->($order[-1]) . color(0), - color(36) . ($opt{reformat} ? $opt{'value-format'} : $opt{'calc-format'})->($total / @order) . color(0), - color(32) . ($opt{reformat} ? $opt{'value-format'} : sub {$_[0]})->($order[0]) . color(0), + $vars{partsum} = sum(0, grep {length} @values[$linemin .. $linemax]) + if $linemin <= $linemax and ($opt{hidemin} or $opt{hidemax}); + %vars = (%vars, + sum => sum(@order), + min => $order[-1], + max => $order[0], ); + $vars{avg} = $vars{sum} / @order; } - say ''; + say varfmt($opt{report}, \%vars); return 1; } +sub varfmt { + my ($fmt, $vars) = @_; + $fmt =~ s[\$\{ \h*+ ((?: [^{}]++ | \{(?1)\} )+) \}]{ + my ($name, $op, $cmd) = split /\s*([;:])/, $1, 2; + my $format = $name =~ s/\+// || $name !~ s/\#// && $opt{reformat}; + local $_ = $vars->{$name}; + defined && do { + $_ = $opt{'value-format'}->($_) if $format; + if ($cmd and $op eq ':') { + $_ = varfmt($cmd, $vars); + } + elsif ($cmd) { + eval $cmd; + warn "Error in \$$name report: $@" if $@; + } + $_; + } + }eg; + return $fmt; +} + sub show_exit { show_lines(); show_stat() if $opt{stat}; @@ -378,6 +460,8 @@ Usage: /\_/\ Options: -a, --[no-]ascii Restrict user interface to ASCII characters -C, --[no-]color Force colored output of values and bar markers + -c, --count Omit repetitions and count the number of + occurrences -f, --field=([+]N|REGEXP) Compare values after a given number of whitespace separators @@ -389,8 +473,9 @@ Options: Output partial progress every given number of seconds or input lines -l, --length=[-]SIZE[%] Trim line contents (between number and bars) - -L, --limit[=(N|-LAST|START-[END])] - Stop output after a number of lines + -L, --limit=[N|[-]START(-[END]|+N)] + Select a range of lines to display + -e, --log Logarithmic (exponential) scale instead of linear --graph-format=CHAR Glyph to repeat for the graph line -m, --markers=FORMAT Statistical positions to indicate on bars --min=N, --max=N Bars extend from 0 or the minimum value if lower @@ -415,7 +500,7 @@ barcat - concatenate texts with graph to visualize values =head1 SYNOPSIS -B [] [... | ] +B [I] [I... | I] =head1 DESCRIPTION @@ -424,7 +509,6 @@ Visualizes relative sizes of values read from input Contents are concatenated similar to I, but numbers are reformatted and a bar graph is appended to each line. -Don't worry, barcat does not drink and divide. It can has various options for input and output (re)formatting, but remains limited to one-dimensional charts. For more complex graphing needs @@ -434,173 +518,226 @@ you'll need a larger animal like I. =over -=item -a, --[no-]ascii +=item B<-a>, B<-->[B]B Restrict user interface to ASCII characters, replacing default UTF-8 by their closest approximation. Input is always interpreted as UTF-8 and shown as is. -=item -C, --[no-]color +=item B<-C>, B<-->[B]B Force colored output of values and bar markers. Defaults on if output is a tty, disabled otherwise such as when piped or redirected. -Can also be disabled by setting I<-M> +Can also be disabled by setting B<-M> or the I environment variable. -=item -f, --field=([+] | ) +=item B<-c>, B<--count> + +Omit repetitions and count the number of occurrences. +Similar to piping input through C +but keeping the order of first appearances. + +Lines are omitted if they (or a specified field) are identical, +and the amount of matches is prepended and used as values +for bars and subsequent statistics. + +=item B<-f>, B<--field>=([B<+>]I | I) Compare values after a given number of whitespace separators, or matching a regular expression. -Unspecified or I<-f0> means values are at the start of each line. -With I<-f1> the second word is taken instead. +Unspecified or B<-f0> means values are at the start of each line. +With B<-f1> the second word is taken instead. A string can indicate the starting position of a value -(such as I<-f:> if preceded by colons), +(such as B<-f:> if preceded by colons), or capture the numbers itself, -for example I<-f'(\d+)'> for the first digits anywhere. -A shorthand for this is I<+0>, or I<+N> to find the Nth number. +for example B<-f'(\d+)'> for the first digits anywhere. +A shorthand for this is C<+0>, or C<+N> to find the Nth number. -=item --header +=item B<--header> Prepend a chart axis with minimum and maximum values labeled. -=item -H, --human-readable +=item B<-H>, B<--human-readable> Format values using SI unit prefixes, -turning long numbers like I<12356789> into I<12.4M>. -Also changes an exponent I<1.602176634e-19> to I<160.2z>. +turning long numbers like C<12356789> into C<12.4M>. +Also changes an exponent C<1.602176634e-19> to C<160.2z>. Short integers are aligned but kept without decimal point. -=item --sexagesimal +=item B<--sexagesimal> Convert seconds to HH:MM:SS time format. -=item -t, --interval[=( | -)] +=item B<-t>, B<--interval>[=(I | B<->I)] Output partial progress every given number of seconds or input lines. An update can also be forced by sending a I alarm signal. -=item -l, --length=[-][%] +=item B<-l>, B<--length>=[B<->]I[B<%>] Trim line contents (between number and bars) to a maximum number of characters. The exceeding part is replaced by an abbreviation sign, -unless C<--length=0>. +unless B<--length=0>. Prepend a dash (i.e. make negative) to enforce padding regardless of encountered contents. -=item -L, --limit[=( | - | -[])] +=item B<-L>, B<--limit>=[I | [B<->]I(B<->[I] | B<+>I)] + +Select a range of lines to display. +A single integer indicates the last line number (like I), +or first line counting from the bottom if negative (like I). -Stop output after a number of lines. -A single value indicates the last line number (like C), -or first line counting from the bottom if negative (like C). -A specific range can be given by two values. +A range consists of a starting line number followed by either +a dash C<-> to an optional end, or plus sign C<+> with count. -All input is still counted and analyzed for statistics, +All hidden input is still counted and analyzed for statistics, but disregarded for padding and bar size. -=item --graph-format= +=item B<-e>, B<--log> + +Logarithmic (Bxponential) scale instead of linear +to compare orders of magnitude. + +=item B<--graph-format>=I Glyph to repeat for the graph line. Defaults to a dash C<->. -=item -m, --markers= +=item B<-m>, B<--markers>=I Statistical positions to indicate on bars. A single indicator glyph precedes each position: =over 2 -=item +=item I Exact value to match on the axis. -A vertical bar at the zero crossing is displayed by I<|0> +A vertical bar at the zero crossing is displayed by C<|0> for negative values. -For example I<:3.14> would show a colon at pi. +For example C<π3.14> would locate pi. + +=item BI -=item I +Repeated at every multiple of a number. +For example C<:/1> for a grid at every integer. + +=item IB Ranked value at the given percentile. -The default shows I<+> at I<50v> for the mean or median; +The default shows C<+> at C<50v> for the mean or median; the middle value or average between middle values. -One standard deviation right of the mean is at about I<68.3v>. -The default includes I<< >31.73v <68.27v >> -to encompass all I results, or 68% of all entries, by B<< <--> >>. +One standard deviation right of the mean is at about C<68.3v>. +The default includes C<< >31.73v <68.27v >> +to encompass all I results, or 68% of all entries, by I<< <--> >>. -=item I +=item B Matches the average; the sum of all values divided by the number of counted lines. -Indicated by default as I<=>. +Indicated by default as C<=>. =back -=item --min=, --max= +=item B<--min>=I, B<--max>=I Bars extend from 0 or the minimum value if lower, to the largest value encountered. These options can be set to customize this range. -=item --palette=( | ...) +=item B<--palette>=(I | I...) Override colors of parsed numbers. -Can be any CSI escape, such as I<90> for default dark grey, -or alternatively I<1;30> for bright black. +Can be any CSI escape, such as C<90> for default dark gray, +or alternatively C<1;30> for bright black. In case of additional colors, the last is used for values equal to the maximum, the first for minima. -If unspecified, these are green and red respectively (I<31 90 32>). +If unspecified, these are green and red respectively (C<31 90 32>). Multiple intermediate colors will be distributed relative to the size of values. -Predefined color schemes are named I and I, -or I and I for 256-color variants. +A non-numeric name can refer to a predefined color scheme: + +=over 8 + +=item B + +Minimal set of monochrome brightnesses. + +=item B + +Utilize the 24 grayscale ramp in 256-color terminals. + +=item B -=item -_, --spark +Gradient red to white in 7 out of 16 colors. + +=item B + +Extended to 17 colors out of 256. + +=item B + +Saturated red to green to blue to red. + +=item B + +All 215 extended colors in unrelated orders. + +=back + +=item B<-_>, B<--spark> Replace lines by I, -single characters (configured by C<--indicators>) +single characters (configured by B<--indicators>) corresponding to input values. -=item --indicators[=] +=item B<--indicators>[=I] Prefix a unicode character corresponding to each value. The first specified character will be used for non-values, the remaining sequence will be distributed over the range of values. Unspecified, block fill glyphs U+2581-2588 will be used. -=item -s, --stat +=item B<-s>, B<--stat> Total statistics after all data. -=item -u, --unmodified +While processing (possibly a neverending pipe), +intermediate results are also shown on signal I if available (control+t on BSDs) +or I otherwise (ctrl+\ on linux). + +=item B<-u>, B<--unmodified> Do not reformat values, keeping leading whitespace. Keep original value alignment, which may be significant in some programs. -=item --value-length= +=item B<--value-length>=I Reserved space for numbers. -=item -w, --width= +=item B<-w>, B<--width>=I Override the maximum number of columns to use. Appended graphics will extend to fill up the entire screen, otherwise determined by the environment variable I -or by running the C command. +or by running the I command. -=item -h, --usage +=item B<-h>, B<--usage> Overview of available options. -=item --help +=item B<--help> Full pod documentation as rendered by perldoc. -=item -V, --version +=item B<-V>, B<--version> Version information. @@ -638,19 +775,19 @@ Monitor network latency from prefixed results: ping google.com | barcat -f'time=\K' -t -Commonly used after counting, for example users on the current server: - - users | tr ' ' '\n' | sort | uniq -c | barcat - -Letter frequencies in text files: +Commonly used after counting, eg letter frequencies in text files: cat /usr/share/games/fortunes/*.u8 | perl -CS -nE 'say for grep length, split /\PL*/, uc' | sort | uniq -c | barcat +Users on the current server while preserving order: + + users | tr ' ' '\n' | barcat -c + Number of HTTP requests per day: - cat httpd/access.log | cut -d\ -f4 | cut -d: -f1 | uniq -c | barcat + barcat -cf'\[([^:]+)' httpd/access.log Any kind of database query results, preserving returned alignment: @@ -693,12 +830,12 @@ Total population history in XML from the World Bank: curl http://api.worldbank.org/v2/country/1W/indicator/SP.POP.TOTL | xmlstarlet sel -t -m '*/*' -v wb:date -o ' ' -v wb:value -n | - barcat -f1 -H + barcat -f1 -H --markers=+/1e9 Population and other information for all countries: curl http://download.geonames.org/export/dump/countryInfo.txt | - grep -v '^#\s' | column -ts$'\t' -n | barcat -f+2 -u -l150 -s + grep -v '^#\s' | column -ts$'\t' -n | barcat -f+2 -e -u -l150 -s And of course various Git statistics, such commit count by year: