use open qw( :std :utf8 );
use re '/msx';
-our $VERSION = '1.09';
+our $VERSION = '1.10';
my %opt;
if (@ARGV) {
$opt{trim} *= $opt{width} / 100 if $opt{trimpct};
$opt{units} = [split //, ' kMGTPEZYRQqryzafpn'.($opt{ascii} ? 'u' : 'μ').'m']
if $opt{'human-readable'};
-$opt{anchor} //= qr/\A/;
$opt{'value-length'} = 4 if $opt{units};
$opt{'value-length'} = 1 if $opt{unmodified};
$opt{'signal-stat'} //= exists $SIG{INFO} ? 'INFO' : 'QUIT';
-$opt{markers} //= '=avg >31.73v <68.27v +50v |0';
+$opt{markers} //= '=avg <31.73v >68.27v +50v |0';
$opt{report} //= join('',
'${partsum+; $_ .= " of "}',
'${sum+; color(1); $_ .= " total in "}',
'${count#} values',
'${lines#; $_ = $_ != @order && " over $_ lines"}',
sprintf('${count: (%s)}', join ', ',
- '${min; color(31)} min',
+ '${0v; color(31)} min',
'${avg; $opt{reformat} or $_ = sprintf "%0.2f", $_; color(36)} avg',
- '${max; color(32)} max',
+ '${100v; color(32)} max',
),
);
$opt{palette} //= $opt{color} && [31, 90, 32];
eval {
require Tie::Array::Sorted;
- tie @order, 'Tie::Array::Sorted', sub { $_[1] <=> $_[0] };
+ tie @order, 'Tie::Array::Sorted', sub { $_[0] <=> $_[1] };
} or warn $@, "Expect slowdown with large datasets!\n"
unless $opt{count};
}
my $float = qr<[0-9]* [.]? [0-9]+ (?: e[+-]?[0-9]+ )?>; # positive numberish
-my $valmatch = qr< $opt{anchor} ( \h* -? $float |) >;
+my $valmatch = $opt{anchor} // qr/\A/;
+$valmatch .= !$opt{count} ? qr/( \h* -? $float |)/ :
+ $opt{anchor} ? qr/(\S*)/ : qr/(.*)/;
+
while (defined ($_ = $opt{input} ? shift @{ $opt{input} } : readline)) {
s/\r?\n\z//;
my $valnum;
if ($opt{count}) {
- ($valnum) = m/$opt{anchor} (\S*)/;
- $valnum //= '';
+ $valnum = m/$valmatch/ && $1;
$uniq{$valnum}++ and next;
push @values, $valnum;
s/\A/\n /;
@order = @values;
}
-@order = sort { $b <=> $a } @order unless tied @order;
+@order = sort { $a <=> $b } @order unless tied @order;
my $maxval = $opt{maxval} // (
$opt{hidemax} ? max grep { length } @values[$nr .. $limit] :
- $order[0]
+ $order[-1]
) // 0;
-my $minval = $opt{minval} // min $order[-1] // (), 0;
+my $minval = $opt{minval} // min $order[0] // (), 0;
my $range = $maxval - $minval;
$range &&= log $range if $opt{log};
my $lenval = $opt{'value-length'} // max map { length } @order;
if ($opt{markers} and $size > 0) {
for my $markspec (split /\h/, $opt{markers}) {
my ($char, $func) = split //, $markspec, 2;
+ my $increment = $func =~ s/[+]\z//;
my @pos = eval {
- if ($func eq 'avg') {
- return sum(@order) / @order;
- }
- elsif ($func =~ /\A([0-9.]+)v\z/) {
- $1 <= 100 or die(
- "Invalid marker $char: percentile $1 out of bounds\n"
- );
- my $index = $#order * $1 / 100;
- return ($order[$index] + $order[$index + .5]) / 2;
- }
- elsif ($func =~ /\A-?[0-9.]+\z/) {
- return $func;
- }
- elsif ($func =~ /\A\/($float)\z/) {
+ if ($func =~ /\A\/($float)\z/) {
my @range = my $multiple = my $next = $1;
while ($next < $maxval) {
$multiple *= 10 if $opt{log};
}
return @range;
}
- else {
- die "Unknown marker $char: $func\n";
- }
- };
- @pos or do {
- warn $@ if $@;
+ return calc($func);
+ } or do {
+ warn "Invalid marker $char: $@" if $@;
next;
};
for my $pos (@pos) {
$pos -= $minval;
$pos &&= log $pos if $opt{log};
$pos >= 0 or next;
- color(36) for $barmark[$pos / $range * $size] = $char;
+ $increment ||= $minval && !$pos;
+ color(36) for $barmark[$pos / $range * $size + $increment + .5] = $char;
}
}
$rel = min(1, $rel / $range) if $range; # 0..1
}
my $color = !length $val || !$opt{palette} ? undef :
- $val == $order[0] ? $opt{palette}->[-1] : # max
- $val == $order[-1] ? $opt{palette}->[0] : # min
+ $val == $order[-1] ? $opt{palette}->[-1] : # max
+ $val == $order[0] ? $opt{palette}->[0] : # min
$opt{palette}->[ $rel * ($#{$opt{palette}} - 1) + 1 ];
my $indicator = $opt{indicators} && $opt{indicators}->[
!length($val) || !$#{$opt{indicators}} ? 0 : # blank
$#{$opt{indicators}} < 2 ? 1 :
- $val >= $order[0] ? -1 :
+ $val >= $order[-1] ? -1 :
$rel * ($#{$opt{indicators}} - 1e-14) + 1
];
sub show_stat {
my %vars = (
+ partsum => undef,
count => int @order,
lines => int @lines,
);
if (@order) {
$vars{partsum} = sum(0, grep {length} @values[$linemin .. $linemax])
if $linemin <= $linemax and ($opt{hidemin} or $opt{hidemax});
- %vars = (%vars,
- sum => sum(@order),
- min => $order[-1],
- max => $order[0],
- );
- $vars{avg} = $vars{sum} / @order;
}
say varfmt($opt{report}, \%vars);
return 1;
}
+sub calc {
+ my ($func) = @_;
+ if ($func eq 'avg') {
+ return calc('sum') / @order;
+ }
+ elsif ($func eq 'sum') {
+ state $cache; # avoid recount
+ state $cachednr = 0; # if unchanged
+ unless (@order == $cachednr) {
+ $cache = sum(@order);
+ $cachednr = @order;
+ }
+ return $cache;
+ }
+ elsif ($func =~ /\A([0-9.]+)v\z/) {
+ $1 <= 100 or die(
+ "percentile $1 out of bounds\n"
+ );
+ my $index = $#order * $1 / 100;
+ my $f = $index - int $index;
+ my $val = $order[$index];
+ if ($f) {
+ my $next = $order[$index + 1];
+ $val -= $f * ($val - $next);
+ }
+ return $val;
+ }
+ elsif ($func =~ /\A-?[0-9.]+\z/) {
+ return $func;
+ }
+ else {
+ die "$func unknown\n";
+ }
+}
+
sub varfmt {
my ($fmt, $vars) = @_;
$fmt =~ s[\$\{ \h*+ ((?: [^{}]++ | \{(?1)\} )+) \}]{
my ($name, $op, $cmd) = split /\s*([;:])/, $1, 2;
my $format = $name =~ s/\+// || $name !~ s/\#// && $opt{reformat};
- local $_ = $vars->{$name};
+ local $_ = exists $vars->{$name} ? $vars->{$name} : calc($name);
defined && do {
$_ = $opt{'value-format'}->($_) if $format;
if ($cmd and $op eq ':') {
- $_ = varfmt($cmd, $vars);
+ $_ = !!$_ && varfmt($cmd, $vars);
}
elsif ($cmd) {
eval $cmd;
Contents are concatenated similar to I<cat>,
but numbers are reformatted and a bar graph is appended to each line.
-Don't worry, barcat does not drink and divide.
It can has various options for input and output (re)formatting,
but remains limited to one-dimensional charts.
For more complex graphing needs
=item B<-c>, B<--count>
Omit repetitions and count the number of occurrences.
-Similar to piping input to C<sort | uniq -c>
+Similar to piping input through C<sort | uniq -c>
but keeping the order of first appearances.
+Lines are omitted if they (or a specified field) are identical,
+and the amount of matches is prepended and used as values
+for bars and subsequent statistics.
+
=item B<-f>, B<--field>=([B<+>]I<number> | I<regexp>)
Compare values after a given number of whitespace separators,
=item I<percentage>B<v>
-Ranked value at the given percentile.
-The default shows C<+> at C<50v> for the mean or median;
-the middle value or average between middle values.
-One standard deviation right of the mean is at about C<68.3v>.
-The default includes C<< >31.73v <68.27v >>
+Ranked value at the given percentile,
+or score at or below which a percentage falls
+in its frequency distribution (inclusive).
+
+The default shows C<+> at C<50v> for the mean or median:
+the middle value or interpolation between two values.
+One standard deviation above the median is at about C<68v>.
+The default includes C<< <31.73v >68.27v >>
to encompass all I<normal> results, or 68% of all entries, by I<< <--> >>.
=item B<avg>
-Matches the average;
+Matches the average (arithmetic mean);
the sum of all values divided by the number of counted lines.
Indicated by default as C<=>.