X-Git-Url: http://git.shiar.nl/barcat.git/blobdiff_plain/b1e1adebfd115dcd80e65a2a505d9d0ed5f96c43..HEAD:/barcat diff --git a/barcat b/barcat index 0445249..2b1a714 100755 --- a/barcat +++ b/barcat @@ -6,7 +6,7 @@ use List::Util qw( min max sum ); use open qw( :std :utf8 ); use re '/msx'; -our $VERSION = '1.09'; +our $VERSION = '1.10'; my %opt; if (@ARGV) { @@ -145,16 +145,16 @@ $opt{units} = [split //, ' kMGTPEZYRQqryzafpn'.($opt{ascii} ? 'u' : 'μ').'m'] $opt{'value-length'} = 4 if $opt{units}; $opt{'value-length'} = 1 if $opt{unmodified}; $opt{'signal-stat'} //= exists $SIG{INFO} ? 'INFO' : 'QUIT'; -$opt{markers} //= '=avg >31.73v <68.27v +50v |0'; +$opt{markers} //= '=avg <31.73v >68.27v +50v |0'; $opt{report} //= join('', '${partsum+; $_ .= " of "}', '${sum+; color(1); $_ .= " total in "}', '${count#} values', '${lines#; $_ = $_ != @order && " over $_ lines"}', sprintf('${count: (%s)}', join ', ', - '${min; color(31)} min', + '${0v; color(31)} min', '${avg; $opt{reformat} or $_ = sprintf "%0.2f", $_; color(36)} avg', - '${max; color(32)} max', + '${100v; color(32)} max', ), ); $opt{palette} //= $opt{color} && [31, 90, 32]; @@ -204,7 +204,7 @@ if (defined $opt{interval}) { eval { require Tie::Array::Sorted; - tie @order, 'Tie::Array::Sorted', sub { $_[1] <=> $_[0] }; + tie @order, 'Tie::Array::Sorted', sub { $_[0] <=> $_[1] }; } or warn $@, "Expect slowdown with large datasets!\n" unless $opt{count}; } @@ -268,12 +268,12 @@ if ($opt{count}) { @order = @values; } -@order = sort { $b <=> $a } @order unless tied @order; +@order = sort { $a <=> $b } @order unless tied @order; my $maxval = $opt{maxval} // ( $opt{hidemax} ? max grep { length } @values[$nr .. $limit] : - $order[0] + $order[-1] ) // 0; -my $minval = $opt{minval} // min $order[-1] // (), 0; +my $minval = $opt{minval} // min $order[0] // (), 0; my $range = $maxval - $minval; $range &&= log $range if $opt{log}; my $lenval = $opt{'value-length'} // max map { length } @order; @@ -289,20 +289,7 @@ if ($opt{markers} and $size > 0) { my ($char, $func) = split //, $markspec, 2; my $increment = $func =~ s/[+]\z//; my @pos = eval { - if ($func eq 'avg') { - return sum(@order) / @order; - } - elsif ($func =~ /\A([0-9.]+)v\z/) { - $1 <= 100 or die( - "Invalid marker $char: percentile $1 out of bounds\n" - ); - my $index = $#order * $1 / 100; - return ($order[$index] + $order[$index + .5]) / 2; - } - elsif ($func =~ /\A-?[0-9.]+\z/) { - return $func; - } - elsif ($func =~ /\A\/($float)\z/) { + if ($func =~ /\A\/($float)\z/) { my @range = my $multiple = my $next = $1; while ($next < $maxval) { $multiple *= 10 if $opt{log}; @@ -310,12 +297,9 @@ if ($opt{markers} and $size > 0) { } return @range; } - else { - die "Unknown marker $char: $func\n"; - } - }; - @pos or do { - warn $@ if $@; + return calc($func); + } or do { + warn "Invalid marker $char: $@" if $@; next; }; for my $pos (@pos) { @@ -358,13 +342,13 @@ while ($nr <= $limit) { $rel = min(1, $rel / $range) if $range; # 0..1 } my $color = !length $val || !$opt{palette} ? undef : - $val == $order[0] ? $opt{palette}->[-1] : # max - $val == $order[-1] ? $opt{palette}->[0] : # min + $val == $order[-1] ? $opt{palette}->[-1] : # max + $val == $order[0] ? $opt{palette}->[0] : # min $opt{palette}->[ $rel * ($#{$opt{palette}} - 1) + 1 ]; my $indicator = $opt{indicators} && $opt{indicators}->[ !length($val) || !$#{$opt{indicators}} ? 0 : # blank $#{$opt{indicators}} < 2 ? 1 : - $val >= $order[0] ? -1 : + $val >= $order[-1] ? -1 : $rel * ($#{$opt{indicators}} - 1e-14) + 1 ]; @@ -404,6 +388,7 @@ say $opt{palette} ? color(0) : '' if $opt{spark}; sub show_stat { my %vars = ( + partsum => undef, count => int @order, lines => int @lines, ); @@ -414,27 +399,56 @@ sub show_stat { if (@order) { $vars{partsum} = sum(0, grep {length} @values[$linemin .. $linemax]) if $linemin <= $linemax and ($opt{hidemin} or $opt{hidemax}); - %vars = (%vars, - sum => sum(@order), - min => $order[-1], - max => $order[0], - ); - $vars{avg} = $vars{sum} / @order; } say varfmt($opt{report}, \%vars); return 1; } +sub calc { + my ($func) = @_; + if ($func eq 'avg') { + return calc('sum') / @order; + } + elsif ($func eq 'sum') { + state $cache; # avoid recount + state $cachednr = 0; # if unchanged + unless (@order == $cachednr) { + $cache = sum(@order); + $cachednr = @order; + } + return $cache; + } + elsif ($func =~ /\A([0-9.]+)v\z/) { + $1 <= 100 or die( + "percentile $1 out of bounds\n" + ); + my $index = $#order * $1 / 100; + my $f = $index - int $index; + my $val = $order[$index]; + if ($f) { + my $next = $order[$index + 1]; + $val -= $f * ($val - $next); + } + return $val; + } + elsif ($func =~ /\A-?[0-9.]+\z/) { + return $func; + } + else { + die "$func unknown\n"; + } +} + sub varfmt { my ($fmt, $vars) = @_; $fmt =~ s[\$\{ \h*+ ((?: [^{}]++ | \{(?1)\} )+) \}]{ my ($name, $op, $cmd) = split /\s*([;:])/, $1, 2; my $format = $name =~ s/\+// || $name !~ s/\#// && $opt{reformat}; - local $_ = $vars->{$name}; + local $_ = exists $vars->{$name} ? $vars->{$name} : calc($name); defined && do { $_ = $opt{'value-format'}->($_) if $format; if ($cmd and $op eq ':') { - $_ = varfmt($cmd, $vars); + $_ = !!$_ && varfmt($cmd, $vars); } elsif ($cmd) { eval $cmd; @@ -630,16 +644,19 @@ For example C<:/1> for a grid at every integer. =item IB -Ranked value at the given percentile. -The default shows C<+> at C<50v> for the mean or median; -the middle value or average between middle values. -One standard deviation right of the mean is at about C<68.3v>. -The default includes C<< >31.73v <68.27v >> +Ranked value at the given percentile, +or score at or below which a percentage falls +in its frequency distribution (inclusive). + +The default shows C<+> at C<50v> for the mean or median: +the middle value or interpolation between two values. +One standard deviation above the median is at about C<68v>. +The default includes C<< <31.73v >68.27v >> to encompass all I results, or 68% of all entries, by I<< <--> >>. =item B -Matches the average; +Matches the average (arithmetic mean); the sum of all values divided by the number of counted lines. Indicated by default as C<=>.