$opt{trim} *= $opt{width} / 100 if $opt{trimpct};
$opt{units} = [split //, ' kMGTPEZYRQqryzafpn'.($opt{ascii} ? 'u' : 'μ').'m']
if $opt{'human-readable'};
-$opt{anchor} //= qr/\A/;
$opt{'value-length'} = 4 if $opt{units};
$opt{'value-length'} = 1 if $opt{unmodified};
$opt{'signal-stat'} //= exists $SIG{INFO} ? 'INFO' : 'QUIT';
eval {
require Tie::Array::Sorted;
tie @order, 'Tie::Array::Sorted', sub { $_[1] <=> $_[0] };
- } or warn $@, "Expect slowdown with large datasets!\n";
+ } or warn $@, "Expect slowdown with large datasets!\n"
+ unless $opt{count};
}
my $float = qr<[0-9]* [.]? [0-9]+ (?: e[+-]?[0-9]+ )?>; # positive numberish
-my $valmatch = qr< $opt{anchor} ( \h* -? $float |) >x;
+my $valmatch = $opt{anchor} // qr/\A/;
+$valmatch .= !$opt{count} ? qr/( \h* -? $float |)/ :
+ $opt{anchor} ? qr/(\S*)/ : qr/(.*)/;
+
while (defined ($_ = $opt{input} ? shift @{ $opt{input} } : readline)) {
s/\r?\n\z//;
+ my $valnum;
if ($opt{count}) {
- my ($valnum) = m/(\S*)/;
- $valnum //= '';
+ $valnum = m/$valmatch/ && $1;
$uniq{$valnum}++ and next;
- push @lines, "\n " . $_;
push @values, $valnum;
- next;
+ s/\A/\n /;
+ }
+ else {
+ s/\A\h*// unless $opt{unmodified};
+ $valnum = s/$valmatch/\n/ && $1;
+ push @values, $valnum;
+ push @order, $valnum if length $valnum;
}
- s/\A\h*// unless $opt{unmodified};
- my $valnum = s/$valmatch/\n/ && $1;
- push @values, $valnum;
- push @order, $valnum if length $valnum;
+
if (defined $opt{trim} and defined $valnum) {
my $trimpos = abs $opt{trim};
$trimpos -= length $valnum if $opt{unmodified};
}
}
push @lines, $_;
+}
+continue {
show_lines() if defined $opt{interval} and $opt{interval} < 0
and $. % $opt{interval} == 0;
}
my $limit = $opt{hidemax} ? $opt{hidemax}->($#lines, $nr) : $#lines;
if ($opt{count}) {
- $_ = $uniq{$_} for @values;
+ $_ = $uniq{$_} for @values[$nr .. $limit];
@order = @values;
}
if ($opt{markers} and $size > 0) {
for my $markspec (split /\h/, $opt{markers}) {
my ($char, $func) = split //, $markspec, 2;
+ my $increment = $func =~ s/[+]\z//;
my @pos = eval {
- if ($func eq 'avg') {
- return sum(@order) / @order;
- }
- elsif ($func =~ /\A([0-9.]+)v\z/) {
- $1 <= 100 or die(
- "Invalid marker $char: percentile $1 out of bounds\n"
- );
- my $index = $#order * $1 / 100;
- return ($order[$index] + $order[$index + .5]) / 2;
- }
- elsif ($func =~ /\A-?[0-9.]+\z/) {
- return $func;
- }
- elsif ($func =~ /\A\/($float)\z/) {
+ if ($func =~ /\A\/($float)\z/) {
my @range = my $multiple = my $next = $1;
while ($next < $maxval) {
$multiple *= 10 if $opt{log};
}
return @range;
}
- else {
- die "Unknown marker $char: $func\n";
- }
- };
- @pos or do {
- warn $@ if $@;
+ return calc($func);
+ } or do {
+ warn "Invalid marker $char: $@" if $@;
next;
};
for my $pos (@pos) {
$pos -= $minval;
$pos &&= log $pos if $opt{log};
$pos >= 0 or next;
- color(36) for $barmark[$pos / $range * $size] = $char;
+ $increment ||= $minval && !$pos;
+ color(36) for $barmark[$pos / $range * $size + $increment + .5] = $char;
}
}
$nr++;
}
say $opt{palette} ? color(0) : '' if $opt{spark};
+%uniq = () if $opt{interval} and $opt{count};
return $nr;
}
sub show_stat {
my %vars = (
+ partsum => undef,
count => int @order,
lines => int @lines,
);
return 1;
}
+sub calc {
+ my ($func) = @_;
+ if ($func eq 'avg') {
+ return sum(@order) / @order;
+ }
+ elsif ($func eq 'sum') {
+ return sum(@order);
+ }
+ elsif ($func =~ /\A([0-9.]+)v\z/) {
+ $1 <= 100 or die(
+ "percentile $1 out of bounds\n"
+ );
+ my $index = $#order * $1 / 100;
+ my $f = $index - int $index;
+ my $val = $order[$index];
+ if ($f) {
+ my $next = $order[$index + 1];
+ $val -= $f * ($val - $next);
+ }
+ return $val;
+ }
+ elsif ($func =~ /\A-?[0-9.]+\z/) {
+ return $func;
+ }
+ else {
+ die "$func unknown\n";
+ }
+}
+
sub varfmt {
my ($fmt, $vars) = @_;
$fmt =~ s[\$\{ \h*+ ((?: [^{}]++ | \{(?1)\} )+) \}]{
my ($name, $op, $cmd) = split /\s*([;:])/, $1, 2;
my $format = $name =~ s/\+// || $name !~ s/\#// && $opt{reformat};
- local $_ = $vars->{$name};
+ local $_ = exists $vars->{$name} ? $vars->{$name} : calc($name);
defined && do {
$_ = $opt{'value-format'}->($_) if $format;
if ($cmd and $op eq ':') {
- $_ = varfmt($cmd, $vars);
+ $_ = !!$_ && varfmt($cmd, $vars);
}
elsif ($cmd) {
eval $cmd;
Contents are concatenated similar to I<cat>,
but numbers are reformatted and a bar graph is appended to each line.
-Don't worry, barcat does not drink and divide.
It can has various options for input and output (re)formatting,
but remains limited to one-dimensional charts.
For more complex graphing needs
=item B<-c>, B<--count>
Omit repetitions and count the number of occurrences.
-Similar to piping input to C<sort | uniq -c>
+Similar to piping input through C<sort | uniq -c>
but keeping the order of first appearances.
+Lines are omitted if they (or a specified field) are identical,
+and the amount of matches is prepended and used as values
+for bars and subsequent statistics.
+
=item B<-f>, B<--field>=([B<+>]I<number> | I<regexp>)
Compare values after a given number of whitespace separators,
=item I<percentage>B<v>
-Ranked value at the given percentile.
-The default shows C<+> at C<50v> for the mean or median;
-the middle value or average between middle values.
-One standard deviation right of the mean is at about C<68.3v>.
+Ranked value at the given percentile,
+or score at or below which a percentage falls
+in its frequency distribution (inclusive).
+
+The default shows C<+> at C<50v> for the mean or median:
+the middle value or interpolation between two values.
+One standard deviation below the median is at about C<68v>.
The default includes C<< >31.73v <68.27v >>
to encompass all I<normal> results, or 68% of all entries, by I<< <--> >>.
=item B<avg>
-Matches the average;
+Matches the average (arithmetic mean);
the sum of all values divided by the number of counted lines.
Indicated by default as C<=>.
Number of HTTP requests per day:
- cat httpd/access.log | cut -d\ -f4 | cut -d: -f1 | uniq -c | barcat
+ barcat -cf'\[([^:]+)' httpd/access.log
Any kind of database query results, preserving returned alignment: