X-Git-Url: http://git.shiar.nl/perl/list-index.git/blobdiff_plain/d61e8685992586de85f6b7b58fee20f15c8111b0..e2a186365224032ff37caac824d50c9b43130952:/lib/List/Index.pm diff --git a/lib/List/Index.pm b/lib/List/Index.pm index 6a818bb..35f0549 100644 --- a/lib/List/Index.pm +++ b/lib/List/Index.pm @@ -6,48 +6,93 @@ use warnings; use Exporter 'import'; -our $VERSION = '1.00'; +our $VERSION = '1.02'; our @EXPORT_OK = qw(rangematch); sub new { - my ($class, $values) = @_; - bless [sort map { s/[^a-z]/./g; $_ } @$values], $class; + my ($class, $options) = @_; + $options ||= {}; + bless $options, $class; } sub ranges { my $self = shift; + my @rows = sort map { s/[^a-z]/./g; $_ } @{ shift() }; my $options = shift || {}; + $options->{$_} //= $self->{$_} for keys %$self; + my $pagesize = $options->{pagesize} || 50; my $context = $options->{context } // 1 + ($pagesize >> 4); my $length = $options->{length } || 4; - my $pages = $options->{pages } || 1 + int $#$self / $pagesize; + my $pages = $options->{pages } || 1 + int($#rows / $pagesize); + + $pagesize = $pages >= @rows ? 1 : @rows / $pages; + my $shrunk = 0; + my $enlarged = 0; - $pagesize = @$self / $pages; - my $offset = 0; - my @links; - while ($offset < @$self) { - my $link = substr $self->[$offset], 0, $length; + my @links = (''); + for (my $offset = $pagesize + .5; $offset < @rows; $offset += $pagesize) { + my $link = substr $rows[$offset], 0, $length; if ($context) { - my $trim = 1; - my $before = $offset > $context ? $self->[$offset - $context] : ''; - for my $match (split //, $before) { - scalar $link =~ /\G\Q$match/g or last; - $trim++; + my $lookbehind = -$context + $shrunk; + my $lookahead = $context - $enlarged; + $shrunk = $enlarged = 0; + + # take a value slightly before the current offset + if ((my $before = $offset + $lookbehind) > 0) { + # see how much of it matches the current link + my $trim = 1; + for my $match (split //, $rows[$before - 1]) { + scalar $link =~ /\G\Q$match/g or last; + $trim++; + } + # truncate link upto where the earlier value starts to differ + if ($trim < length $link) { + substr($link, $trim) = ''; + for (reverse $before .. $offset - 1) { + $rows[$_] =~ /^\Q$link/ or last; + $enlarged++; + } + } + } + + # take a value after the current offset + if ((my $after = $offset + $lookahead) < $#rows) { + # see how much of it matches the current link + my $trim = 1; + pos $link = 0; + for my $match (split //, $rows[$after]) { + scalar $link =~ /\G\Q$match/g or last; + $trim++; + } + # use this link if it's shorter + if ($trim < length $link) { + $enlarged = 0; + for ($offset + 1 .. $after) { + my $prefix = substr $rows[$_], 0, $trim; + # advance lookbehind offset on the next page + $shrunk++; + next if $link =~ /^\Q$prefix/; + $link = $prefix; + last; + } + } } - substr($link, $trim) = '' unless $trim > length $link; } - push @links, [$link]; - $offset += $pagesize; + push @links, $link unless $links[-1] eq $link; } + # add range end to each link for my $i (0 .. $#links - 1) { - my ($link, $lastchar) = $links[$i + 1]->[0] =~ /(.*)(.)/; - $link .= $lastchar eq '.' ? 'z' : chr( ord($lastchar) - 1 ) - unless $lastchar eq 'a'; - $links[$i]->[1] = $link; + # end at start of next value with the last character decremented + my $next = $links[$i + 1]; + $next =~ s{(.)$}{ $1 le 'a' ? '.' : chr( ord($1) - 1 ) }e; + # amend range if it's ahead + $links[$i] .= '-'.$next unless $next eq $links[$i]; } - $links[-1]->[1] = ''; + # final value takes the rest + $links[-1] .= '-'; return \@links; } @@ -56,36 +101,80 @@ sub rangematch { my ($link) = @_; my ($s1, $s2) = $link =~ /([^-]*) - ([^-]*)/x or return qr/^\Q$link/i; + $s1 =~ s/\.$//; my @allow; if (length $s1) { + if (length $s2) { + $s1 le $s2 or $s1 =~ /^\Q$s2/ or return undef; + } + my $prefix = ''; - my $c1; + my $char; for my $i (0 .. length($s1) - 1) { - $c1 = substr $s1, $i, 1; - my $c2 = length $s2 <= $i ? undef : substr $s2, $i, 1; - my $next = $i + 1 >= length($s1) ? $c1 : chr( ord($c1) + 1 ); - $next le $c2 or next if defined $c2; - my $last = defined $c2 && $i == 0 ? chr( ord($c2) - (length $s2 > 1) ) : 'z'; + my $lasti = $i == length($s1) - 1; + $char = substr $s1, $i, 1; + my $next = $char; + # do not include prefix character in final range + $next = chr( ord($char) + 1 ) unless $lasti; + + my $last = 'z'; + next if $next gt $last; + if (length $s2 > $i) { + if ($s2 =~ /^\Q$prefix/) { + $last = substr $s2, $i, 1; + next if $char eq $last; + $last = chr( ord($last) - (length $s2 > 1) ); + next if $next gt $last; + } + } + + if ($char eq '.') { + if ($last eq 'z') { +# push @allow, $prefix if $i and $lasti; +# next; + } +# if ($last eq 'z') { +# push @allow, $prefix if $i and $lasti; +# next; +# } + $next = 'a'; + } + push @allow, $prefix."[$next-$last]"; } continue { - $prefix .= $c1; + $prefix .= $char eq '.' ? '[^a-z]' : $char; } } if (length $s2) { my $prefix = ''; + my $char; for my $i (0 .. length($s2) - 1) { - my $c1 = length $s1 <= $i ? undef : substr $s1, $i, 1; - my $c2 = substr $s2, $i, 1; + $char = substr $s2, $i, 1; my $last = 'z'; - push @allow, "$prefix(?![$c2-$last])" + if (length $s1 > $i) { + my $c1 = substr $s1, $i, 1; + if ($s1 =~ /^\Q$prefix/) { + next if $c1 le $char; + } + } + + if ($char eq '.') { + next if $i < length($s2) - 1; + } + + push @allow, $prefix.'(?!['.($char eq '.' ? 'a' : $char)."-$last])" if $i or $s1 eq ''; - $prefix .= $c2; } + continue { + $prefix .= $char eq '.' ? '[^a-z]' : $char; + } + push @allow, $prefix - unless length $s1 > length $s2 or length $s1 != 0 && length $s2 == 1; #TODO + if $s2 =~ /^\Q$prefix/ and $s1 le $s2 + and not (length $s2 == 1 && length $s1 >= length $s2 && $s1 ne $s2); } my $match = sprintf @allow <= 1 ? '%s' : '(?:%s)', join('|', @allow); @@ -98,14 +187,18 @@ __END__ =head1 NAME -List::Index - Paginate alphabetic entries by finding minimal prefixes +List::Index - Find and apply prefix ranges to paginate keywords =head1 SYNOPSIS use List::Index; - my $index = List::Index->new(\@values); - my @pages = $index->ranges({pagesize => 50}); - printf '%1$s ', @$_ for @pages; + my $index = List::Index->new({ pagesize => 50 }); + my @pages = $index->ranges(\@values); + say "$_" for @pages; + + use List::Index 'rangematch'; + my $limit = rangematch('b-bmq'); # ge 'b' && le 'bmq' + @request = grep { $limit } @values; =head1 DESCRIPTION