From: Mischa POSLAWSKY Date: Sat, 14 Nov 2009 22:08:45 +0000 (+0100) Subject: XXX: lookahead penalty X-Git-Url: http://git.shiar.nl/perl/list-index.git/commitdiff_plain/2af0622fdc633320a7172164ddb9c7436e41998f XXX: lookahead penalty --- diff --git a/lib/List/Index.pm b/lib/List/Index.pm index 339bd7d..4ce9c25 100644 --- a/lib/List/Index.pm +++ b/lib/List/Index.pm @@ -24,43 +24,52 @@ sub ranges { $pagesize = @$self / $pages; my $offset = $pagesize + .5; - my $penalty = 0; + my $shrunk = 0; + my $enlarged = 0; my @links = (''); while ($offset < @$self) { my $link = substr $self->[$offset], 0, $length; if ($context) { - if ($offset > $context - 1 + $penalty) { - # take a value slightly before the current offset - my $before = $self->[$offset - $context - 1 + $penalty]; + my $penalty = 0; + # take a value slightly before the current offset + if ((my $before = $offset - $context + $shrunk) > 0) { # see how much of it matches the current link my $trim = 1; - for my $match (split //, $before) { + for my $match (split //, $self->[$before - 1]) { scalar $link =~ /\G\Q$match/g or last; $trim++; } # truncate link upto where the earlier value starts to differ - substr($link, $trim) = '' unless $trim > length $link; + if ($trim < length $link) { + substr($link, $trim) = ''; + for (reverse $before .. $offset) { + $self->[$offset - $penalty] =~ /^\Q$link/ or last; + $penalty++; + } + } } - $penalty = 0; - if ($offset + $context < $#$self) { - # take a value after the current offset - my $after = $self->[$offset + $context]; + $shrunk = 0; + # take a value after the current offset + if ((my $after = $offset + $context - $enlarged) < $#$self) { # see how much of it matches the current link my $trim = 1; - for my $match (split //, $after) { + for my $match (split //, $self->[$after]) { scalar $link =~ /\G\Q$match/g or last; $trim++; } # use this link if it's shorter if ($trim < length $link) { - $link = substr $after, 0, $trim; + $link = substr $self->[$after], 0, $trim; + # advance lookbehind offset on the next page + $penalty = 0; for ($offset .. $#$self) { - last if $self->[$offset + $penalty] =~ /^\Q$link/; - $penalty++; + last if $self->[$offset + $shrunk] =~ /^\Q$link/; + $shrunk++; } } } + $enlarged = $penalty; } push @links, $link; diff --git a/t/10-ranges.t b/t/10-ranges.t index cf3b0a4..9cfce11 100644 --- a/t/10-ranges.t +++ b/t/10-ranges.t @@ -37,7 +37,7 @@ subtest 'context' => sub { my $index = List::Index->new([qw( kkeg kl km kmlu knsy koxb kpeo kuaa kuab kuac kuapa kuq kur kux kzb lc lg lgu lgua lguc - lguq lgur lgus lgx lka lkq lks lln llq llx + lguq lgur lgws lgx lka lkq lks lln llq llx )]) or return; is_deeply( $index->ranges({ pagesize=>10, context=>0, length=>5 }), @@ -109,13 +109,12 @@ subtest 'distribution' => sub { hnvtvpievbdlkrmb hs hvdvcqn hvn hyrybeur iaiaab ib ibavqyar idfniqvxpohbk idh )]) or return; is_deeply( - $index->ranges({ pagesize=>10, context=>6 }), + $index->ranges({ pagesize=>10, context=>8 }), [qw(-g h i-)], 'large context' ); -{ local $TODO = '?'; is_deeply( - $index->ranges({ pagesize=>10, context=>5 }), + $index->ranges({ pagesize=>10, context=>7 }), # after 2nd page is enlarged by lookbehind to 'h', limit subsequent lookahead # to prevent the page from getting too large (17 entries if forwarded to 'i') [qw(-g h-hm hn-)], @@ -124,7 +123,6 @@ subtest 'distribution' => sub { # page #14 [gn-g] (8): gnihka gniub go gsearnrqns gtdvcxyt gwawkvmueovdjtfj gwoufolwcvmtueyg gysgphci # page #15 [h] (17): h habkdgifjfxoh hbbvjf hbqleexnqts hccgszftbaymfu hdaqzkow hdoeqwdmgqwaoya hfbegicieu hfmlpzzioqjbthz hj hkoysmws hmylu hnvtvpievbdlkrmb hsodfpkatk hvdvcqn hvn hyrybeurqtevjfmi # page #16 [i-ie] (5): i iaab ibiavqyar idfniqvxpohbk idh -} }; subtest 'context' => sub {