X-Git-Url: http://git.shiar.nl/wormy.git/blobdiff_plain/580332877fa744e0653d74d92dc29346b9245093..6d64bb2aa29809c9c24fb5ee036766440b62bf4a:/lib/Parse/Binary/Nested.pm diff --git a/lib/Parse/Binary/Nested.pm b/lib/Parse/Binary/Nested.pm new file mode 100644 index 0000000..6b74f64 --- /dev/null +++ b/lib/Parse/Binary/Nested.pm @@ -0,0 +1,233 @@ +package Parse::Binary::Nested; + +use 5.010; +use strict; +use warnings; + +use Carp; +use Exporter qw(import); + +our $VERSION = '1.10'; +our @EXPORT_OK = qw(unpackf); + +sub new { + my ($class, $format) = @_; + ref $format + or $format = [0 => $format]; + ref $format eq 'ARRAY' + or croak "Invalid Parse::Binary::Nested format: should be an array ref"; + bless [$format, $class->template($format)], $class; +} + +sub template { + my ($self, $format) = @_; + # total (flattened) unpack template from nested format definitions + my $template = ''; + @$format or return $template; + for (reverse 0 .. ($#$format - 1) >> 1) { + my $value = $format->[-($_ << 1) - 1]; + if (ref $value eq 'ARRAY') { + my $count = $value->[0]; + if ($count =~ /^\?/) { + $template .= 'a*'; + last; + } + else { + $value = $self->template($value); + $value = $count =~ s/^([*\d]+)// ? "$count($value)$1" + : $count."X[$count]$count/($value)"; + } + } + else { + $value =~ s/=(?:\d+|.)//g; # hardcoded values + $value =~ s{^C/(a)(\d+)}{$1 . ($2 + 1)}e; # maximum length + } + $template .= $value; + } + return $template; +} + +sub convert { + my ($self, $format, $data, $pos) = @_; + # map flat results into a named and nested hash + my %res; + $pos ||= \(my $_pos = 0); + for (my $i = 0; $i < $#$format; $i += 2) { + my ($field, $template) = @$format[$i, $i+1]; + if (ref $template eq 'ARRAY') { + my ($count, @subformat) = @$template; + + if ($count =~ /^\?(\d+)/) { + # character-terminated group + my $endmark = chr $1; + my $iterate = ref($self)->new(\@subformat); + push @{ $iterate->[0] }, -pos => '=.'; + my $subpos = 0; + while ($subpos < length $data->[0]) { + last if substr($data->[0], $subpos, 1) eq $endmark; + my $iterdata = $iterate->convert($iterate->[0], [ + unpack $iterate->[1], substr($data->[0], $subpos) + ]) or last; + $subpos += delete $iterdata->{-pos}; + push @{ $res{$field} }, $iterdata; + } + $$pos += $subpos + 1; + @$data = unpack( + $self->template([ @$format[$i+2 .. $#$format] ]), + substr($data->[0], $subpos + 1) + ) if $subpos < length $data->[0]; + next; + } + + $$pos++ if $count eq 'C'; + my $max = $count =~ s/^(\d+)// ? $1 : 0; + $count = !$count ? $max + : $count eq '*' ? $res{levelcount}->{total} : shift @$data; + $res{$field}->[$_] = $self->convert(\@subformat, $data, $pos) + for 0 .. ($max || $count)-1; + splice @{ $res{$field} }, $count if $max > $count; + $res{$field} = $res{$field}->[0] if $max == 1; + next; + } + else { + for (split m{(?![0-9*/])(?[0]; + } + when ('Z*') { + $$pos += $count // 1 + length $data->[0]; + } + when (['C/a', 'C/A']) { + $$pos += 1 + ($count // length $data->[0]); + $data->[0] = unpack 'C/a', $data->[0] if defined $count; + } + when ('=') { + unshift @$data, $count; + } + when ('=.') { + unshift @$data, $$pos; + } + when ('X') { + $$pos -= $mult; + next; + } + default { + carp "Unrecognised template element '$type'"; + } + } + if (defined $res{$field}) { + $res{$field} = [ $res{$field} ] unless ref $res{$field} eq 'ARRAY'; + push @{ $res{$field} }, shift @$data; + } + else { + $res{$field} = shift @$data; + } + } + } + } + return \%res; +} + +sub unpackf { + my ($format, $input) = @_; + my $self = UNIVERSAL::isa($format, __PACKAGE__) ? $format + : __PACKAGE__->new($format); + my @data = unpack $self->[1], $input; + return $self->convert($self->[0], \@data); +} + +1; + +=head1 NAME + +Parse::Binary::Nested - Structured unpack + +=head1 SYNOPSIS + + use Parse::Binary::Nested qw(unpackf); + my $data = unpackf([message => 'Z*'], "hi\0"); + + my $parser = Parser::Binary::Nested->new([ + foos => [ + 'C', # count + message => 'Z*', + period => 'C', + ], + trail => 'a*', + ]); + $data = $parser->unpackf("\1foo\0.rest"); + print $data->{foos}->[0]->{message}; + +=head1 DESCRIPTION + +Converts a string into a hash of values, just like C +except that it allows you to name and nest the resulting elements. + +Format declarations are simalar to C templates, +with the following additions: + +=over + +=item * + +An array ref groups additional declarations, +with the first value specifying a repetition. If this count is variable, +the resulting value will be an array ref of hashes. + + repeat => ['C', name => 'a*', value => 'S'] + +With a count of 1, it will return only a hash ref, +thereby simply grouping declarations: + + test_foo => 'C' + test => [1, foo => 'C'] + +=item * + +A template value of C is recognised as a length-preceded string +with a constant (maximal) size, and will return only the string adjusted +to its length. +This behaviour is very similar to C<(C/a@x$length)>, except that it never reads +more than the given number of bytes. + +=item * + +Hardcoded values can be inserted using C<=$number> values. +This can for example be useful to retain forwards-compatibility: + + rows => ['C', + type => '=1', # nothing read + data => 'S', + ] + + rows => ['C', + type => 'C', + data => 'S', + ] + +=back + +=head1 AUTHOR + +Mischa POSLAWSKY + +=head1 LICENSE + +GPL version 3. +