package Parse::Binary::Nested;
+use 5.010;
use strict;
use warnings;
use Carp;
+use Exporter qw(import);
-our $VERSION = '1.00';
+our $VERSION = '1.10';
+our @EXPORT_OK = qw(unpackf);
sub new {
my ($class, $format) = @_;
+ ref $format
+ or $format = [0 => $format];
ref $format eq 'ARRAY'
or croak "Invalid Parse::Binary::Nested format: should be an array ref";
- bless $format, $class;
+ bless [$format, $class->template($format)], $class;
}
sub template {
my ($self, $format) = @_;
# total (flattened) unpack template from nested format definitions
- return join '', map {
+ my $template = '';
+ @$format or return $template;
+ for (reverse 0 .. ($#$format - 1) >> 1) {
my $value = $format->[-($_ << 1) - 1];
if (ref $value eq 'ARRAY') {
my $count = $value->[0];
- $value = $self->template($value);
- $value = $count =~ s/^([*\d]+)// ? "$count($value)$1"
- : $count."X[$count]$count/($value)";
+ if ($count =~ /^\?/) {
+ $template .= 'a*';
+ last;
+ }
+ else {
+ $value = $self->template($value);
+ $value = $count =~ s/^([*\d]+)// ? "$count($value)$1"
+ : $count."X[$count]$count/($value)";
+ }
}
else {
- $value =~ s/^C(a)(\d+)/$1 . ($2 + 1)/e; # length prefix
+ $value =~ s/=(?:\d+|.)//g; # hardcoded values
+ $value =~ s{^C/(a)(\d+)}{$1 . ($2 + 1)}e; # maximum length
}
- $value;
- } reverse 0 .. ($#$format - 1) >> 1;
+ $template .= $value;
+ }
+ return $template;
}
sub convert {
- my ($self, $format, $data) = @_;
+ my ($self, $format, $data, $pos) = @_;
# map flat results into a named and nested hash
my %res;
- while (my ($field, $template) = splice @$format, 0, 2) {
+ $pos ||= \(my $_pos = 0);
+ for (my $i = 0; $i < $#$format; $i += 2) {
+ my ($field, $template) = @$format[$i, $i+1];
if (ref $template eq 'ARRAY') {
my ($count, @subformat) = @$template;
+
+ if ($count =~ /^\?(\d+)/) {
+ # character-terminated group
+ my $endmark = chr $1;
+ my $iterate = ref($self)->new(\@subformat);
+ push @{ $iterate->[0] }, -pos => '=.';
+ my $subpos = 0;
+ while ($subpos < length $data->[0]) {
+ last if substr($data->[0], $subpos, 1) eq $endmark;
+ my $iterdata = $iterate->convert($iterate->[0], [
+ unpack $iterate->[1], substr($data->[0], $subpos)
+ ]) or last;
+ $subpos += delete $iterdata->{-pos};
+ push @{ $res{$field} }, $iterdata;
+ }
+ $$pos += $subpos + 1;
+ @$data = unpack(
+ $self->template([ @$format[$i+2 .. $#$format] ]),
+ substr($data->[0], $subpos + 1)
+ ) if $subpos < length $data->[0];
+ next;
+ }
+
+ $$pos++ if $count eq 'C';
my $max = $count =~ s/^(\d+)// ? $1 : 0;
$count = !$count ? $max
: $count eq '*' ? $res{levelcount}->{total} : shift @$data;
- $res{$field}->[$_] = $self->convert([@subformat], $data) for 0 .. ($max || $count)-1;
+ $res{$field}->[$_] = $self->convert(\@subformat, $data, $pos)
+ for 0 .. ($max || $count)-1;
splice @{ $res{$field} }, $count if $max > $count;
$res{$field} = $res{$field}->[0] if $max == 1;
next;
}
- elsif ($template =~ /^Ca/) {
- $data->[0] = unpack 'C/a', $data->[0];
+ else {
+ for (split m{(?![0-9*/])(?<![/=])}, $template) {
+ my ($type, $count) = m{^(\D+)(\d+)?$} or die 'unsupported';
+ my $mult = $count // 1;
+ given ($type) {
+ when (['c', 'C']) {
+ $$pos += $mult;
+ }
+ when ('x') {
+ $$pos += $mult;
+ next;
+ }
+ when (['b', 'B']) {
+ $$pos++;
+ }
+ when (['s', 'S', 'n', 'v']) {
+ $$pos += $mult * 2;
+ }
+ when (['a', 'A', 'Z', 'a*']) {
+ $$pos += length $data->[0];
+ }
+ when ('Z*') {
+ $$pos += $count // 1 + length $data->[0];
+ }
+ when (['C/a', 'C/A']) {
+ $$pos += 1 + ($count // length $data->[0]);
+ $data->[0] = unpack 'C/a', $data->[0] if defined $count;
+ }
+ when ('=') {
+ unshift @$data, $count;
+ }
+ when ('=.') {
+ unshift @$data, $$pos;
+ }
+ when ('X') {
+ $$pos -= $mult;
+ next;
+ }
+ default {
+ carp "Unrecognised template element '$type'";
+ }
+ }
+ if (defined $res{$field}) {
+ $res{$field} = [ $res{$field} ] unless ref $res{$field} eq 'ARRAY';
+ push @{ $res{$field} }, shift @$data;
+ }
+ else {
+ $res{$field} = shift @$data;
+ }
+ }
}
- $res{$field} = shift @$data;
}
return \%res;
}
sub unpackf {
- my ($self, $input) = @_;
- my @data = unpack $self->template($self), $input;
- return $self->convert([@$self], \@data);
+ my ($format, $input) = @_;
+ my $self = UNIVERSAL::isa($format, __PACKAGE__) ? $format
+ : __PACKAGE__->new($format);
+ my @data = unpack $self->[1], $input;
+ return $self->convert($self->[0], \@data);
}
1;
=head1 SYNOPSIS
- use Parse::Binary::Nested;
+ use Parse::Binary::Nested qw(unpackf);
+ my $data = unpackf([message => 'Z*'], "hi\0");
+
my $parser = Parser::Binary::Nested->new([
foos => [
'C', # count
],
trail => 'a*',
]);
-
- my $data = $parser->unpackf("\1foo\0.rest");
+ $data = $parser->unpackf("\1foo\0.rest");
print $data->{foos}->[0]->{message};
=head1 DESCRIPTION
+Converts a string into a hash of values, just like C<unpack>
+except that it allows you to name and nest the resulting elements.
+
+Format declarations are simalar to C<pack> templates,
+with the following additions:
+
+=over
+
+=item *
+
+An array ref groups additional declarations,
+with the first value specifying a repetition. If this count is variable,
+the resulting value will be an array ref of hashes.
+
+ repeat => ['C', name => 'a*', value => 'S']
+
+With a count of 1, it will return only a hash ref,
+thereby simply grouping declarations:
+
+ test_foo => 'C'
+ test => [1, foo => 'C']
+
+=item *
+
+A template value of C<Ca$length> is recognised as a length-preceded string
+with a constant (maximal) size, and will return only the string adjusted
+to its length.
+This behaviour is very similar to C<(C/a@x$length)>, except that it never reads
+more than the given number of bytes.
+
+=item *
+
+Hardcoded values can be inserted using C<=$number> values.
+This can for example be useful to retain forwards-compatibility:
+
+ rows => ['C',
+ type => '=1', # nothing read
+ data => 'S',
+ ]
+
+ rows => ['C',
+ type => 'C',
+ data => 'S',
+ ]
+
+=back
+
=head1 AUTHOR
Mischa POSLAWSKY <perl@shiar.org>