parse-wormedit: parsing modules in seperate files
[wormy.git] / lib / Parse / Binary / Nested.pm
diff --git a/lib/Parse/Binary/Nested.pm b/lib/Parse/Binary/Nested.pm
new file mode 100644 (file)
index 0000000..6b74f64
--- /dev/null
@@ -0,0 +1,233 @@
+package Parse::Binary::Nested;
+
+use 5.010;
+use strict;
+use warnings;
+
+use Carp;
+use Exporter qw(import);
+
+our $VERSION = '1.10';
+our @EXPORT_OK = qw(unpackf);
+
+sub new {
+       my ($class, $format) = @_;
+       ref $format
+               or $format = [0 => $format];
+       ref $format eq 'ARRAY'
+               or croak "Invalid Parse::Binary::Nested format: should be an array ref";
+       bless [$format, $class->template($format)], $class;
+}
+
+sub template {
+       my ($self, $format) = @_;
+       # total (flattened) unpack template from nested format definitions
+       my $template = '';
+       @$format or return $template;
+       for (reverse 0 .. ($#$format - 1) >> 1) {
+               my $value = $format->[-($_ << 1) - 1];
+               if (ref $value eq 'ARRAY') {
+                       my $count = $value->[0];
+                       if ($count =~ /^\?/) {
+                               $template .= 'a*';
+                               last;
+                       }
+                       else {
+                               $value = $self->template($value);
+                               $value = $count =~ s/^([*\d]+)// ? "$count($value)$1"
+                                       : $count."X[$count]$count/($value)";
+                       }
+               }
+               else {
+                       $value =~ s/=(?:\d+|.)//g;  # hardcoded values
+                       $value =~ s{^C/(a)(\d+)}{$1 . ($2 + 1)}e;  # maximum length
+               }
+               $template .= $value;
+       }
+       return $template;
+}
+
+sub convert {
+       my ($self, $format, $data, $pos) = @_;
+       # map flat results into a named and nested hash
+       my %res;
+       $pos ||= \(my $_pos = 0);
+       for (my $i = 0; $i < $#$format; $i += 2) {
+               my ($field, $template) = @$format[$i, $i+1];
+               if (ref $template eq 'ARRAY') {
+                       my ($count, @subformat) = @$template;
+
+                       if ($count =~ /^\?(\d+)/) {
+                               # character-terminated group
+                               my $endmark = chr $1;
+                               my $iterate = ref($self)->new(\@subformat);
+                               push @{ $iterate->[0] }, -pos => '=.';
+                               my $subpos = 0;
+                               while ($subpos < length $data->[0]) {
+                                       last if substr($data->[0], $subpos, 1) eq $endmark;
+                                       my $iterdata = $iterate->convert($iterate->[0], [
+                                               unpack $iterate->[1], substr($data->[0], $subpos)
+                                       ]) or last;
+                                       $subpos += delete $iterdata->{-pos};
+                                       push @{ $res{$field} }, $iterdata;
+                               }
+                               $$pos += $subpos + 1;
+                               @$data = unpack(
+                                       $self->template([ @$format[$i+2 .. $#$format] ]),
+                                       substr($data->[0], $subpos + 1)
+                               ) if $subpos < length $data->[0];
+                               next;
+                       }
+
+                       $$pos++ if $count eq 'C';
+                       my $max = $count =~ s/^(\d+)// ? $1 : 0;
+                       $count = !$count ? $max
+                               : $count eq '*' ? $res{levelcount}->{total} : shift @$data;
+                       $res{$field}->[$_] = $self->convert(\@subformat, $data, $pos)
+                               for 0 .. ($max || $count)-1;
+                       splice @{ $res{$field} }, $count if $max > $count;
+                       $res{$field} = $res{$field}->[0] if $max == 1;
+                       next;
+               }
+               else {
+                       for (split m{(?![0-9*/])(?<![/=])}, $template) {
+                               my ($type, $count) = m{^(\D+)(\d+)?$} or die 'unsupported';
+                               my $mult = $count // 1;
+                               given ($type) {
+                                       when (['c', 'C']) {
+                                               $$pos += $mult;
+                                       }
+                                       when ('x') {
+                                               $$pos += $mult;
+                                               next;
+                                       }
+                                       when (['b', 'B']) {
+                                               $$pos++;
+                                       }
+                                       when (['s', 'S', 'n', 'v']) {
+                                               $$pos += $mult * 2;
+                                       }
+                                       when (['a', 'A', 'Z', 'a*']) {
+                                               $$pos += length $data->[0];
+                                       }
+                                       when ('Z*') {
+                                               $$pos += $count // 1 + length $data->[0];
+                                       }
+                                       when (['C/a', 'C/A']) {
+                                               $$pos += 1 + ($count // length $data->[0]);
+                                               $data->[0] = unpack 'C/a', $data->[0] if defined $count;
+                                       }
+                                       when ('=') {
+                                               unshift @$data, $count;
+                                       }
+                                       when ('=.') {
+                                               unshift @$data, $$pos;
+                                       }
+                                       when ('X') {
+                                               $$pos -= $mult;
+                                               next;
+                                       }
+                                       default {
+                                               carp "Unrecognised template element '$type'";
+                                       }
+                               }
+                               if (defined $res{$field}) {
+                                       $res{$field} = [ $res{$field} ] unless ref $res{$field} eq 'ARRAY';
+                                       push @{ $res{$field} }, shift @$data;
+                               }
+                               else {
+                                       $res{$field} = shift @$data;
+                               }
+                       }
+               }
+       }
+       return \%res;
+}
+
+sub unpackf {
+       my ($format, $input) = @_;
+       my $self = UNIVERSAL::isa($format, __PACKAGE__) ? $format
+               : __PACKAGE__->new($format);
+       my @data = unpack $self->[1], $input;
+       return $self->convert($self->[0], \@data);
+}
+
+1;
+
+=head1 NAME
+
+Parse::Binary::Nested - Structured unpack
+
+=head1 SYNOPSIS
+
+       use Parse::Binary::Nested qw(unpackf);
+       my $data = unpackf([message => 'Z*'], "hi\0");
+
+       my $parser = Parser::Binary::Nested->new([
+               foos => [
+                       'C', # count
+                       message => 'Z*',
+                       period  => 'C',
+               ],
+               trail => 'a*',
+       ]);
+       $data = $parser->unpackf("\1foo\0.rest");
+       print $data->{foos}->[0]->{message};
+
+=head1 DESCRIPTION
+
+Converts a string into a hash of values, just like C<unpack>
+except that it allows you to name and nest the resulting elements.
+
+Format declarations are simalar to C<pack> templates,
+with the following additions:
+
+=over
+
+=item *
+
+An array ref groups additional declarations,
+with the first value specifying a repetition.  If this count is variable,
+the resulting value will be an array ref of hashes.
+
+       repeat => ['C', name => 'a*', value => 'S']
+
+With a count of 1, it will return only a hash ref,
+thereby simply grouping declarations:
+
+       test_foo => 'C'
+       test => [1, foo => 'C']
+
+=item *
+
+A template value of C<Ca$length> is recognised as a length-preceded string
+with a constant (maximal) size, and will return only the string adjusted
+to its length.
+This behaviour is very similar to C<(C/a@x$length)>, except that it never reads
+more than the given number of bytes.
+
+=item *
+
+Hardcoded values can be inserted using C<=$number> values.
+This can for example be useful to retain forwards-compatibility:
+
+       rows => ['C',
+               type => '=1', # nothing read
+               data => 'S',
+       ]
+       
+       rows => ['C',
+               type => 'C',
+               data => 'S',
+       ]
+
+=back
+
+=head1 AUTHOR
+
+Mischa POSLAWSKY <perl@shiar.org>
+
+=head1 LICENSE
+
+GPL version 3.
+