Parse::Binary::Nested: track parsed bytes
[wormy.git] / Parse / Binary / Nested.pm
1 package Parse::Binary::Nested;
2
3 use 5.010;
4 use strict;
5 use warnings;
6
7 use Carp;
8
9 our $VERSION = '1.02';
10
11 sub new {
12         my ($class, $format) = @_;
13         ref $format eq 'ARRAY'
14                 or croak "Invalid Parse::Binary::Nested format: should be an array ref";
15         bless $format, $class;
16 }
17
18 sub template {
19         my ($self, $format) = @_;
20         # total (flattened) unpack template from nested format definitions
21         return join '', map {
22                 my $value = $format->[-($_ << 1) - 1];
23                 if (ref $value eq 'ARRAY') {
24                         my $count = $value->[0];
25                         $value = $self->template($value);
26                         $value = $count =~ s/^([*\d]+)// ? "$count($value)$1"
27                                 : $count."X[$count]$count/($value)";
28                 }
29                 else {
30                         $value =~ s/=(?:\d+|.)//g;  # hardcoded values
31                         $value =~ s{^C/(a)(\d+)}{$1 . ($2 + 1)}e;  # maximum length
32                 }
33                 $value;
34         } reverse 0 .. ($#$format - 1) >> 1;
35 }
36
37 sub convert {
38         my ($self, $format, $data, $pos) = @_;
39         # map flat results into a named and nested hash
40         my %res;
41         $pos ||= \(my $_pos);
42         while (my ($field, $template) = splice @$format, 0, 2) {
43                 if (ref $template eq 'ARRAY') {
44                         my ($count, @subformat) = @$template;
45                         $$pos++ if $count eq 'C';
46                         my $max = $count =~ s/^(\d+)// ? $1 : 0;
47                         $count = !$count ? $max
48                                 : $count eq '*' ? $res{levelcount}->{total} : shift @$data;
49                         $res{$field}->[$_] = $self->convert([@subformat], $data, $pos)
50                                 for 0 .. ($max || $count)-1;
51                         splice @{ $res{$field} }, $count if $max > $count;
52                         $res{$field} = $res{$field}->[0] if $max == 1;
53                         next;
54                 }
55                 else {
56                         for (split m{(?![0-9*/])(?<![/=])}, $template) {
57                                 my ($type, $count) = m{^(\D+)(\d+)?$} or die 'unsupported';
58                                 my $mult = $count // 1;
59                                 given ($type) {
60                                         when (['c', 'C']) {
61                                                 $$pos += $mult;
62                                         }
63                                         when ('x') {
64                                                 $$pos += $mult;
65                                                 next;
66                                         }
67                                         when (['b', 'B']) {
68                                                 $$pos++;
69                                         }
70                                         when (['s', 'S', 'n', 'v']) {
71                                                 $$pos += $mult * 2;
72                                         }
73                                         when (['a', 'A', 'Z', 'a*']) {
74                                                 $$pos += length $data->[0];
75                                         }
76                                         when ('Z*') {
77                                                 $$pos += $count // 1 + length $data->[0];
78                                         }
79                                         when (['C/a', 'C/A']) {
80                                                 $$pos += 1 + ($count // length $data->[0]);
81                                                 $data->[0] = unpack 'C/a', $data->[0] if defined $count;
82                                         }
83                                         when ('=') {
84                                                 unshift @$data, $count;
85                                         }
86                                         when ('=.') {
87                                                 unshift @$data, $$pos;
88                                         }
89                                         when ('X') {
90                                                 $$pos -= $mult;
91                                                 next;
92                                         }
93                                         default {
94                                                 carp "Unrecognised template element '$type'";
95                                         }
96                                 }
97                                 if (defined $res{$field}) {
98                                         $res{$field} = [ $res{$field} ] unless ref $res{$field} eq 'ARRAY';
99                                         push @{ $res{$field} }, shift @$data;
100                                 }
101                                 else {
102                                         $res{$field} = shift @$data;
103                                 }
104                         }
105                 }
106         }
107         return \%res;
108 }
109
110 sub unpackf {
111         my ($self, $input) = @_;
112         my @data = unpack $self->template($self), $input;
113         return $self->convert([@$self], \@data);
114 }
115
116 1;
117
118 =head1 NAME
119
120 Parse::Binary::Nested - Structured unpack
121
122 =head1 SYNOPSIS
123
124         use Parse::Binary::Nested;
125         my $parser = Parser::Binary::Nested->new([
126                 foos => [
127                         'C', # count
128                         message => 'Z*',
129                         period  => 'C',
130                 ],
131                 trail => 'a*',
132         ]);
133         
134         my $data = $parser->unpackf("\1foo\0.rest");
135         print $data->{foos}->[0]->{message};
136
137 =head1 DESCRIPTION
138
139 Converts a string into a hash of values, just like C<unpack>
140 except that it allows you to name and nest the resulting elements.
141
142 Format declarations are simalar to C<pack> templates,
143 with the following additions:
144
145 =over
146
147 =item *
148
149 An array ref groups additional declarations,
150 with the first value specifying a repetition.  If this count is variable,
151 the resulting value will be an array ref of hashes.
152
153         repeat => ['C', name => 'a*', value => 'S']
154
155 With a count of 1, it will return only a hash ref,
156 thereby simply grouping declarations:
157
158         test_foo => 'C'
159         test => [1, foo => 'C']
160
161 =item *
162
163 A template value of C<Ca$length> is recognised as a length-preceded string
164 with a constant (maximal) size, and will return only the string adjusted
165 to its length.
166 This behaviour is very similar to C<(C/a@x$length)>, except that it never reads
167 more than the given number of bytes.
168
169 =item *
170
171 Hardcoded values can be inserted using C<=$number> values.
172 This can for example be useful to retain forwards-compatibility:
173
174         rows => ['C',
175                 type => '=1', # nothing read
176                 data => 'S',
177         ]
178         
179         rows => ['C',
180                 type => 'C',
181                 data => 'S',
182         ]
183
184 =back
185
186 =head1 AUTHOR
187
188 Mischa POSLAWSKY <perl@shiar.org>
189
190 =head1 LICENSE
191
192 GPL version 3.
193