parse-wormedit: parsing modules in seperate files
[wormy.git] / lib / Parse / Binary / Nested.pm
1 package Parse::Binary::Nested;
2
3 use 5.010;
4 use strict;
5 use warnings;
6
7 use Carp;
8 use Exporter qw(import);
9
10 our $VERSION = '1.10';
11 our @EXPORT_OK = qw(unpackf);
12
13 sub new {
14         my ($class, $format) = @_;
15         ref $format
16                 or $format = [0 => $format];
17         ref $format eq 'ARRAY'
18                 or croak "Invalid Parse::Binary::Nested format: should be an array ref";
19         bless [$format, $class->template($format)], $class;
20 }
21
22 sub template {
23         my ($self, $format) = @_;
24         # total (flattened) unpack template from nested format definitions
25         my $template = '';
26         @$format or return $template;
27         for (reverse 0 .. ($#$format - 1) >> 1) {
28                 my $value = $format->[-($_ << 1) - 1];
29                 if (ref $value eq 'ARRAY') {
30                         my $count = $value->[0];
31                         if ($count =~ /^\?/) {
32                                 $template .= 'a*';
33                                 last;
34                         }
35                         else {
36                                 $value = $self->template($value);
37                                 $value = $count =~ s/^([*\d]+)// ? "$count($value)$1"
38                                         : $count."X[$count]$count/($value)";
39                         }
40                 }
41                 else {
42                         $value =~ s/=(?:\d+|.)//g;  # hardcoded values
43                         $value =~ s{^C/(a)(\d+)}{$1 . ($2 + 1)}e;  # maximum length
44                 }
45                 $template .= $value;
46         }
47         return $template;
48 }
49
50 sub convert {
51         my ($self, $format, $data, $pos) = @_;
52         # map flat results into a named and nested hash
53         my %res;
54         $pos ||= \(my $_pos = 0);
55         for (my $i = 0; $i < $#$format; $i += 2) {
56                 my ($field, $template) = @$format[$i, $i+1];
57                 if (ref $template eq 'ARRAY') {
58                         my ($count, @subformat) = @$template;
59
60                         if ($count =~ /^\?(\d+)/) {
61                                 # character-terminated group
62                                 my $endmark = chr $1;
63                                 my $iterate = ref($self)->new(\@subformat);
64                                 push @{ $iterate->[0] }, -pos => '=.';
65                                 my $subpos = 0;
66                                 while ($subpos < length $data->[0]) {
67                                         last if substr($data->[0], $subpos, 1) eq $endmark;
68                                         my $iterdata = $iterate->convert($iterate->[0], [
69                                                 unpack $iterate->[1], substr($data->[0], $subpos)
70                                         ]) or last;
71                                         $subpos += delete $iterdata->{-pos};
72                                         push @{ $res{$field} }, $iterdata;
73                                 }
74                                 $$pos += $subpos + 1;
75                                 @$data = unpack(
76                                         $self->template([ @$format[$i+2 .. $#$format] ]),
77                                         substr($data->[0], $subpos + 1)
78                                 ) if $subpos < length $data->[0];
79                                 next;
80                         }
81
82                         $$pos++ if $count eq 'C';
83                         my $max = $count =~ s/^(\d+)// ? $1 : 0;
84                         $count = !$count ? $max
85                                 : $count eq '*' ? $res{levelcount}->{total} : shift @$data;
86                         $res{$field}->[$_] = $self->convert(\@subformat, $data, $pos)
87                                 for 0 .. ($max || $count)-1;
88                         splice @{ $res{$field} }, $count if $max > $count;
89                         $res{$field} = $res{$field}->[0] if $max == 1;
90                         next;
91                 }
92                 else {
93                         for (split m{(?![0-9*/])(?<![/=])}, $template) {
94                                 my ($type, $count) = m{^(\D+)(\d+)?$} or die 'unsupported';
95                                 my $mult = $count // 1;
96                                 given ($type) {
97                                         when (['c', 'C']) {
98                                                 $$pos += $mult;
99                                         }
100                                         when ('x') {
101                                                 $$pos += $mult;
102                                                 next;
103                                         }
104                                         when (['b', 'B']) {
105                                                 $$pos++;
106                                         }
107                                         when (['s', 'S', 'n', 'v']) {
108                                                 $$pos += $mult * 2;
109                                         }
110                                         when (['a', 'A', 'Z', 'a*']) {
111                                                 $$pos += length $data->[0];
112                                         }
113                                         when ('Z*') {
114                                                 $$pos += $count // 1 + length $data->[0];
115                                         }
116                                         when (['C/a', 'C/A']) {
117                                                 $$pos += 1 + ($count // length $data->[0]);
118                                                 $data->[0] = unpack 'C/a', $data->[0] if defined $count;
119                                         }
120                                         when ('=') {
121                                                 unshift @$data, $count;
122                                         }
123                                         when ('=.') {
124                                                 unshift @$data, $$pos;
125                                         }
126                                         when ('X') {
127                                                 $$pos -= $mult;
128                                                 next;
129                                         }
130                                         default {
131                                                 carp "Unrecognised template element '$type'";
132                                         }
133                                 }
134                                 if (defined $res{$field}) {
135                                         $res{$field} = [ $res{$field} ] unless ref $res{$field} eq 'ARRAY';
136                                         push @{ $res{$field} }, shift @$data;
137                                 }
138                                 else {
139                                         $res{$field} = shift @$data;
140                                 }
141                         }
142                 }
143         }
144         return \%res;
145 }
146
147 sub unpackf {
148         my ($format, $input) = @_;
149         my $self = UNIVERSAL::isa($format, __PACKAGE__) ? $format
150                 : __PACKAGE__->new($format);
151         my @data = unpack $self->[1], $input;
152         return $self->convert($self->[0], \@data);
153 }
154
155 1;
156
157 =head1 NAME
158
159 Parse::Binary::Nested - Structured unpack
160
161 =head1 SYNOPSIS
162
163         use Parse::Binary::Nested qw(unpackf);
164         my $data = unpackf([message => 'Z*'], "hi\0");
165
166         my $parser = Parser::Binary::Nested->new([
167                 foos => [
168                         'C', # count
169                         message => 'Z*',
170                         period  => 'C',
171                 ],
172                 trail => 'a*',
173         ]);
174         $data = $parser->unpackf("\1foo\0.rest");
175         print $data->{foos}->[0]->{message};
176
177 =head1 DESCRIPTION
178
179 Converts a string into a hash of values, just like C<unpack>
180 except that it allows you to name and nest the resulting elements.
181
182 Format declarations are simalar to C<pack> templates,
183 with the following additions:
184
185 =over
186
187 =item *
188
189 An array ref groups additional declarations,
190 with the first value specifying a repetition.  If this count is variable,
191 the resulting value will be an array ref of hashes.
192
193         repeat => ['C', name => 'a*', value => 'S']
194
195 With a count of 1, it will return only a hash ref,
196 thereby simply grouping declarations:
197
198         test_foo => 'C'
199         test => [1, foo => 'C']
200
201 =item *
202
203 A template value of C<Ca$length> is recognised as a length-preceded string
204 with a constant (maximal) size, and will return only the string adjusted
205 to its length.
206 This behaviour is very similar to C<(C/a@x$length)>, except that it never reads
207 more than the given number of bytes.
208
209 =item *
210
211 Hardcoded values can be inserted using C<=$number> values.
212 This can for example be useful to retain forwards-compatibility:
213
214         rows => ['C',
215                 type => '=1', # nothing read
216                 data => 'S',
217         ]
218         
219         rows => ['C',
220                 type => 'C',
221                 data => 'S',
222         ]
223
224 =back
225
226 =head1 AUTHOR
227
228 Mischa POSLAWSKY <perl@shiar.org>
229
230 =head1 LICENSE
231
232 GPL version 3.
233