parse-wormedit: silence switch feature warnings
[wormy.git] / lib / Parse / Binary / Nested.pm
1 package Parse::Binary::Nested;
2
3 use 5.010;
4 use strict;
5 use warnings;
6 use experimental 'switch';
7
8 use Carp;
9 use Exporter qw(import);
10
11 our $VERSION = '1.10';
12 our @EXPORT_OK = qw(unpackf);
13
14 sub new {
15         my ($class, $format) = @_;
16         ref $format
17                 or $format = [0 => $format];
18         ref $format eq 'ARRAY'
19                 or croak "Invalid Parse::Binary::Nested format: should be an array ref";
20         bless [$format, $class->template($format)], $class;
21 }
22
23 sub template {
24         my ($self, $format) = @_;
25         # total (flattened) unpack template from nested format definitions
26         my $template = '';
27         @$format or return $template;
28         for (reverse 0 .. ($#$format - 1) >> 1) {
29                 my $value = $format->[-($_ << 1) - 1];
30                 if (ref $value eq 'ARRAY') {
31                         my $count = $value->[0];
32                         if ($count =~ /^\?/) {
33                                 $template .= 'a*';
34                                 last;
35                         }
36                         else {
37                                 $value = $self->template($value);
38                                 $value = $count =~ s/^([*\d]+)// ? "$count($value)$1"
39                                         : $count."X[$count]$count/($value)";
40                         }
41                 }
42                 else {
43                         $value =~ s/=(?:\d+|.)//g;  # hardcoded values
44                         $value =~ s{^C/(a)(\d+)}{$1 . ($2 + 1)}e;  # maximum length
45                 }
46                 $template .= $value;
47         }
48         return $template;
49 }
50
51 sub convert {
52         my ($self, $format, $data, $pos) = @_;
53         # map flat results into a named and nested hash
54         my %res;
55         $pos ||= \(my $_pos = 0);
56         for (my $i = 0; $i < $#$format; $i += 2) {
57                 my ($field, $template) = @$format[$i, $i+1];
58                 if (ref $template eq 'ARRAY') {
59                         my ($count, @subformat) = @$template;
60
61                         if ($count =~ /^\?(\d+)/) {
62                                 # character-terminated group
63                                 my $endmark = chr $1;
64                                 my $iterate = ref($self)->new(\@subformat);
65                                 push @{ $iterate->[0] }, -pos => '=.';
66                                 my $subpos = 0;
67                                 while ($subpos < length $data->[0]) {
68                                         last if substr($data->[0], $subpos, 1) eq $endmark;
69                                         my $iterdata = $iterate->convert($iterate->[0], [
70                                                 unpack $iterate->[1], substr($data->[0], $subpos)
71                                         ]) or last;
72                                         $subpos += delete $iterdata->{-pos};
73                                         push @{ $res{$field} }, $iterdata;
74                                 }
75                                 $$pos += $subpos + 1;
76                                 @$data = unpack(
77                                         $self->template([ @$format[$i+2 .. $#$format] ]),
78                                         substr($data->[0], $subpos + 1)
79                                 ) if $subpos < length $data->[0];
80                                 next;
81                         }
82
83                         $$pos++ if $count eq 'C';
84                         my $max = $count =~ s/^(\d+)// ? $1 : 0;
85                         $count = !$count ? $max
86                                 : $count eq '*' ? $res{levelcount}->{total} : shift @$data;
87                         $res{$field}->[$_] = $self->convert(\@subformat, $data, $pos)
88                                 for 0 .. ($max || $count)-1;
89                         splice @{ $res{$field} }, $count if $max > $count;
90                         $res{$field} = $res{$field}->[0] if $max == 1;
91                         next;
92                 }
93                 else {
94                         for (split m{(?![0-9*/])(?<![/=])}, $template) {
95                                 my ($type, $count) = m{^(\D+)(\d+)?$} or die 'unsupported';
96                                 my $mult = $count // 1;
97                                 given ($type) {
98                                         when (['c', 'C']) {
99                                                 $$pos += $mult;
100                                         }
101                                         when ('x') {
102                                                 $$pos += $mult;
103                                                 next;
104                                         }
105                                         when (['b', 'B']) {
106                                                 $$pos++;
107                                         }
108                                         when (['s', 'S', 'n', 'v']) {
109                                                 $$pos += $mult * 2;
110                                         }
111                                         when (['a', 'A', 'Z', 'a*']) {
112                                                 $$pos += length $data->[0];
113                                         }
114                                         when ('Z*') {
115                                                 $$pos += $count // 1 + length $data->[0];
116                                         }
117                                         when (['C/a', 'C/A']) {
118                                                 $$pos += 1 + ($count // length $data->[0]);
119                                                 $data->[0] = unpack 'C/a', $data->[0] if defined $count;
120                                         }
121                                         when ('=') {
122                                                 unshift @$data, $count;
123                                         }
124                                         when ('=.') {
125                                                 unshift @$data, $$pos;
126                                         }
127                                         when ('X') {
128                                                 $$pos -= $mult;
129                                                 next;
130                                         }
131                                         default {
132                                                 carp "Unrecognised template element '$type'";
133                                         }
134                                 }
135                                 if (defined $res{$field}) {
136                                         $res{$field} = [ $res{$field} ] unless ref $res{$field} eq 'ARRAY';
137                                         push @{ $res{$field} }, shift @$data;
138                                 }
139                                 else {
140                                         $res{$field} = shift @$data;
141                                 }
142                         }
143                 }
144         }
145         return \%res;
146 }
147
148 sub unpackf {
149         my ($format, $input) = @_;
150         my $self = UNIVERSAL::isa($format, __PACKAGE__) ? $format
151                 : __PACKAGE__->new($format);
152         my @data = unpack $self->[1], $input;
153         return $self->convert($self->[0], \@data);
154 }
155
156 1;
157
158 =head1 NAME
159
160 Parse::Binary::Nested - Structured unpack
161
162 =head1 SYNOPSIS
163
164         use Parse::Binary::Nested qw(unpackf);
165         my $data = unpackf([message => 'Z*'], "hi\0");
166
167         my $parser = Parser::Binary::Nested->new([
168                 foos => [
169                         'C', # count
170                         message => 'Z*',
171                         period  => 'C',
172                 ],
173                 trail => 'a*',
174         ]);
175         $data = $parser->unpackf("\1foo\0.rest");
176         print $data->{foos}->[0]->{message};
177
178 =head1 DESCRIPTION
179
180 Converts a string into a hash of values, just like C<unpack>
181 except that it allows you to name and nest the resulting elements.
182
183 Format declarations are simalar to C<pack> templates,
184 with the following additions:
185
186 =over
187
188 =item *
189
190 An array ref groups additional declarations,
191 with the first value specifying a repetition.  If this count is variable,
192 the resulting value will be an array ref of hashes.
193
194         repeat => ['C', name => 'a*', value => 'S']
195
196 With a count of 1, it will return only a hash ref,
197 thereby simply grouping declarations:
198
199         test_foo => 'C'
200         test => [1, foo => 'C']
201
202 =item *
203
204 A template value of C<Ca$length> is recognised as a length-preceded string
205 with a constant (maximal) size, and will return only the string adjusted
206 to its length.
207 This behaviour is very similar to C<(C/a@x$length)>, except that it never reads
208 more than the given number of bytes.
209
210 =item *
211
212 Hardcoded values can be inserted using C<=$number> values.
213 This can for example be useful to retain forwards-compatibility:
214
215         rows => ['C',
216                 type => '=1', # nothing read
217                 data => 'S',
218         ]
219         
220         rows => ['C',
221                 type => 'C',
222                 data => 'S',
223         ]
224
225 =back
226
227 =head1 AUTHOR
228
229 Mischa POSLAWSKY <perl@shiar.org>
230
231 =head1 LICENSE
232
233 GPL version 3.
234