75c22fa897eacaa3335cccec230722f6db603d02
[wormy.git] / Parse / Binary / Nested.pm
1 package Parse::Binary::Nested;
2
3 use 5.010;
4 use strict;
5 use warnings;
6
7 use Carp;
8 use Exporter qw(import);
9
10 our $VERSION = '1.10';
11 our @EXPORT_OK = qw(unpackf);
12
13 sub new {
14         my ($class, $format) = @_;
15         ref $format
16                 or $format = [0 => $format];
17         ref $format eq 'ARRAY'
18                 or croak "Invalid Parse::Binary::Nested format: should be an array ref";
19         bless [$format, $class->template($format)], $class;
20 }
21
22 sub template {
23         my ($self, $format) = @_;
24         # total (flattened) unpack template from nested format definitions
25         return join '', map {
26                 my $value = $format->[-($_ << 1) - 1];
27                 if (ref $value eq 'ARRAY') {
28                         my $count = $value->[0];
29                         $value = $self->template($value);
30                         $value = $count =~ s/^([*\d]+)// ? "$count($value)$1"
31                                 : $count."X[$count]$count/($value)";
32                 }
33                 else {
34                         $value =~ s/=(?:\d+|.)//g;  # hardcoded values
35                         $value =~ s{^C/(a)(\d+)}{$1 . ($2 + 1)}e;  # maximum length
36                 }
37                 $value;
38         } reverse 0 .. ($#$format - 1) >> 1;
39 }
40
41 sub convert {
42         my ($self, $format, $data, $pos) = @_;
43         # map flat results into a named and nested hash
44         my %res;
45         $pos ||= \(my $_pos);
46         for (my $i = 0; $i < $#$format; $i += 2) {
47                 my ($field, $template) = @$format[$i, $i+1];
48                 if (ref $template eq 'ARRAY') {
49                         my ($count, @subformat) = @$template;
50                         $$pos++ if $count eq 'C';
51                         my $max = $count =~ s/^(\d+)// ? $1 : 0;
52                         $count = !$count ? $max
53                                 : $count eq '*' ? $res{levelcount}->{total} : shift @$data;
54                         $res{$field}->[$_] = $self->convert(\@subformat, $data, $pos)
55                                 for 0 .. ($max || $count)-1;
56                         splice @{ $res{$field} }, $count if $max > $count;
57                         $res{$field} = $res{$field}->[0] if $max == 1;
58                         next;
59                 }
60                 else {
61                         for (split m{(?![0-9*/])(?<![/=])}, $template) {
62                                 my ($type, $count) = m{^(\D+)(\d+)?$} or die 'unsupported';
63                                 my $mult = $count // 1;
64                                 given ($type) {
65                                         when (['c', 'C']) {
66                                                 $$pos += $mult;
67                                         }
68                                         when ('x') {
69                                                 $$pos += $mult;
70                                                 next;
71                                         }
72                                         when (['b', 'B']) {
73                                                 $$pos++;
74                                         }
75                                         when (['s', 'S', 'n', 'v']) {
76                                                 $$pos += $mult * 2;
77                                         }
78                                         when (['a', 'A', 'Z', 'a*']) {
79                                                 $$pos += length $data->[0];
80                                         }
81                                         when ('Z*') {
82                                                 $$pos += $count // 1 + length $data->[0];
83                                         }
84                                         when (['C/a', 'C/A']) {
85                                                 $$pos += 1 + ($count // length $data->[0]);
86                                                 $data->[0] = unpack 'C/a', $data->[0] if defined $count;
87                                         }
88                                         when ('=') {
89                                                 unshift @$data, $count;
90                                         }
91                                         when ('=.') {
92                                                 unshift @$data, $$pos;
93                                         }
94                                         when ('X') {
95                                                 $$pos -= $mult;
96                                                 next;
97                                         }
98                                         default {
99                                                 carp "Unrecognised template element '$type'";
100                                         }
101                                 }
102                                 if (defined $res{$field}) {
103                                         $res{$field} = [ $res{$field} ] unless ref $res{$field} eq 'ARRAY';
104                                         push @{ $res{$field} }, shift @$data;
105                                 }
106                                 else {
107                                         $res{$field} = shift @$data;
108                                 }
109                         }
110                 }
111         }
112         return \%res;
113 }
114
115 sub unpackf {
116         my ($format, $input) = @_;
117         my $self = UNIVERSAL::isa($format, __PACKAGE__) ? $format
118                 : __PACKAGE__->new($format);
119         my @data = unpack $self->[1], $input;
120         return $self->convert($self->[0], \@data);
121 }
122
123 1;
124
125 =head1 NAME
126
127 Parse::Binary::Nested - Structured unpack
128
129 =head1 SYNOPSIS
130
131         use Parse::Binary::Nested qw(unpackf);
132         my $data = unpackf([message => 'Z*'], "hi\0");
133
134         my $parser = Parser::Binary::Nested->new([
135                 foos => [
136                         'C', # count
137                         message => 'Z*',
138                         period  => 'C',
139                 ],
140                 trail => 'a*',
141         ]);
142         $data = $parser->unpackf("\1foo\0.rest");
143         print $data->{foos}->[0]->{message};
144
145 =head1 DESCRIPTION
146
147 Converts a string into a hash of values, just like C<unpack>
148 except that it allows you to name and nest the resulting elements.
149
150 Format declarations are simalar to C<pack> templates,
151 with the following additions:
152
153 =over
154
155 =item *
156
157 An array ref groups additional declarations,
158 with the first value specifying a repetition.  If this count is variable,
159 the resulting value will be an array ref of hashes.
160
161         repeat => ['C', name => 'a*', value => 'S']
162
163 With a count of 1, it will return only a hash ref,
164 thereby simply grouping declarations:
165
166         test_foo => 'C'
167         test => [1, foo => 'C']
168
169 =item *
170
171 A template value of C<Ca$length> is recognised as a length-preceded string
172 with a constant (maximal) size, and will return only the string adjusted
173 to its length.
174 This behaviour is very similar to C<(C/a@x$length)>, except that it never reads
175 more than the given number of bytes.
176
177 =item *
178
179 Hardcoded values can be inserted using C<=$number> values.
180 This can for example be useful to retain forwards-compatibility:
181
182         rows => ['C',
183                 type => '=1', # nothing read
184                 data => 'S',
185         ]
186         
187         rows => ['C',
188                 type => 'C',
189                 data => 'S',
190         ]
191
192 =back
193
194 =head1 AUTHOR
195
196 Mischa POSLAWSKY <perl@shiar.org>
197
198 =head1 LICENSE
199
200 GPL version 3.
201