mirror of
https://github.com/mpv-player/mpv
synced 2025-01-23 16:13:39 +00:00
fae7307931
file2string.pl and vdpau_functions.pl are direct ports. matroska.py was reimplemented as the Parse::Matroska module in CPAN, and matroska.pl was made a client of Parse::Matroska. A copy of Parse::Matroska is included in TOOLS/lib, and matroska.pl looks there first when trying to load the module. osxbundle.py was not ported since I have no means to verify it. Python is always available on OSX though, so there is no harm in removing the check for it on configure.
332 lines
8.7 KiB
Perl
332 lines
8.7 KiB
Perl
use 5.008;
|
|
use strict;
|
|
use warnings;
|
|
|
|
# ABSTRACT: a mid-level representation of an EBML element
|
|
package Parse::Matroska::Element;
|
|
|
|
use Carp;
|
|
use List::Util qw{first};
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use Parse::Matroska::Reader;
|
|
my $reader = Parse::Matroska::Reader->new($path);
|
|
my $elem = $reader->read_element;
|
|
|
|
print "ID: $elem->{elid}\n";
|
|
print "Name: $elem->{name}\n";
|
|
print "Length: $elem->{content_len}\n";
|
|
print "Type: $elem->{type}\n";
|
|
print "Child count: ", scalar(@{$elem->all_children}), "\n";
|
|
if ($elem->{type} eq 'sub') {
|
|
while (my $chld = $elem->next_child) {
|
|
print "Child Name: $chld->{name}\n";
|
|
}
|
|
} else {
|
|
print "Value: ", $elem->get_value, "\n";
|
|
}
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
Represents a single Matroska element as decoded by
|
|
L<Parse::Matroska::Reader>. This is essentially a hash
|
|
augmented with functions for delay-loading of binary
|
|
values and children elements.
|
|
|
|
=head1 NOTE
|
|
|
|
The API of this module is not yet considered stable.
|
|
|
|
=attr elid
|
|
|
|
The EBML Element ID, suitable for passing to
|
|
L<Parse::Matroska::Definitions/elem_by_hexid>.
|
|
|
|
=attr name
|
|
|
|
The EBML Element's name.
|
|
|
|
=attr type
|
|
|
|
The EBML Element's type. Can be C<uint>, C<sint>,
|
|
C<float>, C<ebml_id>, C<str> or C<binary>. See L</value>
|
|
for details.
|
|
|
|
Equivalent to
|
|
C<elem_by_hexid($elem-E<gt>{value})-E<gt>{valtype}>.
|
|
|
|
=attr value
|
|
|
|
The EBML Element's value. Should be obtained through
|
|
L</get_value>.
|
|
|
|
Is an unicode string if the L</type> is C<str>, that is,
|
|
the string has already been decoded by L<Encode/decode>.
|
|
|
|
Is C<undef> if the L</type> is C<binary> and the contents
|
|
were delay-loaded and not yet read. L</get_value> will
|
|
do the delayed load if needed.
|
|
|
|
Is an arrayref if the L</type> is C<sub>, containing
|
|
the children nodes that were already loaded.
|
|
|
|
Is a hashref if the L</type> is C<ebml_id>, containing
|
|
the referred element's information as defined in
|
|
L<Parse::Matroska::Definitions>. Calling
|
|
C<elem_by_hexid($elem-E<gt>{value}-E<gt>{elid})> will
|
|
return the same object as $elem->{value}.
|
|
|
|
=attr full_len
|
|
|
|
The entire length of this EBML Element, including
|
|
the header's.
|
|
|
|
=attr size_len
|
|
|
|
The length of the size marker. Used when calculating
|
|
L</full_len> from L</content_len>
|
|
|
|
=attr content_len
|
|
|
|
The length of the contents of this EBML Element,
|
|
which excludes the header.
|
|
|
|
=attr reader
|
|
|
|
A weakened reference to the associated
|
|
L<Parse::Matroska::Reader>.
|
|
|
|
=method new(%hash)
|
|
|
|
Creates a new Element initialized with the hash
|
|
given as argument.
|
|
|
|
=cut
|
|
sub new {
|
|
my $class = shift;
|
|
my $self = {};
|
|
bless $self, $class;
|
|
|
|
$self->initialize(@_);
|
|
return $self;
|
|
}
|
|
|
|
=method initialize(%hash)
|
|
|
|
Called by L</new> on initialization.
|
|
|
|
=cut
|
|
sub initialize {
|
|
my ($self, %args) = @_;
|
|
for (keys %args) {
|
|
$self->{$_} = $args{$_};
|
|
}
|
|
$self->{depth} = 0 unless $self->{depth};
|
|
}
|
|
|
|
=method skip
|
|
|
|
Called by the user to ignore the contents of this EBML node.
|
|
Needed when ignoring the children of a node.
|
|
|
|
=cut
|
|
sub skip {
|
|
my ($self) = @_;
|
|
my $reader = $self->{reader};
|
|
return unless $reader; # we don't have to skip if there's no reader
|
|
my $pos = $reader->getpos;
|
|
croak "Too late to skip, reads were already done"
|
|
if $pos ne $self->{data_pos};
|
|
$reader->skip($self->{content_len});
|
|
}
|
|
|
|
=method get_value($keep_bin)
|
|
|
|
Returns the value contained by this EBML element.
|
|
|
|
If the element has children, returns an arrayref to
|
|
the children elements that were already encountered.
|
|
|
|
If the element's type is C<binary> and the value was
|
|
delay-loaded, does the reading now.
|
|
|
|
If $keep_bin is true, the delay-loaded data is kept
|
|
as the L</value>, otherwise, further calls to
|
|
C<get_value> will reread the data from the L</reader>.
|
|
|
|
=cut
|
|
sub get_value {
|
|
my ($self, $keep_bin) = @_;
|
|
|
|
return undef if $self->{type} eq 'skip';
|
|
return $self->{value} if $self->{value};
|
|
|
|
my $reader = $self->{reader} or
|
|
croak "The associated Reader has been deleted";
|
|
|
|
# delay-loaded 'binary'
|
|
if ($self->{type} eq 'binary') {
|
|
croak "Cannot seek in the current Reader" unless $self->{data_pos};
|
|
# seek to the data position...
|
|
$reader->setpos($self->{data_pos});
|
|
# read the data, keeping it in value if requested
|
|
if ($keep_bin) {
|
|
$self->{value} = $reader->readlen($self->{content_len});
|
|
return $self->{value};
|
|
} else {
|
|
return $reader->readlen($self->{content_len});
|
|
}
|
|
}
|
|
}
|
|
|
|
=method next_child($read_bin)
|
|
|
|
Builtin iterator; reads and returns the next child element.
|
|
Always returns undef if the type isn't C<sub>.
|
|
|
|
Returns undef at the end of the iterator and resets itself to
|
|
point to the first element; so calling L</next_child($read_bin)>
|
|
after the iterator returned C<undef> will return the first child.
|
|
|
|
The optional C<$read_bin> parameter has the children elements
|
|
not delay-load their value if their type is C<binary>.
|
|
|
|
If all children elements have already been read, return
|
|
each element in-order as would be given by
|
|
L</all_children($recurse,$read_bin)>.
|
|
|
|
=cut
|
|
sub next_child {
|
|
my ($self, $read_bin) = @_;
|
|
return unless $self->{type} eq 'sub';
|
|
|
|
if ($self->{_all_children_read}) {
|
|
my $idx = $self->{_last_child} ||= 0;
|
|
if ($idx == @{$self->{value}}) {
|
|
# reset the iterator, returning undef once
|
|
$self->{_last_child} = 0;
|
|
return;
|
|
}
|
|
my $ret = $self->{value}->[$idx];
|
|
|
|
++$idx;
|
|
$self->{_last_child} = $idx;
|
|
return $ret;
|
|
}
|
|
|
|
my $len = defined $self->{remaining_len}
|
|
? $self->{remaining_len}
|
|
: $self->{content_len};
|
|
|
|
if ($len == 0) {
|
|
# we've read all children; switch into $self->{value} iteration mode
|
|
$self->{_all_children_read} = 1;
|
|
# return undef since the iterator will reset
|
|
return;
|
|
}
|
|
|
|
$self->{pos_offset} ||= 0;
|
|
my $pos = $self->{data_pos};
|
|
my $reader = $self->{reader} or croak "The associated reader has been deleted";
|
|
$reader->setpos($pos);
|
|
$reader->{fh}->seek($self->{pos_offset}, 1) if $pos;
|
|
|
|
my $chld = $reader->read_element($read_bin);
|
|
return undef unless defined $chld;
|
|
$self->{pos_offset} += $chld->{full_len};
|
|
|
|
$self->{remaining_len} = $len - $chld->{full_len};
|
|
|
|
if ($self->{remaining_len} < 0) {
|
|
croak "Child elements consumed $self->{remaining_len} more bytes than parent $self->{name} contained";
|
|
}
|
|
|
|
$chld->{depth} = $self->{depth} + 1;
|
|
$self->{value} ||= [];
|
|
|
|
push @{$self->{value}}, $chld;
|
|
|
|
return $chld;
|
|
}
|
|
|
|
=method all_children($recurse,$read_bin)
|
|
|
|
Calls L</populate_children($recurse,$read_bin)> on self
|
|
and returns an arrayref with the children nodes.
|
|
|
|
Both C<$recurse> and C<$read_bin> are optional and default
|
|
to false.
|
|
|
|
=cut
|
|
sub all_children {
|
|
my ($self, $recurse, $read_bin) = @_;
|
|
$self->populate_children($recurse, $read_bin);
|
|
return $self->{value};
|
|
}
|
|
|
|
=method children_by_name($name)
|
|
|
|
Searches in the already read children elements for all
|
|
elements with the EBML name C<$name>. Returns an array
|
|
containing all found elements. On scalar context,
|
|
returns only the first element found.
|
|
|
|
Croaks if the element's C<type> isn't C<sub>.
|
|
|
|
=cut
|
|
sub children_by_name {
|
|
my ($self, $name) = @_;
|
|
return unless defined wantarray; # don't do work if work isn't wanted
|
|
croak "Element can't have children" unless $self->{type} eq 'sub';
|
|
|
|
my @found = grep { $_->{name} eq $name } @{$self->{value}};
|
|
return @found if wantarray; # list
|
|
return shift @found if defined wantarray; # scalar
|
|
}
|
|
|
|
=method populate_children($recurse,$read_bin)
|
|
|
|
Populates the internal array of children elements, that is,
|
|
requests that the associated L<Matroska::Parser::Reader> reads
|
|
all children elements. Returns itself.
|
|
|
|
Returns false if the element's C<type> isn't C<sub>.
|
|
|
|
If C<$recurse> is provided and is true, the method will call
|
|
itself in the children elements with the same parameters it
|
|
received; this will build a full EBML tree.
|
|
|
|
If C<$read_bin> is provided and is true, disables delay-loading
|
|
of the contents of C<binary>-type nodes, reading the contents
|
|
to memory.
|
|
|
|
If both C<$recurse> and C<$read_bin> are true, entire EBML trees
|
|
can be loaded without requiring seeks, thus behaving correctly
|
|
on unseekable streams. If C<$read_bin> is false, the entire EBML
|
|
tree is still loaded, but calling L</get_value> on C<binary>-type
|
|
nodes will produce an error on unseekable streams.
|
|
|
|
=cut
|
|
sub populate_children {
|
|
my ($self, $recurse, $read_bin) = @_;
|
|
|
|
return unless $self->{type} eq 'sub';
|
|
|
|
if (@{$self->{value}} && $recurse) {
|
|
# only recurse
|
|
foreach (@{$self->{value}}) {
|
|
$_->populate_children($recurse, $read_bin);
|
|
}
|
|
return $self;
|
|
}
|
|
|
|
while (my $chld = $self->next_child($read_bin)) {
|
|
$chld->populate_children($recurse, $read_bin) if $recurse;
|
|
}
|
|
|
|
return $self;
|
|
}
|
|
|
|
1;
|