summaryrefslogtreecommitdiff
path: root/lib/File
diff options
context:
space:
mode:
authorexiftool <exiftool@users.sourceforge.net>2018-10-09 12:24:48 -0400
committerexiftool <exiftool@users.sourceforge.net>2018-10-09 12:24:48 -0400
commit4d55678e0776c97648f15ca3635cd73ab2eb1924 (patch)
tree9c1edae34b34945898720eab78a181b75bf60142 /lib/File
parent5297201750bfe371010ee437f1d31aeec4ab49ab (diff)
Update to 11.13
Diffstat (limited to 'lib/File')
-rw-r--r--lib/File/RandomAccess.pm54
-rw-r--r--lib/File/RandomAccess.pod19
2 files changed, 66 insertions, 7 deletions
diff --git a/lib/File/RandomAccess.pm b/lib/File/RandomAccess.pm
index 42ea3b89..c689f56b 100644
--- a/lib/File/RandomAccess.pm
+++ b/lib/File/RandomAccess.pm
@@ -16,6 +16,7 @@
# 11/26/2008 - P. Harvey Fixed bug in ReadLine when reading from a
# scalar with a multi-character newline
# 01/24/2009 - PH Protect against reading too much at once
+# 10/04/2018 - PH Added NoBuffer option
#
# Notes: Calls the normal file i/o routines unless SeekTest() fails, in
# which case the file is buffered in memory to allow random access.
@@ -36,13 +37,14 @@ require 5.002;
require Exporter;
use vars qw($VERSION @ISA @EXPORT_OK);
-$VERSION = '1.10';
+$VERSION = '1.11';
@ISA = qw(Exporter);
sub Read($$$);
# constants
my $CHUNK_SIZE = 8192; # size of chunks to read from file (must be power of 2)
+my $SKIP_SIZE = 65536; # size to skip when fast-forwarding over sequential data
my $SLURP_CHUNKS = 16; # read this many chunks at a time when slurping
#------------------------------------------------------------------------------
@@ -60,6 +62,7 @@ sub new($$;$)
# string i/o
$self = {
BUFF_PT => $filePt,
+ BASE => 0,
POS => 0,
LEN => length($$filePt),
TESTED => -1,
@@ -71,8 +74,9 @@ sub new($$;$)
$self = {
FILE_PT => $filePt, # file pointer
BUFF_PT => \$buff, # reference to file data
- POS => 0, # current position in file
- LEN => 0, # data length
+ BASE => 0, # location of start of buffer in file
+ POS => 0, # current position in buffer
+ LEN => 0, # length of data in buffer
TESTED => 0, # 0=untested, 1=passed, -1=failed (requires buffering)
};
bless $self, $class;
@@ -118,7 +122,7 @@ sub Tell($)
my $self = shift;
my $rtnVal;
if ($self->{TESTED} < 0) {
- $rtnVal = $self->{POS};
+ $rtnVal = $self->{POS} + $self->{BASE};
} else {
$rtnVal = tell($self->{FILE_PT});
}
@@ -141,9 +145,11 @@ sub Seek($$;$)
if ($self->{TESTED} < 0) {
my $newPos;
if ($whence == 0) {
- $newPos = $num; # from start of file
+ $newPos = $num - $self->{BASE}; # from start of file
} elsif ($whence == 1) {
$newPos = $num + $self->{POS}; # relative to current position
+ } elsif ($self->{NoBuffer} and $self->{FILE_PT}) {
+ $newPos = -1; # (can't seek relative to end if no buffering)
} else {
$self->Slurp(); # read whole file into buffer
$newPos = $num + $self->{LEN}; # relative to end of file
@@ -192,6 +198,8 @@ sub Read($$$)
}
# read through our buffer if necessary
if ($self->{TESTED} < 0) {
+ # purge old data before reading in NoBuffer mode
+ $self->Purge() or return 0 if $self->{NoBuffer};
my $buff;
my $newPos = $self->{POS} + $len;
# number of bytes to read from file
@@ -244,6 +252,7 @@ sub ReadLine($$)
if ($self->{TESTED} < 0) {
my ($num, $buff);
+ $self->Purge() or return 0 if $self->{NoBuffer};
my $pos = $self->{POS};
if ($fp) {
# make sure we have some data after the current position
@@ -311,9 +320,39 @@ sub Slurp($)
}
}
+#------------------------------------------------------------------------------
+# Purge internal buffer [internal use only]
+# Inputs: 0) reference to RandomAccess object
+# Returns: 1 on success, or 0 if current buffer position is negative
+# Notes: This is called only in NoBuffer mode
+sub Purge($)
+{
+ my $self = shift;
+ return 1 unless $self->{FILE_PT};
+ return 0 if $self->{POS} < 0; # error if we can't read from here
+ if ($self->{POS} > $CHUNK_SIZE) {
+ my $purge = $self->{POS} - ($self->{POS} % $CHUNK_SIZE);
+ if ($purge >= $self->{LEN}) {
+ # read up to current position in 64k chunks, discarding as we go
+ while ($self->{POS} > $self->{LEN}) {
+ $self->{BASE} += $self->{LEN};
+ $self->{POS} -= $self->{LEN};
+ ${$self->{BUFF_PT}} = '';
+ $self->{LEN} = read($self->{FILE_PT}, ${$self->{BUFF_PT}}, $SKIP_SIZE);
+ last if $self->{LEN} < $SKIP_SIZE;
+ }
+ } elsif ($purge > 0) {
+ ${$self->{BUFF_PT}} = substr ${$self->{BUFF_PT}}, $purge;
+ $self->{BASE} += $purge;
+ $self->{POS} -= $purge;
+ $self->{LEN} -= $purge;
+ }
+ }
+ return 1;
+}
#------------------------------------------------------------------------------
-# set binary mode
+# Set binary mode
# Inputs: 0) reference to RandomAccess object
sub BinMode($)
{
@@ -322,7 +361,7 @@ sub BinMode($)
}
#------------------------------------------------------------------------------
-# close the file and free the buffer
+# Close the file and free the buffer
# Inputs: 0) reference to RandomAccess object
sub Close($)
{
@@ -370,6 +409,7 @@ sub Close($)
# reset the buffer
my $emptyBuff = '';
$self->{BUFF_PT} = \$emptyBuff;
+ $self->{BASE} = 0;
$self->{LEN} = 0;
$self->{POS} = 0;
}
diff --git a/lib/File/RandomAccess.pod b/lib/File/RandomAccess.pod
index 860f8a3a..d3a26f37 100644
--- a/lib/File/RandomAccess.pod
+++ b/lib/File/RandomAccess.pod
@@ -215,6 +215,25 @@ Nothing.
=back
+=head1 OPTIONS
+
+=over 4
+
+=item B<NoBuffer>
+
+Avoid buffering sequential files.
+
+ $raf->{NoBuffer} = 1;
+
+When this option is set, old data is purged from the internal buffer before
+a read operation on a sequential file. In this mode, memory requirements
+may be significantly reduced when reading sequential files, but seeking
+backward is limited to within the size of the internal buffer (which will be
+at least as large as the last returned data block), and seeking relative to
+the end of file is not allowed.
+
+=back
+
=head1 AUTHOR
Copyright 2003-2018 Phil Harvey (phil at owl.phy.queensu.ca)