summaryrefslogtreecommitdiff
path: root/bin/perload
diff options
context:
space:
mode:
authorrmanfredi <rmanfredi@190e5f8e-a817-0410-acf6-e9863daed9af>2006-08-24 12:32:52 +0000
committerrmanfredi <rmanfredi@190e5f8e-a817-0410-acf6-e9863daed9af>2006-08-24 12:32:52 +0000
commit8bfc5756fb68e0b13d7e7c0073ad5b9a4790d1b6 (patch)
treedee05e98bc53766d609ef2a3a07a5672627d812c /bin/perload
Moving project to sourceforge.
git-svn-id: https://dist.svn.sourceforge.net/svnroot/dist/trunk/dist@1 190e5f8e-a817-0410-acf6-e9863daed9af
Diffstat (limited to 'bin/perload')
-rwxr-xr-xbin/perload648
1 files changed, 648 insertions, 0 deletions
diff --git a/bin/perload b/bin/perload
new file mode 100755
index 0000000..2354790
--- /dev/null
+++ b/bin/perload
@@ -0,0 +1,648 @@
+: # feed this into perl
+'/bin/true' && eval 'exec perl -S $0 "$@"'
+ if $running_under_some_shell;
+'di';
+'ig00';
+
+#
+# This perl script is its own manual page [generated by wrapman]
+#
+
+# $Id: perload,v 3.0.1.1 1994/10/29 15:45:36 ram Exp ram $
+#
+# Copyright (c) 1991-1997, 2004-2006, Raphael Manfredi
+#
+# You may redistribute only under the terms of the Artistic Licence,
+# as specified in the README file that comes with the distribution.
+# You may reuse parts of this distribution only within the terms of
+# that same Artistic Licence; a copy of which may be found at the root
+# of the source tree for dist 4.0.
+#
+# $Log: perload,v $
+# Revision 3.0.1.1 1994/10/29 15:45:36 ram
+# patch36: added minimal support for perl5 dataloading
+#
+# Revision 3.0 1993/08/18 12:04:06 ram
+# Baseline for dist 3.0 netwide release.
+#
+
+# Replace each function definition in a loading section by two stubs and
+# reject the definition into the DATA part of the script if in a dataload
+# section or into a FILE if in an autoload section.
+
+$in_load = 0; # In a loading section
+$autoload = ''; # Name of autoloaded file
+$has_invocation_stub = 0; # True if we detect a #! stub
+$current_package = 'main'; # Current package
+$init_emitted = 0; # True when dataloading stamp was emitted
+$in_function = 0;
+
+require 'getopt.pl';
+&Getopt;
+
+while (<>) {
+ if ($. == 1 && /^(:|#).*perl/) { # Invocation stub
+ $has_invocation_stub = 1;
+ print;
+ next;
+ }
+ if ($. <= 3 && $has_invocation_stub) {
+ print;
+ next;
+ }
+ if (/^\s*$/) {
+ &flush_comment;
+ print unless $in_function;
+ print if $in_function && !$in_load;
+ if ($in_function && $in_load) {
+ push(@Data, "\n") unless $autoload;
+ $Auto{$autoload} .= "\n" if $autoload;
+ }
+ next;
+ }
+ if (/^\s*;?#/) {
+ if (/#\s*perload on/i) { # Enter a loading section
+ print unless /:$/;
+ $in_load = 1;
+ next;
+ }
+ if (/#\s*perload off/i) { # End a loading section
+ print unless /:$/;
+ $in_load = 0;
+ next;
+ }
+ if (/#\s*autoload (\S+)/i) { # Enter autoloading section
+ print unless /:$/;
+ push(@autoload, $autoload); # Directives may be nested
+ $autoload = $1;
+ $in_load += 2;
+ next;
+ }
+ if (/#\s*offload/i) { # End autoloading section
+ print unless /:$/;
+ $autoload = pop(@autoload); # Revert to previously active file
+ $in_load -= 2;
+ next;
+ }
+ &emit_init unless $init_emitted;
+ push(@Comment, $_) unless $in_function;
+ print if $in_function && !$in_load;
+ next unless $in_function;
+ push(@Data, $_) unless $autoload;
+ $Auto{$autoload} .= $_ if $autoload;
+ next;
+ }
+ &emit_init unless $init_emitted;
+ /^package (\S+)\s*;/ && ($current_package = $1);
+ unless ($in_load) {
+ &flush_comment;
+ print;
+ next;
+ }
+ # We are in a loading section
+ if (/^sub\s+([\w']+)\s*\{(.*)/) {
+ die "line $.: function $1 defined within another function.\n"
+ if $in_function;
+ # Silently ignore one-line functions
+ if (/\}/) {
+ &flush_comment;
+ print;
+ next;
+ }
+ $comment = $2;
+ $in_function = 1;
+ $function = $1;
+ ($fn_package, $fn_basename) = $function =~ /^(\w+)'(\w+)/;
+ unless ($fn_package) {
+ $fn_package = $current_package;
+ $fn_basename = $function;
+ }
+ # Keep leading function comment
+ foreach (@Comment) {
+ push(@Data, $_) unless $autoload;
+ $Auto{$autoload} .= $_ if $autoload;
+ }
+ @Comment = ();
+ # Change package context for correct compilation: the name is visible
+ # within the original function package while the body of the function
+ # is compiled within the current package.
+ $declaration = "sub $fn_package" . "'load_$fn_basename {$comment\n";
+ $package_context = "\tpackage $current_package;\n";
+ if ($autoload) {
+ $Auto{$autoload} .= $declaration . $package_context;
+ } else {
+ push(@Data, $declaration, $package_context);
+ }
+ # Emit stubs
+ print "sub $fn_package", "'$fn_basename";
+ print " { &auto_$fn_package", "'$fn_basename; }\n";
+ print "sub auto_$fn_package", "'$fn_basename { ";
+ print '&main\'dataload' unless $autoload;
+ print '&main\'autoload(' . "'$autoload'" . ', @_)' if $autoload;
+ print "; }\n";
+ next;
+ }
+ unless ($in_function) {
+ &flush_comment;
+ print;
+ next;
+ }
+ # We are in a loading section and inside a function body
+ push(@Data, $_) unless $autoload;
+ $Auto{$autoload} .= $_ if $autoload;
+ $in_function = 0 if /^\}/;
+ if (/^\}/) {
+ push(@Data, "\n") unless $autoload;
+ $Auto{$autoload} .= "\n" if $autoload;
+ }
+}
+
+@auto = keys %Auto;
+if (@auto > 0) {
+ print &q(<<'EOC');
+:# Load the calling function from file and call it. This function is called
+:# only once per file to be loaded.
+:sub main'autoload {
+: local($__file__) = shift(@_);
+: local($__packname__) = (caller(1))[3];
+: $__packname__ =~ s/::/'/;
+: local($__rpackname__) = $__packname__;
+: local($__saved__) = $@;
+: $__rpackname__ =~ s/^auto_//;
+: &perload'load_from_file($__file__);
+: $__rpackname__ =~ s/'/'load_/;
+: $@ = $__saved__; # Restore value $@ had on entrance
+: &$__rpackname__(@_); # Call newly loaded function
+:}
+:
+:# Load file and compile it, substituing the second stub function with the
+:# loaded ones. Location of the file uses the @AUTO array.
+:sub perload'load_from_file {
+: package perload;
+: local($file) = @_; # File to be loaded
+: local($body) = ' ' x 1024; # Pre-extent
+: local($load) = ' ' x 256; # Loading operations
+: # Avoid side effects by protecting special variables which will be
+: # changed by the autoloading operation.
+: local($., $_, $@);
+: $body = '';
+: $load = '';
+: &init_auto unless defined(@'AUTO); # Make sure we have a suitable @AUTO
+: &locate_file unless -f "$file"; # Locate file if relative path
+: open(FILE, $file) ||
+: die "Can't load $'__rpackname__ from $file: $!\n";
+: while (<FILE>) {
+: $load .= '*auto_' . $1 . '\'' . $2 . '= *' . $1 . '\'' . "load_$2;\n"
+: if (/^sub\s+(\w+)'load_(\w+)\s*\{/);
+: $body .= $_;
+: }
+: close FILE;
+EOC
+ if ($opt_t) {
+ print &q(<<'EOC');
+: # Untaint body when running setuid
+: $body =~ /^([^\0]*)/;
+: # No need to untaint $load, as it was built using trusted variables
+: eval $1 . $load;
+EOC
+ } else {
+ print &q(<<'EOC');
+: eval $body . $load;
+EOC
+ }
+ print &q(<<'EOC');
+: chop($@) && die "$@, while parsing code of $file.\n";
+:}
+:
+:# Initialize the @AUTO array. Attempt defining it by using the AUTOLIB
+:# environment variable if set, otherwise look in auto/ first, then in the
+:# current directory.
+:sub perload'init_auto {
+: if (defined $ENV{'AUTOLIB'} && $ENV{'AUTOLIB'}) {
+: @AUTO = split(':', $ENV{'AUTOLIB'});
+: } else {
+: @AUTO = ('auto', '.');
+: }
+:}
+:
+:# Locate to-be-loaded file held in $file by looking through the @AUTO array.
+:# This variable, defined in 'load_from_file', is modified as a side effect.
+:sub perload'locate_file {
+: package perload;
+: local($fullpath);
+: foreach $dir (@'AUTO) {
+: $fullpath = $dir . '/' . $file;
+: last if -f "$fullpath";
+: $fullpath = '';
+: }
+: $file = $fullpath if $fullpath; # Update var from 'load_from_file'
+:}
+:
+EOC
+}
+
+if (@Data > 0) {
+ print &q(<<'EOC');
+:# Load the calling function from DATA segment and call it. This function is
+:# called only once per routine to be loaded.
+:sub main'dataload {
+: local($__packname__) = (caller(1))[3];
+: $__packname__ =~ s/::/'/;
+: local($__rpackname__) = $__packname__;
+: local($__at__) = $@;
+: $__rpackname__ =~ s/^auto_//;
+: &perload'load_from_data($__rpackname__);
+: local($__fun__) = "$__rpackname__";
+: $__fun__ =~ s/'/'load_/;
+: eval "*$__packname__ = *$__fun__;"; # Change symbol table entry
+: die $@ if $@; # Should not happen
+: $@ = $__at__; # Restore value $@ had on entrance
+: &$__fun__; # Call newly loaded function
+:}
+:
+:# Load function name given as argument, fatal error if not existent
+:sub perload'load_from_data {
+: package perload;
+: local($pos) = $Datapos{$_[0]}; # Offset within DATA
+: # Avoid side effects by protecting special variables which will be changed
+: # by the dataloading operation.
+: local($., $_, $@);
+: $pos = &fetch_function_code unless $pos;
+: die "Function $_[0] not found in data section.\n" unless $pos;
+: die "Cannot seek to $pos into data section.\n"
+: unless seek(main'DATA, $pos, 0);
+: local($/) = "\n}";
+: local($body) = scalar(<main'DATA>);
+: local($*) = 1;
+: die "End of file found while loading $_[0].\n" unless $body =~ /^\}$/;
+EOC
+ if ($opt_t) {
+ print &q(<<'EOC');
+: # Untaint body when running setuid
+: $body =~ /^([^\0]*)/;
+: # Now we may safely eval it without getting an insecure dependency
+: eval $1; # Load function into perl space
+EOC
+ } else {
+ print &q(<<'EOC');
+: eval $body; # Load function into perl space
+EOC
+ }
+ print &q(<<'EOC');
+: chop($@) && die "$@, while parsing code of $_[0].\n";
+:}
+:
+EOC
+ print &q(<<'EOC') unless $opt_o;
+:# Parse text after the END token and record defined loadable functions (i.e.
+:# those whose name starts with load_) into the %Datapos array. Such function
+:# definitions must be left adjusted. Stop as soon as the function we want
+:# has been found.
+:sub perload'fetch_function_code {
+: package perload;
+: local($pos) = tell main'DATA;
+: local($in_function) = 0;
+: local($func_name);
+: local($., $_);
+: while (<main'DATA>) {
+: if (/^sub\s+(\w+)'load_(\w+)\s*\{/) {
+: die "DATA line $.: function $1'$2 defined within $func_name.\n"
+: if $in_function;
+: $func_name = $1 . '\'' . $2;
+: $Datapos{$func_name} = $pos;
+: $in_function = 1;
+: next;
+: }
+: $in_function = 0 if /^\}/;
+: next if $in_function;
+: return $pos if $func_name eq $_[0];
+: $pos = tell main'DATA;
+: }
+: 0; # Function not found
+:}
+:
+EOC
+ print &q(<<'EOC') if $opt_o;
+:# This function is called only once, and fills in the %Datapos array with
+:# the offset of each of the dataloaded routines held in the data section.
+:sub perload'fetch_function_code {
+: package perload;
+: local($start) = 0;
+: local($., $_);
+: while (<main'DATA>) { # First move to start of offset table
+: next if /^#/;
+: last if /^$/ && ++$start > 2; # Skip two blank line after end token
+: }
+: $start = tell(main'DATA); # Offsets in table are relative to here
+: local($key, $value);
+: while (<main'DATA>) { # Load the offset table
+: last if /^$/; # Ends with a single blank line
+: ($key, $value) = split(' ');
+: $Datapos{$key} = $value + $start;
+: }
+: $Datapos{$_[0]}; # All that pain to get this offset...
+:}
+:
+EOC
+ print &q(<<'EOC');
+:#
+:# The perl compiler stops here.
+:#
+:
+:__END__
+:
+:#
+:# Beyond this point lie functions we may never compile.
+:#
+:
+EOC
+ # Option -o directs us to optimize the function location by emitting an
+ # offset table, which lists all the position within DATA for each possible
+ # dataloaded routine.
+ if ($opt_o) {
+ print &q(<<'EOC');
+:#
+:# DO NOT CHANGE A IOTA BEYOND THIS COMMENT!
+:# The following table lists offsets of functions within the data section.
+:# Should modifications be needed, change original code and rerun perload
+:# with the -o option to regenerate a proper offset table.
+:#
+:
+EOC
+ $trailing_message = &q(<<'EOC');
+:
+:#
+:# End of offset table and beginning of dataloading section.
+:#
+:
+EOC
+ $pos = 0; # Offset relative to this point (start of table)
+ foreach (@Data) {
+ $Datapos{"$1\'$2"} = $pos - $now
+ if /^sub\s+(\w+)'load_(\w+)\s*\{/; # } for vi
+ $pos += length;
+ }
+ @poskeys = keys %Datapos; # Array of routine names (fully qualified)
+
+ # Write out a formatted table, each entry stored on $entry bytes and
+ # formatted with the $format string.
+ ($entry, $format) = &get_format(*poskeys);
+
+ # The total size occupied by the table is the size of one item times
+ # the number of items plus the final trailing message at the end of
+ # the table.
+ $table_size = $entry * @poskeys + length($trailing_message);
+
+ # Output formatted table
+ foreach (sort @poskeys) {
+ printf($format, $_, $table_size + $Datapos{$_});
+ }
+ print $trailing_message;
+ }
+
+ # Output code for each dataloaded function
+ foreach (@Data) {
+ print;
+ }
+ print &q(<<'EOC');
+:#
+:# End of dataloading section.
+:#
+:
+EOC
+}
+
+if (@auto > 0) {
+ mkdir('auto',0755) unless -d 'auto';
+ foreach $file (@auto) {
+ unless (open(AUTO, ">auto/$file")) {
+ warn "Can't create auto/$file: $!\n";
+ next;
+ }
+ print AUTO &q(<<'EOC');
+:# This file was generated by perload
+:
+EOC
+ print AUTO $Auto{$file};
+ close AUTO;
+ }
+}
+
+# Compute optimum format for routine offset table, returning both the size of
+# each entry and the formating string for printf.
+sub get_format {
+ local(*names) = @_;
+ local($name_len) = 0;
+ local($max_len) = 0;
+ foreach (@names) {
+ $name_len = length;
+ $max_len = $name_len if $name_len > $max_len;
+ }
+ # The size of each entry (preceded by one tab, followed by 12 chars)
+ $name_len = $max_len + 1 + 12;
+ ($name_len, "\t%${max_len}s %10d\n");
+}
+
+sub emit_init {
+ print &q(<<'EOC');
+:#
+:# This perl program uses dynamic loading [generated by perload]
+:#
+:
+:$ENV{LC_ALL} = 'C';
+:
+EOC
+ $init_emitted = 1;
+}
+
+sub flush_comment {
+ print @Comment if @Comment > 0;
+ @Comment = ();
+}
+
+sub q {
+ local($_) = @_;
+ local($*) = 1;
+ s/^://g;
+ $_;
+}
+
+#
+# These next few lines are legal in both perl and nroff.
+#
+
+.00; # finish .ig
+
+'di \" finish diversion--previous line must be blank
+.nr nl 0-1 \" fake up transition to first page again
+.nr % 0 \" start at page 1
+'; __END__ \" the perl compiler stops here
+
+'''
+''' From here on it's a standard manual page.
+'''
+
+.TH PERLOAD 1 "June 20, 1992"
+.AT 3
+.SH NAME
+perload \- builds up autoloaded and dataloaded perl scripts
+.SH SYNOPSIS
+.B perload
+[ \fB\-ot\fR ]
+[ \fIfile\fR ]
+.SH DESCRIPTION
+.I Perload
+takes a perl script as argument (or from stdin if no argument is supplied)
+and prints out on stdout an equivalent script set-up to perform autoloading
+or dataloading. The translation is directed by special comments within the
+original script. Using dynamic loading can drastically improve start-up
+performances, both in time and in memory, as perl does not need to compile
+the whole script nor store its whole compiled form in memory.
+.PP
+.I Autoloading
+delays compilation of some functions until they are needed. The code for these
+functions is loaded dynamically at run-time. The atomicity of loading is a
+file, which means that putting more than one function into a file will cause
+all these functions to be loaded and compiled as soon as one among them is
+needed.
+.PP
+.I Dataloading
+is a form of autoloading where no extra file are needed. The script carries
+all the functions whose compilation is to be delayed in its data segment
+(in the \fIperl\fR sense, i.e. they are accessible via the DATA filehandle).
+The scripts parses the data segment and extracts only the code for the needed
+subroutine, which means granularity is better than with autloading.
+.PP
+It is possible for a single script to use both autoloading and dataloading at
+the same time. However, it should be noted that a script using only dataloading
+is self contained and can be moved or shared accross different platforms without
+fear. On the contrary, a script using only autoloading relies on some externally
+provided files. Sharing this script among different platforms requires sharing
+of these external files. The script itself cannot be redistributed without
+also giving the extra files holding the autoloaded functions.
+.PP
+The major drawback with dataloading is that the DATA filehandle cannot be used
+for anything else and may result in code duplication when two scripts could
+share the same pieces of code. Autoloading appears as the perfect solution in
+this case since two scripts may freely share the same functions without
+actually duplicating them on the disk (hence saving some precious disk blocks
+:-).
+.SH CRITERIA
+Functions to be dataloaded or autoloaded must meet the following layout
+criteria:
+.TP 5
+\-
+They must not be one-line functions like \fIsub sorter { $a <=> $b }\fR.
+Those functions are simply output verbatim, as they are already so
+small that it would not be worth to dynamically load them,
+.TP
+\-
+The first line must be of the form \fIsub routine_name {\fR, with an optional
+comment allowed after the '{'.
+.TP
+\-
+The function definition must end with a single '}' character left aligned.
+.TP
+\-
+Package directives outside any function must be left aligned.
+.PP
+All the above restrictions should not be source of a problem if "standard"
+writing style is used. There are also some name restrictions: the package
+name \fIperload\fR is reserved, as is the \fI@AUTO\fR array when autoloading
+is used. Packages must not start with \fIauto_\fR, as this is prepended to
+user's package names when building the stubs. Furthermore, the subroutines
+names \fImain'autoload\fR and
+\fImain'dataload\fR must not be used by the original script. Again, these
+should not cause any grief.
+.SH DIRECTIVES
+The translation performed by
+.I Perload
+is driven by some special comment directives placed directly within the code.
+Ending those directives with a ':' character will actually prevent them from
+being output into the produced script. Case is irrelevant for all the directives
+and the comment need not be left-aligned, although it must be the first
+non-space item on the line.
+.PP
+The following directives are available:
+.TP 10
+# Perload ON
+Turns on the \fIperload\fR processing. Any function definition which meets
+the criteria listed in the previous section will be replaced by two stubs and
+its actual definition will be rejected into the data segment (default) or a
+file when inside an autoloading section.
+.TP
+# Perload OFF
+Turns off any processing. The script is written as-is on the standard output.
+.TP
+# Autoload \fIpath\fR
+Requests autoloading from file \fIpath\fR, which may be an absolute path or
+a relative path. The file will be located at run-time using the @AUTO array
+if a non-absolute path is supplied or if the file does not exist as listed.
+Autoloading directives may be nested.
+.TP
+# Offload \fIpath\fR
+The argument is not required. The directive ends the previous autoloading
+directive (the inmost one). This does not turn off the \fIperload\fR processing
+though. The \fIpath\fR name is optional here (in fact, it has only a comment
+value).
+.SH OPTIONS
+Perload accepts only two options. Using \fB\-o\fR is meaningful only when
+dataloading is used. It outputs an offset table which lists the relative
+offset of the dataloaded functions within the data section. This will spare
+perl the run-time parsing needed to locate the function, and results in an good
+speed gain. However, it has one major drawback: it prevents people from
+actually modifying the source beyond the start of the table. But anything
+before can be freely edited, which is particulary useful when tailoring the
+script.
+.PP
+This option should not be used when editing of functions within the data
+section is necessary for whatever reason. When \fB\-o\fR is used, any
+change in the dataloaded function must be committed by re-running perload
+on the original script.
+.PP
+The other option \fB\-t\fR is to be used when producing a script which is
+going to run setuid. The body of the loaded function is untainted before being
+fed to eval, which slightly slows down loading (the first time the function is
+called), but avoids either an insecure dependency report or weird warnings from
+taintperl stating something is wrong (which is the behaviour with 4.0 PL35).
+.SH FILES
+.TP 10
+auto
+the subdirectory where all produced autoloaded files are written.
+.SH ENVIRONMENT
+No environment variables are used by \fIperload\fR. However, the autoloaded
+version of the script pays attention to the \fIAUTOLIB\fR variable as a colon
+separated set of directories where the to-be-loaded files are to be found
+when a non-absolute path was specified. If the \fIAUTOLIB\fR variable is not
+set, the default value 'auto:.' is used (i.e. look first in the auto/
+subdirectory, then in the current directory.
+.SH CAVEAT
+Special care is required when using an autoloading script, especially when
+executed by the super-user: it would be very easy for someone to leave a
+special version of a routine to be loaded, in the hope the super-user (or
+another suitable target) executes the autoloaded version of the script with
+some \fIad hoc\fR changes...
+.PP
+The directory holding the to-be-loaded files should therefore be protected
+against unauthorized access, and no file should have write permission on them.
+The directory itself should not be world-writable either, or someone might
+substitute his own version.
+It should also be considered wise to manually set the @AUTO variable to a
+suitable value within the script itself.
+.PP
+The \fB\-o\fR option uses \fIperl\fR's special variable \fI$/\fR with a
+multi-character value. I suspect this did not work with versions of \fIperl\fR
+prior to 4.0, so any script using this optimized form of dataloading will not
+be 100% backward compatible.
+.SH AUTHOR
+Raphael Manfredi <ram@acri.fr>
+.SH CREDITS
+Valuable input came from Wayne H. Scott <wscott@ecn.purdue.edu>. He is
+merely the author of the optimizing offset table (\fB\-o\fR option).
+.PP
+.I Perload
+is based on an article from Tom Christiansen <tchrist@convex.com>,
+.I Autoloading in Perl,
+explaining the concept of dataloading and giving a basic implementation.
+.SH "SEE ALSO"
+perl(1).