diff options
author | rmanfredi <rmanfredi@190e5f8e-a817-0410-acf6-e9863daed9af> | 2006-08-24 12:32:52 +0000 |
---|---|---|
committer | rmanfredi <rmanfredi@190e5f8e-a817-0410-acf6-e9863daed9af> | 2006-08-24 12:32:52 +0000 |
commit | 8bfc5756fb68e0b13d7e7c0073ad5b9a4790d1b6 (patch) | |
tree | dee05e98bc53766d609ef2a3a07a5672627d812c /bin/perload |
Moving project to sourceforge.
git-svn-id: https://dist.svn.sourceforge.net/svnroot/dist/trunk/dist@1 190e5f8e-a817-0410-acf6-e9863daed9af
Diffstat (limited to 'bin/perload')
-rwxr-xr-x | bin/perload | 648 |
1 files changed, 648 insertions, 0 deletions
diff --git a/bin/perload b/bin/perload new file mode 100755 index 0000000..2354790 --- /dev/null +++ b/bin/perload @@ -0,0 +1,648 @@ +: # feed this into perl +'/bin/true' && eval 'exec perl -S $0 "$@"' + if $running_under_some_shell; +'di'; +'ig00'; + +# +# This perl script is its own manual page [generated by wrapman] +# + +# $Id: perload,v 3.0.1.1 1994/10/29 15:45:36 ram Exp ram $ +# +# Copyright (c) 1991-1997, 2004-2006, Raphael Manfredi +# +# You may redistribute only under the terms of the Artistic Licence, +# as specified in the README file that comes with the distribution. +# You may reuse parts of this distribution only within the terms of +# that same Artistic Licence; a copy of which may be found at the root +# of the source tree for dist 4.0. +# +# $Log: perload,v $ +# Revision 3.0.1.1 1994/10/29 15:45:36 ram +# patch36: added minimal support for perl5 dataloading +# +# Revision 3.0 1993/08/18 12:04:06 ram +# Baseline for dist 3.0 netwide release. +# + +# Replace each function definition in a loading section by two stubs and +# reject the definition into the DATA part of the script if in a dataload +# section or into a FILE if in an autoload section. + +$in_load = 0; # In a loading section +$autoload = ''; # Name of autoloaded file +$has_invocation_stub = 0; # True if we detect a #! stub +$current_package = 'main'; # Current package +$init_emitted = 0; # True when dataloading stamp was emitted +$in_function = 0; + +require 'getopt.pl'; +&Getopt; + +while (<>) { + if ($. == 1 && /^(:|#).*perl/) { # Invocation stub + $has_invocation_stub = 1; + print; + next; + } + if ($. <= 3 && $has_invocation_stub) { + print; + next; + } + if (/^\s*$/) { + &flush_comment; + print unless $in_function; + print if $in_function && !$in_load; + if ($in_function && $in_load) { + push(@Data, "\n") unless $autoload; + $Auto{$autoload} .= "\n" if $autoload; + } + next; + } + if (/^\s*;?#/) { + if (/#\s*perload on/i) { # Enter a loading section + print unless /:$/; + $in_load = 1; + next; + } + if (/#\s*perload off/i) { # End a loading section + print unless /:$/; + $in_load = 0; + next; + } + if (/#\s*autoload (\S+)/i) { # Enter autoloading section + print unless /:$/; + push(@autoload, $autoload); # Directives may be nested + $autoload = $1; + $in_load += 2; + next; + } + if (/#\s*offload/i) { # End autoloading section + print unless /:$/; + $autoload = pop(@autoload); # Revert to previously active file + $in_load -= 2; + next; + } + &emit_init unless $init_emitted; + push(@Comment, $_) unless $in_function; + print if $in_function && !$in_load; + next unless $in_function; + push(@Data, $_) unless $autoload; + $Auto{$autoload} .= $_ if $autoload; + next; + } + &emit_init unless $init_emitted; + /^package (\S+)\s*;/ && ($current_package = $1); + unless ($in_load) { + &flush_comment; + print; + next; + } + # We are in a loading section + if (/^sub\s+([\w']+)\s*\{(.*)/) { + die "line $.: function $1 defined within another function.\n" + if $in_function; + # Silently ignore one-line functions + if (/\}/) { + &flush_comment; + print; + next; + } + $comment = $2; + $in_function = 1; + $function = $1; + ($fn_package, $fn_basename) = $function =~ /^(\w+)'(\w+)/; + unless ($fn_package) { + $fn_package = $current_package; + $fn_basename = $function; + } + # Keep leading function comment + foreach (@Comment) { + push(@Data, $_) unless $autoload; + $Auto{$autoload} .= $_ if $autoload; + } + @Comment = (); + # Change package context for correct compilation: the name is visible + # within the original function package while the body of the function + # is compiled within the current package. + $declaration = "sub $fn_package" . "'load_$fn_basename {$comment\n"; + $package_context = "\tpackage $current_package;\n"; + if ($autoload) { + $Auto{$autoload} .= $declaration . $package_context; + } else { + push(@Data, $declaration, $package_context); + } + # Emit stubs + print "sub $fn_package", "'$fn_basename"; + print " { &auto_$fn_package", "'$fn_basename; }\n"; + print "sub auto_$fn_package", "'$fn_basename { "; + print '&main\'dataload' unless $autoload; + print '&main\'autoload(' . "'$autoload'" . ', @_)' if $autoload; + print "; }\n"; + next; + } + unless ($in_function) { + &flush_comment; + print; + next; + } + # We are in a loading section and inside a function body + push(@Data, $_) unless $autoload; + $Auto{$autoload} .= $_ if $autoload; + $in_function = 0 if /^\}/; + if (/^\}/) { + push(@Data, "\n") unless $autoload; + $Auto{$autoload} .= "\n" if $autoload; + } +} + +@auto = keys %Auto; +if (@auto > 0) { + print &q(<<'EOC'); +:# Load the calling function from file and call it. This function is called +:# only once per file to be loaded. +:sub main'autoload { +: local($__file__) = shift(@_); +: local($__packname__) = (caller(1))[3]; +: $__packname__ =~ s/::/'/; +: local($__rpackname__) = $__packname__; +: local($__saved__) = $@; +: $__rpackname__ =~ s/^auto_//; +: &perload'load_from_file($__file__); +: $__rpackname__ =~ s/'/'load_/; +: $@ = $__saved__; # Restore value $@ had on entrance +: &$__rpackname__(@_); # Call newly loaded function +:} +: +:# Load file and compile it, substituing the second stub function with the +:# loaded ones. Location of the file uses the @AUTO array. +:sub perload'load_from_file { +: package perload; +: local($file) = @_; # File to be loaded +: local($body) = ' ' x 1024; # Pre-extent +: local($load) = ' ' x 256; # Loading operations +: # Avoid side effects by protecting special variables which will be +: # changed by the autoloading operation. +: local($., $_, $@); +: $body = ''; +: $load = ''; +: &init_auto unless defined(@'AUTO); # Make sure we have a suitable @AUTO +: &locate_file unless -f "$file"; # Locate file if relative path +: open(FILE, $file) || +: die "Can't load $'__rpackname__ from $file: $!\n"; +: while (<FILE>) { +: $load .= '*auto_' . $1 . '\'' . $2 . '= *' . $1 . '\'' . "load_$2;\n" +: if (/^sub\s+(\w+)'load_(\w+)\s*\{/); +: $body .= $_; +: } +: close FILE; +EOC + if ($opt_t) { + print &q(<<'EOC'); +: # Untaint body when running setuid +: $body =~ /^([^\0]*)/; +: # No need to untaint $load, as it was built using trusted variables +: eval $1 . $load; +EOC + } else { + print &q(<<'EOC'); +: eval $body . $load; +EOC + } + print &q(<<'EOC'); +: chop($@) && die "$@, while parsing code of $file.\n"; +:} +: +:# Initialize the @AUTO array. Attempt defining it by using the AUTOLIB +:# environment variable if set, otherwise look in auto/ first, then in the +:# current directory. +:sub perload'init_auto { +: if (defined $ENV{'AUTOLIB'} && $ENV{'AUTOLIB'}) { +: @AUTO = split(':', $ENV{'AUTOLIB'}); +: } else { +: @AUTO = ('auto', '.'); +: } +:} +: +:# Locate to-be-loaded file held in $file by looking through the @AUTO array. +:# This variable, defined in 'load_from_file', is modified as a side effect. +:sub perload'locate_file { +: package perload; +: local($fullpath); +: foreach $dir (@'AUTO) { +: $fullpath = $dir . '/' . $file; +: last if -f "$fullpath"; +: $fullpath = ''; +: } +: $file = $fullpath if $fullpath; # Update var from 'load_from_file' +:} +: +EOC +} + +if (@Data > 0) { + print &q(<<'EOC'); +:# Load the calling function from DATA segment and call it. This function is +:# called only once per routine to be loaded. +:sub main'dataload { +: local($__packname__) = (caller(1))[3]; +: $__packname__ =~ s/::/'/; +: local($__rpackname__) = $__packname__; +: local($__at__) = $@; +: $__rpackname__ =~ s/^auto_//; +: &perload'load_from_data($__rpackname__); +: local($__fun__) = "$__rpackname__"; +: $__fun__ =~ s/'/'load_/; +: eval "*$__packname__ = *$__fun__;"; # Change symbol table entry +: die $@ if $@; # Should not happen +: $@ = $__at__; # Restore value $@ had on entrance +: &$__fun__; # Call newly loaded function +:} +: +:# Load function name given as argument, fatal error if not existent +:sub perload'load_from_data { +: package perload; +: local($pos) = $Datapos{$_[0]}; # Offset within DATA +: # Avoid side effects by protecting special variables which will be changed +: # by the dataloading operation. +: local($., $_, $@); +: $pos = &fetch_function_code unless $pos; +: die "Function $_[0] not found in data section.\n" unless $pos; +: die "Cannot seek to $pos into data section.\n" +: unless seek(main'DATA, $pos, 0); +: local($/) = "\n}"; +: local($body) = scalar(<main'DATA>); +: local($*) = 1; +: die "End of file found while loading $_[0].\n" unless $body =~ /^\}$/; +EOC + if ($opt_t) { + print &q(<<'EOC'); +: # Untaint body when running setuid +: $body =~ /^([^\0]*)/; +: # Now we may safely eval it without getting an insecure dependency +: eval $1; # Load function into perl space +EOC + } else { + print &q(<<'EOC'); +: eval $body; # Load function into perl space +EOC + } + print &q(<<'EOC'); +: chop($@) && die "$@, while parsing code of $_[0].\n"; +:} +: +EOC + print &q(<<'EOC') unless $opt_o; +:# Parse text after the END token and record defined loadable functions (i.e. +:# those whose name starts with load_) into the %Datapos array. Such function +:# definitions must be left adjusted. Stop as soon as the function we want +:# has been found. +:sub perload'fetch_function_code { +: package perload; +: local($pos) = tell main'DATA; +: local($in_function) = 0; +: local($func_name); +: local($., $_); +: while (<main'DATA>) { +: if (/^sub\s+(\w+)'load_(\w+)\s*\{/) { +: die "DATA line $.: function $1'$2 defined within $func_name.\n" +: if $in_function; +: $func_name = $1 . '\'' . $2; +: $Datapos{$func_name} = $pos; +: $in_function = 1; +: next; +: } +: $in_function = 0 if /^\}/; +: next if $in_function; +: return $pos if $func_name eq $_[0]; +: $pos = tell main'DATA; +: } +: 0; # Function not found +:} +: +EOC + print &q(<<'EOC') if $opt_o; +:# This function is called only once, and fills in the %Datapos array with +:# the offset of each of the dataloaded routines held in the data section. +:sub perload'fetch_function_code { +: package perload; +: local($start) = 0; +: local($., $_); +: while (<main'DATA>) { # First move to start of offset table +: next if /^#/; +: last if /^$/ && ++$start > 2; # Skip two blank line after end token +: } +: $start = tell(main'DATA); # Offsets in table are relative to here +: local($key, $value); +: while (<main'DATA>) { # Load the offset table +: last if /^$/; # Ends with a single blank line +: ($key, $value) = split(' '); +: $Datapos{$key} = $value + $start; +: } +: $Datapos{$_[0]}; # All that pain to get this offset... +:} +: +EOC + print &q(<<'EOC'); +:# +:# The perl compiler stops here. +:# +: +:__END__ +: +:# +:# Beyond this point lie functions we may never compile. +:# +: +EOC + # Option -o directs us to optimize the function location by emitting an + # offset table, which lists all the position within DATA for each possible + # dataloaded routine. + if ($opt_o) { + print &q(<<'EOC'); +:# +:# DO NOT CHANGE A IOTA BEYOND THIS COMMENT! +:# The following table lists offsets of functions within the data section. +:# Should modifications be needed, change original code and rerun perload +:# with the -o option to regenerate a proper offset table. +:# +: +EOC + $trailing_message = &q(<<'EOC'); +: +:# +:# End of offset table and beginning of dataloading section. +:# +: +EOC + $pos = 0; # Offset relative to this point (start of table) + foreach (@Data) { + $Datapos{"$1\'$2"} = $pos - $now + if /^sub\s+(\w+)'load_(\w+)\s*\{/; # } for vi + $pos += length; + } + @poskeys = keys %Datapos; # Array of routine names (fully qualified) + + # Write out a formatted table, each entry stored on $entry bytes and + # formatted with the $format string. + ($entry, $format) = &get_format(*poskeys); + + # The total size occupied by the table is the size of one item times + # the number of items plus the final trailing message at the end of + # the table. + $table_size = $entry * @poskeys + length($trailing_message); + + # Output formatted table + foreach (sort @poskeys) { + printf($format, $_, $table_size + $Datapos{$_}); + } + print $trailing_message; + } + + # Output code for each dataloaded function + foreach (@Data) { + print; + } + print &q(<<'EOC'); +:# +:# End of dataloading section. +:# +: +EOC +} + +if (@auto > 0) { + mkdir('auto',0755) unless -d 'auto'; + foreach $file (@auto) { + unless (open(AUTO, ">auto/$file")) { + warn "Can't create auto/$file: $!\n"; + next; + } + print AUTO &q(<<'EOC'); +:# This file was generated by perload +: +EOC + print AUTO $Auto{$file}; + close AUTO; + } +} + +# Compute optimum format for routine offset table, returning both the size of +# each entry and the formating string for printf. +sub get_format { + local(*names) = @_; + local($name_len) = 0; + local($max_len) = 0; + foreach (@names) { + $name_len = length; + $max_len = $name_len if $name_len > $max_len; + } + # The size of each entry (preceded by one tab, followed by 12 chars) + $name_len = $max_len + 1 + 12; + ($name_len, "\t%${max_len}s %10d\n"); +} + +sub emit_init { + print &q(<<'EOC'); +:# +:# This perl program uses dynamic loading [generated by perload] +:# +: +:$ENV{LC_ALL} = 'C'; +: +EOC + $init_emitted = 1; +} + +sub flush_comment { + print @Comment if @Comment > 0; + @Comment = (); +} + +sub q { + local($_) = @_; + local($*) = 1; + s/^://g; + $_; +} + +# +# These next few lines are legal in both perl and nroff. +# + +.00; # finish .ig + +'di \" finish diversion--previous line must be blank +.nr nl 0-1 \" fake up transition to first page again +.nr % 0 \" start at page 1 +'; __END__ \" the perl compiler stops here + +''' +''' From here on it's a standard manual page. +''' + +.TH PERLOAD 1 "June 20, 1992" +.AT 3 +.SH NAME +perload \- builds up autoloaded and dataloaded perl scripts +.SH SYNOPSIS +.B perload +[ \fB\-ot\fR ] +[ \fIfile\fR ] +.SH DESCRIPTION +.I Perload +takes a perl script as argument (or from stdin if no argument is supplied) +and prints out on stdout an equivalent script set-up to perform autoloading +or dataloading. The translation is directed by special comments within the +original script. Using dynamic loading can drastically improve start-up +performances, both in time and in memory, as perl does not need to compile +the whole script nor store its whole compiled form in memory. +.PP +.I Autoloading +delays compilation of some functions until they are needed. The code for these +functions is loaded dynamically at run-time. The atomicity of loading is a +file, which means that putting more than one function into a file will cause +all these functions to be loaded and compiled as soon as one among them is +needed. +.PP +.I Dataloading +is a form of autoloading where no extra file are needed. The script carries +all the functions whose compilation is to be delayed in its data segment +(in the \fIperl\fR sense, i.e. they are accessible via the DATA filehandle). +The scripts parses the data segment and extracts only the code for the needed +subroutine, which means granularity is better than with autloading. +.PP +It is possible for a single script to use both autoloading and dataloading at +the same time. However, it should be noted that a script using only dataloading +is self contained and can be moved or shared accross different platforms without +fear. On the contrary, a script using only autoloading relies on some externally +provided files. Sharing this script among different platforms requires sharing +of these external files. The script itself cannot be redistributed without +also giving the extra files holding the autoloaded functions. +.PP +The major drawback with dataloading is that the DATA filehandle cannot be used +for anything else and may result in code duplication when two scripts could +share the same pieces of code. Autoloading appears as the perfect solution in +this case since two scripts may freely share the same functions without +actually duplicating them on the disk (hence saving some precious disk blocks +:-). +.SH CRITERIA +Functions to be dataloaded or autoloaded must meet the following layout +criteria: +.TP 5 +\- +They must not be one-line functions like \fIsub sorter { $a <=> $b }\fR. +Those functions are simply output verbatim, as they are already so +small that it would not be worth to dynamically load them, +.TP +\- +The first line must be of the form \fIsub routine_name {\fR, with an optional +comment allowed after the '{'. +.TP +\- +The function definition must end with a single '}' character left aligned. +.TP +\- +Package directives outside any function must be left aligned. +.PP +All the above restrictions should not be source of a problem if "standard" +writing style is used. There are also some name restrictions: the package +name \fIperload\fR is reserved, as is the \fI@AUTO\fR array when autoloading +is used. Packages must not start with \fIauto_\fR, as this is prepended to +user's package names when building the stubs. Furthermore, the subroutines +names \fImain'autoload\fR and +\fImain'dataload\fR must not be used by the original script. Again, these +should not cause any grief. +.SH DIRECTIVES +The translation performed by +.I Perload +is driven by some special comment directives placed directly within the code. +Ending those directives with a ':' character will actually prevent them from +being output into the produced script. Case is irrelevant for all the directives +and the comment need not be left-aligned, although it must be the first +non-space item on the line. +.PP +The following directives are available: +.TP 10 +# Perload ON +Turns on the \fIperload\fR processing. Any function definition which meets +the criteria listed in the previous section will be replaced by two stubs and +its actual definition will be rejected into the data segment (default) or a +file when inside an autoloading section. +.TP +# Perload OFF +Turns off any processing. The script is written as-is on the standard output. +.TP +# Autoload \fIpath\fR +Requests autoloading from file \fIpath\fR, which may be an absolute path or +a relative path. The file will be located at run-time using the @AUTO array +if a non-absolute path is supplied or if the file does not exist as listed. +Autoloading directives may be nested. +.TP +# Offload \fIpath\fR +The argument is not required. The directive ends the previous autoloading +directive (the inmost one). This does not turn off the \fIperload\fR processing +though. The \fIpath\fR name is optional here (in fact, it has only a comment +value). +.SH OPTIONS +Perload accepts only two options. Using \fB\-o\fR is meaningful only when +dataloading is used. It outputs an offset table which lists the relative +offset of the dataloaded functions within the data section. This will spare +perl the run-time parsing needed to locate the function, and results in an good +speed gain. However, it has one major drawback: it prevents people from +actually modifying the source beyond the start of the table. But anything +before can be freely edited, which is particulary useful when tailoring the +script. +.PP +This option should not be used when editing of functions within the data +section is necessary for whatever reason. When \fB\-o\fR is used, any +change in the dataloaded function must be committed by re-running perload +on the original script. +.PP +The other option \fB\-t\fR is to be used when producing a script which is +going to run setuid. The body of the loaded function is untainted before being +fed to eval, which slightly slows down loading (the first time the function is +called), but avoids either an insecure dependency report or weird warnings from +taintperl stating something is wrong (which is the behaviour with 4.0 PL35). +.SH FILES +.TP 10 +auto +the subdirectory where all produced autoloaded files are written. +.SH ENVIRONMENT +No environment variables are used by \fIperload\fR. However, the autoloaded +version of the script pays attention to the \fIAUTOLIB\fR variable as a colon +separated set of directories where the to-be-loaded files are to be found +when a non-absolute path was specified. If the \fIAUTOLIB\fR variable is not +set, the default value 'auto:.' is used (i.e. look first in the auto/ +subdirectory, then in the current directory. +.SH CAVEAT +Special care is required when using an autoloading script, especially when +executed by the super-user: it would be very easy for someone to leave a +special version of a routine to be loaded, in the hope the super-user (or +another suitable target) executes the autoloaded version of the script with +some \fIad hoc\fR changes... +.PP +The directory holding the to-be-loaded files should therefore be protected +against unauthorized access, and no file should have write permission on them. +The directory itself should not be world-writable either, or someone might +substitute his own version. +It should also be considered wise to manually set the @AUTO variable to a +suitable value within the script itself. +.PP +The \fB\-o\fR option uses \fIperl\fR's special variable \fI$/\fR with a +multi-character value. I suspect this did not work with versions of \fIperl\fR +prior to 4.0, so any script using this optimized form of dataloading will not +be 100% backward compatible. +.SH AUTHOR +Raphael Manfredi <ram@acri.fr> +.SH CREDITS +Valuable input came from Wayne H. Scott <wscott@ecn.purdue.edu>. He is +merely the author of the optimizing offset table (\fB\-o\fR option). +.PP +.I Perload +is based on an article from Tom Christiansen <tchrist@convex.com>, +.I Autoloading in Perl, +explaining the concept of dataloading and giving a basic implementation. +.SH "SEE ALSO" +perl(1). |