summaryrefslogtreecommitdiff
path: root/lib/IO/Async/ChildManager.pm
blob: 48ac108420c51f8891642f7b781b1d508b86b6ec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
#  You may distribute under the terms of either the GNU General Public License
#  or the Artistic License (the same terms as Perl itself)
#
#  (C) Paul Evans, 2007-2014 -- leonerd@leonerd.org.uk

package IO::Async::ChildManager;

use strict;
use warnings;

our $VERSION = '0.67';

# Not a notifier

use IO::Async::Stream;

use IO::Async::OS;

use Carp;
use Scalar::Util qw( weaken );

use POSIX qw( _exit dup dup2 nice );

use constant LENGTH_OF_I => length( pack( "I", 0 ) );

=head1 NAME

C<IO::Async::ChildManager> - facilitates the execution of child processes

=head1 SYNOPSIS

This object is used indirectly via an C<IO::Async::Loop>:

 use IO::Async::Loop;

 my $loop = IO::Async::Loop->new;

 ...

 $loop->run_child(
    command => "/bin/ps",

    on_finish => sub {
       my ( $pid, $exitcode, $stdout, $stderr ) = @_;
       my $status = ( $exitcode >> 8 );
       print "ps [PID $pid] exited with status $status\n";
    },
 );

 $loop->open_child(
    command => [ "/bin/ping", "-c4", "some.host" ],

    stdout => {
       on_read => sub {
          my ( $stream, $buffref, $eof ) = @_;
          while( $$buffref =~ s/^(.*)\n// ) {
             print "PING wrote: $1\n";
          }
          return 0;
       },
    },

    on_finish => sub {
       my ( $pid, $exitcode ) = @_;
       my $status = ( $exitcode >> 8 );
       ...
    },
 );

 my ( $pipeRd, $pipeWr ) = IO::Async::OS->pipepair;
 $loop->spawn_child(
    command => "/usr/bin/my-command",

    setup => [
       stdin  => [ "open", "<", "/dev/null" ],
       stdout => $pipeWr,
       stderr => [ "open", ">>", "/var/log/mycmd.log" ],
       chdir  => "/",
    ]

    on_exit => sub {
       my ( $pid, $exitcode ) = @_;
       my $status = ( $exitcode >> 8 );
       print "Command exited with status $status\n";
    },
 );

 $loop->spawn_child(
    code => sub {
       do_something; # executes in a child process
       return 1;
    },

    on_exit => sub {
       my ( $pid, $exitcode, $dollarbang, $dollarat ) = @_;
       my $status = ( $exitcode >> 8 );
       print "Child process exited with status $status\n";
       print " OS error was $dollarbang, exception was $dollarat\n";
    },
 );

=head1 DESCRIPTION

This module extends the functionality of the containing C<IO::Async::Loop> to
manage the execution of child processes. It acts as a central point to store
PID values of currently-running children, and to call the appropriate
continuation handler code when the process terminates. It provides useful
wrapper methods that set up filehandles and other child process details, and
to capture the child process's STDOUT and STDERR streams.

=cut

# Writing to variables of $> and $) have tricky ways to obtain error results
sub setuid
{
   my ( $uid ) = @_;

   $> = $uid; my $saved_errno = $!;
   $> == $uid and return 1;

   $! = $saved_errno;
   return undef;
}

sub setgid
{
   my ( $gid ) = @_;

   $) = $gid; my $saved_errno = $!;
   $) == $gid and return 1;

   $! = $saved_errno;
   return undef;
}

sub setgroups
{
   my @groups = @_;

   my $gid = $)+0;
   # Put the primary GID as the first group in the supplementary list, because
   # some operating systems ignore this position, expecting it to indeed be
   # the primary GID.
   # See
   #   https://rt.cpan.org/Ticket/Display.html?id=65127
   @groups = grep { $_ != $gid } @groups;

   $) = "$gid $gid " . join " ", @groups; my $saved_errno = $!;

   # No easy way to detect success or failure. Just check that we have all and
   # only the right groups
   my %gotgroups = map { $_ => 1 } split ' ', "$)";

   $! = $saved_errno;
   $gotgroups{$_}-- or return undef for @groups;
   keys %gotgroups or return undef;

   return 1;
}

# Internal constructor
sub new
{
   my $class = shift;
   my ( %params ) = @_;

   my $loop = delete $params{loop} or croak "Expected a 'loop'";

   my $self = bless {
      loop => $loop,
   }, $class;

   weaken( $self->{loop} );

   return $self;
}

=head1 METHODS

When active, the following methods are available on the containing C<Loop>
object.

=cut

=head2 $pid = $loop->spawn_child( %params )

This method creates a new child process to run a given code block or command.
The C<%params> hash takes the following keys:

=over 8

=item command => ARRAY or STRING

Either a reference to an array containing the command and its arguments, or a
plain string containing the command. This value is passed into perl's
C<exec> function.

=item code => CODE

A block of code to execute in the child process. It will be called in scalar
context inside an C<eval> block.

=item setup => ARRAY

A reference to an array which gives file descriptors to set up in the child
process before running the code or command. See below.

=item on_exit => CODE

A continuation to be called when the child processes exits. It will be invoked
in the following way:

 $on_exit->( $pid, $exitcode, $dollarbang, $dollarat )

The second argument is passed the plain perl C<$?> value.

=back

Exactly one of the C<command> or C<code> keys must be specified.

If the C<command> key is used, the given array or string is executed using the
C<exec> function. 

If the C<code> key is used, the return value will be used as the C<exit(2)>
code from the child if it returns (or 255 if it returned C<undef> or thows an
exception).

 Case          | ($exitcode >> 8)       | $dollarbang | $dollarat
 --------------+------------------------+-------------+----------
 exec succeeds | exit code from program |     0       |    ""
 exec fails    |         255            |     $!      |    ""
 $code returns |     return value       |     $!      |    ""
 $code dies    |         255            |     $!      |    $@

It is usually more convenient to use the C<open_child> method in simple cases
where an external program is being started in order to interact with it via
file IO, or even C<run_child> when only the final result is required, rather
than interaction while it is running.

=cut

sub spawn_child
{
   my $self = shift;
   my %params = @_;

   my $command = delete $params{command};
   my $code    = delete $params{code};
   my $setup   = delete $params{setup};
   my $on_exit = delete $params{on_exit};

   if( %params ) {
      croak "Unrecognised options to spawn: " . join( ",", keys %params );
   }

   defined $command and defined $code and 
      croak "Cannot pass both 'command' and 'code' to spawn";

   defined $command or defined $code or
      croak "Must pass one of 'command' or 'code' to spawn";

   my @setup = defined $setup ? $self->_check_setup_and_canonicise( $setup ) : ();

   my $loop = $self->{loop};

   my ( $readpipe, $writepipe );

   {
      # Ensure it's FD_CLOEXEC - this is a bit more portable than manually
      # fiddling with F_GETFL and F_SETFL (e.g. MSWin32)
      local $^F = -1;

      ( $readpipe, $writepipe ) = IO::Async::OS->pipepair or croak "Cannot pipe() - $!";
   }

   if( defined $command ) {
      my @command = ref( $command ) ? @$command : ( $command );

      $code = sub {
         no warnings;
         exec( @command );
         return;
      };
   }

   my $kid = $loop->fork( 
      code => sub {
         # Child
         close( $readpipe );
         $self->_spawn_in_child( $writepipe, $code, \@setup );
      },
   );

   # Parent
   close( $writepipe );
   return $self->_spawn_in_parent( $readpipe, $kid, $on_exit );
}

=head2 C<setup> array

This array gives a list of file descriptor operations to perform in the child
process after it has been C<fork(2)>ed from the parent, before running the code
or command. It consists of name/value pairs which are ordered; the operations
are performed in the order given.

=over 8

=item fdI<n> => ARRAY

Gives an operation on file descriptor I<n>. The first element of the array
defines the operation to be performed:

=over 4

=item [ 'close' ]

The file descriptor will be closed.

=item [ 'dup', $io ]

The file descriptor will be C<dup2(2)>ed from the given IO handle.

=item [ 'open', $mode, $file ]

The file descriptor will be opened from the named file in the given mode. The
C<$mode> string should be in the form usually given to the C<open> function;
such as '<' or '>>'.

=item [ 'keep' ]

The file descriptor will not be closed; it will be left as-is.

=back

A non-reference value may be passed as a shortcut, where it would contain the
name of the operation with no arguments (i.e. for the C<close> and C<keep>
operations).

=item IO => ARRAY

Shortcut for passing C<fdI<n>>, where I<n> is the fileno of the IO
reference. In this case, the key must be a reference that implements the
C<fileno> method. This is mostly useful for

 $handle => 'keep'

=item fdI<n> => IO

A shortcut for the C<dup> case given above.

=item stdin => ...

=item stdout => ...

=item stderr => ...

Shortcuts for C<fd0>, C<fd1> and C<fd2> respectively.

=item env => HASH

A reference to a hash to set as the child process's environment.

Note that this will entirely set a new environment, completely replacing the
existing one. If you want to simply add new keys or change the values of some
keys without removing the other existing ones, you can simply copy C<%ENV>
into the hash before setting new keys:

 env => {
    %ENV,
    ANOTHER => "key here",
 }

=item nice => INT

Change the child process's scheduling priority using C<POSIX::nice>.

=item chdir => STRING

Change the child process's working directory using C<chdir>.

=item setuid => INT

=item setgid => INT

Change the child process's effective UID or GID.

=item setgroups => ARRAY

Change the child process's groups list, to those groups whose numbers are
given in the ARRAY reference.

On most systems, only the privileged superuser change user or group IDs.
C<IO::Async> will B<NOT> check before detaching the child process whether
this is the case.

If setting both the primary GID and the supplementary groups list, it is
suggested to set the primary GID first. Moreover, some operating systems may
require that the supplementary groups list contains the primary GID.

=back

If no directions for what to do with C<stdin>, C<stdout> and C<stderr> are
given, a default of C<keep> is implied. All other file descriptors will be
closed, unless a C<keep> operation is given for them.

If C<setuid> is used, be sure to place it after any other operations that
might require superuser privileges, such as C<setgid> or opening special
files.

=cut

sub _check_setup_and_canonicise
{
   my $self = shift;
   my ( $setup ) = @_;

   ref $setup eq "ARRAY" or croak "'setup' must be an ARRAY reference";

   return () if !@$setup;

   my @setup;

   my $has_setgroups;

   foreach my $i ( 0 .. $#$setup / 2 ) {
      my ( $key, $value ) = @$setup[$i*2, $i*2 + 1];

      # Rewrite stdin/stdout/stderr
      $key eq "stdin"  and $key = "fd0";
      $key eq "stdout" and $key = "fd1";
      $key eq "stderr" and $key = "fd2";

      # Rewrite other filehandles
      ref $key and eval { $key->fileno; 1 } and $key = "fd" . $key->fileno;

      if( $key =~ m/^fd(\d+)$/ ) {
         my $fd = $1;
         my $ref = ref $value;

         if( !$ref ) {
            $value = [ $value ];
         }
         elsif( $ref eq "ARRAY" ) {
            # Already OK
         }
         elsif( $ref eq "GLOB" or eval { $value->isa( "IO::Handle" ) } ) {
            $value = [ 'dup', $value ];
         }
         else {
            croak "Unrecognised reference type '$ref' for file descriptor $fd";
         }

         my $operation = $value->[0];
         grep { $_ eq $operation } qw( open close dup keep ) or 
            croak "Unrecognised operation '$operation' for file descriptor $fd";
      }
      elsif( $key eq "env" ) {
         ref $value eq "HASH" or croak "Expected HASH reference for 'env' setup key";
      }
      elsif( $key eq "nice" ) {
         $value =~ m/^\d+$/ or croak "Expected integer for 'nice' setup key";
      }
      elsif( $key eq "chdir" ) {
         # This isn't a purely watertight test, but it does guard against
         # silly things like passing a reference - directories such as
         # ARRAY(0x12345) are unlikely to exist
         -d $value or croak "Working directory '$value' does not exist";
      }
      elsif( $key eq "setuid" ) {
         $value =~ m/^\d+$/ or croak "Expected integer for 'setuid' setup key";
      }
      elsif( $key eq "setgid" ) {
         $value =~ m/^\d+$/ or croak "Expected integer for 'setgid' setup key";
         $has_setgroups and carp "It is suggested to 'setgid' before 'setgroups'";
      }
      elsif( $key eq "setgroups" ) {
         ref $value eq "ARRAY" or croak "Expected ARRAY reference for 'setgroups' setup key";
         m/^\d+$/ or croak "Expected integer in 'setgroups' array" for @$value;
         $has_setgroups = 1;
      }
      else {
         croak "Unrecognised setup operation '$key'";
      }

      push @setup, $key => $value;
   }

   return @setup;
}

sub _spawn_in_parent
{
   my $self = shift;
   my ( $readpipe, $kid, $on_exit ) = @_;

   my $loop = $self->{loop};

   # We need to wait for both the errno pipe to close, and for waitpid
   # to give us an exit code. We'll form two closures over these two
   # variables so we can cope with those happening in either order

   my $dollarbang;
   my ( $dollarat, $length_dollarat );
   my $exitcode;
   my $pipeclosed = 0;

   $loop->add( IO::Async::Stream->new(
      notifier_name => "statuspipe,kid=$kid",

      read_handle => $readpipe,

      on_read => sub {
         my ( $self, $buffref, $eof ) = @_;

         if( !defined $dollarbang ) {
            if( length( $$buffref ) >= 2 * LENGTH_OF_I ) {
               ( $dollarbang, $length_dollarat ) = unpack( "II", $$buffref );
               substr( $$buffref, 0, 2 * LENGTH_OF_I, "" );
               return 1;
            }
         }
         elsif( !defined $dollarat ) {
            if( length( $$buffref ) >= $length_dollarat ) {
               $dollarat = substr( $$buffref, 0, $length_dollarat, "" );
               return 1;
            }
         }

         if( $eof ) {
            $dollarbang = 0  if !defined $dollarbang;
            if( !defined $length_dollarat ) {
               $length_dollarat = 0;
               $dollarat = "";
            }

            $pipeclosed = 1;

            if( defined $exitcode ) {
               local $! = $dollarbang;
               $on_exit->( $kid, $exitcode, $!, $dollarat );
            }
         }

         return 0;
      }
   ) );

   $loop->watch_child( $kid => sub { 
      ( my $kid, $exitcode ) = @_;

      if( $pipeclosed ) {
         local $! = $dollarbang;
         $on_exit->( $kid, $exitcode, $!, $dollarat );
      }
   } );

   return $kid;
}

sub _spawn_in_child
{
   my $self = shift;
   my ( $writepipe, $code, $setup ) = @_;

   my $exitvalue = eval {
      # Map of which handles will be in use by the end
      my %fd_in_use = ( 0 => 1, 1 => 1, 2 => 1 ); # Keep STDIN, STDOUT, STDERR

      # Count of how many times we'll need to use the current handles.
      my %fds_refcount = %fd_in_use;

      # To dup2() without clashes we might need to temporarily move some handles
      my %dup_from;

      my $max_fd = 0;
      my $writepipe_clashes = 0;

      if( @$setup ) {
         # The writepipe might be in the way of a setup filedescriptor. If it
         # is we'll have to dup2 it out of the way then close the original.
         foreach my $i ( 0 .. $#$setup/2 ) {
            my ( $key, $value ) = @$setup[$i*2, $i*2 + 1];
            $key =~ m/^fd(\d+)$/ or next;
            my $fd = $1;

            $max_fd = $fd if $fd > $max_fd;
            $writepipe_clashes = 1 if $fd == fileno $writepipe;

            my ( $operation, @params ) = @$value;

            $operation eq "close" and do {
               delete $fd_in_use{$fd};
               delete $fds_refcount{$fd};
            };

            $operation eq "dup" and do {
               $fd_in_use{$fd} = 1;

               my $fileno = fileno $params[0];
               # Keep a count of how many times it will be dup'ed from so we
               # can close it once we've finished
               $fds_refcount{$fileno}++;

               $dup_from{$fileno} = $fileno;
            };

            $operation eq "keep" and do {
               $fds_refcount{$fd} = 1;
            };
         }
      }

      foreach ( IO::Async::OS->potentially_open_fds ) {
         next if $fds_refcount{$_};
         next if $_ == fileno $writepipe;
         POSIX::close( $_ );
      }

      if( @$setup ) {
         if( $writepipe_clashes ) {
            $max_fd++;

            dup2( fileno $writepipe, $max_fd ) or die "Cannot dup2(writepipe to $max_fd) - $!\n";
            undef $writepipe;
            open( $writepipe, ">&=$max_fd" ) or die "Cannot fdopen($max_fd) as writepipe - $!\n";
         }

         foreach my $i ( 0 .. $#$setup/2 ) {
            my ( $key, $value ) = @$setup[$i*2, $i*2 + 1];

            if( $key =~ m/^fd(\d+)$/ ) {
               my $fd = $1;
               my( $operation, @params ) = @$value;

               $operation eq "dup"   and do {
                  my $from = fileno $params[0];

                  if( $from != $fd ) {
                     if( exists $dup_from{$fd} ) {
                        defined( $dup_from{$fd} = dup( $fd ) ) or die "Cannot dup($fd) - $!";
                     }

                     my $real_from = $dup_from{$from};

                     POSIX::close( $fd );
                     dup2( $real_from, $fd ) or die "Cannot dup2($real_from to $fd) - $!\n";
                  }

                  $fds_refcount{$from}--;
                  if( !$fds_refcount{$from} and !$fd_in_use{$from} ) {
                     POSIX::close( $from );
                     delete $dup_from{$from};
                  }
               };

               $operation eq "open"  and do {
                  my ( $mode, $filename ) = @params;
                  open( my $fh, $mode, $filename ) or die "Cannot open('$mode', '$filename') - $!\n";

                  my $from = fileno $fh;
                  dup2( $from, $fd ) or die "Cannot dup2($from to $fd) - $!\n";

                  close $fh;
               };
            }
            elsif( $key eq "env" ) {
               %ENV = %$value;
            }
            elsif( $key eq "nice" ) {
               nice( $value ) or die "Cannot nice($value) - $!";
            }
            elsif( $key eq "chdir" ) {
               chdir( $value ) or die "Cannot chdir('$value') - $!";
            }
            elsif( $key eq "setuid" ) {
               setuid( $value ) or die "Cannot setuid('$value') - $!";
            }
            elsif( $key eq "setgid" ) {
               setgid( $value ) or die "Cannot setgid('$value') - $!";
            }
            elsif( $key eq "setgroups" ) {
               setgroups( @$value ) or die "Cannot setgroups() - $!";
            }
         }
      }

      $code->();
   };

   my $writebuffer = "";
   $writebuffer .= pack( "I", $!+0 );
   $writebuffer .= pack( "I", length( $@ ) ) . $@;

   syswrite( $writepipe, $writebuffer );

   return $exitvalue;
}

=head1 AUTHOR

Paul Evans <leonerd@leonerd.org.uk>

=cut

0x55AA;