#!/usr/bin/perl -w # dgit repos policy hook script for Debian # # Copyright (C) 2015-2019 Ian Jackson # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . use strict; use Debian::Dgit::Infra; # must precede Debian::Dgit; - can change @INC! use Debian::Dgit qw(:DEFAULT :policyflags); setup_sigwarn(); use POSIX; use JSON; use File::Temp qw(tempfile); use DBI; use IPC::Open2; use Data::Dumper; use Debian::Dgit::Policy::Debian; initdebug('%'); enabledebuglevel $ENV{'DGIT_DRS_DEBUG'}; END { $? = 127; } # deliberate exit uses _exit our $distro = shift @ARGV // die "need DISTRO"; our $repos = shift @ARGV // die "need DGIT-REPOS-DIR"; our $dgitlive = shift @ARGV // die "need DGIT-LIVE-DIR"; our $distrodir = shift @ARGV // die "need DISTRO-DIR"; our $action = shift @ARGV // die "need ACTION"; our $publicmode = 02775; our $new_upload_propagation_slop = 3600*4 + 100;# fixme config; our $poldbh; our $pkg; our $pkgdir; our ($pkg_exists,$pkg_secret); our $stderr; our ($version,$suite,$tagname); our %deliberately; # We assume that it is not possible for NEW to have a version older # than sid. # Whenever pushing, we check for # source-package-local tainted history # global tainted history # can be overridden by --deliberately except for an admin prohib taint # # ALL of the following apply only if history is secret: # # if NEW has no version, or a version which is not in our history[1] # (always) # check all suites # if any suite's version is in our history[1], publish our history # otherwise discard our history, # tainting --deliberately-include-questionable-history # # if NEW has a version which is in our history[1] # (on push only) # require explicit specification of one of # --deliberately-include-questionable-history # --deliberately-not-fast-forward # (latter will taint old NEW version --d-i-q-h) # (otherwise) # leave it be # # [1] looking for the relevant git tag for the version number and not # caring what that tag refers to. # # When we are doing a push to a fresh repo, any version will do: in # this case, this is the first dgit upload of an existing package, # and we trust that the uploader hasn't included in their git # history any previous non-dgit uploads. # # A wrinkle: if we approved a push recently, we treat NEW as having # a version which is in our history. This is because the package may # still be being uploaded. (We record this using the timestamp of the # package's git repo directory.) # We aim for the following invariants and properties: # # - .dsc of published dgit package will have corresponding publicly # visible dgit-repo (soon) # # - when a new package is rejected we help maintainer avoid # accidentally including bad objects in published dgit history # # - .dsc of NEW dgit package has corresponding dgit-repo but not # publicly readable sub apiquery ($) { my ($subpath) = @_; local $/=undef; my $dgit = "$dgitlive/dgit"; $dgit = "dgit" if !stat_exists $dgit; my $cmd = "$dgit -d$distro \$DGIT_TEST_OPTS"; $cmd .= " -".("D" x $debuglevel) if $debuglevel; $cmd .= " archive-api-query $subpath"; printdebug "apiquery $cmd\n"; $!=0; $?=0; my $json = `$cmd`; defined $json && !$? or die "$subpath $! $?"; my $r = decode_json $json; my $d = new Data::Dumper([$r], [qw(r)]); printdebug "apiquery $subpath | ", $d->Dump() if $debuglevel>=2; return $r; } sub vsn_in_our_history ($) { my ($vsn) = @_; # Eventually, when we withdraw support for old-format (DEP-14 # namespace) tags, we will need to change this to only look # for debiantag_new. See the commit # "Tag change: Update dgit-repos-policy-debian" # (reverting which is a good start for that change). my @tagrefs = map { "refs/tags/".$_ } debiantags $vsn, $distro; printdebug " checking history vsn=$vsn tagrefs=@tagrefs\n"; open F, "-|", qw(git for-each-ref), @tagrefs; $_ = ; close F; return 1 if defined && m/\S/; die "$pkg tagrefs @tagrefs $? $!" if $?; return 0; } sub specific_suite_has_suitable_vsn ($$) { my ($suite, $vsn_check) = @_; # tests $vsn_check->($version) my $in_suite = apiquery "dsc_in_suite/$suite/$pkg"; foreach my $entry (@$in_suite) { my $vsn = $entry->{version}; die "$pkg ?" unless defined $vsn; printdebug " checking history found suite=$suite vsn=$vsn\n"; return 1 if $vsn_check->($vsn); } return 0; } sub new_has_vsn_in_our_history () { return specific_suite_has_suitable_vsn('new', \&vsn_in_our_history); } sub good_suite_has_suitable_vsn ($) { my ($vsn_check) = @_; # as for specific_suite_has_specific_vsn my $suites = apiquery "suites"; foreach my $suitei (@$suites) { my $suite = $suitei->{name}; die unless defined $suite; next if $suite =~ m/\bnew$/; return 1 if specific_suite_has_suitable_vsn($suite, $vsn_check); } return 0; } sub statpackage () { $pkgdir = "$repos/$pkg.git"; if (!stat_exists $pkgdir) { printdebug "statpackage $pkg => ENOENT\n"; $pkg_exists = 0; } else { $pkg_exists = 1; $pkg_secret = !!(~(stat _)[2] & 05); printdebug "statpackage $pkg => exists, secret=$pkg_secret.\n"; } } sub getpackage () { die "need PACKAGE" unless @ARGV >= 1; $pkg = shift @ARGV; die unless $pkg =~ m/^$package_re$/; statpackage(); } sub add_taint ($$) { my ($refobj, $reason) = @_; printdebug "TAINTING $refobj\n", (map { "\%| $_" } split "\n", $reason), "\n"; my $tf = new File::Temp or die $!; print $tf "$refobj^0\n" or die $!; flush $tf or die $!; seek $tf,0,0 or die $!; my $gcfpid = open GCF, "-|"; defined $gcfpid or die $!; if (!$gcfpid) { open STDIN, "<&", $tf or die $!; exec 'git', 'cat-file', '--batch'; die $!; } close $tf or die $!; $_ = ; defined $_ or die; m/^(\w+) (\w+) (\d+)\n/ or die "$_ ?"; my $gitobjid = $1; my $gitobjtype = $2; my $bytes = $3; my $gitobjdata; if ($gitobjtype eq 'commit' or $gitobjtype eq 'tag') { $!=0; read GCF, $gitobjdata, $bytes == $bytes or die "$gitobjid $bytes $!"; } close GCF; $poldbh->do("INSERT INTO taints". " (package, gitobjid, gitobjtype, gitobjdata, time, comment)". " VALUES (?,?,?,?,?,?)", {}, $pkg, $gitobjid, $gitobjtype, $gitobjdata, time, $reason); my $taint_id = $poldbh->last_insert_id(undef,undef,"taints","taint_id"); die unless defined $taint_id; $poldbh->do("INSERT INTO taintoverrides". " (taint_id, deliberately)". " VALUES (?, '--deliberately-include-questionable-history')", {}, $taint_id); } sub add_taint_by_tag ($$) { my ($tagname,$refobjid) = @_; add_taint($refobjid, "tag $tagname referred to this object in git tree but all". " previously pushed versions were found to have been". " removed from NEW (ie, rejected) (or never arrived)"); } sub check_package () { # This is not read-only, but it is safe to call within a # policy-client-query, because it will do one of the following: # # * Do nothing much. # * Find that the package has been ACCEPTed, make it not secret, # and mirror it. # * Find that the package has been REJECTed and add some taints # to the db. This may be rolled back, but that's OK because # future calls will discover the same. # # (This is a consequence of this function being idempotent, # cron-callable, and correct in the sense that it doesn't make # wrongly-sequenced updates to both the DB and the FS.) # # An (often useful) side-effect is to chdir to the package repo. return 0 unless $pkg_exists; return 0 unless $pkg_secret; printdebug "check_package\n"; chdir $pkgdir or die "$pkgdir $!"; stat '.' or die "$pkgdir $!"; my $mtime = ((stat _)[9]); my $age = time - $mtime; printdebug "check_package age=$age\n"; if (good_suite_has_suitable_vsn(\&vsn_in_our_history)) { chmod $publicmode, "." or die $!; $pkg_secret = 0; eval { my $mirror_hook = "$distrodir/mirror-hook"; if (stat_exists $mirror_hook) { my @mirror_cmd = ($mirror_hook, $distrodir, "updated-hook", $pkg); debugcmd " (mirror)",@mirror_cmd; system @mirror_cmd and failedcmd @mirror_cmd; } }; if (length $@) { chomp $@; print STDERR "policy hook: warning:". " failed to mirror publication of \`$pkg':". " $@\n"; } return 0; } return 0 if $age < $new_upload_propagation_slop; return 0 if new_has_vsn_in_our_history(); printdebug "check_package secret, deleted, tainting\n"; git_for_each_ref('refs/tags', sub { my ($objid,$objtype,$fullrefname,$tagname) = @_; add_taint_by_tag($tagname,$objid); }); return FRESHREPO; } sub action_check_package () { getpackage(); return check_package(); } sub getpushinfo () { die unless @ARGV >= 4; $version = shift @ARGV; $suite = shift @ARGV; $tagname = shift @ARGV; my $delibs = shift @ARGV; foreach my $delib (split /\,/, $delibs) { $deliberately{$delib} = 1; } } sub deliberately ($) { return $deliberately{"--deliberately-$_[0]"}; } sub package_questionable_head_msg () { # This lets us reuse some of the same code, and the same message, # for checking during actual push, and for pre-push taint check. # # In principle it might be nicer to unify this code, with some # kind of super realistic dry run push function. However, that # dry run function would have to involve sending the client's git # objects to the server. We wouldn't want to do that twice, so it # would have to be cached somehow, but we don't want to allow # un-signed things to lurk about on the server. (The super dry # run mode would have to involve the server tolerating an unsigned # tag, or something, but that would be OK in principle.) The dry # run mode would have to be careful about what subsets of the # actions it took. # # So instead we apply ad-hoc checks, with separate implementations # for the push path, and the pre-push checks. The tainted-objects # policy-client-query exists to support this: it allows the # efficient reimplementation of the tainted objects check, without # transferring the objects to the server. return undef unless $pkg_exists; return undef unless $pkg_secret; # TODO: ideally this would be translated at the client end but # that would involve marking it i_ here (which is part of # dgit-intrastructure.deb) and then having the string appear in # the po for dgit.deb. return <= 1; my $freshrepo = shift @ARGV; my $initq = $poldbh->prepare(<execute($pkg); my @objscatcmd = qw(git); push @objscatcmd, qw(--git-dir), $freshrepo if length $freshrepo; push @objscatcmd, qw(cat-file --batch); debugcmd '|',@objscatcmd if $debuglevel>=2; my @taintids; my $chkinput = tempfile(); while (my $taint = $initq->fetchrow_hashref()) { push @taintids, $taint->{taint_id}; print $chkinput $taint->{gitobjid}, "\n" or die $!; printdebug '|> ', $taint->{gitobjid}, "\n" if $debuglevel>=2; } flush $chkinput or die $!; seek $chkinput,0,0 or die $!; my $checkpid = open CHKOUT, "-|" // die $!; if (!$checkpid) { open STDIN, "<&", $chkinput or die $!; delete $ENV{GIT_ALTERNATE_OBJECT_DIRECTORIES}; # ^ recent versions of git set this in the environment of # receive hooks. This can cause us to see things which # the user is trying to abolish. exec @objscatcmd or die $!; } my ($taintinfoq,$overridesanyq,$untaintq,$overridesq); my $overridesstmt = <; die "($taintid @objscatcmd) $!" unless defined $_; printdebug "|< ", $_ if $debuglevel>=2; next if m/^\w+ missing$/; die "($taintid @objscatcmd) $_ ?" unless m/^(\w+) (\w+) (\d+)\s/; my ($objid,$objtype,$nbytes) = ($1,$2,$3); my $drop; (read CHKOUT, $drop, $nbytes) == $nbytes or die "($taintid @objscatcmd) $!"; $!=0; $_ = ; die "($taintid @objscatcmd) $!" unless defined $_; die "($taintid @objscatcmd) $_ ?" if m/\S/; $taintinfoq ||= $poldbh->prepare(<execute($taintid); my $ti = $taintinfoq->fetchrow_hashref(); die "($taintid)" unless $ti; printdebug "SQL overrides: @overridesv $taintid /\n$overridesstmt\n"; $overridesq ||= $poldbh->prepare($overridesstmt); $overridesq->execute(@overridesv, $taintid); my ($ovwhy) = $overridesq->fetchrow_array(); my $ovstatus; if (!defined $ovwhy) { $overridesanyq ||= $poldbh->prepare(<execute($taintid); my ($ovany) = $overridesanyq->fetchrow_array(); $ovwhy = $ovany ? '' : undef; $mustreject = 1; } else { $untaintq ||= $poldbh->prepare(<execute($taintid); } $stderr .= tainted_objects_message({ %$ti, gitobjid => $objid, gitobjtype => $objtype, }, $ovstatus, \%hinted); } close CHKOUT; if ($mustreject) { $stderr .= <> 1)); printdebug sprintf "chmod %#o (was %#o) %s\n", $newmode, $oldmode, $freshrepo; chmod $newmode, $freshrepo or die $!; utime undef, undef, $freshrepo or die $!; } } return 0; } sub action_check_list () { opendir L, "$repos" or die "$repos $!"; while (defined (my $dent = readdir L)) { next unless $dent =~ m/^($package_re)\.git$/; $pkg = $1; statpackage(); next unless $pkg_exists; next unless $pkg_secret; print "$pkg\n" or die $!; } closedir L or die $!; close STDOUT or die $!; return 0; } sub polclqu_tainted_objects () { check_package(); my $suite = shift @ARGV // die "need SUITE"; my $topq = $poldbh->selectall_arrayref(< {} }, $pkg); foreach my $row (@$topq) { my $delibs = $poldbh->selectall_arrayref(<{taint_id}); $row->{overrides} = [ map { $_->[0] } @$delibs ]; delete $row->{taint_id}; # remove any undef entries, for nice json foreach my $k (keys %$row) { defined $row->{$k} or delete $row->{$k}; } } if (defined(my $questionable_head_msg = package_questionable_head_msg())) { # We would reject this in push. We need to arrange that the # client will detect this. The object tainting mechanism can # be (ab)used for this: we tell the client that HEAD is tainted. my $head = git_rev_parse(server_ref($suite).'~0'); push @$topq, { gitobjtype => 'commit', gitobjid => $head, comment => 'Package is in NEW, need a --deliberately', hint => $questionable_head_msg, # If the client was told -d-i-q-h, then they can go ahead - # our push will be placated. If the client was told -d-n-f-f # then presumably they aren't sending the old HEAD, so there # won't be a tainted object reachable from their head - so # -d-n-f-f isn't listed here. After all, this is for controlling # when the client will regard this as a blocking problem, not # for advising the user about overriding options. overrides => [qw(--deliberately-include-questionable-history)] }; } print encode_json $topq, "\n" or die $!; } my $fn_name; if ($action eq 'policy-client-query') { getpackage(); my $query = shift @ARGV // die "need QUERY-OP"; $fn_name = "polclqu_$query"; } else { $fn_name = "action_$action"; } $fn_name =~ y/-/_/; my $fn = ${*::}{$fn_name}; if (!$fn) { printdebug "dgit-repos-policy-debian: unknown $fn_name\n"; _exit 0; } my $sleepy=0; my $rcode; my $db_busy_exception= 'Debian::Dgit::Policy::Debian::DB_BUSY'; my @orgargv = @ARGV; for (;;) { @ARGV = @orgargv; eval { poldb_setup(poldb_path($repos), sub { $poldbh->{HandleError} = sub { return 0 unless $poldbh->err == 5; # SQLITE_BUSY, not in .pm :-( die bless { }, $db_busy_exception; }; eval ($ENV{'DGIT_RPD_TEST_DBLOOP_HOOK'}//''); die $@ if length $@; # used by tests/tests/debpolicy-dbretry }); $stderr = ''; $rcode = $fn->(); if ($action eq 'policy-client-query') { close STDOUT or die $!; _exit 0; } else { die unless defined $rcode; $poldbh->commit; printdebug "poldbh commit\n"; } }; last unless length $@; die $@ unless ref $@ eq $db_busy_exception; die if $sleepy >= 20; $sleepy++; print STDERR "[policy database busy, retrying (${sleepy}s)]\n"; eval { $poldbh->rollback; }; } print STDERR $stderr or die $!; flush STDERR or die $!; _exit $rcode;