summaryrefslogtreecommitdiff
path: root/dgit
diff options
context:
space:
mode:
authorIan Jackson <ijackson@chiark.greenend.org.uk>2016-08-14 17:55:44 +0100
committerIan Jackson <ijackson@chiark.greenend.org.uk>2016-09-06 00:31:18 +0100
commitc226efe2ca372f479fc70f88a90e9bc0a9fab7b3 (patch)
tree60bc29dc9c7ff7a98de491090f9ff75c2652404e /dgit
parent80ffc69e9d5cc652a611e84df5d11d80d8995ff6 (diff)
dgit: New fetch algorithm - try to tolerate in-archive copies
Big comment explains. Amongst other things: * Introduce the mergeinfo concept * Abolish fetchspec in favour of lrfetchref * Synthesize a pseudo-merge linking the archive's .dsc with the dgit server's git branch, as appropriate. * Introduce mergeinfo_version. I have tried to ensure that where the import done by previous versions of dgit would be correct, the new import is the same. Signed-off-by: Ian Jackson <ijackson@chiark.greenend.org.uk>
Diffstat (limited to 'dgit')
-rwxr-xr-xdgit320
1 files changed, 274 insertions, 46 deletions
diff --git a/dgit b/dgit
index 53ccd72..f038da3 100755
--- a/dgit
+++ b/dgit
@@ -155,6 +155,7 @@ sub lrref () { return "refs/remotes/$remotename/".server_branch($csuite); }
sub rrref () { return server_ref($csuite); }
sub lrfetchrefs () { return "refs/dgit-fetch/$csuite"; }
+sub lrfetchref () { return lrfetchrefs.'/'.server_branch($csuite); }
sub stripepoch ($) {
my ($vsn) = @_;
@@ -196,11 +197,6 @@ sub no_such_package () {
exit 4;
}
-sub fetchspec () {
- local $csuite = '*';
- return "+".rrref().":".lrref();
-}
-
sub changedir ($) {
my ($newdir) = @_;
printdebug "CD $newdir\n";
@@ -1220,9 +1216,11 @@ sub get_archive_dsc () {
my $fmt = getfield $dsc, 'Format';
fail "unsupported source format $fmt, sorry" unless $format_ok{$fmt};
$dsc_checked = !!$digester;
+ printdebug "get_archive_dsc: Version ".(getfield $dsc, 'Version')."\n";
return;
}
$dsc = undef;
+ printdebug "get_archive_dsc: nothing in archive, returning undef\n";
}
sub check_for_git ();
@@ -1292,7 +1290,7 @@ sub create_remote_git_repo () {
}
}
-our ($dsc_hash,$lastpush_hash);
+our ($dsc_hash,$lastpush_mergeinput);
our $ud = '.git/dgit/unpack';
@@ -1471,7 +1469,8 @@ sub check_for_vendor_patches () {
"distro being accessed");
}
-sub generate_commit_from_dsc () {
+sub generate_commits_from_dsc () {
+ # See big comment in fetch_from_archive, below.
prep_ud();
changedir $ud;
@@ -1520,10 +1519,16 @@ $changes
# imported from the archive
END
close C or die $!;
- my $outputhash = make_commit qw(../commit.tmp);
+ my $rawimport_hash = make_commit qw(../commit.tmp);
my $cversion = getfield $clogp, 'Version';
+ my $rawimport_mergeinput = {
+ Commit => $rawimport_hash,
+ Info => "Import of source package",
+ };
+ my @output = ($rawimport_mergeinput);
progress "synthesised git commit from .dsc $cversion";
- if ($lastpush_hash) {
+ if ($lastpush_mergeinput) {
+ my $lastpush_hash = $lastpush_mergeinput->{Commit};
runcmd @git, qw(reset -q --hard), $lastpush_hash;
runcmd qw(sh -ec), 'dpkg-parsechangelog >>../changelogold.tmp';
my $oldclogp = parsecontrol('../changelogold.tmp','previous changelog');
@@ -1531,18 +1536,10 @@ END
my $vcmp =
version_compare($oversion, $cversion);
if ($vcmp < 0) {
- # git upload/ is earlier vsn than archive, use archive
- open C, ">../commit2.tmp" or die $!;
- print C <<END or die $!;
-tree $tree
-parent $lastpush_hash
-parent $outputhash
-author $authline
-committer $authline
-
+ @output = ($rawimport_mergeinput, $lastpush_mergeinput,
+ { Message => <<END, ReverseParents => 1 });
Record $package ($cversion) in archive suite $csuite
END
- $outputhash = make_commit qw(../commit2.tmp);
} elsif ($vcmp > 0) {
print STDERR <<END or die $!;
@@ -1550,18 +1547,14 @@ Version actually in archive: $cversion (older)
Last allegedly pushed/uploaded: $oversion (newer or same)
$later_warning_msg
END
- $outputhash = $lastpush_hash;
+ @output = $lastpush_mergeinput;
} else {
- $outputhash = $lastpush_hash;
+ @output = $lastpush_mergeinput;
}
}
changedir '../../../..';
- runcmd @git, qw(update-ref -m),"dgit fetch import $cversion",
- 'DGIT_ARCHIVE', $outputhash;
- cmdoutput @git, qw(log -n2), $outputhash;
- # ... gives git a chance to complain if our commit is malformed
rmtree($ud);
- return $outputhash;
+ return @output;
}
sub complete_file_from_dsc ($$) {
@@ -1610,10 +1603,9 @@ sub ensure_we_have_orig () {
}
sub git_fetch_us () {
- my @specs = (fetchspec());
- push @specs,
+ my @specs =
map { "+refs/$_/*:".lrfetchrefs."/$_/*" }
- qw(tags heads);
+ qw(tags heads), $branchprefix;
runcmd_ordryrun_local @git, qw(fetch -p -n -q), access_giturl(), @specs;
my %here;
@@ -1639,6 +1631,21 @@ sub git_fetch_us () {
});
}
+sub mergeinfo_getclogp ($) {
+ my ($mi) = @_;
+ # Ensures thit $mi->{Clogp} exists and returns it
+ return $mi->{Clogp} if $mi->{Clogp};
+ my $mclog = ".git/dgit/clog-$mi->{Commit}";
+ mkpath '.git/dgit';
+ runcmd shell_cmd "exec >$mclog", @git, qw(cat-file blob),
+ "$mi->{Commit}:debian/changelog";
+ $mi->{Clogp} = parsechangelog("-l$mclog");
+}
+
+sub mergeinfo_version ($) {
+ return getfield( (mergeinfo_getclogp $_[0]), 'Version' );
+}
+
sub fetch_from_archive () {
# ensures that lrref() is what is actually in the archive,
# one way or another
@@ -1660,17 +1667,108 @@ sub fetch_from_archive () {
progress "no version available from the archive";
}
- $lastpush_hash = git_get_ref(lrref());
+ # If the archive's .dsc has a Dgit field, there are three
+ # relevant git commitids we need to choose between and/or merge
+ # together:
+ # 1. $dsc_hash: the Dgit field from the archive
+ # 2. $lastpush_hash: the suite branch on the dgit git server
+ # 3. $lastfetch_hash: our local tracking brach for the suite
+ #
+ # These may all be distinct and need not be in any fast forward
+ # relationship:
+ #
+ # If the dsc was pushed to this suite, then the server suite
+ # branch will have been updated; but it might have been pushed to
+ # a different suite and copied by the archive. Conversely a more
+ # recent version may have been pushed with dgit but not appeared
+ # in the archive (yet).
+ #
+ # $lastfetch_hash may be awkward because archive imports
+ # (particularly, imports of Dgit-less .dscs) are performed only as
+ # needed on individual clients, so different clients may perform a
+ # different subset of them - and these imports are only made
+ # public during push. So $lastfetch_hash may represent a set of
+ # imports different to a subsequent upload by a different dgit
+ # client.
+ #
+ # Our approach is as follows:
+ #
+ # As between $dsc_hash and $lastpush_hash: if $lastpush_hash is a
+ # descendant of $dsc_hash, then it was pushed by a dgit user who
+ # had based their work on $dsc_hash, so we should prefer it.
+ # Otherwise, $dsc_hash was installed into this suite in the
+ # archive other than by a dgit push, and (necessarily) after the
+ # last dgit push into that suite (since a dgit push would have
+ # been descended from the dgit server git branch); thus, in that
+ # case, we prefer the archive's version (and produce a
+ # pseudo-merge to overwrite the dgit server git branch).
+ #
+ # (If there is no Dgit field in the archive's .dsc then
+ # generate_commit_from_dsc uses the version numbers to decide
+ # whether the suite branch or the archive is newer. If the suite
+ # branch is newer it ignores the archive's .dsc; otherwise it
+ # generates an import of the .dsc, and produces a pseudo-merge to
+ # overwrite the suite branch with the archive contents.)
+ #
+ # The outcome of that part of the algorithm is the `public view',
+ # and is same for all dgit clients: it does not depend on any
+ # unpublished history in the local tracking branch.
+ #
+ # As between the public view and the local tracking branch: The
+ # local tracking branch is only updated by dgit fetch, and
+ # whenever dgit fetch runs it includes the public view in the
+ # local tracking branch. Therefore if the public view is not
+ # descended from the local tracking branch, the local tracking
+ # branch must contain history which was imported from the archive
+ # but never pushed; and, its tip is now out of date. So, we make
+ # a pseudo-merge to overwrite the old imports and stitch the old
+ # history in.
+ #
+ # Finally: we do not necessarily reify the public view (as
+ # described above). This is so that we do not end up stacking two
+ # pseudo-merges. So what we actually do is figure out the inputs
+ # to any public view psuedo-merge and put them in @mergeinputs.
+
+ my @mergeinputs;
+ # $mergeinputs[]{Commit}
+ # $mergeinputs[]{Info}
+ # $mergeinputs[0] is the one whose tree we use
+ # @mergeinputs is in the order we use in the actual commit)
+ #
+ # Also:
+ # $mergeinputs[]{Message} is a commit message to use
+ # $mergeinputs[]{ReverseParents} if def specifies that parent
+ # list should be in opposite order
+ # Such an entry has no Commit or Info. It applies only when found
+ # in the last entry. (This ugliness is to support making
+ # identical imports to previous dgit versions.)
+
+ my $lastpush_hash = git_get_ref(lrfetchref());
printdebug "previous reference hash=$lastpush_hash\n";
- my $hash;
+ $lastpush_mergeinput = $lastpush_hash && {
+ Commit => $lastpush_hash,
+ Info => "dgit suite branch on dgit git server",
+ };
+
+ my $lastfetch_hash = git_get_ref(lrref());
+ printdebug "fetch_from_archive: lastfetch=$lastfetch_hash\n";
+ my $lastfetch_mergeinput = $lastfetch_hash && {
+ Commit => $lastfetch_hash,
+ Info => "dgit client's archive history view",
+ };
+
+ my $dsc_mergeinput = $dsc_hash && {
+ Commit => $dsc_hash,
+ Info => "Dgit field in .dsc from archive",
+ };
+
if (defined $dsc_hash) {
fail "missing remote git history even though dsc has hash -".
- " could not find ref ".lrref().
- " (should have been fetched from ".access_giturl()."#".rrref().")"
+ " could not find ref ".rref()." at ".access_giturl()
unless $lastpush_hash;
- $hash = $dsc_hash;
ensure_we_have_orig();
if ($dsc_hash eq $lastpush_hash) {
+ @mergeinputs = $dsc_mergeinput
} elsif (is_fast_fwd($dsc_hash,$lastpush_hash)) {
print STDERR <<END or die $!;
@@ -1679,16 +1777,35 @@ Commit referred to by archive: $dsc_hash
Last allegedly pushed/uploaded: $lastpush_hash
$later_warning_msg
END
- $hash = $lastpush_hash;
+ @mergeinputs = ($lastpush_mergeinput);
} else {
- fail "git head (".lrref()."=$lastpush_hash) is not a ".
- "descendant of archive's .dsc hash ($dsc_hash)";
+ # Archive has .dsc which is not a descendant of the last dgit
+ # push. This can happen if the archive moves .dscs about.
+ # Just follow its lead.
+ if (is_fast_fwd($lastpush_hash,$dsc_hash)) {
+ progress "archive .dsc names newer git commit";
+ @mergeinputs = ($dsc_mergeinput);
+ } else {
+ progress "archive .dsc names other git commit, fixing up";
+ @mergeinputs = ($dsc_mergeinput, $lastpush_mergeinput);
+ }
}
} elsif ($dsc) {
- $hash = generate_commit_from_dsc();
+ @mergeinputs = generate_commits_from_dsc();
+ # We have just done an import. Now, our import algorithm might
+ # have been improved. But even so we do not want to generate
+ # a new different import of the same package. So if the
+ # version numbers are the same, just use our existing version.
+ # If the version numbers are different, the archive has changed
+ # (perhaps, rewound).
+ if ($lastfetch_mergeinput &&
+ !version_compare( (mergeinfo_version $lastfetch_mergeinput),
+ (mergeinfo_version $mergeinputs[0]) )) {
+ @mergeinputs = ($lastfetch_mergeinput);
+ }
} elsif ($lastpush_hash) {
# only in git, not in the archive yet
- $hash = $lastpush_hash;
+ @mergeinputs = ($lastpush_mergeinput);
print STDERR <<END or die $!;
Package not found in the archive, but has allegedly been pushed using dgit.
@@ -1707,12 +1824,123 @@ END
}
return 0;
}
- printdebug "current hash=$hash\n";
- if ($lastpush_hash) {
- fail "not fast forward on last upload branch!".
- " (archive's version left in DGIT_ARCHIVE)"
- unless is_fast_fwd($lastpush_hash, $hash);
+
+ if ($lastfetch_hash &&
+ !grep {
+ my $h = $_->{Commit};
+ $h and is_fast_fwd($lastfetch_hash, $h);
+ # If true, one of the existing parents of this commit
+ # is a descendant of the $lastfetch_hash, so we'll
+ # be ff from that automatically.
+ } @mergeinputs
+ ) {
+ # Otherwise:
+ push @mergeinputs, $lastfetch_mergeinput;
+ }
+
+ printdebug "fetch mergeinfos:\n";
+ foreach my $mi (@mergeinputs) {
+ if ($mi->{Info}) {
+ printdebug " commit $mi->{Commit} $mi->{Info}\n";
+ } else {
+ printdebug sprintf " ReverseParents=%d Message=%s",
+ $mi->{ReverseParents}, $mi->{Message};
+ }
+ }
+
+ my $compat_info= pop @mergeinputs
+ if $mergeinputs[$#mergeinputs]{Message};
+
+ @mergeinputs = grep { defined $_->{Commit} } @mergeinputs;
+
+ my $hash;
+ if (@mergeinputs > 1) {
+ # here we go, then:
+ my $tree_commit = $mergeinputs[0]{Commit};
+
+ my $tree = cmdoutput @git, qw(cat-file commit), $tree_commit;
+ $tree =~ m/\n\n/; $tree = $`;
+ $tree =~ m/^tree (\w+)$/m or die "$dsc_hash tree ?";
+ $tree = $1;
+
+ # We use the changelog author of the package in question the
+ # author of this pseudo-merge. This is (roughly) correct if
+ # this commit is simply representing aa non-dgit upload.
+ # (Roughly because it does not record sponsorship - but we
+ # don't have sponsorship info because that's in the .changes,
+ # which isn't in the archivw.)
+ #
+ # But, it might be that we are representing archive history
+ # updates (including in-archive copies). These are not really
+ # the responsibility of the person who created the .dsc, but
+ # there is no-one whose name we should better use. (The
+ # author of the .dsc-named commit is clearly worse.)
+
+ my $useclogp = mergeinfo_getclogp $mergeinputs[0];
+ my $author = clogp_authline $useclogp;
+ my $cversion = getfield $useclogp, 'Version';
+
+ my $mcf = ".git/dgit/mergecommit";
+ open MC, ">", $mcf or die "$mcf $!";
+ print MC <<END or die $!;
+tree $tree
+END
+
+ my @parents = grep { $_->{Commit} } @mergeinputs;
+ @parents = reverse @parents if $compat_info->{ReverseParents};
+ print MC <<END or die $! foreach @parents;
+parent $_->{Commit}
+END
+
+ print MC <<END or die $!;
+author $author
+committer $author
+
+END
+
+ if (defined $compat_info->{Message}) {
+ print MC $compat_info->{Message} or die $!;
+ } else {
+ print MC <<END or die $!;
+Record $package ($cversion) in archive suite $csuite
+
+Record that
+END
+ my $message_add_info = sub {
+ my ($mi) = (@_);
+ my $mversion = mergeinfo_version $mi;
+ printf MC " %-20s %s\n", $mversion, $mi->{Info}
+ or die $!;
+ };
+
+ $message_add_info->($mergeinputs[0]);
+ print MC <<END or die $!;
+should be treated as descended from
+END
+ $message_add_info->($_) foreach @mergeinputs[1..$#mergeinputs];
+ }
+
+ close MC or die $!;
+ $hash = make_commit $mcf;
+ } else {
+ $hash = $mergeinputs[0]{Commit};
}
+ progress "fetch hash=$hash\n";
+
+ my $chkff = sub {
+ my ($lasth, $what) = @_;
+ return unless $lasth;
+ die "$lasth $hash $what ?" unless is_fast_fwd($lasth, $hash);
+ };
+
+ $chkff->($lastpush_hash, 'dgit repo server tip (last push)');
+ $chkff->($lastfetch_hash, 'local tracking tip (last fetch)');
+
+ runcmd @git, qw(update-ref -m), "dgit fetch $csuite",
+ 'DGIT_ARCHIVE', $hash;
+ cmdoutput @git, qw(log -n2), $hash;
+ # ... gives git a chance to complain if our commit is malformed
+
if (defined $skew_warning_vsn) {
mkpath '.git/dgit';
printdebug "SKEW CHECK WANT $skew_warning_vsn\n";
@@ -1732,7 +1960,8 @@ We were able to obtain only $got_vsn
END
}
}
- if ($lastpush_hash ne $hash) {
+
+ if ($lastfetch_hash ne $hash) {
my @upd_cmd = (@git, qw(update-ref -m), 'dgit fetch', lrref(), $hash);
if (act_local()) {
cmdoutput @upd_cmd;
@@ -1808,7 +2037,6 @@ sub clone ($) {
runcmd @git, qw(init -q);
my $giturl = access_giturl(1);
if (defined $giturl) {
- set_local_git_config "remote.$remotename.fetch", fetchspec();
open H, "> .git/HEAD" or die $!;
print H "ref: ".lref()."\n" or die $!;
close H or die $!;