#!/usr/bin/perl -w
#
# bagger.pl - convert a [mostly] linear git history into a "bag of patches"
# style mess where independent patches are applied in parallel.
#
use strict;
use Data::Dumper;
use Carp;

my $EOL = "\033[0K"; # delete to end of line

{   package Set;

    sub new {
        my $class = shift;
        my $self = { 'set' => {} };
        $self->{'set'}->{$_} = 1 for @_; # add given keys
        return bless($self, $class);
    }

    sub has {
        my ($self, $key) = @_;
        return defined( $self->{'set'}->{$key} );
    }

    sub add {
        my ($self, $key) = @_;
        $self->{'set'}->{$key} = 1;
        return undef;
    }

    sub del {
        my ($self, $key) = @_;
        delete $self->{'set'}->{$key};
        return undef;
    }

    sub add_set {
        my ($self, $other) = @_;
        $self->{'set'}->{$_} = 1 for keys %{ $other->{'set'} };
        return undef;
    }

    sub del_set {
        my ($self, $other) = @_;
        delete $self->{'set'}->{$_} for keys %{ $other->{'set'} };
        return undef;
    }

    sub to_list {
        my ($self) = @_;
        return sort { $a <=> $b } keys %{ $self->{set} };
    }

    sub to_string {
        my ($self) = @_;
        return join(",", $self->to_list);
    }

    sub is_single {
        my ($self) = @_;
        return (scalar keys %{ $self->{set} }) == 1;
    }

    1; }

# Hash mapping branch number to Set of its deps.
my %deps = ( 1 => Set->new(0) );

# Add a new dependency.
sub add_dep ($$) {
    my ($new_branch, $needs_old) = @_;
    $deps{$new_branch} = Set->new unless defined $deps{$new_branch};
    $deps{$new_branch}->add($needs_old);
    $deps{$new_branch}->add_set( $deps{$needs_old} );
    return undef;
}

# Remove dependency chains in the given set.
sub cull_deps ($) {
    my ($s) = @_;
    $s->del_set( $deps{$_} ) for $s->to_list;
}

my $context = 3; # number of lines of context when generating patches
my ($src, $dst);

# Turn branch number into name, preferably one that can't be
# mistaken for a hex encoded checksum.
#
sub b_name ($) { my ($num) = @_; return "b_$num"; }

# Check out the named branch.
sub checkout ($) {
    my ($b) = @_;
    system("(cd $dst && git checkout -q $b)");
    confess unless $? == 0;
}

# Returns 1 if the named branch exists in destination.
sub branch_exists ($) {
    my ($b_name) = @_;
    system("(cd $dst && git rev-parse --verify $b_name >/dev/null 2>&1)");
    return ($? == 0);
}

# Create a new branch from an existing one.
sub branch_off ($$) {
    my ($from_branch, $new_branch) = @_;
    if (!branch_exists($new_branch)) {
        system("(cd $dst && git checkout -q $from_branch -b $new_branch)");
        confess unless $? == 0;
    } else {
        # Messier.
        checkout($from_branch);

        system("(cd $dst && git branch -D $new_branch >/dev/null)");
        confess unless $? == 0;

        system("(cd $dst && git checkout -b $new_branch 2>/dev/null)");
        confess unless $? == 0;
    }
}

# Return the parent of the given revision in the source.
sub src_parent ($) {
    my ($curr) = @_;
    my $out = `(cd $src && git rev-parse --verify ${curr}^)`;
    confess unless $? == 0;
    chomp $out;
    return $out;
}

# Returns true if the given patch can be applied to current state of $dst.
sub can_apply_patch ($) {
    my ($patch) = @_;
    open F, "| (cd $dst && git apply --check 2>/dev/null)" or confess;
    print F $patch;
    close F;
    return ($? == 0);
}

# Actually apply the patch via "am" subcommand.
sub apply_patch ($) {
    my ($patch) = @_;
    open F, "| (cd $dst && git am 2>/dev/null)" or confess;
    print F $patch;
    close F;
    confess unless $? == 0;
}

# Create a new branch as a merge of given numbered branches.
sub merge {
    my $new_branch = shift;
    confess unless scalar @_ > 0;
    my $first = shift;

    branch_off(b_name($first), $new_branch);
    return if scalar @_ == 0; # nothing to merge
    my $branches = join(" ", map { b_name($_) } @_);
    system("(cd $dst && git merge -q $branches >/dev/null)");
    confess "*** MERGE FAILURE ***" unless $? == 0;
}

# Try merge of given set, return 1 on success.
sub try_merge ($$$$) {
    my ($branch_num, $parent_set, $patch, $trycache) = @_;

    my $s = Set->new( $parent_set->to_list ); # copy
    cull_deps($s);
    return 0 if $s->is_single; # skip non-merges

    # Check cache.
    my $key = $s->to_string;
    return $trycache->{$key} if defined $trycache->{$key};

    my $range = $s->to_string;
    my $maxlen = 78 - length("Trying - failure");
    $range = "..." . substr($range, -($maxlen-3)) if length $range > $maxlen;
    print "Trying $range - $EOL";

    my $new_branch = b_name($branch_num);
    merge($new_branch, reverse $s->to_list); # git needs latest branch first
    my $result = can_apply_patch($patch);

    print $result ? "SUCCESS\n" : "failure$EOL\r";
    $trycache->{$key} = $result;
    return $result;
}

# --- execution starts here ---

die "usage: $0 src_repo dst_repo\n" unless scalar @ARGV == 2;
($src, $dst) = @ARGV;
die "source dir \"$src\" doesn't exist" unless -d $src;
die "output dir \"$dst\" already exists" if -d $dst;

print "==> Initializing dest repo.\n";
system("(mkdir $dst && cd $dst && git init)");
die unless $? == 0;

print "==> Getting a list of revisions.\n";
my @revs;
{
    my $opts = "--no-merges --cherry-pick --topo-order";
    my $log = `(cd $src && git rev-list $opts HEAD)`;
    die unless $? == 0;
    unshift @revs, $_ for split "\n", $log;
    #print Dumper(\@revs);
}
print "There are ", scalar @revs, " revs.\n";

print "==> Creating the empty commit.\n";
{
    my $b0 = "refs/heads/".b_name(0);
    open F, "| (cd $dst && git fast-import --quiet)" or die;
    print F <<EOF;
reset $b0
commit $b0
mark :1
author The Universe <root> 1238294157 +1100
committer The Universe <root> 1238294157 +1100
data 7
Empty.

EOF
    close F;
    die unless $? == 0;
}

print "==> Importing the first commit.\n";
{
    my $b0 = "refs/heads/".b_name(0);
    my $b1 = "refs/heads/".b_name(1);
    my $first = $revs[0];
    my $export = `(cd $src && git fast-export $first)`;
    die unless $? == 0;

    my $import = "";
    my $is_commit = 0;
    while ($export ne "") {
        (my $line, $export) = split "\n", $export, 2;
        $line =~ s{\A(reset|commit) \(null\)}{$1 $b1};
        $is_commit = 1 if $line =~ /\Acommit/;
        $import .= $line . "\n";

        # Skip over data chunks.
        my ($data_len) = $line =~ /\Adata (\d+)/;
        if (defined $data_len) {
            my $data = substr($export, 0, $data_len);
            $export = substr($export, $data_len);
            $import .= $data;
            # Add parent
            $import .= "from $b0\n" if $is_commit;
        }
    }
    #print Dumper(\$import);

    open F, "| (cd $dst && git fast-import --quiet)" or die;
    print F $import;
    close F;
    die unless $? == 0;
}

# This is the business end.
my $branch_num = 2;
my $num_revs = scalar @revs;
foreach my $i (0..$num_revs-2) {
    print "==> [", ($i+2), "/$num_revs] Importing commit ";
    my $b = $revs[$i+1];
    my $a = src_parent($b);
    my $range = substr($a,0,6)."..".substr($b,0,6);
    print "($range)\n";

    # Grab patch from src repo.
    my $patch = `(cd $src && git format-patch -k -U$context --stdout $a..$b)`;
    die unless $? == 0;
    #print Dumper(\$patch);

    # Try it on top of every branch.
    my $success = 0;
    foreach my $try_branch (0..$branch_num-1) {
        checkout(b_name($try_branch));
        if (can_apply_patch($patch)) {
            print "Succeeded against branch $try_branch\n";
            branch_off(b_name($try_branch), b_name($branch_num));
            apply_patch($patch);

            add_dep($branch_num, $try_branch);
            # move on
            $branch_num++;
            $success = 1;
            last;
        }
        # else try the next one
        print "Failed against branch $try_branch$EOL\r";
    }
    next if $success;

    # That failed, try merge combinations.
    # Count up from 1 to the last branch and try ranged merges.
    #
    my $trycache = {}; # hash of tried sets, keyed on Set->to_string
    foreach my $max (1..$branch_num-1) {
        my $s = Set->new(1..$max);

        next unless try_merge($branch_num, $s, $patch, $trycache);
        # else we succeeded.

        print "Trying to cut down the range.\n";
        # From the greatest to the smallest, try to remove each branch.
        foreach (reverse (1..$max-1)) {
            $s->del($_);
            # and put it back if the merge fails:
            $s->add($_) unless try_merge($branch_num, $s, $patch, $trycache);
        }

        print "Reduced to: ", $s->to_string, "\n";
        try_merge($branch_num, $s, $patch, {}) or die; # force merge (no cache)
        apply_patch($patch);

        # create dependencies
        add_dep($branch_num, $_) for $s->to_list;

        # move on
        $branch_num++;
        $success = 1;
        last;
    }
    next if $success;

    die "Couldn't apply $range against any branch";
}

print "==> Master merge.\n";
{
    my $s = Set->new(1..$branch_num-1);
    cull_deps($s);
    merge("master", $s->to_list);
}

print "==> Done!\n";

# vim:set ts=4 sw=4 et:
