| package Git::SVN::Fetcher; |
| use vars qw/@ISA $_ignore_regex $_include_regex $_preserve_empty_dirs |
| $_placeholder_filename @deleted_gpath %added_placeholder |
| $repo_id/; |
| use strict; |
| use warnings $ENV{GIT_PERL_FATAL_WARNINGS} ? qw(FATAL all) : (); |
| use SVN::Delta; |
| use Carp qw/croak/; |
| use File::Basename qw/dirname/; |
| use Git qw/command command_oneline command_noisy command_output_pipe |
| command_input_pipe command_close_pipe |
| command_bidi_pipe command_close_bidi_pipe |
| get_record/; |
| BEGIN { |
| @ISA = qw(SVN::Delta::Editor); |
| } |
| |
| # file baton members: path, mode_a, mode_b, pool, fh, blob, base |
| sub new { |
| my ($class, $git_svn, $switch_path) = @_; |
| my $self = SVN::Delta::Editor->new; |
| bless $self, $class; |
| if (exists $git_svn->{last_commit}) { |
| $self->{c} = $git_svn->{last_commit}; |
| $self->{empty_symlinks} = |
| _mark_empty_symlinks($git_svn, $switch_path); |
| } |
| |
| # some options are read globally, but can be overridden locally |
| # per [svn-remote "..."] section. Command-line options will *NOT* |
| # override options set in an [svn-remote "..."] section |
| $repo_id = $git_svn->{repo_id}; |
| my $k = "svn-remote.$repo_id.ignore-paths"; |
| my $v = eval { command_oneline('config', '--get', $k) }; |
| $self->{ignore_regex} = $v; |
| |
| $k = "svn-remote.$repo_id.include-paths"; |
| $v = eval { command_oneline('config', '--get', $k) }; |
| $self->{include_regex} = $v; |
| |
| $k = "svn-remote.$repo_id.preserve-empty-dirs"; |
| $v = eval { command_oneline('config', '--get', '--bool', $k) }; |
| if ($v && $v eq 'true') { |
| $_preserve_empty_dirs = 1; |
| $k = "svn-remote.$repo_id.placeholder-filename"; |
| $v = eval { command_oneline('config', '--get', $k) }; |
| $_placeholder_filename = $v; |
| } |
| |
| # Load the list of placeholder files added during previous invocations. |
| $k = "svn-remote.$repo_id.added-placeholder"; |
| $v = eval { command_oneline('config', '--get-all', $k) }; |
| if ($_preserve_empty_dirs && $v) { |
| # command() prints errors to stderr, so we only call it if |
| # command_oneline() succeeded. |
| my @v = command('config', '--get-all', $k); |
| $added_placeholder{ dirname($_) } = $_ foreach @v; |
| } |
| |
| $self->{empty} = {}; |
| $self->{dir_prop} = {}; |
| $self->{file_prop} = {}; |
| $self->{absent_dir} = {}; |
| $self->{absent_file} = {}; |
| require Git::IndexInfo; |
| $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new }); |
| $self->{pathnameencoding} = Git::config('svn.pathnameencoding'); |
| $self; |
| } |
| |
| # this uses the Ra object, so it must be called before do_{switch,update}, |
| # not inside them (when the Git::SVN::Fetcher object is passed) to |
| # do_{switch,update} |
| sub _mark_empty_symlinks { |
| my ($git_svn, $switch_path) = @_; |
| my $bool = Git::config_bool('svn.brokenSymlinkWorkaround'); |
| return {} if (!defined($bool)) || (defined($bool) && ! $bool); |
| |
| my %ret; |
| my ($rev, $cmt) = $git_svn->last_rev_commit; |
| return {} unless ($rev && $cmt); |
| |
| # allow the warning to be printed for each revision we fetch to |
| # ensure the user sees it. The user can also disable the workaround |
| # on the repository even while git svn is running and the next |
| # revision fetched will skip this expensive function. |
| my $printed_warning; |
| chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`); |
| my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt); |
| my $pfx = defined($switch_path) ? $switch_path : $git_svn->path; |
| $pfx .= '/' if length($pfx); |
| while (defined($_ = get_record($ls, "\0"))) { |
| s/\A100644 blob $empty_blob\t//o or next; |
| unless ($printed_warning) { |
| print STDERR "Scanning for empty symlinks, ", |
| "this may take a while if you have ", |
| "many empty files\n", |
| "You may disable this with `", |
| "git config svn.brokenSymlinkWorkaround ", |
| "false'.\n", |
| "This may be done in a different ", |
| "terminal without restarting ", |
| "git svn\n"; |
| $printed_warning = 1; |
| } |
| my $path = $_; |
| my (undef, $props) = |
| $git_svn->ra->get_file($pfx.$path, $rev, undef); |
| if ($props->{'svn:special'}) { |
| $ret{$path} = 1; |
| } |
| } |
| command_close_pipe($ls, $ctx); |
| \%ret; |
| } |
| |
| # returns true if a given path is inside a ".git" directory |
| sub in_dot_git { |
| $_[0] =~ m{(?:^|/)\.git(?:/|$)}; |
| } |
| |
| # return value: 0 -- don't ignore, 1 -- ignore |
| # This will also check whether the path is explicitly included |
| sub is_path_ignored { |
| my ($self, $path) = @_; |
| return 1 if in_dot_git($path); |
| return 1 if defined($self->{ignore_regex}) && |
| $path =~ m!$self->{ignore_regex}!; |
| return 0 if defined($self->{include_regex}) && |
| $path =~ m!$self->{include_regex}!; |
| return 0 if defined($_include_regex) && |
| $path =~ m!$_include_regex!; |
| return 1 if defined($self->{include_regex}); |
| return 1 if defined($_include_regex); |
| return 0 unless defined($_ignore_regex); |
| return 1 if $path =~ m!$_ignore_regex!o; |
| return 0; |
| } |
| |
| sub set_path_strip { |
| my ($self, $path) = @_; |
| $self->{path_strip} = qr/^\Q$path\E(\/|$)/ if length $path; |
| } |
| |
| sub open_root { |
| { path => '' }; |
| } |
| |
| sub open_directory { |
| my ($self, $path, $pb, $rev) = @_; |
| { path => $path }; |
| } |
| |
| sub git_path { |
| my ($self, $path) = @_; |
| if (my $enc = $self->{pathnameencoding}) { |
| require Encode; |
| Encode::from_to($path, 'UTF-8', $enc); |
| } |
| if ($self->{path_strip}) { |
| $path =~ s!$self->{path_strip}!! or |
| die "Failed to strip path '$path' ($self->{path_strip})\n"; |
| } |
| $path; |
| } |
| |
| sub delete_entry { |
| my ($self, $path, $rev, $pb) = @_; |
| return undef if $self->is_path_ignored($path); |
| |
| my $gpath = $self->git_path($path); |
| return undef if ($gpath eq ''); |
| |
| # remove entire directories. |
| my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath") |
| =~ /\A040000 tree ($::oid)\t\Q$gpath\E\0/); |
| if ($tree) { |
| my ($ls, $ctx) = command_output_pipe(qw/ls-tree |
| -r --name-only -z/, |
| $tree); |
| while (defined($_ = get_record($ls, "\0"))) { |
| my $rmpath = "$gpath/$_"; |
| $self->{gii}->remove($rmpath); |
| print "\tD\t$rmpath\n" unless $::_q; |
| } |
| print "\tD\t$gpath/\n" unless $::_q; |
| command_close_pipe($ls, $ctx); |
| } else { |
| $self->{gii}->remove($gpath); |
| print "\tD\t$gpath\n" unless $::_q; |
| } |
| # Don't add to @deleted_gpath if we're deleting a placeholder file. |
| push @deleted_gpath, $gpath unless $added_placeholder{dirname($path)}; |
| $self->{empty}->{$path} = 0; |
| undef; |
| } |
| |
| sub open_file { |
| my ($self, $path, $pb, $rev) = @_; |
| my ($mode, $blob); |
| |
| goto out if $self->is_path_ignored($path); |
| |
| my $gpath = $self->git_path($path); |
| ($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath") |
| =~ /\A(\d{6}) blob ($::oid)\t\Q$gpath\E\0/); |
| unless (defined $mode && defined $blob) { |
| die "$path was not found in commit $self->{c} (r$rev)\n"; |
| } |
| if ($mode eq '100644' && $self->{empty_symlinks}->{$path}) { |
| $mode = '120000'; |
| } |
| out: |
| { path => $path, mode_a => $mode, mode_b => $mode, blob => $blob, |
| pool => SVN::Pool->new, action => 'M' }; |
| } |
| |
| sub add_file { |
| my ($self, $path, $pb, $cp_path, $cp_rev) = @_; |
| my $mode; |
| |
| if (!$self->is_path_ignored($path)) { |
| my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#); |
| delete $self->{empty}->{$dir}; |
| $mode = '100644'; |
| |
| if ($added_placeholder{$dir}) { |
| # Remove our placeholder file, if we created one. |
| delete_entry($self, $added_placeholder{$dir}) |
| unless $path eq $added_placeholder{$dir}; |
| delete $added_placeholder{$dir} |
| } |
| } |
| |
| { path => $path, mode_a => $mode, mode_b => $mode, |
| pool => SVN::Pool->new, action => 'A' }; |
| } |
| |
| sub add_directory { |
| my ($self, $path, $cp_path, $cp_rev) = @_; |
| goto out if $self->is_path_ignored($path); |
| my $gpath = $self->git_path($path); |
| if ($gpath eq '') { |
| my ($ls, $ctx) = command_output_pipe(qw/ls-tree |
| -r --name-only -z/, |
| $self->{c}); |
| while (defined($_ = get_record($ls, "\0"))) { |
| $self->{gii}->remove($_); |
| print "\tD\t$_\n" unless $::_q; |
| push @deleted_gpath, $gpath; |
| } |
| command_close_pipe($ls, $ctx); |
| $self->{empty}->{$path} = 0; |
| } |
| my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#); |
| delete $self->{empty}->{$dir}; |
| $self->{empty}->{$path} = 1; |
| |
| if ($added_placeholder{$dir}) { |
| # Remove our placeholder file, if we created one. |
| delete_entry($self, $added_placeholder{$dir}); |
| delete $added_placeholder{$dir} |
| } |
| |
| out: |
| { path => $path }; |
| } |
| |
| sub change_dir_prop { |
| my ($self, $db, $prop, $value) = @_; |
| return undef if $self->is_path_ignored($db->{path}); |
| $self->{dir_prop}->{$db->{path}} ||= {}; |
| $self->{dir_prop}->{$db->{path}}->{$prop} = $value; |
| undef; |
| } |
| |
| sub absent_directory { |
| my ($self, $path, $pb) = @_; |
| return undef if $self->is_path_ignored($path); |
| $self->{absent_dir}->{$pb->{path}} ||= []; |
| push @{$self->{absent_dir}->{$pb->{path}}}, $path; |
| undef; |
| } |
| |
| sub absent_file { |
| my ($self, $path, $pb) = @_; |
| return undef if $self->is_path_ignored($path); |
| $self->{absent_file}->{$pb->{path}} ||= []; |
| push @{$self->{absent_file}->{$pb->{path}}}, $path; |
| undef; |
| } |
| |
| sub change_file_prop { |
| my ($self, $fb, $prop, $value) = @_; |
| return undef if $self->is_path_ignored($fb->{path}); |
| if ($prop eq 'svn:executable') { |
| if ($fb->{mode_b} != 120000) { |
| $fb->{mode_b} = defined $value ? 100755 : 100644; |
| } |
| } elsif ($prop eq 'svn:special') { |
| $fb->{mode_b} = defined $value ? 120000 : 100644; |
| } else { |
| $self->{file_prop}->{$fb->{path}} ||= {}; |
| $self->{file_prop}->{$fb->{path}}->{$prop} = $value; |
| } |
| undef; |
| } |
| |
| sub apply_textdelta { |
| my ($self, $fb, $exp) = @_; |
| return undef if $self->is_path_ignored($fb->{path}); |
| my $suffix = 0; |
| ++$suffix while $::_repository->temp_is_locked("svn_delta_${$}_$suffix"); |
| my $fh = $::_repository->temp_acquire("svn_delta_${$}_$suffix"); |
| # $fh gets auto-closed() by SVN::TxDelta::apply(), |
| # (but $base does not,) so dup() it for reading in close_file |
| open my $dup, '<&', $fh or croak $!; |
| my $base = $::_repository->temp_acquire("git_blob_${$}_$suffix"); |
| # close_file may call temp_acquire on 'svn_hash', but because of the |
| # call chain, if the temp_acquire call from close_file ends up being the |
| # call that first creates the 'svn_hash' temp file, then the FileHandle |
| # that's created as a result will end up in an SVN::Pool that we clear |
| # in SVN::Ra::gs_fetch_loop_common. Avoid that by making sure the |
| # 'svn_hash' FileHandle is already created before close_file is called. |
| my $tmp_fh = $::_repository->temp_acquire('svn_hash'); |
| $::_repository->temp_release($tmp_fh, 1); |
| |
| if ($fb->{blob}) { |
| my ($base_is_link, $size); |
| |
| if ($fb->{mode_a} eq '120000' && |
| ! $self->{empty_symlinks}->{$fb->{path}}) { |
| print $base 'link ' or die "print $!\n"; |
| $base_is_link = 1; |
| } |
| retry: |
| $size = $::_repository->cat_blob($fb->{blob}, $base); |
| die "Failed to read object $fb->{blob}" if ($size < 0); |
| |
| if (defined $exp) { |
| seek $base, 0, 0 or croak $!; |
| my $got = ::md5sum($base); |
| if ($got ne $exp) { |
| my $err = "Checksum mismatch: ". |
| "$fb->{path} $fb->{blob}\n" . |
| "expected: $exp\n" . |
| " got: $got\n"; |
| if ($base_is_link) { |
| warn $err, |
| "Retrying... (possibly ", |
| "a bad symlink from SVN)\n"; |
| $::_repository->temp_reset($base); |
| $base_is_link = 0; |
| goto retry; |
| } |
| die $err; |
| } |
| } |
| } |
| seek $base, 0, 0 or croak $!; |
| $fb->{fh} = $fh; |
| $fb->{base} = $base; |
| [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ]; |
| } |
| |
| sub close_file { |
| my ($self, $fb, $exp) = @_; |
| return undef if $self->is_path_ignored($fb->{path}); |
| |
| my $hash; |
| my $path = $self->git_path($fb->{path}); |
| if (my $fh = $fb->{fh}) { |
| if (defined $exp) { |
| seek($fh, 0, 0) or croak $!; |
| my $got = ::md5sum($fh); |
| if ($got ne $exp) { |
| die "Checksum mismatch: $path\n", |
| "expected: $exp\n got: $got\n"; |
| } |
| } |
| if ($fb->{mode_b} == 120000) { |
| sysseek($fh, 0, 0) or croak $!; |
| my $rd = sysread($fh, my $buf, 5); |
| |
| if (!defined $rd) { |
| croak "sysread: $!\n"; |
| } elsif ($rd == 0) { |
| warn "$path has mode 120000", |
| " but it points to nothing\n", |
| "converting to an empty file with mode", |
| " 100644\n"; |
| $fb->{mode_b} = '100644'; |
| } elsif ($buf ne 'link ') { |
| warn "$path has mode 120000", |
| " but is not a link\n"; |
| } else { |
| my $tmp_fh = $::_repository->temp_acquire( |
| 'svn_hash'); |
| my $res; |
| while ($res = sysread($fh, my $str, 1024)) { |
| my $out = syswrite($tmp_fh, $str, $res); |
| defined($out) && $out == $res |
| or croak("write ", |
| Git::temp_path($tmp_fh), |
| ": $!\n"); |
| } |
| defined $res or croak $!; |
| |
| ($fh, $tmp_fh) = ($tmp_fh, $fh); |
| Git::temp_release($tmp_fh, 1); |
| } |
| } |
| |
| $hash = $::_repository->hash_and_insert_object( |
| Git::temp_path($fh)); |
| $hash =~ /^$::oid$/ or die "not an object ID: $hash\n"; |
| |
| Git::temp_release($fb->{base}, 1); |
| Git::temp_release($fh, 1); |
| } else { |
| $hash = $fb->{blob} or die "no blob information\n"; |
| } |
| $fb->{pool}->clear; |
| $self->{gii}->update($fb->{mode_b}, $hash, $path) or croak $!; |
| print "\t$fb->{action}\t$path\n" if $fb->{action} && ! $::_q; |
| undef; |
| } |
| |
| sub abort_edit { |
| my $self = shift; |
| $self->{nr} = $self->{gii}->{nr}; |
| delete $self->{gii}; |
| $self->SUPER::abort_edit(@_); |
| } |
| |
| sub close_edit { |
| my $self = shift; |
| |
| if ($_preserve_empty_dirs) { |
| my @empty_dirs; |
| |
| # Any entry flagged as empty that also has an associated |
| # dir_prop represents a newly created empty directory. |
| foreach my $i (keys %{$self->{empty}}) { |
| push @empty_dirs, $i if exists $self->{dir_prop}->{$i}; |
| } |
| |
| # Search for directories that have become empty due subsequent |
| # file deletes. |
| push @empty_dirs, $self->find_empty_directories(); |
| |
| # Finally, add a placeholder file to each empty directory. |
| $self->add_placeholder_file($_) foreach (@empty_dirs); |
| |
| $self->stash_placeholder_list(); |
| } |
| |
| $self->{git_commit_ok} = 1; |
| $self->{nr} = $self->{gii}->{nr}; |
| delete $self->{gii}; |
| $self->SUPER::close_edit(@_); |
| } |
| |
| sub find_empty_directories { |
| my ($self) = @_; |
| my @empty_dirs; |
| my %dirs = map { dirname($_) => 1 } @deleted_gpath; |
| |
| foreach my $dir (sort keys %dirs) { |
| next if $dir eq "."; |
| |
| # If there have been any additions to this directory, there is |
| # no reason to check if it is empty. |
| my $skip_added = 0; |
| foreach my $t (qw/dir_prop file_prop/) { |
| foreach my $path (keys %{ $self->{$t} }) { |
| if (exists $self->{$t}->{dirname($path)}) { |
| $skip_added = 1; |
| last; |
| } |
| } |
| last if $skip_added; |
| } |
| next if $skip_added; |
| |
| # Use `git ls-tree` to get the filenames of this directory |
| # that existed prior to this particular commit. |
| my $ls = command('ls-tree', '-z', '--name-only', |
| $self->{c}, "$dir/"); |
| my %files = map { $_ => 1 } split(/\0/, $ls); |
| |
| # Remove the filenames that were deleted during this commit. |
| delete $files{$_} foreach (@deleted_gpath); |
| |
| # Report the directory if there are no filenames left. |
| push @empty_dirs, $dir unless (scalar %files); |
| } |
| @empty_dirs; |
| } |
| |
| sub add_placeholder_file { |
| my ($self, $dir) = @_; |
| my $path = "$dir/$_placeholder_filename"; |
| my $gpath = $self->git_path($path); |
| |
| my $fh = $::_repository->temp_acquire($gpath); |
| my $hash = $::_repository->hash_and_insert_object(Git::temp_path($fh)); |
| Git::temp_release($fh, 1); |
| $self->{gii}->update('100644', $hash, $gpath) or croak $!; |
| |
| # The directory should no longer be considered empty. |
| delete $self->{empty}->{$dir} if exists $self->{empty}->{$dir}; |
| |
| # Keep track of any placeholder files we create. |
| $added_placeholder{$dir} = $path; |
| } |
| |
| sub stash_placeholder_list { |
| my ($self) = @_; |
| my $k = "svn-remote.$repo_id.added-placeholder"; |
| my $v = eval { command_oneline('config', '--get-all', $k) }; |
| command_noisy('config', '--unset-all', $k) if $v; |
| foreach (values %added_placeholder) { |
| command_noisy('config', '--add', $k, $_); |
| } |
| } |
| |
| 1; |
| __END__ |
| |
| =head1 NAME |
| |
| Git::SVN::Fetcher - tree delta consumer for "git svn fetch" |
| |
| =head1 SYNOPSIS |
| |
| use SVN::Core; |
| use SVN::Ra; |
| use Git::SVN; |
| use Git::SVN::Fetcher; |
| use Git; |
| |
| my $gs = Git::SVN->find_by_url($url); |
| my $ra = SVN::Ra->new(url => $url); |
| my $editor = Git::SVN::Fetcher->new($gs); |
| my $reporter = $ra->do_update($SVN::Core::INVALID_REVNUM, '', |
| 1, $editor); |
| $reporter->set_path('', $old_rev, 0); |
| $reporter->finish_report; |
| my $tree = $gs->tmp_index_do(sub { command_oneline('write-tree') }); |
| |
| foreach my $path (keys %{$editor->{dir_prop}) { |
| my $props = $editor->{dir_prop}{$path}; |
| foreach my $prop (keys %$props) { |
| print "property $prop at $path changed to $props->{$prop}\n"; |
| } |
| } |
| foreach my $path (keys %{$editor->{empty}) { |
| my $action = $editor->{empty}{$path} ? 'added' : 'removed'; |
| print "empty directory $path $action\n"; |
| } |
| foreach my $path (keys %{$editor->{file_prop}) { ... } |
| foreach my $parent (keys %{$editor->{absent_dir}}) { |
| my @children = @{$editor->{abstent_dir}{$parent}}; |
| print "cannot fetch directory $parent/$_: not authorized?\n" |
| foreach @children; |
| } |
| foreach my $parent (keys %{$editor->{absent_file}) { ... } |
| |
| =head1 DESCRIPTION |
| |
| This is a subclass of C<SVN::Delta::Editor>, which means it implements |
| callbacks to act as a consumer of Subversion tree deltas. This |
| particular implementation of those callbacks is meant to store |
| information about the resulting content which B<git svn fetch> could |
| use to populate new commits and new entries for F<unhandled.log>. |
| More specifically: |
| |
| =over |
| |
| =item * Additions, removals, and modifications of files are propagated |
| to git-svn's index file F<$GIT_DIR/svn/$refname/index> using |
| B<git update-index>. |
| |
| =item * Changes in Subversion path properties are recorded in the |
| C<dir_prop> and C<file_prop> fields (which are hashes). |
| |
| =item * Addition and removal of empty directories are indicated by |
| entries with value 1 and 0 respectively in the C<empty> hash. |
| |
| =item * Paths that are present but cannot be conveyed (presumably due |
| to permissions) are recorded in the C<absent_file> and |
| C<absent_dirs> hashes. For each key, the corresponding value is |
| a list of paths under that directory that were present but |
| could not be conveyed. |
| |
| =back |
| |
| The interface is unstable. Do not use this module unless you are |
| developing git-svn. |
| |
| =head1 DEPENDENCIES |
| |
| L<SVN::Delta> from the Subversion perl bindings, |
| the core L<Carp> and L<File::Basename> modules, |
| and git's L<Git> helper module. |
| |
| C<Git::SVN::Fetcher> has not been tested using callers other than |
| B<git-svn> itself. |
| |
| =head1 SEE ALSO |
| |
| L<SVN::Delta>, |
| L<Git::SVN::Editor>. |
| |
| =head1 INCOMPATIBILITIES |
| |
| None reported. |
| |
| =head1 BUGS |
| |
| None. |