| #!/usr/bin/perl |
| # |
| # Copyright 2008-2009 Peter Krefting <peter@softwolves.pp.se> |
| # |
| # ------------------------------------------------------------------------ |
| # |
| # This program is free software; you can redistribute it and/or modify |
| # it under the terms of the GNU General Public License as published by |
| # the Free Software Foundation. |
| # |
| # This program is distributed in the hope that it will be useful, |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| # GNU General Public License for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with this program; if not, write to the Free Software |
| # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
| # |
| # ------------------------------------------------------------------------ |
| |
| =pod |
| |
| =head1 NAME |
| |
| import-directories - Import bits and pieces to Git. |
| |
| =head1 SYNOPSIS |
| |
| B<import-directories.perl> F<configfile> F<outputfile> |
| |
| =head1 DESCRIPTION |
| |
| Script to import arbitrary projects version controlled by the "copy the |
| source directory to a new location and edit it there"-version controlled |
| projects into version control. Handles projects with arbitrary branching |
| and version trees, taking a file describing the inputs and generating a |
| file compatible with the L<git-fast-import(1)> format. |
| |
| =head1 CONFIGURATION FILE |
| |
| =head2 Format |
| |
| The configuration file is based on the standard I<.ini> format. |
| |
| ; Comments start with semi-colons |
| [section] |
| key=value |
| |
| Please see below for information on how to escape special characters. |
| |
| =head2 Global configuration |
| |
| Global configuration is done in the B<[config]> section, which should be |
| the first section in the file. Configuration can be changed by |
| repeating configuration sections later on. |
| |
| [config] |
| ; configure conversion of CRLFs. "convert" means that all CRLFs |
| ; should be converted into LFs (suitable for the core.autocrlf |
| ; setting set to true in Git). "none" means that all data is |
| ; treated as binary. |
| crlf=convert |
| |
| =head2 Revision configuration |
| |
| Each revision that is to be imported is described in three |
| sections. Revisions should be defined in topological order, so |
| that a revision's parent has always been defined when a new revision |
| is introduced. All the sections for one revision must be defined |
| before defining the next revision. |
| |
| Each revision is assigned a unique numerical identifier. The |
| numbers do not need to be consecutive, nor monotonically |
| increasing. |
| |
| For instance, if your configuration file contains only the two |
| revisions 4711 and 42, where 4711 is the initial commit, the |
| only requirement is that 4711 is completely defined before 42. |
| |
| =pod |
| |
| =head3 Revision description section |
| |
| A section whose section name is just an integer gives meta-data |
| about the revision. |
| |
| [3] |
| ; author sets the author of the revisions |
| author=Peter Krefting <peter@softwolves.pp.se> |
| ; branch sets the branch that the revision should be committed to |
| branch=master |
| ; parent describes the revision that is the parent of this commit |
| ; (optional) |
| parent=1 |
| ; merges describes a revision that is merged into this commit |
| ; (optional; can be repeated) |
| merges=2 |
| ; selects one file to take the timestamp from |
| ; (optional; if unspecified, the most recent file from the .files |
| ; section is used) |
| timestamp=3/source.c |
| |
| =head3 Revision contents section |
| |
| A section whose section name is an integer followed by B<.files> |
| describe all the files included in this revision. If a file that |
| was available previously is not included in this revision, it will |
| be removed. |
| |
| If an on-disk revision is incomplete, you can point to files from |
| a previous revision. There are no restriction as to where the source |
| files are located, nor to the names of them. |
| |
| [3.files] |
| ; the key is the path inside the repository, the value is the path |
| ; as seen from the importer script. |
| source.c=ver-3.00/source.c |
| source.h=ver-2.99/source.h |
| readme.txt=ver-3.00/introduction to the project.txt |
| |
| File names are treated as byte strings (but please see below on |
| quoting rules), and should be stored in the configuration file in |
| the encoding that should be used in the generated repository. |
| |
| =head3 Revision commit message section |
| |
| A section whose section name is an integer followed by B<.message> |
| gives the commit message. This section is read verbatim, up until |
| the beginning of the next section. As such, a commit message may not |
| contain a line that begins with an opening square bracket ("[") and |
| ends with a closing square bracket ("]"), unless they are surrounded |
| by whitespace or other characters. |
| |
| [3.message] |
| Implement foobar. |
| ; trailing blank lines are ignored. |
| |
| =cut |
| |
| # Globals |
| use strict; |
| use warnings; |
| use integer; |
| my $crlfmode = 0; |
| my @revs; |
| my (%revmap, %message, %files, %author, %branch, %parent, %merges, %time, %timesource); |
| my $sectiontype = 0; |
| my $rev = 0; |
| my $mark = 1; |
| |
| # Check command line |
| if ($#ARGV < 1 || $ARGV[0] =~ /^--?h/) |
| { |
| exec('perldoc', $0); |
| exit 1; |
| } |
| |
| # Open configuration |
| my $config = $ARGV[0]; |
| open CFG, '<', $config or die "Cannot open configuration file \"$config\": "; |
| |
| # Open output |
| my $output = $ARGV[1]; |
| open OUT, '>', $output or die "Cannot create output file \"$output\": "; |
| binmode OUT; |
| |
| LINE: while (my $line = <CFG>) |
| { |
| $line =~ s/\r?\n$//; |
| next LINE if $sectiontype != 4 && $line eq ''; |
| next LINE if $line =~ /^;/; |
| my $oldsectiontype = $sectiontype; |
| my $oldrev = $rev; |
| |
| # Sections |
| if ($line =~ m"^\[(config|(\d+)(|\.files|\.message))\]$") |
| { |
| if ($1 eq 'config') |
| { |
| $sectiontype = 1; |
| } |
| elsif ($3 eq '') |
| { |
| $sectiontype = 2; |
| $rev = $2; |
| # Create a new revision |
| die "Duplicate rev: $line\n " if defined $revmap{$rev}; |
| print "Reading revision $rev\n"; |
| push @revs, $rev; |
| $revmap{$rev} = $mark ++; |
| $time{$revmap{$rev}} = 0; |
| } |
| elsif ($3 eq '.files') |
| { |
| $sectiontype = 3; |
| $rev = $2; |
| die "Revision mismatch: $line\n " unless $rev == $oldrev; |
| } |
| elsif ($3 eq '.message') |
| { |
| $sectiontype = 4; |
| $rev = $2; |
| die "Revision mismatch: $line\n " unless $rev == $oldrev; |
| } |
| else |
| { |
| die "Internal parse error: $line\n "; |
| } |
| next LINE; |
| } |
| |
| # Parse data |
| if ($sectiontype != 4) |
| { |
| # Key and value |
| if ($line =~ m"^\s*([^\s].*=.*[^\s])\s*$") |
| { |
| my ($key, $value) = &parsekeyvaluepair($1); |
| # Global configuration |
| if (1 == $sectiontype) |
| { |
| if ($key eq 'crlf') |
| { |
| $crlfmode = 1, next LINE if $value eq 'convert'; |
| $crlfmode = 0, next LINE if $value eq 'none'; |
| } |
| die "Unknown configuration option: $line\n "; |
| } |
| # Revision specification |
| if (2 == $sectiontype) |
| { |
| my $current = $revmap{$rev}; |
| $author{$current} = $value, next LINE if $key eq 'author'; |
| $branch{$current} = $value, next LINE if $key eq 'branch'; |
| $parent{$current} = $value, next LINE if $key eq 'parent'; |
| $timesource{$current} = $value, next LINE if $key eq 'timestamp'; |
| push(@{$merges{$current}}, $value), next LINE if $key eq 'merges'; |
| die "Unknown revision option: $line\n "; |
| } |
| # Filespecs |
| if (3 == $sectiontype) |
| { |
| # Add the file and create a marker |
| die "File not found: $line\n " unless -f $value; |
| my $current = $revmap{$rev}; |
| ${$files{$current}}{$key} = $mark; |
| my $time = &fileblob($value, $crlfmode, $mark ++); |
| |
| # Update revision timestamp if more recent than other |
| # files seen, or if this is the file we have selected |
| # to take the time stamp from using the "timestamp" |
| # directive. |
| if ((defined $timesource{$current} && $timesource{$current} eq $value) |
| || $time > $time{$current}) |
| { |
| $time{$current} = $time; |
| } |
| } |
| } |
| else |
| { |
| die "Parse error: $line\n "; |
| } |
| } |
| else |
| { |
| # Commit message |
| my $current = $revmap{$rev}; |
| if (defined $message{$current}) |
| { |
| $message{$current} .= "\n"; |
| } |
| $message{$current} .= $line; |
| } |
| } |
| close CFG; |
| |
| # Start spewing out data for git-fast-import |
| foreach my $commit (@revs) |
| { |
| # Progress |
| print OUT "progress Creating revision $commit\n"; |
| |
| # Create commit header |
| my $mark = $revmap{$commit}; |
| |
| # Branch and commit id |
| print OUT "commit refs/heads/", $branch{$mark}, "\nmark :", $mark, "\n"; |
| |
| # Author and timestamp |
| die "No timestamp defined for $commit (no files?)\n" unless defined $time{$mark}; |
| print OUT "committer ", $author{$mark}, " ", $time{$mark}, " +0100\n"; |
| |
| # Commit message |
| die "No message defined for $commit\n" unless defined $message{$mark}; |
| my $message = $message{$mark}; |
| $message =~ s/\n$//; # Kill trailing empty line |
| print OUT "data ", length($message), "\n", $message, "\n"; |
| |
| # Parent and any merges |
| print OUT "from :", $revmap{$parent{$mark}}, "\n" if defined $parent{$mark}; |
| if (defined $merges{$mark}) |
| { |
| foreach my $merge (@{$merges{$mark}}) |
| { |
| print OUT "merge :", $revmap{$merge}, "\n"; |
| } |
| } |
| |
| # Output file marks |
| print OUT "deleteall\n"; # start from scratch |
| foreach my $file (sort keys %{$files{$mark}}) |
| { |
| print OUT "M 644 :", ${$files{$mark}}{$file}, " $file\n"; |
| } |
| print OUT "\n"; |
| } |
| |
| # Create one file blob |
| sub fileblob |
| { |
| my ($filename, $crlfmode, $mark) = @_; |
| |
| # Import the file |
| print OUT "progress Importing $filename\nblob\nmark :$mark\n"; |
| open FILE, '<', $filename or die "Cannot read $filename\n "; |
| binmode FILE; |
| my ($size, $mtime) = (stat(FILE))[7,9]; |
| my $file; |
| read FILE, $file, $size; |
| close FILE; |
| $file =~ s/\r\n/\n/g if $crlfmode; |
| print OUT "data ", length($file), "\n", $file, "\n"; |
| |
| return $mtime; |
| } |
| |
| # Parse a key=value pair |
| sub parsekeyvaluepair |
| { |
| =pod |
| |
| =head2 Escaping special characters |
| |
| Key and value strings may be enclosed in quotes, in which case |
| whitespace inside the quotes is preserved. Additionally, an equal |
| sign may be included in the key by preceding it with a backslash. |
| For example: |
| |
| "key1 "=value1 |
| key2=" value2" |
| key\=3=value3 |
| key4=value=4 |
| "key5""=value5 |
| |
| Here the first key is "key1 " (note the trailing white-space) and the |
| second value is " value2" (note the leading white-space). The third |
| key contains an equal sign "key=3" and so does the fourth value, which |
| does not need to be escaped. The fifth key contains a trailing quote, |
| which does not need to be escaped since it is inside a surrounding |
| quote. |
| |
| =cut |
| my $pair = shift; |
| |
| # Separate key and value by the first non-quoted equal sign |
| my ($key, $value); |
| if ($pair =~ /^(.*[^\\])=(.*)$/) |
| { |
| ($key, $value) = ($1, $2) |
| } |
| else |
| { |
| die "Parse error: $pair\n "; |
| } |
| |
| # Unquote and unescape the key and value separately |
| return (&unescape($key), &unescape($value)); |
| } |
| |
| # Unquote and unescape |
| sub unescape |
| { |
| my $string = shift; |
| |
| # First remove enclosing quotes. Backslash before the trailing |
| # quote leaves both. |
| if ($string =~ /^"(.*[^\\])"$/) |
| { |
| $string = $1; |
| } |
| |
| # Second remove any backslashes inside the unquoted string. |
| # For later: Handle special sequences like \t ? |
| $string =~ s/\\(.)/$1/g; |
| |
| return $string; |
| } |
| |
| __END__ |
| |
| =pod |
| |
| =head1 EXAMPLES |
| |
| B<import-directories.perl> F<project.import> |
| |
| =head1 AUTHOR |
| |
| Copyright 2008-2009 Peter Krefting E<lt>peter@softwolves.pp.se> |
| |
| This program is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation. |
| |
| =cut |