| #!/bin/bash |
| |
| # Example script to deltify an entire GIT repository based on the commit list. |
| # The most recent version of a file is the reference and previous versions |
| # are made delta against the best earlier version available. And so on for |
| # successive versions going back in time. This way the increasing delta |
| # overhead is pushed towards older versions of any given file. |
| # |
| # The -d argument allows to provide a limit on the delta chain depth. |
| # If 0 is passed then everything is undeltafied. Limiting the delta |
| # depth is meaningful for subsequent access performance to old revisions. |
| # A value of 16 might be a good compromize between performance and good |
| # space saving. Current default is unbounded. |
| # |
| # The --max-behind=30 argument is passed to git-mkdelta so to keep |
| # combinations and memory usage bounded a bit. If you have lots of memory |
| # and CPU power you may remove it (or set to 0) to let git-mkdelta find the |
| # best delta match regardless of the number of revisions for a given file. |
| # You can also make the value smaller to make it faster and less |
| # memory hungry. A value of 5 ought to still give pretty good results. |
| # When set to 0 or ommitted then look behind is unbounded. Note that |
| # git-mkdelta might die with a segmentation fault in that case if it |
| # runs out of memory. Note that the GIT repository will still be consistent |
| # even if git-mkdelta dies unexpectedly. |
| |
| set -e |
| |
| max_depth= |
| [ "$1" == "-d" ] && max_depth="--max-depth=$2" && shift 2 |
| |
| overlap=30 |
| max_behind="--max-behind=$overlap" |
| |
| function process_list() { |
| if [ "$list" ]; then |
| echo "Processing $curr_file" |
| echo "$list" | xargs git-mkdelta $max_depth $max_behind -v |
| fi |
| } |
| |
| rev_list="" |
| curr_file="" |
| |
| git-rev-list HEAD | |
| while true; do |
| # Let's batch revisions into groups of 1000 to give it a chance to |
| # scale with repositories containing long revision lists. We also |
| # overlap with the previous batch the size of mkdelta's look behind |
| # value in order to account for the processing discontinuity. |
| rev_list="$(echo -e -n "$rev_list" | tail --lines=$overlap)" |
| for i in $(seq 1000); do |
| read rev || break |
| rev_list="$rev_list$rev\n" |
| done |
| echo -e -n "$rev_list" | |
| git-diff-tree -r -t --stdin | |
| awk '/^:/ { if ($5 == "M") printf "%s %s\n%s %s\n", $4, $6, $3, $6 }' | |
| LC_ALL=C sort -s -k 2 | uniq | |
| while read sha1 file; do |
| if [ "$file" == "$curr_file" ]; then |
| list="$list $sha1" |
| else |
| process_list |
| curr_file="$file" |
| list="$sha1" |
| fi |
| done |
| [ "$rev" ] || break |
| done |
| process_list |
| |
| curr_file="root directory" |
| list="$( |
| git-rev-list HEAD | |
| while read commit; do |
| git-cat-file commit $commit | |
| sed -n 's/tree //p;Q' |
| done |
| )" |
| process_list |
| |