1*4882a593Smuzhiyun# Generate a reproducible archive from the content of a directory 2*4882a593Smuzhiyun# 3*4882a593Smuzhiyun# $1 : input directory 4*4882a593Smuzhiyun# $2 : leading component in archive 5*4882a593Smuzhiyun# $3 : ISO8601 date: YYYY-MM-DDThh:mm:ssZZ 6*4882a593Smuzhiyun# $4 : output file 7*4882a593Smuzhiyun# $5... : globs of filenames to exclude from the archive, suitable for 8*4882a593Smuzhiyun# find's -path option, and relative to the input directory $1 9*4882a593Smuzhiyun# 10*4882a593Smuzhiyun# Notes : 11*4882a593Smuzhiyun# - the timestamp is internally rounded to the highest entire second 12*4882a593Smuzhiyun# less than or equal to the timestamp (i.e. any sub-second fractional 13*4882a593Smuzhiyun# part is ignored) 14*4882a593Smuzhiyun# - must not be called with CWD as, or below, the input directory 15*4882a593Smuzhiyun# - some temporary files are created in CWD, and removed at the end 16*4882a593Smuzhiyun# 17*4882a593Smuzhiyun# Example: 18*4882a593Smuzhiyun# $ find /path/to/temp/dir 19*4882a593Smuzhiyun# /path/to/temp/dir/ 20*4882a593Smuzhiyun# /path/to/temp/dir/some-file 21*4882a593Smuzhiyun# /path/to/temp/dir/some-dir/ 22*4882a593Smuzhiyun# /path/to/temp/dir/some-dir/some-other-file 23*4882a593Smuzhiyun# 24*4882a593Smuzhiyun# $ mk_tar_gz /path/to/some/dir \ 25*4882a593Smuzhiyun# foo_bar-1.2.3 \ 26*4882a593Smuzhiyun# 1970-01-01T00:00:00Z \ 27*4882a593Smuzhiyun# /path/to/foo.tar.gz \ 28*4882a593Smuzhiyun# '.git/*' '.svn/*' 29*4882a593Smuzhiyun# 30*4882a593Smuzhiyun# $ tar tzf /path/to/foo.tar.gz 31*4882a593Smuzhiyun# foo_bar-1.2.3/some-file 32*4882a593Smuzhiyun# foo_bar-1.2.3/some-dir/some-other-file 33*4882a593Smuzhiyun# 34*4882a593Smuzhiyunmk_tar_gz() { 35*4882a593Smuzhiyun local in_dir="${1}" 36*4882a593Smuzhiyun local base_dir="${2}" 37*4882a593Smuzhiyun local date="${3}" 38*4882a593Smuzhiyun local out="${4}" 39*4882a593Smuzhiyun shift 4 40*4882a593Smuzhiyun local glob tmp pax_options 41*4882a593Smuzhiyun local -a find_opts 42*4882a593Smuzhiyun 43*4882a593Smuzhiyun for glob; do 44*4882a593Smuzhiyun find_opts+=( -or -path "./${glob#./}" ) 45*4882a593Smuzhiyun done 46*4882a593Smuzhiyun 47*4882a593Smuzhiyun # Drop sub-second precision to play nice with GNU tar's valid_timespec check 48*4882a593Smuzhiyun date="$(date -d "${date}" -u +%Y-%m-%dT%H:%M:%S+00:00)" 49*4882a593Smuzhiyun 50*4882a593Smuzhiyun pax_options="delete=atime,delete=ctime,delete=mtime" 51*4882a593Smuzhiyun pax_options+=",exthdr.name=%d/PaxHeaders/%f,exthdr.mtime={${date}}" 52*4882a593Smuzhiyun 53*4882a593Smuzhiyun tmp="$(mktemp --tmpdir="$(pwd)")" 54*4882a593Smuzhiyun pushd "${in_dir}" >/dev/null 55*4882a593Smuzhiyun 56*4882a593Smuzhiyun # Establish list 57*4882a593Smuzhiyun find . -not -type d -and -not \( -false "${find_opts[@]}" \) >"${tmp}.list" 58*4882a593Smuzhiyun # Sort list for reproducibility 59*4882a593Smuzhiyun LC_ALL=C sort <"${tmp}.list" >"${tmp}.sorted" 60*4882a593Smuzhiyun 61*4882a593Smuzhiyun # Create POSIX tarballs, since that's the format the most reproducible 62*4882a593Smuzhiyun tar cf - --transform="s#^\./#${base_dir}/#S" \ 63*4882a593Smuzhiyun --numeric-owner --owner=0 --group=0 --mtime="${date}" \ 64*4882a593Smuzhiyun --format=posix --pax-option="${pax_options}" \ 65*4882a593Smuzhiyun -T "${tmp}.sorted" >"${tmp}.tar" 66*4882a593Smuzhiyun 67*4882a593Smuzhiyun # Compress the archive 68*4882a593Smuzhiyun gzip -6 -n <"${tmp}.tar" >"${out}" 69*4882a593Smuzhiyun 70*4882a593Smuzhiyun rm -f "${tmp}"{.list,.sorted,.tar} 71*4882a593Smuzhiyun 72*4882a593Smuzhiyun popd >/dev/null 73*4882a593Smuzhiyun} 74*4882a593Smuzhiyun 75*4882a593Smuzhiyun# Keep this line and the following as last lines in this file. 76*4882a593Smuzhiyun# vim: ft=bash 77