##
# pack: Bare-Bones Version 
# 
# Synopsis: pack files along with all related symlinks into a tar archive on a USB stick
#
use File::Basename;
use File::Temp qw/ :mktemp  /;
use File::Find ();
use Getopt::Long;
use POSIX qw(strftime);
# set signal handlers
$SIG{INT}  = \&cleanup;
$SIG{QUIT} = \&cleanup;
#----------------------------------------------------------------------------------------
# CONFIGURATION
#----------------------------------------------------------------------------------------
$config  = "/etc/pack.list";                              # configuration: contains the file list

$flashdir=  ${ENV}{'VOL'} || "/media/sda1";               # set mount point
$tarfile = "$flashdir/actualBackup.2";                    # main archiving file

#----------------------------------------------------------------------------------------
our $DEBUG = ${ENV}{'debug'} || 0;                        # set to numerical value > 0
#----------------------------------------------------------------------------------------
$prog    = basename($0);                                  # program name
$tmpconf = "/tmp/${prog}-conf.tmp";                       # temporary $config
$tmpfile = "/tmp/${prog}-pack.tmp";                       # temporary file
$list    = "/tmp/${prog}-list.tmp";                       # temporary list
$log     = "/tmp/${prog}-$$.log";                         # logfile
$less    = ${ENV}{'PAGER'} || "less";                     # less is more


-e $config || warn "Config file $config missing!";        # sanity test
#----------------------------------------------------------------------------------------
# GLOBAL
use vars qw/*name/;
*name   = *File::Find::name;

our %KNOWN;                                               # links traversed before
our $days;                                                # number of days to consider for archive
our ($see,$unpack, $yes);                                 # global option names
our $hostname;                                            # hostname, used as archive label
#----------------------------------------------------------------------------------------
# determine if gdialog is available and whether X is up
our $dialog = ($ENV{'DISPLAY'} && (qx#which gdialog 2>/dev/null#))? "gdialog" : "dialog";
chomp($hostname = qx/hostname --fqdn/);                   # determine fully-qualified hostname
#----------------------------------------------------------------------------------------
# FUNCTIONS
#----------------------------------------------------------------------------------------
sub log {                                                 # for printing messages
 printf STDERR "\033[1;34;7m@_\033[m\n";
}
##########################################################################
# present error log if there
sub checklog {
  my $title = "" . ((@_)? "@_" : "Error Log");
  if ( -s $log ) {
  system("fmt -w 72 $log > $tmpfile;" .
         "$dialog --backtitle \"ERRORS as below, please redo.\"".
         "        --title \"$title\"  --textbox ${tmpfile} 10 72");
  return 0;                                               # return error
  }
  return 1;                                               # return ok
}
##########################################################################
# remove temporary files
sub cleanup {
  unlink  ($log, $list, $tmpfile, $tmpconf) unless $DEBUG > 1;
  exit ((@_)? 1 : 0);
}
##########################################################################
# simple error routine, checks for error log
# displays a warning box if no error log present
sub error {
  checklog("Error: @_") && system("$dialog --title \"Error\" --msgbox \"@_\" 7 35");
  cleanup 1;
}
##########################################################################
# unpack(): unpack the tarfile given as argument
##########################################################################
sub unpack {
  if ( @_ && -e $_[0] ) {                                 # if file present ...
    ::log "Unpacking $_[0] ...";
    if (system("tar -jxpPvf $_[0] 2>$log") == 0) {        # extract files
      ::log "Storing old archive file in /tmp ...";
      system "mv  $_[0] /tmp 2>>$log";
    }
  }                                                       # else: return quietly
}
##########################################################################
# readLink(): return arguments if these are not links
# otherwise return the list of links in order of their use
# ended by a non-link file
##########################################################################
sub readLink {
  my $link;
  foreach $file (@_) {                                    # repeat for all arguments
    $file =~ s#/$##;                                      # strip trailing slashes
    printf "$file\n";                                     # output filename
    until (! -l ($link=$file)) {                          # handle symlinks iteratively
       $file = readlink($link);
       (-e $file)  or  warn "!stuff $file";
       $file =~ m#^/#  or $file = sprintf("%s/$file", dirname($link)); # symlink to same directory
       printf "$file\n";                                  # output filename
    }
  }
}
##########################################################################
# readlink_recurse(): return argument if not a link, otherwise return the
#            list of links in order of their use ended by a non-link file
##########################################################################
sub readlink_recurse {
 my $file = shift;
 my $link;
 LINK: while( -l ($link=$file) ) {              # handle symlinks iteratively
    $file = readlink($link);
    $file =~ m#^/#  or $file = sprintf("%s/$file", dirname($link)); # symlink to same directory

    # CIRCLE PREVENTION UNIT: if there is a circularity in the symlinks
    # we would like to prevent calling getLinkedFiles again. Therefore, the
    # list KNOWN is maintained. If a symlinked directory is encountered and
    # at the same time found to be in $KNOWN, the control resumes at the
    # label `LINK',  and the recursive call to getLinkedFiles is skipped.
    if ( -d $file ) {
      for my $i (keys %KNOWN) {
        next LINK if $i eq $file                # seen before
                  || ( -l $file                 # pointer to existing subtree
                      && ($i =~ m#${file}.*#  || $file =~ m#${i}.*#));
      }
      getLinkedFiles($file);                    # otherwise: recurse
    } else {
      error "FATAL: $file is a broken link.\n" unless -e $file;
      printf "$file\n";                         # output filename
    }
  }
}
##########################################################################
# getLinkedFiles(): get both the symlink and the file it points to
##########################################################################
sub getLinkedFiles {
  foreach $i (@_) {                                       # repeat for all arguments

   $i =~ s#/$##;                                          # strip trailing slashes
   next  if ${KNOWN}{$i}++;                               #  skip known entries

   if ( -d $i && -l $i) {                                 # 1) DIRECTORIES THAT APPEAR AS SYMLINKS
      print "$i\n";                                       # record as visited
      ::log "Symlinked dir: $i"     if $DEBUG >4;
      readlink_recurse($i);

   } elsif (-d $i) {                                      # 2) PURE DIRECTORIES
       opendir(DIR, $i) || die "can't read directory $i: $!";
       my @files = grep(!/(\.\.?|\Q${i}\E)$/, readdir(DIR));
       closedir(DIR);

       # This alternative has the criterion:  all files in the directory are new
       my $dir_is_new = (($#files + 1) == grep(-e "$i/$_" && ((-M "$i/$_") <= 0), @files));

       if ($dir_is_new) {                                 # 2a) NEW DIRECTORY
        ::log "New dir: $i"     if $DEBUG >4;

        if ( (-M "$i" ) < 0 ) {                           # this prints to LIST, so wants a check
           ::log "ADDING DIRECTORY: $i"     if $DEBUG;    # the *entire* directory is added
           print  LIST "$i\n";                            # only add if this directory is newer
        } else {                                          # old directory: add directory contents
         @files = map { $_ = "$i/$_" } @files;            # prepend directory name
         getLinkedFiles( @files );                        #
        }
       } else {                                           # 2b) OLD DIRECTORY
         ::log "Old dir: $i"     if $DEBUG >4;
         foreach (@files) {
           if (-f "$i/$_" ) {                             # plain file: just print
              print "$i/$_\n";
           } else {
              getLinkedFiles("$i/$_");                    # recurse per each dir entry
           }
         }
       }
   } elsif ( -l $i) {                                     # 3) LINKS
      print "$i\n";                                       # record as visited
      readlink_recurse($i);                               # and recurse again

   } else  {                                              # 4) OTHER (PLAIN) FILES
      print "$i\n";
   }
  }
}
##########################################################################
# parse the configuration file and build the list via recursive subfunction calls
sub build_list  {
  # we are using the simpler variant of timestamps here where the user only
  # states the maximum age of files in terms of $days
  $days = 1 unless $days;                               # set default value

  # rewind script start time in seconds by the number of days
  $^T -= $days * 24 * 3600;

  ::log "Building list from configuration file ...";
  local @ARGV = ($config);                      # args for <>: override global ones
  # create temporary files
  open(TMPLIST,  ">$tmpfile") || error "Can't open temp list $tmpfile: $!";
  open(LIST,     ">$list")    || error "Can't open temp list $list: $!";
  select TMPLIST;                                         # redirect all output to this file

  # Main Processing Loop: read configuration file, decide how to parse, call subroutines
  while(<>) {
   s/#.*//g; s/^\s+//g;                                   # strip comments, leading white space
   s/>/> /;                                               # deal with missing white space
   next if m/^\s*$/;                                      # skip blank lines
   my @arr=split;

   if ($arr[0] =~ m/<\s*rec\s*>/i) {                      # (i)    RECURSIVE DESCENT
     shift @arr;                                          # remove first entry (command)
     getLinkedFiles(@arr);

   } elsif ($arr[0] =~ m/<\s*link\s*>/i) {                # (ii)   LINK ONLY
     shift @arr;                                          # remove first entry (command)
     readLink(@arr);

   } else {                                               # (iii)  FILE ONLY
     foreach (@arr) {
       if (m#[{\*]#) {
         $_ = qx/bash -c "echo $_"/;                      # let bash do the eval
         print STDERR "BASH: $_" if $DEBUG > 2;
         foreach (split) { print "$_\n"; }                # add each file
       } elsif ( -d ) {
          getLinkedFiles($_);                             # a single directory
       } else {
         printf "$_\n";                                   # a file or a link
       }
     }                                                    # end (foreach)
   }                                                      # end (if)
  }                                                       # end (while)
  close TMPLIST;
  select STDOUT;
  %KNOWN = ();                                            # truncate list

  # PROCESS TEMPORARY FILE, LINE BY LINE
  ::log "Selecting files modified during the last $days day(s) ...";
  open(TMPLIST,  "$tmpfile") || error "Can't open temp list $tmpfile: $!";

  while(<TMPLIST>) {                                      # build the list
    chomp;
    s#/[^/]+/(\.\.)+/(\S+)#/$2#g;                         # normalize file names that contain ".."
    error "FATAL:\n \"$_\"\n--$!\n" unless -e $_;         # check if file exists

    next if ( (-M ) >= 0 );                               # skip old files
    print LIST "$_\n" unless $KNOWN{$_}++;                # remove duplicates
  }
  close TMPLIST; close LIST;
  error "Your backup seems up-to date:\nSearch resulted in an empty list." unless %KNOWN;
}
#----------------------------------------------------------------------------------------
# SCRIPT PROPER
#----------------------------------------------------------------------------------------
# GET OPTIONS
Getopt::Long::Configure ("bundling");                     # enable bundling of single-character options
GetOptions (
    'days=i' => \$days,                                   # set the period of backup
    'help'   => \&usage,                                  # help
    'list'   => \$see,                                    # only check the list
    'unpack' => \$unpack,                                 # unpack
    'yes'    => \$yes,                                    # overwrite ok
) || die("Usage: $0 --days n| --list| --unpack [--yes]"); # display help message in case of error

# TEST FOR DEVICE
if ( defined $yes || defined $unpack || not defined $see ) {
  for $trial (1..3) {                                     # check: is USB device mounted
     if (qx#ls -l $flashdir 2>/dev/null# ) {
       $mounted="yes";
       last;
     }
     ::log "Trying to mount device $flashdir ... (attempt $trial)";
     sleep $trial;                                        # wait a second or two
  }
  $mounted || die "Your device does not seem to be mounted on $flashdir -- try again.\n";
}
#----------------------------------------------------------------------------------------
# PROCEED BY CASES
#----------------------------------------------------------------------------------------
if ( $see ) {                                             # VIEW FILE LIST
  build_list();
  system "$less $list";                                   # launch viewer
#----------------------------------------------------------------------------
 } elsif ( $unpack ) {                                    # UNPACK ARCHIVE
    if ( -e $tarfile ) {                                  # check if archive present
      # test first whether the archive was created on this host
      open(TAR, "tar jtf  $tarfile|") || die "Cannot read volume header of $tarfile: $!\n";
      chomp($label = <TAR>);                              # only read the first line: vol. header
      die "Refusing to unpack, archive was created on this host. Try the --yes option.\n"
              if ($label eq $hostname && ! defined $yes);
      ::log "Unpacking ...";
      ::unpack $tarfile;                                  # this does all the work

      checklog()					  # check for errors in logfile
    } else {
      error "$tarfile not present";                       # complain about missing file
    }
#----------------------------------------------------------------------------
 } else {                                                 # BUILD THE ARCHIVE
  if ( ! $yes && -e $tarfile ) {                          # warn before overwriting
   system("$dialog --title \"WARNING:\" --yesno \"Archive $tarfile already exists. Overwrite?\" 8 30");
   $? == 0 || exit;
  }
  build_list();                                           # construct the list

  # TAR OPERATION
  ::log "Creating backup archive file ...";
  system "tar -jcpPvf $tarfile --label $hostname --files-from=$list 2>$log";
  if ($? != 0) {                                          # something went wrong
    unlink $tarfile;
    error "Tar Problem!\nDeleting archive $tarfile";
  }
  printf "Transferred %.0f KB.\n", (-s $tarfile)/1024;    # display the total size

  ::log "Syncing ...";
  system "sync;sync;";                                    # try syncing flash disk

  ::log "Testing file integrity:";                        # verify the bzip2 file
  system "bzip2 -tvv $tarfile";
}
#----------------------------------------------------------------------------
cleanup();
