#!/bin/bash
#
# Script to unpack the databases downloaded from Storage Element
# to local scratch directory on the node. Used by HMMER and BLAST
# runtime environments. Handles tarred and gzipped databases.
#
# Usage: prepare_db database1.fasta.gz [database2.fasta.tar.gz]
#
# BLASTDB or HMMER_DB_DIR environment variable needs to be set to point to
# the location to unpack the files into.
# 
# author: Olli Tourunen
#

# check that there is at least one argument
if [ ! -n "$1" ]; then
  echo "Usage: `basename $0` file1[.tar].gz [file2[.tar].gz] etc."
  exit 1
fi  

# check that either BLASTDB or HMMER_DB_DIR is set 
# and set the target directory and the source variable name 
# (the latter for log messages)
if [ "xxx$HMMER_DB_DIR" != "xxx" ]; then
  if [ "xxx$BLASTDB" != "xxx" ]; then
    echo "`basename $0` WARNING: both BLASTDB and HMMER_DB_DIR are set, choosing HMMER_DB_DIR"
  fi
  targetDir=$HMMER_DB_DIR
  targetDirVarName="HMMER_DB_DIR"
elif [ "xxx$BLASTDB" != "xxx" ]; then
  targetDir=$BLASTDB
  targetDirVarName="BLASTDB"
else
  echo "`basename $0` ERROR: Neither BLASTDB nor HMMER_DB_DIR is set" 1>&2
  exit 1
fi

# check that targetDir is a directory
if [ ! -d $targetDir ]; then
  echo "`basename $0` ERROR: $targetDirVarName does not point to a valid directory" 1>&2
  exit 1
fi

# check that the argument files can be read
for inputfile in $*; do
  if [ ! -e $inputfile ]; then
    echo "`basename $0` ERROR: File $inputfile cannot be read" 1>&2
    exit 1
  fi
done

# do the actual extraction
for inputfile in $*; do
  # find out if the file is a gzipped tar file
  match=0  
  for suffix in "tar.gz" "TAR.GZ" "tgz" "TGZ"; do
    echo $inputfile | grep -q "$suffix\$" 
    if [ $? -eq 0 ]; then match=1 ; break; fi
  done     
  if [ $match -eq 1 ]; then
    tar xvfz $inputfile -C $targetDir
    continue
  fi
  
  # find out if the file is a plain tar file
  match=0  
  for suffix in "tar" "TAR"; do
    echo $inputfile | grep -q "$suffix\$" 
    if [ $? -eq 0 ]; then match=1 ; break; fi
  done     
  if [ $match -eq 1 ]; then
    tar xvf $inputfile -C $targetDir
    continue
  fi
  
  # if none of the above matched, we just try to gunzip the file
  gunzip -c $inputfile > "$targetDir/`basename $inputfile .gz`"
  
done