#!/usr/bin/perl
#

##########################################################################################################################
#                                                                                                                        #
# autoAug.pl                                                                                                             #
# train and run AUGUSTUS automatically                                                                                   #
#                                                                                                                        #
# usage:                                                                                                                 #
# autoAug.pl [OPTIONS] -g=genome.fa -t=trainingfile -s=speciesname -c=cdnafile                                           #
#                                                                                                                        #
##########################################################################################################################

use Getopt::Long;
use Cwd;

use File::Spec::Functions qw(rel2abs);
use File::Basename qw(dirname basename);

BEGIN {
    $0=rel2abs($0);
    our $directory = dirname($0);
}
use lib $directory;
use helpMod qw(find checkFile relToAbs uptodate);
use Term::ANSIColor qw(:constants);
use DBI;
use strict;

my $scriptPath=dirname($0);           # the path of directory where this script placed

my $genome;                           # name of sequece file
my $trainingset;                      # name of training set file
my $species;                          # species name
my $hints;                            # hints file name
my $havehints=0;                      # have hints at all or not?
my $estali;                           # est file for make UTR-Training
my $positionWD=cwd();                 # workding superdirectory where program is called from
my $pasa='';                          # switch it on to create training set, est set, hints file with PASA
my $pasapolyAhints;                   # use pasa Poly A hints as hints for the prediction
my $fasta_cdna;                       # fasta file for PASA
my $verbose=2;                        # verbose level
my $singleCPU=0;                      # run everything sequentially whithout interruption
my $maxIntronLen = 100000;            # maximal length of an intron, used by PASA and BLAT
my $noninteractive;                   # parameter for autoAugPred.pl
my $cname="fe";                       # parameter for autoAugPred.pl:cluster name
my $nodeNum=20;                       # parameter for autoAugPred.pl
my $optrounds=1;                      # optimization rounds
my $useGMAPforPASA=0;                 # use GMAP instead of BLAT (only for PASA)
my $useexisting=0;                    # start with and change existing config, parameter and result files
my $utr=1;                            # default value: with "utr" if cDNA exists
my $flanking_DNA='';                  # length of flanking DNA, defaul value is min{ave. gene length, 10000}
my $help=0;                           # print usage
my $useexistingopt = "";
my $autoAugDir_abinitio;
my $autoAugDir_hints;
my $autoAugDir_hints_utr;
my $autoAugDir_utr;
my $autoAugDir;
my $shells_path;
my $trainDir;                          # directory for creating the training set
my $index=0;
my $shellDir;
my $aug;
my $perlCmdString;                    # to store perl commands
my $cmdString;                        # to store shell commands



# usage

my $usage =  <<_EOH_;

Function: train AUGUSTUS and run AUGUSTUS completly and automatically

Usage:

autoAug.pl [OPTIONS] --species=sname --genome=genome.fa --cdna=cdna.fa --trainingset=genesfile
autoAug.pl [OPTIONS] --species=sname --genome=genome.fa --cdna=cdna.fa --pasa
autoAug.pl [OPTIONS] --species=sname --genome=genome.fa --trainingset=genesfile [--estali=cdna.psl] [--hints=hints.gff]

--genome=fasta                      fasta file with DNA sequences for training
--trainingset=genesfile             genesfile contains training genes in Genbank, GFF or protein FASTA format
--species=sname                     species name as used by AUGUSTUS
--hints=hints.gff                   hints for gene predictions with AUGUSTUS
--estali=cdna.psl                   cDNA alignments in PSL format (as generated by BLAT and GMAP) are used to construct UTRs
--pasa                              use PASA to construct a training set
--cdna=cdna.fa                      a fasta file with cDNA sequences (ESTs, mRNA)
--pasapolyAhints                    use PASA polyA hints as hints for the prediction

options:
--useexisting                       use and change the present config and parameter files if they exist for 'species'
--verbose                           print more status info. Cumulative option, e.g. use -v -v -v to make this script very verbose
--noutr                             do not train and predict UTRs.
--workingdir=/path/to/wd/           In the working directory results and temporary files are stored.
                                    Default: current working directory
--singleCPU                         run the complete program sequentially instead of parallel execution of jobs on a cluster
--noninteractive                    bypass all manual interaction when using a SGE cluster
--cname=yourClusterName             cluster name, only use it wenn "noninteractive" default:fe
--index=i                           step index, default:0 
--optrounds=n                       optimization rounds - each meta parameter is optimized this often (default 1)
--maxIntronLen=n                    maximal length of an intron as used by PASA and BLAT, not by AUGUSTUS (default 100000)
--useGMAPforPASA                    use GMAP instead of BLAT in the PASA run
--help                              print this usage information
_EOH_
    
    ;

if (@ARGV==0) {print "$usage\n"; exit(0);}

GetOptions( 'genome=s' => \$genome,
	    'trainingset=s' => \$trainingset,
	    'species=s' => \$species,
	    'hints=s' => \$hints,
	    'estali=s' => \$estali,
	    'workingdir=s' => \$positionWD,
	    'pasa!' => \$pasa,
	    'singleCPU!' => \$singleCPU,
	    'cdna=s' => \$fasta_cdna,
	    'verbose+' => \$verbose,
	    'noninteractive' => \$noninteractive,
	    'cname=s' => \$cname,
	    'index=i' => \$index,
	    'optrounds=i' => \$optrounds,
	    'maxIntronLen=i' => \$maxIntronLen,
	    'useGMAPforPASA!' => \$useGMAPforPASA,
	    'useexisting!' => \$useexisting,
	    'utr!' => \$utr,
	    'help!' => \$help,
            'pasapolyAhints!' => \$pasapolyAhints 
    	);

if ($help) {print "$usage\n"; exit(0);}

             ############ make some regular checks ##############


# check upfront whether any common problems will occur later. So the user doesn't have to wait a long time to
# find out that some programs are not installed. 
check_upfront();


# directory structure:
# $positionWD = cwd  -  $rootDir = autoAug  -   = trainingSet
#                                           -   = autoAugTrain
#                                           -   = autoAugPred_abinitio
#                                           -   = autoAugPred_hints
#                                           -   = autoAugPred_hints_utr
#                                           -   = results

# check the write permission of $positionWD before building of the work directory
die("Do not have write permission for $positionWD.\nPlease use command 'chmod' to reset permission " . 
    "or specify another working directory\n") if (! -w $positionWD);

my $rootDir="$positionWD/autoAug";
die ("$rootDir already exists. Reuse with --useexisting or use another directory with --workingdir=dir")
    if (!$useexisting && -d $rootDir);

if (! -d $rootDir) {
    mkdir "$rootDir" or die ("Could not create directory $rootDir\n");
}
$autoAugDir_abinitio = "$rootDir/autoAugPred_abinitio";
$autoAugDir_hints = "$rootDir/autoAugPred_hints";
$autoAugDir_hints_utr = "$rootDir/autoAugPred_hints_utr";
$autoAugDir_utr = "$rootDir/autoAugPred_utr";

my $AUGUSTUS_CONFIG_PATH=$ENV{'AUGUSTUS_CONFIG_PATH'};

# show error information and stop the program if $species not specified
die("Error: Need to specify the species!\n$usage") unless($species);

# check species directory
die("$AUGUSTUS_CONFIG_PATH/species/$species already exists. Choose another species name or delete this directory to start from scratch.\n") 
    if (-d "$AUGUSTUS_CONFIG_PATH/species/$species" && !$useexisting && ($noninteractive || $index==0));

# check genome file
$genome = checkFile($genome, "fasta", $usage);

# show error information and stop the program if the specified $positionWD coudn't be found
# overwrite $positionWD with absolute path
$positionWD=relToAbs($positionWD);         
die("Error: Did not find the directory $positionWD! Please specify a valid one for \"workingdir\"! \n") unless (-d $positionWD);

$useexistingopt = "--useexisting" if ($useexisting);


my $verboseString;
$verboseString='' if ($verbose==0);
$verboseString="-v" if ($verbose==1);
$verboseString='-v -v' if ($verbose==2);
$verboseString='-v -v -v' if ($verbose>2);
#print "First column: verbosity level x, only print this line if $verbose >= x \n\n\n";

$havehints = (defined($hints) || defined($fasta_cdna) || defined($estali));
$fasta_cdna = checkFile($fasta_cdna, "fasta", $usage) if (defined($fasta_cdna));
$trainingset = checkFile($trainingset,"training genes", $usage) if($index==0 && defined($trainingset));
$hints = checkFile($hints,"hints", $usage) if (defined($hints));
$estali = checkFile($estali,"EST alignment", $usage) if (defined($estali));

training_set_dirs() if($index==0);

if($pasa && $index==0){
    $trainingset = "$trainDir/training/training.gb";
    if (!uptodate([$genome,$fasta_cdna], [$trainingset])){
	construct_training_set();
    } else {
	print ("1 Skipping training set construction with PASA. Using existing file $trainingset.\n") if ($verbose>=1);
    }
}

if($index==0 && (!defined($hints) || !defined($estali))){
    if (!uptodate(["$rootDir/seq/genome.fa"],["$rootDir/seq/genome.summary", "$rootDir/seq/contigs.gff"])){
	prepare_genome();
    }
    
    if (defined($fasta_cdna) &&
	!uptodate([$fasta_cdna, "$rootDir/seq/genome.fa"],
		  ["$rootDir/cdna/cdna.psl", "$rootDir/cdna/cdna.f.psl", "$rootDir/hints/hints.E.gff"])){
	alignments_and_hints();
    } else {
	print "1 Using existing cDNA alignments and hints.\n" if ($verbose>=1);
    }
    $hints = "$rootDir/hints/hints.E.gff";
    $estali = "$rootDir/cdna/cdna.f.psl";
}

autoTrain_no_utr() if ($noninteractive or $index==0);

if ($noninteractive){
    autoAug_noninteractive("",""); # without hints
    autoAug_noninteractive("1","") if ($havehints); # with hints
    if ($utr && defined($hints)){
	autoTrain_with_utr();
	autoAug_noninteractive("1","1");
    }
} else {
    autoAug_prepareScripts("","") if ($index==0);
    $index++ if ($singleCPU);

    if($index==1){
	autoAug_continue("",""); # without hints
	autoAug_prepareScripts("1","") if ($havehints);# with hints
	$index++ if ($singleCPU);
    }
    if($index==2){
	autoAug_continue("1","") if ($havehints);# with hints
	if ($utr && ($havehints)){
	    autoTrain_with_utr();
	    autoAug_prepareScripts("1","1");
	}
	$index++ if ($singleCPU);
    }
    autoAug_continue("1","1") if ($index==3 && $utr && ($havehints));
}

collect() if($noninteractive or $index==3);

                           ############### sub functions ##############


        ##################### construct training set with pasa ######################

sub training_set_dirs {
    # build directory for training set construction (e.g. PASA)
   
    chdir $rootDir or die ("Could not change to directory $rootDir.\n");
    $trainDir="$rootDir/trainingSet";
    if (! -d $trainDir){
	print "3 mkdir $trainDir\n" if ($verbose>=3);
	mkdir "$trainDir" or die("\nError: Could not create directory $trainDir.\n");
    }
    if (! -d "seq"){
	print "3 mkdir seq\n" if ($verbose>=3);
	mkdir "seq" or die("\nError: Could not create directory seq.\n");
    }
    if (! -d "hints"){
	print "3 mkdir hints\n" if ($verbose>=3);
	mkdir "hints" or die("\nError: Could not create directory hints.\n");
    }
    if (! -d "cdna"){
	print "3 mkdir cdna\n" if ($verbose>=3);
	mkdir "cdna" or die("\nError: Could not create directory cdna.\n");
    }
    
   
    # build subdirectory structure
    
    chdir "$trainDir" or die("\nError: Could not cd to directory $trainDir.\n");
    

    for(("gbrowse","pasa","training")){
	mkdir "$_"; 
	print "3 mkdir $_\n" if ($verbose>=3);
    }
 
    print "2 All necessary directories have been created unter $trainDir.\n" if ($verbose>=2);

    # build symbolic link for $genome

    print "3 cd $rootDir/seq\n" if ($verbose>=3);
    chdir "$rootDir/seq";
    if (!uptodate([$genome], ["genome.fa"])){
	print "3 ln -s $genome genome.fa\n" if($verbose>=3);
	system("ln -s $genome genome.fa")==0 or die("\nfailed to execute ln -s $genome genome.fa\n");
    }
}

sub DropDataBase {
  my $hostname = shift;
  my $database = shift;
  my $user = shift;
  my $pass = shift;
  my $dbh = shift;

  my $dsn  = "DBI:mysql:database=mysql;host=$hostname";

  # Constructor and Connection
  $dbh = DBI::->connect( $dsn, $user, $pass, { 'RaiseError' => 1, 'AutoCommit' => 1 } ) or die DBI::errstr;
  my $sth = $dbh->prepare("SHOW DATABASES;");
  $sth->execute();
  my $dbexists = 0;
  while (my @db = $sth->fetchrow_array()) {
      $dbexists |= ($db[0] eq $database); # db is array of length 1 holding dbname
  }
  
  if ($dbexists) {
      print "1 Dropping MySQL database $database on host $hostname.\n" if ($verbose>=1);
      $sth = $dbh->prepare("DROP DATABASE $database;");
      $sth->execute();
  } else {
      print "3 MySQL database $database does not exist on $hostname.\n" if ($verbose>=1);
  }
  $dbh->disconnect();
}

sub construct_training_set{

    print "\n\n1 ####### Step 0: Creating training set with genes using PASA #######\n\n" if ($verbose>=1);

    my $PASAHOME=$ENV{'PASAHOME'};
    die("Error: The environment variable PASAHOME is undefined.\n") unless $PASAHOME;
    
    # run seqclean 

    print "3 cd $trainDir/pasa\n" if ($verbose>=3);
    chdir "$trainDir/pasa" or die ("Cannot change directory to $trainDir/pasa\n");

    if (!uptodate([$fasta_cdna], ["transcripts.fasta"])){
	print "3 ln -fs $fasta_cdna transcripts.fasta\n" if ($verbose>=3);
	system("ln -fs $fasta_cdna transcripts.fasta")==0 or die ("failed to execute: $!\n");
    }

    if (!uptodate(["transcripts.fasta"], ["transcripts.fasta.clean"])){
	$perlCmdString="perl $PASAHOME/seqclean/seqclean/seqclean transcripts.fasta 1>seqclean.stdout 2>seqclean.stderr";
	print "2 Running $perlCmdString ..." if ($verbose>=2);
	system("$perlCmdString")==0 or die ("failed to execute: $!\n");
	print " Finished!\n" if ($verbose>=2);
    } else {
	print ("2 Skipping seqclean. Using existing transcripts.fasta.clean.\n") if ($verbose>=2);
    }

    # set appropriate values in file "alignAssembly.config"
    my $pasaDBname = "PASA$species";
    $pasaDBname =~ s/\./_/g; # replace "." by "_" in species name for MySQL database because it is not allowed there

    if (!uptodate(["$PASAHOME/pasa_conf/pasa.alignAssembly.Template.txt"], ["alignAssembly.config"])){
	$cmdString = "cp $PASAHOME/pasa_conf/pasa.alignAssembly.Template.txt alignAssembly.config";
	print "3 $cmdString\n" if ($verbose>=3);
	system("$cmdString")==0 or die ("failed to execute: $cmdString\n");
	
	print "3 Setting appropriate values in alignAssembly.config\n" if ($verbose>=3);
	open(CONFIG, "alignAssembly.config") or die ("Cannot open file alignAssembly.config!\n");
	open(TEMP, ">temp") or die("\nCannot open file temp\n");
	while(<CONFIG>){
	    s/<__MYSQLDB__>/$pasaDBname/;
	    s/<__MAX_INTRON_LENGTH__>/$maxIntronLen/;
	    s/<__MIN_PERCENT_ALIGNED__>/0.8/;
	    s/<__MIN_AVG_PER_ID__>/0.9/;
	    print TEMP;
	}
	close(CONFIG);
	close(TEMP);
  
	$cmdString="rm alignAssembly.config; mv temp alignAssembly.config; chmod a+x alignAssembly.config";
	print "3 $cmdString\n" if ($verbose>=3);
	system("$cmdString")==0 or die ("failed to execute: $!\n");
	print "3 Adjusted alignAssembly.config\n" if ($verbose>=3); 
    } else {
	print ("2 Using existing alignAssembly.config.\n") if ($verbose>=3);
    }

    # executing the Alignment Assembly

    if (!uptodate([$genome, "alignAssembly.config", "transcripts.fasta", "transcripts.fasta.clean"],
		  ["trainingSetCandidates.gff", "pasa_asmbls_to_training_set.stdout"])){
	$cmdString="ln -fs $genome genome.fasta";
	print "3 $cmdString\n" if ($verbose>=3);
	system("$cmdString")==0 or die("\nfailed to execute $!\n");
	
	my $gmapoption = "";
	$gmapoption = "--USE_GMAP" if ($useGMAPforPASA);
	
	print "3 Reading MySQL variables from $PASAHOME/pasa_conf/\n" if ($verbose>=3);
	open(CONFIG, "$PASAHOME/pasa_conf/conf.txt") or die("\nCould not open $PASAHOME/pasa_conf/conf.txt!\n");
	my $MYSQLSERVER;
	my $MYSQL_RO_USER;
	my $MYSQL_RO_PASSWORD;
	my $MYSQL_RW_USER;
	my $MYSQL_RW_PASSWORD;
	while(<CONFIG>){
	    $MYSQLSERVER=$1       if /MYSQLSERVER=(.*)/;
	    $MYSQL_RO_USER=$1     if /MYSQL_RO_USER=(.*)/;
	    $MYSQL_RO_PASSWORD=$1 if /MYSQL_RO_PASSWORD=(.*)/;
            $MYSQL_RW_USER=$1      if /MYSQL_RW_USER=(.*)/;
            $MYSQL_RW_PASSWORD=$1 if /MYSQL_RW_PASSWORD=(.*)/;
	    
	}
	close(CONFIG);
	print "0 Warning: MYSQL_RO_PASSWORD is empty!\n" if (! $MYSQL_RO_PASSWORD);
	
	my $dbh;
	if ($useexisting) {
	    &DropDataBase("$MYSQLSERVER","$pasaDBname","$MYSQL_RW_USER","$MYSQL_RW_PASSWORD",\$dbh);
        }

	$perlCmdString="perl $PASAHOME/scripts/Launch_PASA_pipeline.pl -c alignAssembly.config -C -R -g genome.fasta "
	    ."-t transcripts.fasta.clean -T -u transcripts.fasta $gmapoption 1>Launch_PASA_pipeline.stdout 2>Launch_PASA_pipeline.stderr";
	
	print "2 Executing the Alignment Assembly: $perlCmdString ..." if ($verbose>=2);
	print "2 A test output...\n";
	my $abortString;
	$abortString = "\nFailed to execute, possible reasons could be:\n";
	$abortString.= "1. There is already a database named \"$pasaDBname\" in your mysql host.\n";
	$abortString.= "2. The software \"slclust\" is not installed correctly, try to install it";
	$abortString.= " again (see the details in the PASA documentation).\n";
	$abortString.= "Inspect $trainDir/pasa/Launch_PASA_pipeline.stderr for PASA error messages.\n";
	
	system("$perlCmdString")==0 or die ("$abortString");
	print " Finished\n" if ($verbose>=2);
  	

        

	$perlCmdString="perl $PASAHOME/scripts/pasa_asmbls_to_training_set.dbi -M \"$pasaDBname:$MYSQLSERVER\" -p "
	    ."\"$MYSQL_RO_USER:$MYSQL_RO_PASSWORD\" -g genome.fasta 1>pasa_asmbls_to_training_set.stdout 2>pasa_asmbls_to_training_set.stderr";
	
	print "2 Running $perlCmdString ..." if ($verbose>=2);
	system("$perlCmdString")==0 or die ("failed to execute: $perlCmdString\n");
	print " Finished\n" if ($verbose>=2);
	
	print ("2 Cleaning up after PASA ...\n") if ($verbose>=2);
	my @filesToDelete=("output.assembly_building.out" ,
			   "output.tophits" ,                        
			   "output.tophits.btab" ,
			   "blat_validations" ,                        
			   "BLAT_DIR" ,
			   "output.alignment_assemblies.out" ,                        
			   "output.subclusters.out");
	foreach my $file (@filesToDelete) {         
	    $perlCmdString="rm -rf $file";
	    print "3 Deleting $file" if ($verbose>=3);
	    system("$perlCmdString");
	}
    #dropping pasa database 
    &DropDataBase("$MYSQLSERVER","$pasaDBname","$MYSQL_RW_USER","$MYSQL_RW_PASSWORD",\$dbh);
    } else {
 	print ("2 Skipping PASA training set creation. Using existing trainingSetCandidates.gff.\n") if ($verbose>=2);
    }
    
    # find complete genes in candidate training file
    if (!uptodate((["trainingSetCandidates.gff"] or ["trainingSetCandidates.gff3"]), ["trainingSetComplete.gff"])){
	print "3 cd ../training\n" if ($verbose>=3);
	chdir "../training" or die ("Could not change directory to training!\n");
	# old PASA version (at least before January 2011, probably older) produce different output files than new PASA version:
	if(-e "../pasa/trainingSetCandidates.fasta"){
		$cmdString = 'grep complete ../pasa/trainingSetCandidates.fasta | perl -pe \'s/>(\S+).*/$1\$/\'';
	}else{
		$cmdString = 'grep complete ../pasa/trainingSetCandidates.cds | perl -pe \'s/>(\S+).*/$1\$/\'';
	}
	print "3 $cmdString 1> pasa.complete.lst\n" if ($verbose>=3);
	system("$cmdString 1> pasa.complete.lst")==0 or die("\nfailed to execute $!\n");
	if (! -e "pasa.complete.lst" || -z "pasa.complete.lst"){
            die ("PASA has not constructed any complete training gene. Training aborted because of insufficient data.\n");
        }
	# old PASA version (at least before January 2011, probably older) produce different output files than new PASA version:
	if(-e "../pasa/trainingSetCandidates.gff"){
		$cmdString="grep -f pasa.complete.lst ../pasa/trainingSetCandidates.gff >trainingSetComplete.temp.gff";
	}else{
		$cmdString="grep -f pasa.complete.lst ../pasa/trainingSetCandidates.gff3 >trainingSetComplete.temp.gff";
	}
	print "2 Running \"$cmdString\" ..." if ($verbose>=2);
	system("$cmdString")==0 or die("\nfailed to execute $!\n");
	print " Finished!\n" if ($verbose>=2);
	
	# sort trainingSetComplete.temp.gff for gff2gbSmallDNA.pl later
	
	$cmdString='cat trainingSetComplete.temp.gff | perl -pe \'s/\t\S*(asmbl_\d+).*/\t$1/\' | sort '
	    .'-n -k 4 | sort -s -k 9 | sort -s -k 1,1 > trainingSetComplete.gff';
    
	print "2 Running \"$cmdString\" ..." if ($verbose >=2);
	system("$cmdString")==0 or die("\nfailed to execute $cmdString\n");
	print " Finished!\n" if ($verbose >=2);
    }

    # calculate the a average gene length
    if(-e "../pasa/trainingSetCandidates.gff"){
    	open(FILE, "../pasa/trainingSetCandidates.gff") or die("\nCould not open ../pasa/trainingSetCandidates.gff\n");
    }else{
	open(FILE, "../pasa/trainingSetCandidates.gff3") or die("\nCould not open ../pasa/trainingSetCandidates.gff\n");
    }
    my $sum=0;
    my $n=0;
    while(<FILE>){
	if(/\tgene\t/){
	    split;
	    my $len=$_[4]-$_[3]+1;
	    $sum+=$len;
	    $n++;
	}
    }
    print "1 Average gene length in the training set is " . sprintf ("%.2f", ($sum/$n)) . "\n" if ($verbose >=1);
    
    # set flanking DNA

    my $flanking_DNA = int($sum/$n);
    $flanking_DNA = 10000 if ($flanking_DNA > 10000);
    $flanking_DNA = 1000 if ($flanking_DNA < 1000);
    print "2 The length of flanking DNA is set as $flanking_DNA accordingly.\n" if ($verbose>=2);

    # convert file format from gff to gb

    my $string=find("gff2gbSmallDNA.pl");
    print "3 Found script $string.\n" if ($verbose>=3);
    
    $perlCmdString="perl $string trainingSetComplete.gff $rootDir/seq/genome.fa $flanking_DNA "
    ."trainingSetComplete.gb 1>gff2gbSmallDNA.stdout 2>gff2gbSmallDNA.stderr";
    
    print "3 $perlCmdString\n" if ($verbose>=3);
    system("$perlCmdString")==0 or die ("failed to execute: $!\n");
    
    # let etraining find prolematic genbank entries
    
    # count the number of entries in trainingSetComplete.gb
   
    my $num_TSC=`grep -c ^LOCUS trainingSetComplete.gb`;
    $num_TSC*=1;
    print "1 The training set trainingSetComplete.gb contains $num_TSC entries\n" if ($verbose>=1);
    
    # set "stopCodonExcludedFromCDS" to true
  
    print "2 Now trying to find out whether the CDS in the training set contain or exclude the stop codon.\n" if ($verbose >=2);
    my $genericPath="$AUGUSTUS_CONFIG_PATH/species/generic";
    chdir "$genericPath" or die ("Could not change directory to $scriptPath\n");
    print "3 cd $genericPath\n" if ($verbose>=3);

    $cmdString="cat generic_parameters.cfg | perl -pe 's/(stopCodonExcludedFromCDS )(\s+) /$1true /' > temp_1";
    system("$cmdString")==0 or die ("failed to execute: $!\n");
    print "3 $cmdString\n" if ($verbose>=3);
    
    system("mv temp_1 generic_parameters.cfg")==0 or die("\nfailed to execute: $!\n");
    print "3 mv temp_1 generic_parameters.cfg\n" if ($verbose>=3);
    print "3 Set value of \"stopCodonExcludedFromCDS\" in generic_parameters.cfg to \"true\"\n" if ($verbose>=3);
    
    # first try with etraining
    print "3 mv $trainDir/pasa/trainingSetComplete.gb $trainDir/training/trainingSetComplete.gb\n";   
    $cmdString="mv $trainDir/pasa/trainingSetComplete.gb $trainDir/training/trainingSetComplete.gb";
    system("$cmdString")==0 or die("\nfailed to move trainingSetComplete.gb to $trainDir/training\n");
    print "3 cd $trainDir/training\n" if ($verbose>=3);
    chdir "$trainDir/training" or die ("Could not change directory to $trainDir/training\n");
    $cmdString="etraining --species=generic trainingSetComplete.gb 1>train.out 2>train.err";
    print "3 Running \"$cmdString\" ... " if ($verbose>=3);
    system("$cmdString")==0 or die("\nfailed to execute: $!\n");
    print " Finished!\n" if ($verbose>=3); 
    print "3 train.out and train.err have been made under $trainDir/training.\n" if ($verbose>=3);
    
    # set "stopCodonExcludedFromCDS" to false and run etraining again if necessary
    my $err_stopCodonExcludedFromCDS=`grep -c "exon doesn't end in stop codon" train.err`;
    my $err_rate=$err_stopCodonExcludedFromCDS/$num_TSC;
    print "3 Error rate caused by \"exon doesn't end in stop codon\" is $err_rate\n" if ($verbose>=3);
    if($err_rate>=0.5){
	print "3 The appropriate value for \"stopCodonExcludedFromCDS\" seems to be \"false\".\n" if ($verbose>=3);
        chdir "$scriptPath" or die ("Can not chdir to $scriptPath.\n");
        system("cat generic_parameters.cfg | perl -pe 's/(stopCodonExcludedFromCDS )(\s+) /$1false /' > temp_1")==0 or die ("failed to execute: $!\n");
        system("mv temp_1 generic_parameters.cfg")==0 or die("\nfailed to execute: $!\n");
        print "3 Setted value of \"stopCodonExcludedFromCDS\" in generic_parameters.cfg to \"false\"\n" if ($verbose>=3);
        print "3 Try etraining again: etraining --species=$species training.gb.train >train.out ..." if ($verbose>=3);
        chdir "$trainDir/training/" or die ("Can not change directory to $trainDir/training.");
        system("etraining --species=generic trainingSetComplete.gb 1>train.out 2>train.err")==0 or die("\nfailed to execute: $!\n");
        print " Finished!\n" if ($verbose>=3);
        print "3 train.out and train.err have been made again under $trainDir/training.\n" if ($verbose>=3);
	print "2 Stop codons seem to be contained by CDS. Setting stopCodonExcludedFromCDS to false\n" if ($verbose>=2);
    }
    else{
	print "2 Stop codons seem to be exluded from CDS. Setting stopCodonExcludedFromCDS to true\n" if ($verbose>=2); 
    }

    print "1 Now filtering problematic genes from training set...\n" if ($verbose>=1);

    # extract badlist
    print '3 cat train.err | perl -ne \'print \"'."$1".'\n\" if /in sequence (\S+):/\' > badlist' if ($verbose>=3);
    system("cat train.err | perl -ne 'print \"$1\n\" if /in sequence (\S+):/' > badlist")==0 or die ("failed to execute: $!\n");

    # check whether only a small fraction of all entries created a problem, if >10%, output a warning
    my $bad_num=`wc -l badlist`;
    $bad_num*=1;
    print "3 The number of all entries that created a problem is $bad_num\n" if ($verbose>=3);
    my $frac=$bad_num/$num_TSC;
    if($frac>=0.5){
	print "3 Warning: The fraction of all entries that created a problem is ".($bad_num/$num_TSC)."\n" if ($verbose>=3);
    }

    # create file training.gb without erroneous genes
    $string=find("filterGenes.pl");
    print "3 Found script $string.\n" if ($verbose>=3);
    $perlCmdString="perl $string badlist trainingSetComplete.gb > training.gb";
    print "3 Running $perlCmdString ..." if ($verbose>=3);
    system("$perlCmdString")==0 or die("\nfailed to execute: $perlCmdString!\n");
    print " Finished!\n" if ($verbose>=3);

    print "\n1 ####### Finished step 0 at " .(scalar localtime()) .
	". All files are stored in $trainDir #######\n" if ($verbose>=1);
}


sub prepare_genome{
    # create summary of genome
    print "3 cd $rootDir/seq\n" if ($verbose>=3);
    chdir "$rootDir/seq" or die ("Could not change directory to ../seq\n");
    my $string=find("summarizeACGTcontent.pl");
    $perlCmdString="perl $string genome.fa > genome.summary";
    print "3 Running $perlCmdString ..." if ($verbose>=3);
    system("$perlCmdString")==0 or die("\nfailed to execute: $perlCmdString!\n");
    
    # create contigs gbrowse file
    $cmdString='cat genome.summary | grep "bases." | perl -pe \'s/(\d+)\sbases.\s+(\S*) BASE.*/$2\tassembly\tcontig\t1\t$1\t.\t.\t.\tContig $2/\' > contigs.gff';
    print "3 Running $cmdString ..." if ($verbose>=3);
    system("$cmdString")==0 or die("\nfailed to execute: $cmdString!\n");
    print " Finished!\n" if ($verbose>=3);
}

sub alignments_and_hints{

    
    # BLAT cdna files. find blat, pslCDnaFilter minId(???)
    print "3 cd $rootDir/cdna\n" if ($verbose>=3);
    chdir "$rootDir/cdna" or die ("Could not change directory to $rootDir/cdna\n");
    if (!uptodate([$fasta_cdna], ["cdna.fa"])){
	system("ln -fs $fasta_cdna cdna.fa")==0 or die ("failed to execute: ln -fs $fasta_cdna cdna.fa");
    }
    # blat 
    # maxIntron=5000 to be determined
    if (!uptodate(["../seq/genome.fa", "cdna.fa"], ["cdna.psl"])){
	print "1 Aligning cDNA to genome with BLAT...\n" if ($verbose>=1); 
	$cmdString="blat -noHead  -minIdentity=80 -maxIntron=$maxIntronLen ../seq/genome.fa cdna.fa cdna.psl 1>blat.stdout 2>blat.stderr"; 
	print "3 $cmdString ..." if ($verbose>=3);
	
	my $abortString = "\nProgram aborted. Possibly \"BLAT\" is not installed or not in your PATH\n";  
	
	system("$cmdString")==0 or die("$abortString");
	print "Finished!\n" if ($verbose>=3);

	if($verbose>=2){
	    open(BLAT, "blat.stdout") or die ("Cannot open blat.stdout!\n");
	    while(defined (my $i=<BLAT>)){
		print '2'." $i";
	    }
	    close(BLAT);
	}
    } else {
	print "1 Reusing existing BLAT alignment.\n" if ($verbose>=1);
    }
    
    # pslCDnaFilter
    $cmdString="pslCDnaFilter -minId=0.9 -localNearBest=0.005 -ignoreNs -bestOverlap "
	."cdna.psl cdna.f.psl 1>pslCDnaFilter.stdout 2>pslCDnaFilter.stderr";
    print "3 $cmdString\n" if ($verbose>=3);
    
    if (system("$cmdString") != 0) {
	print "WARNING: Could not successfully find and run pslCDnaFilter. Please install this program.\n";
	print "Will continue anyways with unfiltered alignments. Expect worse results.\n";
	system("ln -s cdna.psl cdna.f.psl");
    }
    # create gbrowse files
    my $string=find("blat2gbrowse.pl");
    print "3 Found script $string.\n" if ($verbose>=3);
    $perlCmdString="perl $string --source=CDNA cdna.f.psl cdna.gbrowse";
    print "3 Running $perlCmdString ..." if ($verbose>3);
    system("$perlCmdString")==0 or die("\nFailed to execute: $perlCmdString!\n");
    print " Finished!\n" if ($verbose>3);
    
    # create hints
    print "1 Creating hints from cDNA alignments....\n" if ($verbose>=1);
    chdir "../hints" or die("\nCould not change directory to ../hints\n");
    $string=find("blat2hints.pl");
    $perlCmdString="perl $string --in=../cdna/cdna.f.psl --out=hints.E.gff --minintronlen=35 --trunkSS 1>blat2hints.stdout 2>blat2hints.stderr";
    print "2 Running $perlCmdString ..." if ($verbose>=2);
    system("$perlCmdString")==0 or die("\nfailed to execute: $perlCmdString!\n");
    print " Finished!\n" if ($verbose>=2);
    
    if ($pasapolyAhints) {
      chdir "../trainingSet" or die ("\nCould not change directory to ../\n");
      my $pasapolyAfile=checkFile("pasa/output.polyAsites.fasta");
      if (defined $pasapolyAfile) {
        print "2 Converting $pasapolyAfile into a hintfile\n" if ($verbose>=2);
        $string=find("pasapolyA2hints.pl");
	$perlCmdString="perl $string $pasapolyAfile > pasa/output.polyAsites.gff";
        print "2 Running $perlCmdString ..." if ($verbose>=2);
	system("$perlCmdString")==0 or die("\nfailed to execute: $perlCmdString!\n");
	print " Finished!\n" if ($verbose>=2);
	my $pasapolyAhintfile=checkFile("pasa/output.polyAsites.gff");
	if (defined $pasapolyAhintfile) {
          print "2 Appending PASA-polyA-hint file to the cDNA hint file\n";
	  $perlCmdString="cat ../hints/hints.E.gff $pasapolyAhintfile > ../hints/hints.E.gff.temp";
	  print "3 Running $perlCmdString.\n" if ($verbose>=3);
      	  system("$perlCmdString")==0 or die("\nfailed to execute: $perlCmdString!\n");
	  rename("../hints/hints.E.gff.temp","../hints/hints.E.gff");
	}
      }
    }
     
    chdir $positionWD;
    $estali="$rootDir/cdna/cdna.f.psl";
}


         ####################### train AUGUSTUS without UTR #########################


sub autoTrain_no_utr{
    
    print "\n1 ####### Step 1: Training AUGUSTUS (no UTR models) #######\n" if ($verbose>=1);
    
    $trainingset   =   checkFile($trainingset, "training", $usage);

    # run autoAugTrain.pl
    $perlCmdString="perl $scriptPath/autoAugTrain.pl -t=$trainingset -s=$species $useexistingopt -g=$genome -w=$rootDir $verboseString --opt=$optrounds";
    print "\n2 $perlCmdString\n" if ($verbose>=2);
    system("$perlCmdString")==0 or die ("failed to execute: $!\n");

    print "\n1 ####### Finished step 1 at " .(scalar localtime()) . 
	". All files are stored in $rootDir/autoAugTrain #######\n" if ($verbose>=1);
}


         ###################### prepare scripts for AUGUSTUS ######################


sub autoAug_prepareScripts{

    my $hints_switch=shift;         # for AUGUSTUS with hints
    my $utr_switch=shift;           # for AUGUSTUS with UTR

    if($verbose>=1){
	my $string="Preparing scripts for AUGUSTUS";
	print "\n\n1 ";
        print "####### Step 2: $string without hints and UTR #######"      if (!$hints_switch && !$utr_switch);
        print "####### Step 4: $string with hints, without UTR #######"    if ( $hints_switch && !$utr_switch);
	print "####### Step 7: $string with hints and UTR #######"         if ( $hints_switch &&  $utr_switch);
	print "\n";
    }
    
    $autoAugDir = $autoAugDir_abinitio  if (!$hints_switch && !$utr_switch);
    $autoAugDir = $autoAugDir_hints     if ($hints_switch && !$utr_switch);
    $autoAugDir = $autoAugDir_hints_utr if ($hints_switch && $utr_switch);
    $autoAugDir = $autoAugDir_utr       if (!$hints_switch && $utr_switch);

    my $hintsString ="--hints=$hints" if ($hints_switch);
    my $utrString   ="--utr"          if ($utr_switch);
   
    
    $perlCmdString = "perl $scriptPath/autoAugPred.pl -g=$genome --species=$species -w=$rootDir $utrString " . 
	"$verboseString $hintsString $useexistingopt";
    $perlCmdString .= " --singleCPU" if ($singleCPU);
    print "2 $perlCmdString" if ($verbose>=2);
    system("$perlCmdString")==0 or die("\nfailed to execute $!\n");
    
    my $stepNum;
    $stepNum=2 if (!$hints_switch && !$utr_switch);
    $stepNum=4 if ( $hints_switch && !$utr_switch);
    $stepNum=7 if ( $hints_switch &&  $utr_switch);
    print "\n1 ####### Finished step $stepNum. The scripts are stored in $autoAugDir/shells #######\n" if ($verbose>=1);
	
    my $estString;
    $estString = "--estali=your.cdna.psl" if ($index==1 && !defined($estali));
    $estString = "--estali=$estali"           if ($index==1 && $pasa); 
  
    my $pasaString ="--pasa"               if ($pasa);

    # show prompt

    my $sum=$index+1;
    if (!$singleCPU) {
	print "\n\nWhen above jobs are finished, continue by running the command\n";
	print "autoAug.pl --species=$species --genome=$genome --useexisting "
	    . "--hints=$hints $estString $verboseString $pasaString --index=$sum\n\n";
    }
}


       ########################### deal with results of AUGUSTUS ############################


sub autoAug_continue{
    
    my $hints_switch=shift;         # for AUGUSTUS with hints
    my $utr_switch=shift;           # for AUGUSTUS with UTR

    if($verbose>=1){
	my $string="Continue to predict genome structure with AUGUSTUS";
        print "\n1 ";
        print "####### Step 3: $string without hints, no UTR #######"       if (!$hints_switch && !$utr_switch);
        print "####### Step 5: $string with hints, no UTR #######"          if ( $hints_switch && !$utr_switch);
        print "####### Step 8: $string with hints, containing UTR #######"  if ( $hints_switch &&  $utr_switch);
        print "\n";
    }

    my $hintsString="--hints=$hints" if ($hints_switch);
    my $utrString=" --utr" if ($utr_switch);

    $estali="$rootDir/cdna/cdna.f.psl" if ($pasa);

    my $mainDir;
    $mainDir = "$autoAugDir_abinitio"  if ($index==1);
    $mainDir = "$autoAugDir_hints"     if ($index==2);
    $mainDir = "$autoAugDir_utr"       if ($index==3);
 
    my $shellDir = "$mainDir/shells";


    $perlCmdString = "perl $scriptPath/autoAugPred.pl --species=$species --genome=$rootDir/seq/genome.fa --continue --workingdir=$rootDir $verboseString $hintsString $utrString $useexistingopt";
    $perlCmdString .= " --singleCPU" if ($singleCPU);
    my $abortString = "\nError executing\n$perlCmdString\n";
    print "3 $perlCmdString\n" if ($verbose >= 3);
    chdir $positionWD;
    system("$perlCmdString")==0 or die ("$abortString");

    $aug = "$shellDir/../predictions/augustus.gff" if($index==2);
    
    my $stepNum;
    $stepNum=3 if (!$hints_switch && !$utr_switch);
    $stepNum=5 if ( $hints_switch && !$utr_switch);
    $stepNum=8 if ( $hints_switch &&  $utr_switch);

    print "\n1 ####### Finished step $stepNum. All files are stored in $mainDir #######\n" if ($verbose>=1);

}






       ################## run AUGUGUS completely automatically ##################


sub autoAug_noninteractive{

    my $hints_switch=shift;       # for AUGUSTUS with hints
    my $utr_switch=shift;         # for AUGUSTUS with UTR

    my $hintsString="--hints=$hints" if ($hints_switch);
    my $utrString="--utr"            if ($utr_switch);

    my $string;
    $string="ab initio (without hints and utr)" if(!$hints_switch && !$utr_switch);
    $string="with hints" if($hints_switch && !$utr_switch);
    $string="with hints and utr" if($hints_switch && $utr_switch);

    print "\n\n1 ####### Now predicting genes $string in the whole sequence...#######\n" if ($verbose>=1);
    $perlCmdString="perl $scriptPath/autoAugPred.pl -g=$genome --species=$species $hintsString $utrString --noninteractive --cname=$cname -w=$rootDir $verboseString $useexistingopt";
    print "2 $perlCmdString ...\n" if ($verbose>1);
    system("$perlCmdString")==0 or die ("failed to execute: $perlCmdString!\n");

    print "\n####### Finished predicting genes $string #######\n";	
}






      ################# training AUGUSTUS with UTR ####################


sub autoTrain_with_utr{
    
    my $stepNum;
    
    $stepNum=6 if (!$noninteractive);
    $stepNum=8 if ( $noninteractive);

    print "\n1 ####### Step $stepNum: Training AUGUSTUS with UTR #######\n" if ($verbose>=1);

    my $augString;
    $augString="--aug=$autoAugDir_hints/predictions/augustus.gff";
   
    $perlCmdString="perl $scriptPath/autoAugTrain.pl -g=$genome -s=$species --utr -e=$estali $augString -w=$rootDir $verboseString --opt=$optrounds $useexistingopt";
    print "\n2 $perlCmdString\n" if ($verbose>=2);
    system("$perlCmdString")==0 or die ("failed to execute: $!\n");

    print "\n1 ####### Finished step $stepNum, all files are stored in $rootDir/training/utr #######\n" if ($verbose>=1);
    
}


      ########################### collect all important files in one directory #######################


sub collect{

    my $stepNum;
    
    $stepNum=7 if (!$noninteractive);
    $stepNum=9 if ( $noninteractive);

    print "\n1 ####### Step $stepNum: Collecting important files #######\n" if ($verbose>=1);

    my $summary_dir = "$rootDir/results";
    if (!$useexisting && -d $summary_dir){
	print STDERR "Directory $summary_dir already exists. Use --useexisting or move it.\n";
	exit(1);
    }
    system("mkdir -p $summary_dir")==0 or die("\nCould not create directory $summary_dir.\n");
    
    # build subdir structure

    chdir "$summary_dir" or die("\nError: cannot change directory to $summary_dir!\n");
    for(("gbrowse", "hints","predictions","seq", "genes", "config")){mkdir "$_"}
    print "3 All necessary diretories have been created unter $summary_dir.\n" if ($verbose>=3);
    
    # collect gbrowse files
    print "3 cd gbrowse\n" if ($verbose>=3);
    chdir "gbrowse";
    system ("ln -sf $genome genome.fa") if (!uptodate([$genome], ["genome.fa"]));


    $cmdString = "cp $rootDir/seq/contigs.gff contigs.gff";
    system("$cmdString")==0 or die("\nfailed to execute: $!\n");

    if (-f "$rootDir/cdna/cdna.gbrowse"){
	$cmdString = "ln -sf $rootDir/cdna/cdna.gbrowse cdna.gbrowse";
	system("$cmdString")==0 or die("\nfailed to execute: $cmdString\n");
    }
    
    if (-f $fasta_cdna){
	$cmdString = "ln -sf $fasta_cdna cdna.fa";
	system("$cmdString")==0 or die("\nfailed to execute: $cmdString\n");
    }

    
    foreach((["$autoAugDir_abinitio/gbrowse/augustus.abinitio.gbrowse", "augustus.abinitio.gbrowse"],
            ["$autoAugDir_hints/gbrowse/augustus.E.gbrowse", "augustus.E.gbrowse"],
	    ["$autoAugDir_utr/gbrowse/augustus.UTR.gbrowse", "augustus.UTR.gbrowse"],
	    ["$rootDir/autoAugTrain/gbrowse/utr.train.gbrowse", "utr.train.gbrowse"])){
	if (-f $_->[0]){
	    $cmdString = "cp $_->[0]  $_->[1]";
	    print "3 $cmdString\n" if ($verbose>=3);
	    system("$cmdString")==0 or die ("Could not execute $cmdString");
	}
    }

    # collect the hints file
    print "3 cd ../hints\n" if ($verbose>=3);
    chdir "../hints";
    if($pasa){
	$cmdString="ln -sf $rootDir/hints/hints.E.gff hints.E.gff";
	system("$cmdString")==0 or die("\nfailed to execute: $!\n");
        print "3 $cmdString\n" if ($verbose>=3);
    } elsif ($havehints) {
	$cmdString="ln -sf  $hints hints.E.gff";
	system("$cmdString")==0 or die("\nfailed to execute: $!\n");
	print "3 $cmdString\n" if ($verbose>=3);
    }
    
    # collect prediction files
    print "3 cd ../predictions\n" if ($verbose >= 3);
    chdir "../predictions";
    
    foreach((["$autoAugDir_abinitio/predictions/augustus.gff", "augustus.abinitio.gff"],
	     ["$autoAugDir_abinitio/predictions/augustus.aa", "augustus.abinitio.aa"],
	     ["$autoAugDir_hints/predictions/augustus.gff", "augustus.hints.gff"],
	     ["$autoAugDir_hints/predictions/augustus.aa", "augustus.hints.aa"],
	     ["$autoAugDir_utr/predictions/augustus.gff", "augustus.utr.hints.gff"],
	     ["$autoAugDir_utr/predictions/augustus.aa", "augustus.utr.hints.aa"])){
	if (-f $_->[0]){
	    $cmdString = "cp $_->[0]  $_->[1]";
	    print "3 $cmdString\n" if ($verbose>=3);
	    system("$cmdString")==0 or die ("Could not execute $cmdString");
	}
    }
    
    # make a link for genome.fa 
    chdir "../seq";
    $cmdString="ln -s $genome genome.fa" if (!uptodate([$genome], ["genome.fa"]));

    system("$cmdString")==0 or die("\nfailed to execute: $!\n");
    print "3 $cmdString\n" if ($verbose>=3);
    
    # collect config files
    my $configDir="$AUGUSTUS_CONFIG_PATH/species/$species";
    print '3 cd ../config'."\n" if ($verbose>=3);
    chdir "../config";
    $cmdString = "cp $configDir/* . ; rm *.orig*;";
    print "3 $cmdString\n" if ($verbose>=3);
    system("$cmdString")==0 or die ("failed to execute: $cmdString\n");
    
    # collect files with gb format

    print "3 cd ../genes\n" if ($verbose>=3);
    chdir "../genes";

    foreach(("find $rootDir/autoAugTrain -name \"*.gb\" | grep -v tmp_opt_ > tempgbn",
	     "find $rootDir/autoAugTrain -name \"*.gb.*\" | grep -v .gb.lst >> tempgbn")){
	system("$_")==0 or die("\nfailed to execute: $!\n");
        print "3 $_\n" if ($verbose>=3);
    }

    open(TP, "tempgbn") or die ("Cannot open the file \"tempgbn\"!\n");
    while(defined (my $i=<TP>)){
	$i =~ /^(\/.*\/)(.*)\n$/;
	if(-f "$2"){
	    $cmdString="ln -fs $1$2 $2"."_another";
	    system("$cmdString")==0 or die("\nfailed to execute: $!\n");
	    print "3 $cmdString\n" if ($verbose>=3);
	}
	else{
	    $cmdString="ln -s $1$2 $2";
	    system("$cmdString")==0 or die("\nfailed to execute: $!\n");
            print "3 $cmdString\n" if ($verbose>=3);
	}
    }
    print "3 rm tempgbn\n" if ($verbose>=3);
    system("rm tempgbn")==0 or die die("failed to execute: $!\n");
    
    print "\n1 ####### Finished step $stepNum. All files are stored in $summary_dir #######\n" if ($verbose>=1);
    print "\n1 ####### Done autoAug.pl #######\n" if ($verbose>=1);
    print "" . (scalar localtime()) . "\n" if ($verbose>=1);
}


# check upfront whether any common problems will occur later. So the user doesn't have to wait a long time to
# find out that some programs are not installed.
# TODO: put more checks in here
sub check_upfront{
    print "2 checking for installed programs ... " if ($verbose>=2);
    die("Error: The environment variable AUGUSTUS_CONFIG_PATH is not defined.\n") unless $ENV{'AUGUSTUS_CONFIG_PATH'};
    die("Error: The environment variable PASAHOME is undefined.\n") if ($pasa && !defined($ENV{'PASAHOME'}));
    
    my $augpath = "$ENV{'AUGUSTUS_CONFIG_PATH'}/../src/augustus";
    if (system("$augpath > /dev/null 2> /dev/null") != 0){
	if (! -f $augpath){
	    print STDERR "Error: augustus executable not found at $augpath.\n";
	} else {
	    print STDERR "Error: $augpath not executable on this machine.\n";
	}
	exit (1);
    }
    if (defined($fasta_cdna)){
	if (system("which blat > /dev/null") != 0){
	    print STDERR "Error: blat not installed. Please install first.\n";
	    exit (1);
	}
    }
    if ($useGMAPforPASA && $pasa){
	if (system("which gmap > /dev/null") != 0){
	    print STDERR "Error: 'gmap' not installed. Install GMAP first or use BLAT.\n";
	    exit(1);
	}
    }
    find("gff2gbSmallDNA.pl");
    find("summarizeACGTcontent.pl");
    print "ok.\n" if ($verbose>=2);
}
