Commit 13d5f2d4 authored by Nicolas Delhomme's avatar Nicolas Delhomme

updated the pipeline

parent bf69cb31
Pipeline #53 skipped
...@@ -18,7 +18,7 @@ set -x ...@@ -18,7 +18,7 @@ set -x
## are we on UPPMAX ## are we on UPPMAX
if [ ! -z $SLURM_SUBMIT_DIR ]; then if [ ! -z $SLURM_SUBMIT_DIR ]; then
module load bioinfo-tools module load bioinfo-tools
module load FastQC/0.10.1 module load FastQC/0.11.5
## echo "Running on UPPMAX" ## echo "Running on UPPMAX"
else else
## echo "Running locally" ## echo "Running locally"
......
...@@ -4,8 +4,16 @@ ...@@ -4,8 +4,16 @@
#SBATCH -t 0-01:00:00 #SBATCH -t 0-01:00:00
#SBATCH --mail-type=ALL #SBATCH --mail-type=ALL
## load the module if it exists
module load bioinfo-tools && module load fastQvalidator || {
if ! hash fastQValidator 2>/dev/null; then
echo "fastQValidator was not found in your path" 1>&2
exit 1
fi
}
usage() { usage() {
echo "usage: `basename $0` <fastq> echo "usage: `basename $0` <fastq>
Run fastQValidator on a FASTQ file. Prints output on stdout and Run fastQValidator on a FASTQ file. Prints output on stdout and
exits with a non-zero exit status if the input file does not exits with a non-zero exit status if the input file does not
...@@ -16,6 +24,8 @@ ARGUMENTS: ...@@ -16,6 +24,8 @@ ARGUMENTS:
NOTES: NOTES:
fastQValidator must lie in your PATH" 1>&2 fastQValidator must lie in your PATH" 1>&2
exit 1
} }
## stop on error ## stop on error
...@@ -23,20 +33,13 @@ set -e ...@@ -23,20 +33,13 @@ set -e
## check ## check
if [ $# != 1 ]; then if [ $# != 1 ]; then
echo "The argument should be one fastq filename" 1>&2 echo "This function takes one argument: a fastq filename" 1>&2
usage usage
exit 1
fi fi
if [ ! -f $1 ]; then if [ ! -f $1 ]; then
echo "The fastq filename you provided does not exist" 1>&2 echo "The fastq filename you provided does not exist" 1>&2
usage usage
exit 1
fi
if ! hash fastQValidator 2>/dev/null; then
echo "fastQValidator was not found in your path" 1>&2
exit 1
fi fi
## we print 1000 errors, should be enough ## we print 1000 errors, should be enough
......
...@@ -20,7 +20,9 @@ echo >&2 \ ...@@ -20,7 +20,9 @@ echo >&2 \
for the P. trichocarpa gene exon gff3 file for the P. trichocarpa gene exon gff3 file
-s is the protocol stranded? -s is the protocol stranded?
default to FALSE default to FALSE
-a are we counting antisense transcripts?
default to FALSE, only active in combination with -s
-t Chose attribute to count in the gff3 file default is exon
Note: Note:
BAM file are expected to be sorted by position BAM file are expected to be sorted by position
Only HTSeq 0.6+ version(s) are supported Only HTSeq 0.6+ version(s) are supported
...@@ -28,24 +30,11 @@ echo >&2 \ ...@@ -28,24 +30,11 @@ echo >&2 \
exit 1 exit 1
} }
## Are we on UPPMAX? echo Loading modules
if [ ! -z $SLURM_SUBMIT_DIR ]; then module load bioinfo-tools htseq
## laod the modules
echo Loading modules
module load python/2.7.6
module load bioinfo-tools
module load samtools/0.1.19
else
htseq=`which htseq-count`
if [ "$?" -ne 0 ]; then
echo "error: you need to install htseq or add it to your path"
exit 1
fi
fi
## check the version ## check the version
isVersion6=`htseq-count --help | grep "version 0.6" | wc -l` if [ `htseq-count --help | grep -c "version 0.6"` -ne 1 ]; then
if [ $isVersion6 != 1 ]; then
echo Only HTSeq version 0.6+ are supported echo Only HTSeq version 0.6+ are supported
usage usage
fi fi
...@@ -53,13 +42,17 @@ fi ...@@ -53,13 +42,17 @@ fi
## options ## options
IDATTR="Parent" IDATTR="Parent"
stranded=0 stranded=0
antisense=0
t="exon"
## get the options ## get the options
while getopts i:s option while getopts ai:st: option
do do
case "$option" in case "$option" in
a) antisense=1;;
i) IDATTR=$OPTARG;; i) IDATTR=$OPTARG;;
s) stranded=1;; s) stranded=1;;
t) t=$OPTARG;;
\?) ## unknown flag \?) ## unknown flag
usage;; usage;;
esac esac
...@@ -67,12 +60,6 @@ done ...@@ -67,12 +60,6 @@ done
shift `expr $OPTIND - 1` shift `expr $OPTIND - 1`
## we get two dir and two files as input ## we get two dir and two files as input
if [ $# == 4 ]; then
echo "This function arguments have changed!"
usage
fi
if [ $# != 3 ]; then if [ $# != 3 ]; then
echo "This function takes one directory, one bam and one gff3 file as arguments" echo "This function takes one directory, one bam and one gff3 file as arguments"
usage usage
...@@ -84,28 +71,33 @@ if [ ! -d $1 ]; then ...@@ -84,28 +71,33 @@ if [ ! -d $1 ]; then
fi fi
if [ ! -f $2 ]; then if [ ! -f $2 ]; then
echo "The third argument needs to be an existing bam file" echo "The second argument needs to be an existing bam file"
usage usage
fi fi
nam=`basename ${2//.bam/}` nam=`basename ${2//.bam/}`
if [ ! -f $3 ]; then if [ ! -f $3 ]; then
echo "The forth argument needs to be an existing gff3 file" echo "The third argument needs to be an existing gff3 file"
usage usage
fi fi
## sort by id if [ $t == "CDS" ]; then
## samtools sort -n $3 $2/${nam}-byname echo "Warning: the CDS option require the CDS feature to be capital in you gff3 file"
fi
## get the count table ## get the count table
if [ $stranded == 0 ]; then if [ $stranded == 0 ]; then
if [ $antisense == 1 ]; then
echo "The antisense only works in conjunction with the -s option" >&2
fi
## since we are not using strand specific, go for the union ## since we are not using strand specific, go for the union
htseq-count -f bam -r pos -m union -s no -t exon -i $IDATTR $2 $3 > $1/$nam.txt htseq-count -f bam -r pos -m union -s no -t $t -i $IDATTR $2 $3 > $1/$nam.txt
else else
htseq-count -f bam -r pos -m intersection-nonempty -s reverse -t exon -i $IDATTR $2 $3 > $1/$nam.txt ## normal counting
if [ $antisense == 0 ]; then
htseq-count -f bam -r pos -m intersection-nonempty -s reverse -t $t -i $IDATTR $2 $3 > $1/$nam.txt
else
htseq-count -f bam -r pos -m intersection-nonempty -s yes -t $t -i $IDATTR $2 $3 > $1/$nam.txt
fi
fi fi
## clean
## rm $2/${nam}-byname.bam
This diff is collapsed.
#!/bin/bash -l #!/bin/bash -l
#SBATCH -p node
## THINK OF --outStd SAM --outSAMunmapped Within to write SAM directly and keep all reads. That does not affect any of the log file to be generated #SBATCH -n 16
## but consider if we want that when reporting the Chimeric SAM (i.e. for merging the files, we would not want the reads to be part of the SAM... #SBATCH -t 0-12:00:00
## the good thing with outputting to SAM is that it can be readily piped into samtools -bs - | samtools sort - filename
#SBATCH -p core
#SBATCH -n 8
#SBATCH -t 0-02:00:00
#SBATCH --mail-type=ALL #SBATCH --mail-type=ALL
################# # -p node is needed to accept the -C memory configuration
## Build geneModel
#################
## TODO extract that to its own script
##usage sbatch -p devel -t 1:00:00 runSTAR.sh genome.fa
#/home/davidsu/bin/STAR --runMode genomeGenerate --genomeDir $1 --genomeFastaFiles $2 --sjdbOverhang 99 --sjdbGTFfile $3 --runThreadN 8
#exit;
## stop on error and be verbose in the output ## stop on error and be verbose in the output
set -e -x set -e -x
## load the modules
module load bioinfo-tools star/2.4.0f1 samtools
## exec ## vars
STAR=
### tool sanity
if [ ! -z $SLURM_SUBMIT_DIR ]; then
module load bioinfo-tools
module load samtools/0.1.19
module load star/2.3.0e
STAR=`which STAR`
else
STAR=`which STAR`
if [ $? != 0 ]; then
echo "please install STAR before running this script or add it to your PATH"
exit 1
fi
if [ ! -f $STAR -a ! -x $STAR ]; then
echo "your STAR does not appear to be an executable file"
exit 1
fi
samtools=`which samtools`
if [ $? != 0 ]; then
echo "please install samtools before running this script or add it to your PATH"
exit 1
fi
if [ ! -f $samtools -a ! -x $samtools ]; then
echo "your samtools does not appear to be an executable file"
exit 1
fi
fi
##########
# Run star
##########
INTRONMAX=11000 INTRONMAX=11000
OUT_DIR=`pwd` GFF=
GFF=1
SINGLE=0 SINGLE=0
PROC=8 PROC=16
FORMAT="gtf"
LIMIT=10000000000
## additional options for STAR
OPTIONS="--outSAMstrandField intronMotif --readFilesCommand zcat --outSAMmapqUnique 254 --quantMode TranscriptomeSAM --outFilterMultimapNmax 100 --outReadsUnmapped Fastx --chimSegmentMin 1 --outSAMtype BAM SortedByCoordinate --outWigType bedGraph"
## usage ## usage
usage(){ usage(){
echo >&2 \ echo >&2 \
" "
Usage: runSTAR.sh [option] <fwd file> <rv file> <genome dir> <gene model gff3> [--] [additional STAR arguments] Usage: $0 [option] <out dir> <genome dir> <fwd file> <rv file> [--] [additional STAR arguments]
Options: Options:
-e STAR executable -f the gtf/gff3 file format (default gtf)
-g if there is no gff file -g the path to a gtf/gff3 file
-m max intron length -l the BAM sorting memory limit ($LIMIT)
-o outdir -m the max intron length ($INTRONMAX)
-p number of threads to be used (default: 8) -p number of threads to be used (default: 16)
-s if there is no reverse file -q set for Illumina +64 Phred score
-s if there is no reverse
-n no default option
Notes: Notes:
The number of arguments is only 3 when -s is set.
-- is a special argument that stop the command line scanning for the script options. -- is a special argument that stop the command line scanning for the script options.
It is necessary if you want to precised additional - non-default - STAR arguments. It is necessary if you want to precised additional - non-default - STAR arguments.
When the format is gff3, the exon-transcript relationship assumes a 'Parent' keylink.
" "
exit 1 exit 1
} }
## get the options ## get the options
while getopts e:gm:o:sp: option while getopts f:g:l:m:np:qs option
do do
case "$option" in case "$option" in
e) STAR=$OPTARG;; f) FORMAT=$OPTARG;;
g) GFF=0;; g) GFF=$OPTARG;;
m) INTRONMAX=$OPTARG;; l) LIMIT=$OPTARG;;
o) OUT_DIR=$OPTARG;; m) INTRONMAX=$OPTARG;;
n) OPTIONS="";;
p) PROC=$OPTARG;; p) PROC=$OPTARG;;
q) OPTIONS="$OPTIONS --outQSconversionAdd -31";;
s) SINGLE=1;; s) SINGLE=1;;
\?) ## unknown flag \?) ## unknown flag
usage;; usage;;
esac esac
done done
shift `expr $OPTIND - 1` shift `expr $OPTIND - 1`
## check the arguments ## update the options
if [ ! -z $STAR -a ! -f $STAR -a ! -x $STAR ]; then ## dirty if loop to accomodate for v2.3.*
echo "your STAR does not appear to be an executable file" if [ "$OPTIONS" != "" ]; then
exit 1 OPTIONS="$OPTIONS --limitBAMsortRAM $LIMIT"
fi fi
## check the arguments
echo "Parsing the arguments"
ARGS=4 ARGS=4
if [ $SINGLE == 1 ]; then if [ $SINGLE == 1 ]; then
let "ARGS = $ARGS - 1" let "ARGS = $ARGS - 1"
FIND=".f*q.gz" FIND=".f*.gz"
else else
FIND="_1.f*q.gz" FIND="_[1,2].f*q.gz"
fi
if [ $GFF == 0 ]; then
let "ARGS = $ARGS - 1"
fi fi
## checkthe number of args
if [ $# -lt $ARGS ]; then if [ $# -lt $ARGS ]; then
echo "This script needs 2 arguments without GFF and for SE data; 3 for either and 4 for none of these two conditions." echo "This script needs 3 or 4 arguments for SE or PE data, respectively."
usage usage
fi fi
## get the out dir
outdir=$1
shift
## check the genome dir
if [ ! -d $1 ]; then
echo "The genome directory: $1 does not exist"
usage
else
genome=$1
shift
fi
## Check if the first file exists
if [ ! -f $1 ]; then if [ ! -f $1 ]; then
echo "The forward fastq file: $1 does not exist" echo "The forward fastq file: $1 does not exist"
usage usage
else else
in1=$1 fwd=$1
shift shift
fi fi
## Check if the second file exists
if [ $SINGLE == 0 ]; then if [ $SINGLE == 0 ]; then
if [ ! -f $1 ]; then if [ ! -f $1 ]; then
echo "The reverse fastq file: $1 does not exist" echo "The reverse fastq file: $1 does not exist"
usage usage
else else
in2=$1 rev=$1
shift shift
fi fi
fi fi
if [ ! -d $1 ]; then ## if gff is set check if it exists
echo "The genome directory: $1 does not exist" if [ ! -z $GFF ] && [ ! -f $GFF ] ; then
usage echo "The gene model gtf/gff3 file: $GFF does not exists"
usage
else else
genome=$1 if [ ! -z $GFF ]; then
shift OPTIONS="--sjdbGTFfile $GFF $OPTIONS"
fi
fi fi
if [ $GFF == 1 ]; then ## if format is set
if [ ! -f $1 ]; then case $FORMAT in
echo "The gene model gff3 file: $1 does not exists" gff3)
usage OPTIONS=" $OPTIONS --sjdbGTFtagExonParentTranscript Parent"
else ;;
gff3=$1 gff)
shift OPTIONS=" $OPTIONS --sjdbGTFtagExonParentTranscript Parent"
fi ;;
fi gtf);;
#nothing to do
*)
echo "There are only 2 supported format, gtf or gff3"
usage;;
esac
## do we have more arguments ## do we have more arguments
if [ $# != 0 ]; then if [ $# != 0 ]; then
...@@ -164,34 +152,67 @@ if [ $# != 0 ]; then ...@@ -164,34 +152,67 @@ if [ $# != 0 ]; then
shift shift
fi fi
## output name ## create the output dir
uz3=$OUT_DIR/`basename ${in1//$FIND/}` echo "Processing"
if [ ! -d $outdir ]; then
mkdir -p $outdir
fi
## output prefix
bnam=`basename ${fwd//$FIND/}`
fnam=$outdir/$bnam
## start STAR
echo "Aligning"
if [ $SINGLE == 1 ]; then
STAR --genomeDir $genome --readFilesIn $fwd --runThreadN $PROC --alignIntronMax $INTRONMAX --outFileNamePrefix $fnam $OPTIONS $@
else
STAR --genomeDir $genome --readFilesIn $fwd $rev --runThreadN $PROC --alignIntronMax $INTRONMAX --outFileNamePrefix $fnam $OPTIONS $@
fi
## save the log
echo "Logging"
mkdir -p ${fnam}_logs
mv ${fnam}Log.* ${fnam}_logs
## save the junctions
mkdir -p ${fnam}_junctions
mv ${fnam}SJ* ${fnam}_junctions
mv ${fnam}Chimeric.out.junction ${fnam}_junctions
## save the wig
echo "Wiggling"
mkdir -p ${fnam}_bedgraphs
mv ${fnam}Signal.*.bg ${fnam}_bedgraphs
## start star ## rename the output
if [ $SINGLE == 1 -a $GFF == 0 ]; then echo "Renaming"
$STAR --genomeDir $genome --readFilesIn $in1 --runThreadN $PROC --alignIntronMax $INTRONMAX --outSAMstrandField intronMotif --readFilesCommand zcat --outFileNamePrefix $uz3 $@ mv ${fnam}Aligned.sortedByCoord.out.bam ${fnam}_STAR.bam
if [ $SINGLE == 0 ]; then
mv ${fnam}Unmapped.out.mate1 ${fnam}_Unmapped_1.fq
mv ${fnam}Unmapped.out.mate2 ${fnam}_Unmapped_2.fq
else else
if [ $SINGLE == 1 -o $GFF == 0 ]; then mv ${fnam}Unmapped.out.mate1 ${fnam}_Unmapped.fq
if [ $GFF == 0 ]; then
$STAR --genomeDir $genome --readFilesIn $in1 $in2 --runThreadN $PROC --alignIntronMax $INTRONMAX --outSAMstrandField intronMotif --readFilesCommand zcat --outFileNamePrefix $uz3 $@
else
$STAR --genomeDir $genome --readFilesIn $in1 --runThreadN $PROC --alignIntronMax $INTRONMAX --outSAMstrandField intronMotif --sjdbGTFfile $gff3 --readFilesCommand zcat --outFileNamePrefix $uz3 $@
fi
else
$STAR --genomeDir $genome --readFilesIn $in1 $in2 --runThreadN $PROC --alignIntronMax $INTRONMAX --outSAMstrandField intronMotif --sjdbGTFfile $gff3 --readFilesCommand zcat --outFileNamePrefix $uz3 $@
fi
fi fi
## save the logs mv ${fnam}Aligned.toTranscriptome.out.bam ${fnam}_STAR_Transcriptome.bam
mkdir -p ${uz3}_logs
mv ${uz3}Log.* ${uz3}_logs ## compress files (we would only need 2 CPUS, but what if PROC is set to 1)
mv ${uz3}SJ* ${uz3}_logs find $outdir -name "${bnam}_Unmapped*.fq" -print0 | xargs -P $PROC -0 -I {} gzip -f {}
## sort the transcriptome bam and rename
samtools sort -@ 16 -n ${fnam}_STAR_Transcriptome.bam ${fnam}_STAR_Transcriptome.sorted
rm ${fnam}_STAR_Transcriptome.bam
mv ${fnam}_STAR_Transcriptome.sorted.bam ${fnam}_STAR_Transcriptome.bam
## convert sam to bam ## convert the chimeric sam to bam
samtools view -Sb ${uz3}Aligned.out.sam | samtools sort - ${uz3}_STAR samtools view -Sb ${fnam}Chimeric.out.sam | samtools sort -@ 16 - ${fnam}_STAR_Chimeric
samtools index ${uz3}_STAR.bam
## clean ## index the BAMs
rm ${uz3}Aligned.out.sam echo "Indexing"
printf "%s\0%s" ${fnam}_STAR.bam ${fnam}_STAR_Chimeric.bam | xargs -P $PROC -0 -I {} samtools index {}
## TODO modify to print SAM to stdout to pipe into samtools, add the mate1,2 generation and compress these ## cleanup
echo "Cleaning"
rm ${fnam}Chimeric.out.sam
rm -rf ${fnam}_STARtmp/
...@@ -16,16 +16,11 @@ set -e ...@@ -16,16 +16,11 @@ set -e
set -x set -x
## check the options if any ## check the options if any
KEEP=0 KEEP=1
useMtSSU=1 useMtSSU=0
UNPAIRED=0 UNPAIRED=0
PROC=16 PROC=16
DBS=
## local run
## replaced by checking for the SORTMERNADIR - see below
## if [ -z $SLURM_SUBMIT_DIR ]; then
## SLURM_SUBMIT_DIR=`pwd`
## fi
## usage ## usage
usage(){ usage(){
...@@ -34,24 +29,73 @@ echo >&2 \ ...@@ -34,24 +29,73 @@ echo >&2 \
Usage: runSortmerna.sh [option] <out dir> <tmp dir> <forward fastq.gz> <reverse fastq.gz> Usage: runSortmerna.sh [option] <out dir> <tmp dir> <forward fastq.gz> <reverse fastq.gz>
Options: Options:
-k keep the rRNA -d define your dbs (semi-colon separated)
-m do not run against mtSSU -k drop the rRNA (only for v1.9, default to keep them)
-p number of threads to be used (default $PROC) -m run against mtSSU in addition (only for v1.9)
-p number of threads to be used (default $PROC)
-u single end data (in that case only the forward fastq is needed) -u single end data (in that case only the forward fastq is needed)
Note: Note:
1) The SORTMERNADIR environment variable needs to be set 1) The SORTMERNADIR environment variable needs to be set
2) Only SortMeRna version 1.9 is supported 2) Only SortMeRna version 1.9 and 2.x are supported (2.x is default)
3) -m is not applicable if -d is set
" "
exit 1 exit 1
} }
## load the module
module load bioinfo-tools
## Does not work on uppmax - umea has an empty result
## while uppmax is verbose.
## avail=$( module avail sortmerna 2>&1 > /dev/null)
## avail=`echo $avail | tr -d [:blank:]`
## if [ ! -z $avail ]; then
## module load sortmerna
## sortmerna --version
##fi
## record the SORTMERNADIR if it exists
STOREENV=
if [ ! -z $SORTMERNADIR ]; then
STOREENV=$SORTMERNADIR
fi
## try to load or echo
module load sortmerna || {
echo "No sortmerna as module"
## then check for availability
tool=`which sortmerna 2>/dev/null`
if [ ! -z $tool ] && [ -f $tool ] && [ -x $tool ]; then
echo "sortmerna available"
else
echo "ERROR: INSTALL SortMeRna"
usage
fi
}
# restore the env if it existed
if [ ! -z $STOREENV ]; then
export SORTMERNADIR=$STOREENV
fi
## check for sortmerna version
is1dot9=`sortmerna --version 2>&1 | grep version | grep 1.9 | wc -c`
is2dotx=`sortmerna --version 2>&1 | grep "version 2." | wc -c`
if [ $is1dot9 == 0 ] && [ $is2dotx == 0 ]; then
echo "Only version 1.9 and 2.x are supported"
usage
fi
## get the options ## get the options
while getopts kmp:u option while getopts d:kmp:u option
do do
case "$option" in case "$option" in
k) KEEP=1;; d) DBS=$OPTARG;;
m) useMtSSU=0;; k) KEEP=0;;
m) useMtSSU=1;;
p) PROC=$OPTARG;; p) PROC=$OPTARG;;
u) UNPAIRED=1;; u) UNPAIRED=1;;
\?) ## unknown flag \?) ## unknown flag
...@@ -73,23 +117,46 @@ if [ -z $SORTMERNADIR ]; then ...@@ -73,23 +117,46 @@ if [ -z $SORTMERNADIR ]; then
usage usage
fi fi
## set the dbs ## set the default dbs
db5s=$SORTMERNADIR/rRNA_databases/rfam-5s-database-id98.fasta if [ ! -z $DBS ]; then
db58s=$SORTMERNADIR/rRNA_databases/rfam-5.8s-database-id98.fasta dbs=${DBS//;/ }
db16s=$SORTMERNADIR/rRNA_databases/silva-bac-16s-database-id85.fasta dbNum=`echo $DBS | awk -F";" '{print NF}'`
db18s=$SORTMERNADIR/rRNA_databases/silva-euk-18s-database-id95.fasta else
db23s=$SORTMERNADIR/rRNA_databases/silva-bac-23s-database-id98.fasta if [ $is2dotx != 0 ]; then
db28s=$SORTMERNADIR/rRNA_databases/silva-euk-28s-database-id98.fasta db5s=$SORTMERNADIR/rRNA_databases/rfam-5s-database-id98.fasta,$SORTMERNADIR/automata/rfam-5s-database-id98
dbNum=6 db58s=$SORTMERNADIR/rRNA_databases/rfam-5.8s-database-id98.fasta,$SORTMERNADIR/automata/rfam-5.8s-database-id98
dbs="$db5s $db58s $db16s $db18s $db23s $db28s" db16sa=$SORTMERNADIR/rRNA_databases/silva-arc-16s-id95.fasta,$SORTMERNADIR/automata/silva-arc-16s-database-id95
if [ $useMtSSU == 1 ]; then db16s=$SORTMERNADIR/rRNA_databases/silva-bac-16s-id90.fasta,$SORTMERNADIR/automata/silva-bac-16s-database-id90
db18s=$SORTMERNADIR/rRNA_databases/silva-euk-18s-id95.fasta,$SORTMERNADIR/automata/silva-euk-18s-database-id95
db23sa=$SORTMERNADIR/rRNA_databases/silva-arc-23s-id98.fasta,$SORTMERNADIR/automata/silva-arc-23s-database-id98
db23s=$SORTMERNADIR/rRNA_databases/silva-bac-23s-id98.fasta,$SORTMERNADIR/automata/silva-bac-23s-database-id98
db28s=$SORTMERNADIR/rRNA_databases/silva-euk-28s-id98.fasta,$SORTMERNADIR/automata/silva-euk-28s-database-id98
dbs="$db5s:$db58s:$db16sa:$db16s:$db18s:$db23sa:$db23s:$db28s"
#if [ ! -f $SORTMERNADIR/automata/rfam-5s-database-id98.stats ]; then