Commit 13d5f2d4 authored by Nicolas Delhomme's avatar Nicolas Delhomme

updated the pipeline

parent bf69cb31
Pipeline #53 skipped
......@@ -18,7 +18,7 @@ set -x
## are we on UPPMAX
if [ ! -z $SLURM_SUBMIT_DIR ]; then
module load bioinfo-tools
module load FastQC/0.10.1
module load FastQC/0.11.5
## echo "Running on UPPMAX"
else
## echo "Running locally"
......
......@@ -4,8 +4,16 @@
#SBATCH -t 0-01:00:00
#SBATCH --mail-type=ALL
## load the module if it exists
module load bioinfo-tools && module load fastQvalidator || {
if ! hash fastQValidator 2>/dev/null; then
echo "fastQValidator was not found in your path" 1>&2
exit 1
fi
}
usage() {
echo "usage: `basename $0` <fastq>
echo "usage: `basename $0` <fastq>
Run fastQValidator on a FASTQ file. Prints output on stdout and
exits with a non-zero exit status if the input file does not
......@@ -16,6 +24,8 @@ ARGUMENTS:
NOTES:
fastQValidator must lie in your PATH" 1>&2
exit 1
}
## stop on error
......@@ -23,20 +33,13 @@ set -e
## check
if [ $# != 1 ]; then
echo "The argument should be one fastq filename" 1>&2
echo "This function takes one argument: a fastq filename" 1>&2
usage
exit 1
fi
if [ ! -f $1 ]; then
echo "The fastq filename you provided does not exist" 1>&2
usage
exit 1
fi
if ! hash fastQValidator 2>/dev/null; then
echo "fastQValidator was not found in your path" 1>&2
exit 1
fi
## we print 1000 errors, should be enough
......
......@@ -20,7 +20,9 @@ echo >&2 \
for the P. trichocarpa gene exon gff3 file
-s is the protocol stranded?
default to FALSE
-a are we counting antisense transcripts?
default to FALSE, only active in combination with -s
-t Chose attribute to count in the gff3 file default is exon
Note:
BAM file are expected to be sorted by position
Only HTSeq 0.6+ version(s) are supported
......@@ -28,24 +30,11 @@ echo >&2 \
exit 1
}
## Are we on UPPMAX?
if [ ! -z $SLURM_SUBMIT_DIR ]; then
## laod the modules
echo Loading modules
module load python/2.7.6
module load bioinfo-tools
module load samtools/0.1.19
else
htseq=`which htseq-count`
if [ "$?" -ne 0 ]; then
echo "error: you need to install htseq or add it to your path"
exit 1
fi
fi
echo Loading modules
module load bioinfo-tools htseq
## check the version
isVersion6=`htseq-count --help | grep "version 0.6" | wc -l`
if [ $isVersion6 != 1 ]; then
if [ `htseq-count --help | grep -c "version 0.6"` -ne 1 ]; then
echo Only HTSeq version 0.6+ are supported
usage
fi
......@@ -53,13 +42,17 @@ fi
## options
IDATTR="Parent"
stranded=0
antisense=0
t="exon"
## get the options
while getopts i:s option
while getopts ai:st: option
do
case "$option" in
a) antisense=1;;
i) IDATTR=$OPTARG;;
s) stranded=1;;
t) t=$OPTARG;;
\?) ## unknown flag
usage;;
esac
......@@ -67,12 +60,6 @@ done
shift `expr $OPTIND - 1`
## we get two dir and two files as input
if [ $# == 4 ]; then
echo "This function arguments have changed!"
usage
fi
if [ $# != 3 ]; then
echo "This function takes one directory, one bam and one gff3 file as arguments"
usage
......@@ -84,28 +71,33 @@ if [ ! -d $1 ]; then
fi
if [ ! -f $2 ]; then
echo "The third argument needs to be an existing bam file"
echo "The second argument needs to be an existing bam file"
usage
fi
nam=`basename ${2//.bam/}`
if [ ! -f $3 ]; then
echo "The forth argument needs to be an existing gff3 file"
echo "The third argument needs to be an existing gff3 file"
usage
fi
## sort by id
## samtools sort -n $3 $2/${nam}-byname
if [ $t == "CDS" ]; then
echo "Warning: the CDS option require the CDS feature to be capital in you gff3 file"
fi
## get the count table
if [ $stranded == 0 ]; then
if [ $antisense == 1 ]; then
echo "The antisense only works in conjunction with the -s option" >&2
fi
## since we are not using strand specific, go for the union
htseq-count -f bam -r pos -m union -s no -t exon -i $IDATTR $2 $3 > $1/$nam.txt
htseq-count -f bam -r pos -m union -s no -t $t -i $IDATTR $2 $3 > $1/$nam.txt
else
htseq-count -f bam -r pos -m intersection-nonempty -s reverse -t exon -i $IDATTR $2 $3 > $1/$nam.txt
## normal counting
if [ $antisense == 0 ]; then
htseq-count -f bam -r pos -m intersection-nonempty -s reverse -t $t -i $IDATTR $2 $3 > $1/$nam.txt
else
htseq-count -f bam -r pos -m intersection-nonempty -s yes -t $t -i $IDATTR $2 $3 > $1/$nam.txt
fi
fi
## clean
## rm $2/${nam}-byname.bam
This diff is collapsed.
#!/bin/bash -l
## THINK OF --outStd SAM --outSAMunmapped Within to write SAM directly and keep all reads. That does not affect any of the log file to be generated
## but consider if we want that when reporting the Chimeric SAM (i.e. for merging the files, we would not want the reads to be part of the SAM...
## the good thing with outputting to SAM is that it can be readily piped into samtools -bs - | samtools sort - filename
#SBATCH -p core
#SBATCH -n 8
#SBATCH -t 0-02:00:00
#SBATCH -p node
#SBATCH -n 16
#SBATCH -t 0-12:00:00
#SBATCH --mail-type=ALL
#################
## Build geneModel
#################
## TODO extract that to its own script
##usage sbatch -p devel -t 1:00:00 runSTAR.sh genome.fa
#/home/davidsu/bin/STAR --runMode genomeGenerate --genomeDir $1 --genomeFastaFiles $2 --sjdbOverhang 99 --sjdbGTFfile $3 --runThreadN 8
#exit;
# -p node is needed to accept the -C memory configuration
## stop on error and be verbose in the output
set -e -x
## load the modules
module load bioinfo-tools star/2.4.0f1 samtools
## exec
STAR=
### tool sanity
if [ ! -z $SLURM_SUBMIT_DIR ]; then
module load bioinfo-tools
module load samtools/0.1.19
module load star/2.3.0e
STAR=`which STAR`
else
STAR=`which STAR`
if [ $? != 0 ]; then
echo "please install STAR before running this script or add it to your PATH"
exit 1
fi
if [ ! -f $STAR -a ! -x $STAR ]; then
echo "your STAR does not appear to be an executable file"
exit 1
fi
samtools=`which samtools`
if [ $? != 0 ]; then
echo "please install samtools before running this script or add it to your PATH"
exit 1
fi
if [ ! -f $samtools -a ! -x $samtools ]; then
echo "your samtools does not appear to be an executable file"
exit 1
fi
fi
##########
# Run star
##########
## vars
INTRONMAX=11000
OUT_DIR=`pwd`
GFF=1
GFF=
SINGLE=0
PROC=8
PROC=16
FORMAT="gtf"
LIMIT=10000000000
## additional options for STAR
OPTIONS="--outSAMstrandField intronMotif --readFilesCommand zcat --outSAMmapqUnique 254 --quantMode TranscriptomeSAM --outFilterMultimapNmax 100 --outReadsUnmapped Fastx --chimSegmentMin 1 --outSAMtype BAM SortedByCoordinate --outWigType bedGraph"
## usage
usage(){
echo >&2 \
"
Usage: runSTAR.sh [option] <fwd file> <rv file> <genome dir> <gene model gff3> [--] [additional STAR arguments]
Usage: $0 [option] <out dir> <genome dir> <fwd file> <rv file> [--] [additional STAR arguments]
Options:
-e STAR executable
-g if there is no gff file
-m max intron length
-o outdir
-p number of threads to be used (default: 8)
-s if there is no reverse file
-f the gtf/gff3 file format (default gtf)
-g the path to a gtf/gff3 file
-l the BAM sorting memory limit ($LIMIT)
-m the max intron length ($INTRONMAX)
-p number of threads to be used (default: 16)
-q set for Illumina +64 Phred score
-s if there is no reverse
-n no default option
Notes:
The number of arguments is only 3 when -s is set.
-- is a special argument that stop the command line scanning for the script options.
It is necessary if you want to precised additional - non-default - STAR arguments.
When the format is gff3, the exon-transcript relationship assumes a 'Parent' keylink.
"
exit 1
}
## get the options
while getopts e:gm:o:sp: option
while getopts f:g:l:m:np:qs option
do
case "$option" in
e) STAR=$OPTARG;;
g) GFF=0;;
m) INTRONMAX=$OPTARG;;
o) OUT_DIR=$OPTARG;;
f) FORMAT=$OPTARG;;
g) GFF=$OPTARG;;
l) LIMIT=$OPTARG;;
m) INTRONMAX=$OPTARG;;
n) OPTIONS="";;
p) PROC=$OPTARG;;
q) OPTIONS="$OPTIONS --outQSconversionAdd -31";;
s) SINGLE=1;;
\?) ## unknown flag
\?) ## unknown flag
usage;;
esac
done
shift `expr $OPTIND - 1`
## check the arguments
if [ ! -z $STAR -a ! -f $STAR -a ! -x $STAR ]; then
echo "your STAR does not appear to be an executable file"
exit 1
## update the options
## dirty if loop to accomodate for v2.3.*
if [ "$OPTIONS" != "" ]; then
OPTIONS="$OPTIONS --limitBAMsortRAM $LIMIT"
fi
## check the arguments
echo "Parsing the arguments"
ARGS=4
if [ $SINGLE == 1 ]; then
let "ARGS = $ARGS - 1"
FIND=".f*q.gz"
FIND=".f*.gz"
else
FIND="_1.f*q.gz"
fi
if [ $GFF == 0 ]; then
let "ARGS = $ARGS - 1"
FIND="_[1,2].f*q.gz"
fi
## checkthe number of args
if [ $# -lt $ARGS ]; then
echo "This script needs 2 arguments without GFF and for SE data; 3 for either and 4 for none of these two conditions."
echo "This script needs 3 or 4 arguments for SE or PE data, respectively."
usage
fi
## get the out dir
outdir=$1
shift
## check the genome dir
if [ ! -d $1 ]; then
echo "The genome directory: $1 does not exist"
usage
else
genome=$1
shift
fi
## Check if the first file exists
if [ ! -f $1 ]; then
echo "The forward fastq file: $1 does not exist"
usage
usage
else
in1=$1
shift
fwd=$1
shift
fi
## Check if the second file exists
if [ $SINGLE == 0 ]; then
if [ ! -f $1 ]; then
echo "The reverse fastq file: $1 does not exist"
usage
else
in2=$1
shift
rev=$1
shift
fi
fi
if [ ! -d $1 ]; then
echo "The genome directory: $1 does not exist"
usage
## if gff is set check if it exists
if [ ! -z $GFF ] && [ ! -f $GFF ] ; then
echo "The gene model gtf/gff3 file: $GFF does not exists"
usage
else
genome=$1
shift
if [ ! -z $GFF ]; then
OPTIONS="--sjdbGTFfile $GFF $OPTIONS"
fi
fi
if [ $GFF == 1 ]; then
if [ ! -f $1 ]; then
echo "The gene model gff3 file: $1 does not exists"
usage
else
gff3=$1
shift
fi
fi
## if format is set
case $FORMAT in
gff3)
OPTIONS=" $OPTIONS --sjdbGTFtagExonParentTranscript Parent"
;;
gff)
OPTIONS=" $OPTIONS --sjdbGTFtagExonParentTranscript Parent"
;;
gtf);;
#nothing to do
*)
echo "There are only 2 supported format, gtf or gff3"
usage;;
esac
## do we have more arguments
if [ $# != 0 ]; then
......@@ -164,34 +152,67 @@ if [ $# != 0 ]; then
shift
fi
## output name
uz3=$OUT_DIR/`basename ${in1//$FIND/}`
## create the output dir
echo "Processing"
if [ ! -d $outdir ]; then
mkdir -p $outdir
fi
## output prefix
bnam=`basename ${fwd//$FIND/}`
fnam=$outdir/$bnam
## start STAR
echo "Aligning"
if [ $SINGLE == 1 ]; then
STAR --genomeDir $genome --readFilesIn $fwd --runThreadN $PROC --alignIntronMax $INTRONMAX --outFileNamePrefix $fnam $OPTIONS $@
else
STAR --genomeDir $genome --readFilesIn $fwd $rev --runThreadN $PROC --alignIntronMax $INTRONMAX --outFileNamePrefix $fnam $OPTIONS $@
fi
## save the log
echo "Logging"
mkdir -p ${fnam}_logs
mv ${fnam}Log.* ${fnam}_logs
## save the junctions
mkdir -p ${fnam}_junctions
mv ${fnam}SJ* ${fnam}_junctions
mv ${fnam}Chimeric.out.junction ${fnam}_junctions
## save the wig
echo "Wiggling"
mkdir -p ${fnam}_bedgraphs
mv ${fnam}Signal.*.bg ${fnam}_bedgraphs
## start star
if [ $SINGLE == 1 -a $GFF == 0 ]; then
$STAR --genomeDir $genome --readFilesIn $in1 --runThreadN $PROC --alignIntronMax $INTRONMAX --outSAMstrandField intronMotif --readFilesCommand zcat --outFileNamePrefix $uz3 $@
## rename the output
echo "Renaming"
mv ${fnam}Aligned.sortedByCoord.out.bam ${fnam}_STAR.bam
if [ $SINGLE == 0 ]; then
mv ${fnam}Unmapped.out.mate1 ${fnam}_Unmapped_1.fq
mv ${fnam}Unmapped.out.mate2 ${fnam}_Unmapped_2.fq
else
if [ $SINGLE == 1 -o $GFF == 0 ]; then
if [ $GFF == 0 ]; then
$STAR --genomeDir $genome --readFilesIn $in1 $in2 --runThreadN $PROC --alignIntronMax $INTRONMAX --outSAMstrandField intronMotif --readFilesCommand zcat --outFileNamePrefix $uz3 $@
else
$STAR --genomeDir $genome --readFilesIn $in1 --runThreadN $PROC --alignIntronMax $INTRONMAX --outSAMstrandField intronMotif --sjdbGTFfile $gff3 --readFilesCommand zcat --outFileNamePrefix $uz3 $@
fi
else
$STAR --genomeDir $genome --readFilesIn $in1 $in2 --runThreadN $PROC --alignIntronMax $INTRONMAX --outSAMstrandField intronMotif --sjdbGTFfile $gff3 --readFilesCommand zcat --outFileNamePrefix $uz3 $@
fi
mv ${fnam}Unmapped.out.mate1 ${fnam}_Unmapped.fq
fi
## save the logs
mkdir -p ${uz3}_logs
mv ${uz3}Log.* ${uz3}_logs
mv ${uz3}SJ* ${uz3}_logs
mv ${fnam}Aligned.toTranscriptome.out.bam ${fnam}_STAR_Transcriptome.bam
## compress files (we would only need 2 CPUS, but what if PROC is set to 1)
find $outdir -name "${bnam}_Unmapped*.fq" -print0 | xargs -P $PROC -0 -I {} gzip -f {}
## sort the transcriptome bam and rename
samtools sort -@ 16 -n ${fnam}_STAR_Transcriptome.bam ${fnam}_STAR_Transcriptome.sorted
rm ${fnam}_STAR_Transcriptome.bam
mv ${fnam}_STAR_Transcriptome.sorted.bam ${fnam}_STAR_Transcriptome.bam
## convert sam to bam
samtools view -Sb ${uz3}Aligned.out.sam | samtools sort - ${uz3}_STAR
samtools index ${uz3}_STAR.bam
## convert the chimeric sam to bam
samtools view -Sb ${fnam}Chimeric.out.sam | samtools sort -@ 16 - ${fnam}_STAR_Chimeric
## clean
rm ${uz3}Aligned.out.sam
## index the BAMs
echo "Indexing"
printf "%s\0%s" ${fnam}_STAR.bam ${fnam}_STAR_Chimeric.bam | xargs -P $PROC -0 -I {} samtools index {}
## TODO modify to print SAM to stdout to pipe into samtools, add the mate1,2 generation and compress these
## cleanup
echo "Cleaning"
rm ${fnam}Chimeric.out.sam
rm -rf ${fnam}_STARtmp/
......@@ -16,16 +16,11 @@ set -e
set -x
## check the options if any
KEEP=0
useMtSSU=1
KEEP=1
useMtSSU=0
UNPAIRED=0
PROC=16
## local run
## replaced by checking for the SORTMERNADIR - see below
## if [ -z $SLURM_SUBMIT_DIR ]; then
## SLURM_SUBMIT_DIR=`pwd`
## fi
DBS=
## usage
usage(){
......@@ -34,24 +29,73 @@ echo >&2 \
Usage: runSortmerna.sh [option] <out dir> <tmp dir> <forward fastq.gz> <reverse fastq.gz>
Options:
-k keep the rRNA
-m do not run against mtSSU
-p number of threads to be used (default $PROC)
-d define your dbs (semi-colon separated)
-k drop the rRNA (only for v1.9, default to keep them)
-m run against mtSSU in addition (only for v1.9)
-p number of threads to be used (default $PROC)
-u single end data (in that case only the forward fastq is needed)
Note:
1) The SORTMERNADIR environment variable needs to be set
2) Only SortMeRna version 1.9 is supported
2) Only SortMeRna version 1.9 and 2.x are supported (2.x is default)
3) -m is not applicable if -d is set
"
exit 1
}
## load the module
module load bioinfo-tools
## Does not work on uppmax - umea has an empty result
## while uppmax is verbose.
## avail=$( module avail sortmerna 2>&1 > /dev/null)
## avail=`echo $avail | tr -d [:blank:]`
## if [ ! -z $avail ]; then
## module load sortmerna
## sortmerna --version
##fi
## record the SORTMERNADIR if it exists
STOREENV=
if [ ! -z $SORTMERNADIR ]; then
STOREENV=$SORTMERNADIR
fi
## try to load or echo
module load sortmerna || {
echo "No sortmerna as module"
## then check for availability
tool=`which sortmerna 2>/dev/null`
if [ ! -z $tool ] && [ -f $tool ] && [ -x $tool ]; then
echo "sortmerna available"
else
echo "ERROR: INSTALL SortMeRna"
usage
fi
}
# restore the env if it existed
if [ ! -z $STOREENV ]; then
export SORTMERNADIR=$STOREENV
fi
## check for sortmerna version
is1dot9=`sortmerna --version 2>&1 | grep version | grep 1.9 | wc -c`
is2dotx=`sortmerna --version 2>&1 | grep "version 2." | wc -c`
if [ $is1dot9 == 0 ] && [ $is2dotx == 0 ]; then
echo "Only version 1.9 and 2.x are supported"
usage
fi
## get the options
while getopts kmp:u option
while getopts d:kmp:u option
do
case "$option" in
k) KEEP=1;;
m) useMtSSU=0;;
d) DBS=$OPTARG;;
k) KEEP=0;;
m) useMtSSU=1;;
p) PROC=$OPTARG;;
u) UNPAIRED=1;;
\?) ## unknown flag
......@@ -73,23 +117,46 @@ if [ -z $SORTMERNADIR ]; then
usage
fi
## set the dbs
db5s=$SORTMERNADIR/rRNA_databases/rfam-5s-database-id98.fasta
db58s=$SORTMERNADIR/rRNA_databases/rfam-5.8s-database-id98.fasta
db16s=$SORTMERNADIR/rRNA_databases/silva-bac-16s-database-id85.fasta
db18s=$SORTMERNADIR/rRNA_databases/silva-euk-18s-database-id95.fasta
db23s=$SORTMERNADIR/rRNA_databases/silva-bac-23s-database-id98.fasta
db28s=$SORTMERNADIR/rRNA_databases/silva-euk-28s-database-id98.fasta
dbNum=6
dbs="$db5s $db58s $db16s $db18s $db23s $db28s"
if [ $useMtSSU == 1 ]; then
## set the default dbs
if [ ! -z $DBS ]; then
dbs=${DBS//;/ }
dbNum=`echo $DBS | awk -F";" '{print NF}'`
else
if [ $is2dotx != 0 ]; then
db5s=$SORTMERNADIR/rRNA_databases/rfam-5s-database-id98.fasta,$SORTMERNADIR/automata/rfam-5s-database-id98
db58s=$SORTMERNADIR/rRNA_databases/rfam-5.8s-database-id98.fasta,$SORTMERNADIR/automata/rfam-5.8s-database-id98
db16sa=$SORTMERNADIR/rRNA_databases/silva-arc-16s-id95.fasta,$SORTMERNADIR/automata/silva-arc-16s-database-id95
db16s=$SORTMERNADIR/rRNA_databases/silva-bac-16s-id90.fasta,$SORTMERNADIR/automata/silva-bac-16s-database-id90
db18s=$SORTMERNADIR/rRNA_databases/silva-euk-18s-id95.fasta,$SORTMERNADIR/automata/silva-euk-18s-database-id95
db23sa=$SORTMERNADIR/rRNA_databases/silva-arc-23s-id98.fasta,$SORTMERNADIR/automata/silva-arc-23s-database-id98
db23s=$SORTMERNADIR/rRNA_databases/silva-bac-23s-id98.fasta,$SORTMERNADIR/automata/silva-bac-23s-database-id98
db28s=$SORTMERNADIR/rRNA_databases/silva-euk-28s-id98.fasta,$SORTMERNADIR/automata/silva-euk-28s-database-id98
dbs="$db5s:$db58s:$db16sa:$db16s:$db18s:$db23sa:$db23s:$db28s"
#if [ ! -f $SORTMERNADIR/automata/rfam-5s-database-id98.stats ]; then
# echo "No indexes found, creating indexes in folder $SORTMERNADIR/automata"
# indexdb_rna --ref $dbs
#fi
else
db5s=$SORTMERNADIR/rRNA_databases/rfam-5s-database-id98.fasta
db58s=$SORTMERNADIR/rRNA_databases/rfam-5.8s-database-id98.fasta
db16sa=$SORTMERNADIR/rRNA_databases/silva-arc-16s-database-id95.fasta
db16s=$SORTMERNADIR/rRNA_databases/silva-bac-16s-database-id85.fasta
db18s=$SORTMERNADIR/rRNA_databases/silva-euk-18s-database-id95.fasta
db23sa=$SORTMERNADIR/rRNA_databases/silva-arc-23s-database-id98.fasta
db23s=$SORTMERNADIR/rRNA_databases/silva-bac-23s-database-id98.fasta
db28s=$SORTMERNADIR/rRNA_databases/silva-euk-28s-database-id98.fasta
dbNum=8
dbs="$db5s $db58s $db16sa $db16s $db18s $db23sa $db23s $db28s"
fi
## Add the mtSSU
if [ $is1dot9 != 0 ] && [ $useMtSSU == 1 ]; then
mtSSU=$SORTMERNADIR/rRNA_databases/mtSSU_UCLUST-95-identity.fasta
dbs="$db5s $db58s $db16s $db18s $db23s $db28s $mtSSU"
dbNum=7
dbs="$dbs $mtSSU"
dbNum=9
fi
fi
##
echo Checking
......@@ -147,19 +214,15 @@ fi
## interleave them
fm=`basename ${3//.f*q.gz/}`
if [ $UNPAIRED == 0 ]; then
isVersion9=`sortmerna --version | grep "version 1.9" | wc -l`
if [ $isVersion9 != 1 ]; then
echo Only SortMeRna version 1.9 is supported
usage
else
merge-paired-reads.sh $2/$f1 $2/$f2 $2/$fm
fi
merge-paired-reads.sh $2/$f1 $2/$f2 $2/$fm
fi
##
if [ $UNPAIRED == 0 ]; then
echo Pre-cleaning
rm -f $2/$f1 $2/$f2
else
echo "TODO: Cleaning needs implementing for single end sequencing"
fi
##
......@@ -173,28 +236,45 @@ else
fi
## check the options
opt=
if [ $KEEP -eq 1 ]; then
opt="--bydbs --accept $2/${fo}_rRNA"
opt="-a $PROC"
if [ $KEEP == 1 ] && [ $is1dot9 != 0 ]; then
opt="$opt --bydbs --accept $2/${fo}_rRNA"
fi
## run
if [ $UNPAIRED == 0 ]; then
sortmerna -n $dbNum --db $dbs --I $2/$fm --other $2/$fo --log $1/$fo -a $PROC -v --paired-in $opt
if [ $is2dotx != 0 ]; then
sortmerna --ref $dbs --reads $2/$fm --other $2/$fo --log --paired_in --fastx $opt --sam --num_alignments 1 --aligned $2/${fo}_rRNA
else
sortmerna -n $dbNum --db $dbs --I $2/$fm --other $2/$fo --log $1/$fo --paired-in $opt
fi
else
sortmerna -n $dbNum --db $dbs --I $2/$f1 --other $1/$fo --log $1/$fo -a $PROC -v $opt
if [ $is2dotx != 0 ]; then
sortmerna --ref $dbs --reads $2/$f1 --other $1/$fo --log $opt --sam --fastx --num_alignments 1 --aligned $2/${fo}_rRNA
else
sortmerna -n $dbNum --db $dbs --I $2/$f1 --other $1/$fo --log $1/$fo $opt
fi
fi
## deinterleave it
if [ $UNPAIRED == 0 ]; then
## sortmerna get confused by dots in the filenames
if [ ! -f $2/$fo.fastq ]; then
mv $2/$fo.* $2/$fo.fastq
mv $2/$fo.* $2/$fo.fastq
fi
unmerge-paired-reads.sh $2/$fo.fastq $1/${fo}_1.fq $1/${fo}_2.fq
fi
## rm the tmp
## cleanup
echo Post-Cleaning
if [ $is2dotx != 0 ]; then
## mv the rRNA, fastq and log back
mv $2/${fo}_rRNA.* $1
fi
## rm the tmp
if [ $UNPAIRED == 0 ]; then
rm -f $2/$fm $2/$fo.fastq
else
......@@ -202,7 +282,7 @@ else
fi
## deinterleave the rest if needed
if [ $KEEP -eq 1 ]; then
if [ $KEEP == 1 ]; then
if [ $UNPAIRED == 0 ]; then
find $2 -name "${fo}_rRNA*" -print0 | xargs