diff --git a/tutorial/easyRNASeq/Drosophila_melanogaster.BDGP5.77-synthetic-transcripts.rda b/tutorial/easyRNASeq/Drosophila_melanogaster.BDGP5.77-synthetic-transcripts.rda new file mode 100644 index 0000000000000000000000000000000000000000..9cdafc265299d455c6932ca22fbcff7df65d39b1 Binary files /dev/null and b/tutorial/easyRNASeq/Drosophila_melanogaster.BDGP5.77-synthetic-transcripts.rda differ diff --git a/tutorial/easyRNASeq/Drosophila_melanogaster.BDGP5.77.with-chr.gtf.gz b/tutorial/easyRNASeq/Drosophila_melanogaster.BDGP5.77.with-chr.gtf.gz new file mode 100644 index 0000000000000000000000000000000000000000..dc000d3e990ed690fd438c3c9bf40bcf932fea56 Binary files /dev/null and b/tutorial/easyRNASeq/Drosophila_melanogaster.BDGP5.77.with-chr.gtf.gz differ diff --git a/tutorial/easyRNASeq/synthetic-transcript-creation-example.R b/tutorial/easyRNASeq/synthetic-transcript-creation-example.R new file mode 100644 index 0000000000000000000000000000000000000000..c28d7e09d363019a72d6fda7b3ca3a48d5825178 --- /dev/null +++ b/tutorial/easyRNASeq/synthetic-transcript-creation-example.R @@ -0,0 +1,64 @@ +#' --- +#' title: "Synthetic transcripts generation example" +#' author: "Nicolas Delhomme" +#' date: "`r Sys.Date()`" +#' output: +#' html_document: +#' toc: true +#' number_sections: true +#' --- +#' +#' # Setup +#' Load the libraries +library(easyRNASeq) +suppressPackageStartupMessages(library(IRanges)) +suppressPackageStartupMessages(library(genomeIntervals)) +library(pander) + +#' Source an helper file +source("https://microasp.upsc.se/root/upscb-public/raw/master/src/R/createSyntheticTranscripts.R") + +#' # Process +#' ## Synthetic transcripts creation +#' This function takes a gtf or gff3 _filename_ as input. +#' +#' The _input_ parameter defines the file format (default to gff3). +#' +#' The _feature_ parameter defines which feature to look for in the provided +#' file. Commonly mRNA for gff3 and transcript for gtf. It defaults to mRNA. +#' Several parameter can ge given as argument. +#' +#' The _output_ paramter defines the type of object that is returned. +#' It can generate a **Genome_intervals** or a __GRanges__ class of objects. +#' The former can be saved as a gff3 using the writeGff3 function from the +#' genomeIntervals package (loaded). The latter can be saved as an RData object +#' and/or be used directly in the construction of an AnnotParam. +gAnnot <- createSyntheticTranscripts( + filename="~/Box Sync/Projects/easyRNASeq/Drosophila_melanogaster.BDGP5.77.with-chr.gtf.gz", + input="gtf", + feature="transcript", + output="GRanges") + +#' ## Export +#' Save the object for later re-use +save(gAnnot, file="Drosophila_melanogaster.BDGP5.77-synthetic-transcripts.rda") + +#' ## Summarization +#' ### Set the params +param <- RnaSeqParam(annotParam=AnnotParam(datasource=gAnnot), + bamParam=BamParam(paired=FALSE)) + +#' ### Get the BAM files +bamFiles <- getBamFileList(filenames= + dir(system.file(package="RnaSeqTutorial","extdata"), + pattern="[A,T].*.bam$", + full.names=TRUE)) + +#' ### Run +sexp <- simpleRNASeq(bamFiles=bamFiles,param=param,verbose=TRUE) + +#' ### Check +pander(colSums(assay(sexp))) + +#' # Session Info +sessionInfo() diff --git a/tutorial/easyRNASeq/synthetic-transcript-creation-example.html b/tutorial/easyRNASeq/synthetic-transcript-creation-example.html new file mode 100644 index 0000000000000000000000000000000000000000..73a01badad8f9a0d38ab0be7b3f306e14756a426 --- /dev/null +++ b/tutorial/easyRNASeq/synthetic-transcript-creation-example.html @@ -0,0 +1,363 @@ + + + + + + + + + + + + + + +Synthetic transcripts generation example + + + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ +
+ +
+

1 Setup

+

Load the libraries

+
library(easyRNASeq)
+library(pander)
+

Source an helper file

+
source("https://microasp.upsc.se/root/upscb-public/raw/master/src/R/createSyntheticTranscripts.R")
+
+
+

2 Process

+
+

2.1 Synthetic transcripts creation

+

This function takes a gtf or gff3 filename as input.

+

The input parameter defines the file format (default to gff3).

+

The feature parameter defines which feature to look for in the provided file. Commonly mRNA for gff3 and transcript for gtf. It defaults to mRNA. Several parameter can ge given as argument.

+

The output paramter defines the type of object that is returned. It can generate a Genome_intervals or a GRanges class of objects. The former can be saved as a gff3 using the writeGff3 function from the genomeIntervals package (loaded). The latter can be saved as an RData object and/or be used directly in the construction of an AnnotParam.

+
gAnnot <- createSyntheticTranscripts(
+  filename="~/Box Sync/Projects/easyRNASeq/Drosophila_melanogaster.BDGP5.77.with-chr.gtf.gz",
+  input="gtf",
+  feature="transcript",
+  output="GRanges")
+
## Loading required package: genomeIntervals
+## Loading required package: intervals
+## 
+## Attaching package: 'intervals'
+## 
+## The following objects are masked from 'package:easyRNASeq':
+## 
+##     reduce, type
+## 
+## Loading required package: BiocGenerics
+## Loading required package: parallel
+## 
+## Attaching package: 'BiocGenerics'
+## 
+## The following objects are masked from 'package:parallel':
+## 
+##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
+##     clusterExport, clusterMap, parApply, parCapply, parLapply,
+##     parLapplyLB, parRapply, parSapply, parSapplyLB
+## 
+## The following object is masked from 'package:stats':
+## 
+##     xtabs
+## 
+## The following objects are masked from 'package:base':
+## 
+##     anyDuplicated, append, as.data.frame, as.vector, cbind,
+##     colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
+##     intersect, is.unsorted, lapply, Map, mapply, match, mget,
+##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
+##     rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
+##     table, tapply, union, unique, unlist, unsplit
+## 
+## Loading required package: S4Vectors
+## Loading required package: stats4
+## Loading required package: IRanges
+## 
+## Attaching package: 'IRanges'
+## 
+## The following objects are masked from 'package:intervals':
+## 
+##     expand, reduce
+
+
+

2.2 Export

+

Save the object for later re-use

+
save(gAnnot, file="Drosophila_melanogaster.BDGP5.77-synthetic-transcripts.rda")
+
+
+

2.3 Summarization

+
+

2.3.1 Set the params

+
param <- RnaSeqParam(annotParam=AnnotParam(datasource=gAnnot),
+                     bamParam=BamParam(paired=FALSE))
+
+
+

2.3.2 Get the BAM files

+
bamFiles <- getBamFileList(filenames=
+                             dir(system.file(package="RnaSeqTutorial","extdata"),
+                                 pattern="[A,T].*.bam$",
+                                 full.names=TRUE))
+
+
+

2.3.3 Run

+
sexp <- simpleRNASeq(bamFiles=bamFiles,param=param,verbose=TRUE)
+
## ==========================
+## simpleRNASeq version 2.4.5
+## ==========================
+## Creating a SummarizedExperiment.
+## ==========================
+## Processing the alignments.
+## ==========================
+## Pre-processing 4 BAM files.
+## Validating the BAM files.
+## Extracted 15 reference sequences information.
+## Extracting parameter from ACACTG.bam
+## Extracting parameter from ACTAGC.bam
+## Extracting parameter from ATGGCT.bam
+## Extracting parameter from TTGCGA.bam
+## Found 4 single-end BAM files.
+## Found 0 paired-end BAM files.
+## Bam file: ACACTG.bam has reads of length 30bp
+## Bam file: ACTAGC.bam has reads of length 30bp
+## Bam file: ATGGCT.bam has reads of length 30bp
+## Bam file: TTGCGA.bam has reads of length 30bp
+
## Warning in FUN(X[[i]], ...): Bam file: ACACTG.bam is considered unstranded.
+
## Warning in FUN(X[[i]], ...): Bam file: ACACTG.bam Strandedness could not
+## be determined using 18615 regions spanning 1300192 bp on either strand at a
+## 90% cutoff; 73.67 percent appear to be stranded.
+
## Warning in FUN(X[[i]], ...): Bam file: ACTAGC.bam is considered unstranded.
+
## Warning in FUN(X[[i]], ...): Bam file: ACTAGC.bam Strandedness could not
+## be determined using 14772 regions spanning 1023473 bp on either strand at a
+## 90% cutoff; 76.6 percent appear to be stranded.
+
## Warning in FUN(X[[i]], ...): Bam file: ATGGCT.bam is considered unstranded.
+
## Warning in FUN(X[[i]], ...): Bam file: ATGGCT.bam Strandedness could not
+## be determined using 18462 regions spanning 1280337 bp on either strand at a
+## 90% cutoff; 74.26 percent appear to be stranded.
+
## Warning in FUN(X[[i]], ...): Bam file: TTGCGA.bam is considered unstranded.
+
## Warning in FUN(X[[i]], ...): Bam file: TTGCGA.bam Strandedness could not
+## be determined using 19659 regions spanning 1381886 bp on either strand at a
+## 90% cutoff; 73.37 percent appear to be stranded.
+
## Streaming ACACTG.bam
+## Read 56643 reads
+## Streaming ACTAGC.bam
+## Read 42698 reads
+## Streaming ATGGCT.bam
+## Read 55414 reads
+## Streaming TTGCGA.bam
+## Read 60740 reads
+## Bam file: ACACTG.bam has 56643 reads.
+## Bam file: ACTAGC.bam has 42698 reads.
+## Bam file: ATGGCT.bam has 55414 reads.
+## Bam file: TTGCGA.bam has 60740 reads.
+## ==========================
+## Processing the annotation
+## ==========================
+## Validating the annotation source
+## No validation performed at that stage
+## Fetching the annotation
+## Using the provided annotation as such
+## ==========================
+## Sanity checking
+## ==========================
+## ==========================
+## Creating the count table
+## ==========================
+## Using 1 CPU core
+## Streaming ACACTG.bam
+## The data is single-end
+## The data is unstranded; overlapping features will be ignored.
+## The summarization is by 'read' and the mode is: Union.
+## Processing 56643 reads
+## Done with ACACTG.bam
+## Streaming ACTAGC.bam
+## The data is single-end
+## The data is unstranded; overlapping features will be ignored.
+## The summarization is by 'read' and the mode is: Union.
+## Processing 42698 reads
+## Done with ACTAGC.bam
+## Streaming ATGGCT.bam
+## The data is single-end
+## The data is unstranded; overlapping features will be ignored.
+## The summarization is by 'read' and the mode is: Union.
+## Processing 55414 reads
+## Done with ATGGCT.bam
+## Streaming TTGCGA.bam
+## The data is single-end
+## The data is unstranded; overlapping features will be ignored.
+## The summarization is by 'read' and the mode is: Union.
+## Processing 60740 reads
+## Done with TTGCGA.bam
+## ==========================
+## Returning a
+##       SummarizedExperiment
+## ==========================
+
+
+

2.3.4 Check

+
pander(colSums(assay(sexp)))
+ ++++++ + + + + + + + + + + + + + + + + +
ACACTG.bamACTAGC.bamATGGCT.bamTTGCGA.bam
55027414305385758946
+
+
+
+
+

3 Session Info

+
sessionInfo()
+
## R version 3.2.1 (2015-06-18)
+## Platform: x86_64-apple-darwin13.4.0 (64-bit)
+## Running under: OS X 10.10.4 (Yosemite)
+## 
+## locale:
+## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
+## 
+## attached base packages:
+## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
+## [8] methods   base     
+## 
+## other attached packages:
+## [1] IRanges_2.2.5          S4Vectors_0.6.1        genomeIntervals_1.24.1
+## [4] BiocGenerics_0.14.0    intervals_0.15.0       pander_0.5.2          
+## [7] easyRNASeq_2.4.5      
+## 
+## loaded via a namespace (and not attached):
+##  [1] Rcpp_0.11.6             formatR_1.2            
+##  [3] RColorBrewer_1.1-2      futile.logger_1.4.1    
+##  [5] GenomeInfoDb_1.4.1      XVector_0.8.0          
+##  [7] bitops_1.0-6            futile.options_1.0.0   
+##  [9] tools_3.2.1             zlibbioc_1.14.0        
+## [11] biomaRt_2.24.0          digest_0.6.8           
+## [13] annotate_1.46.0         evaluate_0.7           
+## [15] RSQLite_1.0.0           lattice_0.20-31        
+## [17] DBI_0.3.1               yaml_2.1.13            
+## [19] DESeq_1.20.0            hwriter_1.3.2          
+## [21] genefilter_1.50.0       stringr_1.0.0          
+## [23] knitr_1.10.5            Biostrings_2.36.1      
+## [25] locfit_1.5-9.1          LSD_3.0                
+## [27] grid_3.2.1              Biobase_2.28.0         
+## [29] AnnotationDbi_1.30.1    XML_3.98-1.3           
+## [31] survival_2.38-3         BiocParallel_1.2.7     
+## [33] rmarkdown_0.7           limma_3.24.12          
+## [35] latticeExtra_0.6-26     edgeR_3.10.2           
+## [37] geneplotter_1.46.0      lambda.r_1.1.7         
+## [39] magrittr_1.5            Rsamtools_1.20.4       
+## [41] htmltools_0.2.6         splines_3.2.1          
+## [43] GenomicAlignments_1.4.1 GenomicRanges_1.20.5   
+## [45] ShortRead_1.26.0        xtable_1.7-4           
+## [47] stringi_0.5-5           RCurl_1.95-4.7
+
+ + +
+ + + + + + + +