From 71603ade084df0e1ef968680536d607fd325fca4 Mon Sep 17 00:00:00 2001 From: Nicolas Delhomme Date: Tue, 7 Jul 2015 10:27:43 +0200 Subject: [PATCH] Extended the gitignore and finalised the synth. trx. tut. --- .gitignore | 1 + .../synthetic-transcript-creation-example.R | 14 ++++- ...synthetic-transcript-creation-example.html | 62 +++++-------------- 3 files changed, 28 insertions(+), 49 deletions(-) diff --git a/.gitignore b/.gitignore index 88dc072..9cf9d3a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ .Rproj.user .Rhistory .RData +.DS_Store diff --git a/tutorial/easyRNASeq/synthetic-transcript-creation-example.R b/tutorial/easyRNASeq/synthetic-transcript-creation-example.R index c28d7e0..bf75097 100644 --- a/tutorial/easyRNASeq/synthetic-transcript-creation-example.R +++ b/tutorial/easyRNASeq/synthetic-transcript-creation-example.R @@ -18,6 +18,18 @@ library(pander) #' Source an helper file source("https://microasp.upsc.se/root/upscb-public/raw/master/src/R/createSyntheticTranscripts.R") +#' Download the annotation file (gtf) from: +#' +#' https://microasp.upsc.se/root/upscb-public/raw/master/tutorial/easyRNASeq/Drosophila_melanogaster.BDGP5.77.with-chr.gtf.gz +#' +#' Note that the annotation file differ from that you would get from FlyBase; +#' I have had to edit the annotation file to add chr in front of the chromosome +#' names and to change the mitochondria name to chrM; since the data in the BAM +#' file I used later on follow these conventions. +library(curl) +curl_download("https://microasp.upsc.se/root/upscb-public/raw/master/tutorial/easyRNASeq/Drosophila_melanogaster.BDGP5.77.with-chr.gtf.gz", + "Drosophila_melanogaster.BDGP5.77.with-chr.gtf.gz") + #' # Process #' ## Synthetic transcripts creation #' This function takes a gtf or gff3 _filename_ as input. @@ -34,7 +46,7 @@ source("https://microasp.upsc.se/root/upscb-public/raw/master/src/R/createSynthe #' genomeIntervals package (loaded). The latter can be saved as an RData object #' and/or be used directly in the construction of an AnnotParam. gAnnot <- createSyntheticTranscripts( - filename="~/Box Sync/Projects/easyRNASeq/Drosophila_melanogaster.BDGP5.77.with-chr.gtf.gz", + filename="Drosophila_melanogaster.BDGP5.77.with-chr.gtf.gz", input="gtf", feature="transcript", output="GRanges") diff --git a/tutorial/easyRNASeq/synthetic-transcript-creation-example.html b/tutorial/easyRNASeq/synthetic-transcript-creation-example.html index 73a01ba..69b2e31 100644 --- a/tutorial/easyRNASeq/synthetic-transcript-creation-example.html +++ b/tutorial/easyRNASeq/synthetic-transcript-creation-example.html @@ -10,7 +10,7 @@ - + Synthetic transcripts generation example @@ -64,7 +64,7 @@ img {
@@ -88,9 +88,17 @@ img {

1 Setup

Load the libraries

library(easyRNASeq)
+suppressPackageStartupMessages(library(IRanges))
+suppressPackageStartupMessages(library(genomeIntervals))
 library(pander)

Source an helper file

source("https://microasp.upsc.se/root/upscb-public/raw/master/src/R/createSyntheticTranscripts.R")
+

Download the annotation file (gtf) from:

+

_https://microasp.upsc.se/root/upscb-public/raw/master/tutorial/easyRNASeq/Drosophila_melanogaster.BDGP5.77.with-chr.gtf.gz_

+

Note that the annotation file differ from that you would get from FlyBase; I have had to edit the annotation file to add chr in front of the chromosome names and to change the mitochondria name to chrM; since the data in the BAM file I used later on follow these conventions.

+
library(curl)
+curl_download("https://microasp.upsc.se/root/upscb-public/raw/master/tutorial/easyRNASeq/Drosophila_melanogaster.BDGP5.77.with-chr.gtf.gz",
+              "Drosophila_melanogaster.BDGP5.77.with-chr.gtf.gz")

2 Process

@@ -101,52 +109,10 @@ library(pander)

The feature parameter defines which feature to look for in the provided file. Commonly mRNA for gff3 and transcript for gtf. It defaults to mRNA. Several parameter can ge given as argument.

The output paramter defines the type of object that is returned. It can generate a Genome_intervals or a GRanges class of objects. The former can be saved as a gff3 using the writeGff3 function from the genomeIntervals package (loaded). The latter can be saved as an RData object and/or be used directly in the construction of an AnnotParam.

gAnnot <- createSyntheticTranscripts(
-  filename="~/Box Sync/Projects/easyRNASeq/Drosophila_melanogaster.BDGP5.77.with-chr.gtf.gz",
+  filename="Drosophila_melanogaster.BDGP5.77.with-chr.gtf.gz",
   input="gtf",
   feature="transcript",
   output="GRanges")
-
## Loading required package: genomeIntervals
-## Loading required package: intervals
-## 
-## Attaching package: 'intervals'
-## 
-## The following objects are masked from 'package:easyRNASeq':
-## 
-##     reduce, type
-## 
-## Loading required package: BiocGenerics
-## Loading required package: parallel
-## 
-## Attaching package: 'BiocGenerics'
-## 
-## The following objects are masked from 'package:parallel':
-## 
-##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
-##     clusterExport, clusterMap, parApply, parCapply, parLapply,
-##     parLapplyLB, parRapply, parSapply, parSapplyLB
-## 
-## The following object is masked from 'package:stats':
-## 
-##     xtabs
-## 
-## The following objects are masked from 'package:base':
-## 
-##     anyDuplicated, append, as.data.frame, as.vector, cbind,
-##     colnames, do.call, duplicated, eval, evalq, Filter, Find, get,
-##     intersect, is.unsorted, lapply, Map, mapply, match, mget,
-##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
-##     rbind, Reduce, rep.int, rownames, sapply, setdiff, sort,
-##     table, tapply, union, unique, unlist, unsplit
-## 
-## Loading required package: S4Vectors
-## Loading required package: stats4
-## Loading required package: IRanges
-## 
-## Attaching package: 'IRanges'
-## 
-## The following objects are masked from 'package:intervals':
-## 
-##     expand, reduce

2.2 Export

@@ -306,9 +272,9 @@ library(pander) ## [8] methods base ## ## other attached packages: -## [1] IRanges_2.2.5 S4Vectors_0.6.1 genomeIntervals_1.24.1 -## [4] BiocGenerics_0.14.0 intervals_0.15.0 pander_0.5.2 -## [7] easyRNASeq_2.4.5 +## [1] curl_0.9 pander_0.5.2 genomeIntervals_1.24.1 +## [4] intervals_0.15.0 IRanges_2.2.5 S4Vectors_0.6.1 +## [7] BiocGenerics_0.14.0 easyRNASeq_2.4.5 ## ## loaded via a namespace (and not attached): ## [1] Rcpp_0.11.6 formatR_1.2 -- 2.22.0