Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
U
UPSCb-public
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Administrator
UPSCb-public
Commits
c419cb63
Commit
c419cb63
authored
Jul 06, 2015
by
Nicolas Delhomme
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added gtf support
parent
02dfdfbc
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
62 additions
and
28 deletions
+62
-28
createSyntheticTranscripts.R
src/R/createSyntheticTranscripts.R
+62
-28
No files found.
src/R/createSyntheticTranscripts.R
View file @
c419cb63
"createSyntheticTranscripts"
<-
function
(
gff3
,
output
=
c
(
"Genome_intervals"
,
"GRanges"
),
features
=
c
(
"mRNA"
,
"tRNA"
,
"miRNA"
))
{
require
(
genomeIntervals
)
require
(
S
4
Vectors
)
"createSyntheticTranscripts"
<-
function
(
filename
,
input
=
c
(
"gff3"
,
"gtf"
),
features
=
c
(
"mRNA"
,
"miRNA"
,
"tRNA"
,
"transcript"
),
output
=
c
(
"Genome_intervals"
,
"GRanges"
))
{
#' load libraries
stopifnot
(
require
(
genomeIntervals
))
stopifnot
(
require
(
S
4
Vectors
))
stopifnot
(
require
(
IRanges
))
stopifnot
(
require
(
easyRNASeq
))
#' first check
stopifnot
(
file.exists
(
filename
))
#' get the values
input
<-
match.arg
(
input
)
features
<-
match.arg
(
features
,
several.ok
=
TRUE
)
output
<-
match.arg
(
output
)
gff
<-
readGff3
(
gff3
)
#' define some global variables
relation
<-
switch
(
input
,
"gff3"
=
list
(
ID
=
"ID"
,
Parent
=
"Parent"
),
"gtf"
=
list
(
ID
=
"transcript_id"
,
Parent
=
"gene_id"
))
#' read the gff3/gtf file
dat
<-
readGff3
(
filename
)
## get the gene <-> pacid map
#' If gtf, reformat the attributes and drop the double quotes
if
(
input
==
"gtf"
){
dat
$
gffAttributes
<-
gsub
(
"\""
,
""
,
easyRNASeq
:::
.convertGffToGtfAttributes
(
dat
$
gffAttributes
))
}
## get the gene <-> mRNA/transcript map
# This is mRNA IDs and their parents (genes)
sel
<-
gff
$
type
%in%
features
idMap
<-
data.frame
(
type
=
gff
[
sel
]
$
type
,
getGffAttribute
(
gff
[
sel
],
"ID"
),
getGffAttribute
(
gff
[
sel
],
"Parent"
))
sel
<-
dat
$
type
%in%
features
# That step would not necessary for gtf, but it is easier to implement in a
# similar way for both format
idMap
<-
data.frame
(
type
=
dat
[
sel
]
$
type
,
getGffAttribute
(
dat
[
sel
],
relation
$
ID
),
getGffAttribute
(
dat
[
sel
],
relation
$
Parent
))
## extract the exons and group by gene ID
sel
<-
gff
$
type
==
"exon"
sel
<-
dat
$
type
==
"exon"
## we can drop multiple Parents (i.e. comma separated Parent values as we are
## collapsing them anyway)
mRnaID
<-
sub
(
",.*"
,
""
,
getGffAttribute
(
gff
[
sel
],
"Parent"
))
mRnaID
<-
sub
(
",.*"
,
""
,
getGffAttribute
(
dat
[
sel
],
switch
(
input
,
"gff3"
=
relation
$
Parent
,
"gtf"
=
relation
$
ID
)))
## avoid unwanted features
rngs
<-
IRanges
::
IRanges
(
start
=
gff
[
sel
,
1
],
end
=
gff
[
sel
,
2
])[
mRnaID
%in%
idMap
$
ID
]
rngs
<-
IRanges
(
start
=
dat
[
sel
,
1
],
end
=
dat
[
sel
,
2
])[
mRnaID
%in%
idMap
[,
relation
$
ID
]
]
## create a set of synthetic exons
rngList
<-
IRanges
::
reduce
(
IRanges
::
split
(
rngs
,
idMap
[
match
(
mRnaID
[
mRnaID
%in%
idMap
$
ID
],
idMap
$
ID
),
"Parent"
]))
idMap
[
match
(
mRnaID
[
mRnaID
%in%
idMap
[,
relation
$
ID
]
],
idMap
[,
relation
$
ID
]),
relation
$
Parent
]))
## export the gene, exon and features as gff3
## create the new gff object
## select the gene
sel
<-
gff
$
type
==
"gene"
sel
<-
dat
$
type
==
"gene"
## create the gene gff
geneID
<-
getGffAttribute
(
gff
[
sel
],
"ID"
)
geneGff
<-
gff
[
sel
][
geneID
%in%
idMap
$
Parent
]
geneID
<-
getGffAttribute
(
dat
[
sel
],
switch
(
input
,
"gff3"
=
relation
$
ID
,
"gtf"
=
relation
$
Parent
))
geneGff
<-
dat
[
sel
][
geneID
%in%
idMap
[,
relation
$
Parent
]]
if
(
input
==
"gtf"
){
geneGff
$
gffAttributes
<-
sub
(
relation
$
Parent
,
"ID"
,
geneGff
$
gffAttributes
)
}
## create gffs for each feature
featureGff
<-
Reduce
(
c
,
lapply
(
features
,
function
(
f
)
{
fGff
<-
gff
[
sel
][
geneID
%in%
idMap
$
Parent
[
idMap
$
type
==
f
]]
f.sel
<-
geneID
%in%
idMap
[,
relation
$
Parent
][
idMap
$
type
==
f
]
fGff
<-
dat
[
sel
][
f.sel
]
fGff
$
type
<-
f
fGff
$
gffAttributes
<-
paste
(
paste
(
sub
(
";"
,
".0;"
,
fGff
$
gffAttributes
),
"0;Parent="
,
sep
=
"."
),
geneID
[
geneID
%in%
idMap
$
Parent
[
idMap
$
type
==
f
]],
sep
=
""
)
fGff
$
gffAttributes
<-
paste
(
"ID="
,
getGffAttribute
(
fGff
,
relation
$
Parent
),
".0;Parent="
,
getGffAttribute
(
fGff
,
relation
$
Parent
),
sep
=
""
)
fGff
}))
## create the exon gff
rngList
<-
rngList
[
match
(
geneID
[
geneID
%in%
idMap
$
Parent
],
names
(
rngList
))]
rngList
<-
rngList
[
match
(
geneID
[
geneID
%in%
idMap
[,
relation
$
Parent
]
],
names
(
rngList
))]
exonNumber
<-
elementLengths
(
rngList
)
exonGff
<-
gff
[
rep
(
which
(
sel
)[
geneID
%in%
idMap
$
Parent
],
exonNumber
)]
exonGff
<-
dat
[
rep
(
which
(
sel
)[
geneID
%in%
idMap
[,
relation
$
Parent
]
],
exonNumber
)]
exonGff
[,
1
]
<-
IRanges
::
unlist
(
start
(
rngList
))
exonGff
[,
2
]
<-
IRanges
::
unlist
(
end
(
rngList
))
exonID
<-
sapply
(
exonNumber
,
":"
,
1
)
sel
<-
geneGff
$
strand
==
"+"
exonID
[
sel
]
<-
sapply
(
exonID
[
sel
],
rev
)
ID
<-
getGffAttribute
(
exonGff
,
"ID"
)
ID
<-
getGffAttribute
(
exonGff
,
switch
(
input
,
"gff3"
=
relation
$
ID
,
"gtf"
=
relation
$
Parent
)
)
exonGff
$
gffAttributes
<-
paste0
(
"ID="
,
paste
(
ID
,
"exon"
,
unlist
(
exonID
,
use.names
=
FALSE
),
sep
=
"."
),
";Name="
,
paste
(
ID
,
"exon"
,
unlist
(
exonID
,
use.names
=
FALSE
),
sep
=
"."
),
";Parent="
,
paste
(
ID
,
"0"
,
sep
=
"."
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment