Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ biocViews: Infrastructure, DataImport, Genetics, Sequencing, RNASeq, SNP,
URL: https://bioconductor.org/packages/GenomicAlignments
Video: https://www.youtube.com/watch?v=2KqBSbkfhRo , https://www.youtube.com/watch?v=3PK_jx44QTs
BugReports: https://github.com/Bioconductor/GenomicAlignments/issues
Version: 1.35.0
Version: 1.35.2
License: Artistic-2.0
Encoding: UTF-8
Authors@R: c(
Expand Down
143 changes: 141 additions & 2 deletions R/GAlignmentsList-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,26 @@
setClass("GAlignmentsList",
contains="CompressedRangesList",
representation(
strandMode="integer", # single integer (0L, 1L, or 2L)
unlistData="GAlignments",
elementMetadata="DataFrame"
),
prototype(
strandMode=1L,
elementType="GAlignments",
elementMetadata=new("DFrame")
)
)

### Formal API:
### strandMode(x) - indicates how to infer the strand of a pair from the
### strand of the first and last alignments in the pair:
### 0: strand of the pair is always *;
### 1: strand of the pair is strand of its first alignment;
### 2: strand of the pair is strand of its last alignment.
### These modes are equivalent to 'strandSpecific' equal 0, 1,
### and 2, respectively, for the featureCounts() function
### defined in the Rsubread package.
### names(x) - NULL or character vector.
### length(x) - single integer. Nb of alignments in 'x'.
### seqnames(x) - 'factor' Rle of the same length as 'x'.
Expand All @@ -24,6 +34,7 @@ setClass("GAlignmentsList",
### rname(x) <- value - same as 'seqnames(x) <- value'.
### cigar(x) - character vector of the same length as 'x'.
### strand(x) - 'factor' Rle of the same length as 'x' (levels: +, -, *).
### obeys strandMode(x) (see above).
### qwidth(x) - integer vector of the same length as 'x'.
### start(x), end(x), width(x) - integer vectors of the same length as 'x'.
### njunc(x) - integer vector of the same length as 'x'.
Expand Down Expand Up @@ -57,10 +68,50 @@ setClass("GAlignmentsList",
### same length and class as 'x' (endomorphism).
###

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### updateObject()
###
### Internal representation of GAlignmentsList objects has changed in
### GenomicAlignments 1.35.1 (Bioc 3.17).
###

.get_GAlignmentsList_version <- function(object)
{
if (.hasSlot(object, "strandMode")) "current" else "< 1.35.1"
}

setMethod("updateObject", "GAlignmentsList",
function(object, ..., verbose=FALSE)
{
## elementType slot.
version <- .get_GAlignmentsList_version(object)
if (version == "current") {
if (verbose)
message("[updateObject] Internal representation of ",
class(object), " object is current.\n",
"[updateObject] Nothing to update.")
} else {
if (verbose)
message("[updateObject] ", class(object), " object uses ",
"internal representation from\n",
"[updateObject] GenomicAlignments ", version, ". ",
"Updating it ...")
object@strandMode <- new(class(object))@elementType
}

callNextMethod()
}
)


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Getters.
###

setMethod("strandMode", "GAlignmentsList",
function(x) x@strandMode
)

setMethod("seqnames", "GAlignmentsList",
function(x) relist(seqnames(unlist(x, use.names=FALSE)), x)
)
Expand All @@ -74,7 +125,26 @@ setMethod("cigar", "GAlignmentsList",
)

setMethod("strand", "GAlignmentsList",
function(x) relist(strand(unlist(x, use.names=FALSE)), x)
function(x)
{
ga <- unlist(x, use.names=FALSE)
ga_mcols <- mcols(ga, use.names=FALSE)
s <- strand(unlist(x, use.names=FALSE))
if (strandMode(x) == 0L)
s <- Rle(strand("*"), length(ga))
else {
if (is.null(ga_mcols$flag))
warning("Flag information missing in GAlignmentsList object. Strand information might not be accurate.")
else {
mask_first_mate <- bamFlagTest(ga_mcols$flag, "isFirstMateRead")
if (strandMode(x) == 1L)
s[!mask_first_mate] <- invertStrand(s[!mask_first_mate])
else ## assuming strandMode(x) == 2L
s[mask_first_mate] <- invertStrand(s[mask_first_mate])
}
}
relist(s, x)
}
)

setMethod("qwidth", "GAlignmentsList",
Expand Down Expand Up @@ -127,6 +197,23 @@ setReplaceMethod("seqnames", "GAlignmentsList",
GenomicRanges:::set_CompressedGenomicRangesList_seqnames
)

setReplaceMethod("strandMode", "GAlignmentsList",
function(x, value)
{
x@strandMode <- .normarg_strandMode_replace_value(value)
x
}
)

setMethod("invertStrand", "GAlignmentsList",
function(x)
{
strand_mode <- strandMode(x)
if (strand_mode != 0L)
strandMode(x) <- 3L - strand_mode
x
}
)

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Validity.
Expand All @@ -136,6 +223,7 @@ setReplaceMethod("seqnames", "GAlignmentsList",
{
## TDB: Currently known pitfalls are caught by
## GAlignments validity.
.valid.GAlignmentPairs.strandMode(x)
}

setValidity2("GAlignmentsList", .valid.GAlignmentsList)
Expand Down Expand Up @@ -269,8 +357,13 @@ setAs("GAlignmentPairs", "GAlignmentsList",
pbe <- PartitioningByEnd()
else
pbe <- PartitioningByEnd(seq(2, 2*length(from), 2), names=names(from))
mcols_gal <- cbind(mate_status=factor(rep("mated", length(from)),
levels=c("mated", "ambiguous", "unmated")),
mcols(from))
new("GAlignmentsList",
strandMode=strandMode(from),
unlistData=unlist(from, use.names=FALSE),
elementMetadata=mcols_gal,
partitioning=pbe)
}
)
Expand All @@ -283,12 +376,17 @@ setAs("GAlignmentsList", "GAlignmentPairs",
first <- c(TRUE, FALSE)
last <- c(FALSE, TRUE)
ga_mcols <- mcols(ga, use.names=FALSE)
if (!is.null(ga_mcols$flag)) {
first <- bamFlagTest(ga_mcols$flag, "isFirstMateRead")
last <- !first
}
isProperPair <- if (!is.null(ga_mcols$flag)) {
bamFlagTest(ga_mcols$flag[first], "isProperPair")
} else {
TRUE
}
GAlignmentPairs(ga[first], ga[last],
strandMode=strandMode(from),
isProperPair=isProperPair,
names=names(ga)[first])
}
Expand Down Expand Up @@ -319,6 +417,47 @@ setMethod("relistToClass", "GAlignments",

setMethod("show", "GAlignmentsList",
function(object)
GenomicRanges:::show_GenomicRangesList(object)
showGAlignmentsList(object)
)

## adapted from GenomicRanges:::show_GenomicRangesList
## to show the right strand according to strandMode
showGAlignmentsList <- function(x)
{
lx <- length(x)
nc <- ncol(mcols(x, use.names=FALSE))
cat(class(x), " object with ",
lx, " ", ifelse(lx == 1L, "pair", "pairs"),
", strandMode=", strandMode(x),
", and ",
nc, " metadata ", ifelse(nc == 1L, "column", "columns"),
":\n", sep="")
x_len <- length(x)
cumsumN <- end(PartitioningByEnd(x))
N <- tail(cumsumN, 1)
if (x_len == 0L) {
cat("<0 elements>\n")
}
else if (x_len <= 3L || (x_len <= 5L && N <= 20L)) {
y <- x
strand(y) <- strand(y) ## overwrite original strand w/ one
show(as.list(y))
}
else {
if (cumsumN[[3L]] <= 20L) {
showK <- 3L
}
else if (cumsumN[[2L]] <= 20L) {
showK <- 2L
}
else {
showK <- 1L
}
y <- x[seq_len(showK)]
strand(y) <- strand(y) ## overwrite original strand w/ one
show(as.list(y))
diffK <- x_len - showK
cat("...\n", "<", diffK, " more element", ifelse(diffK ==
1L, "", "s"), ">\n", sep="")
}
}
35 changes: 31 additions & 4 deletions R/findOverlaps-methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,15 @@ setMethod("findOverlaps", c("GAlignmentsList", "Vector"),
select = c("all", "first", "last", "arbitrary"),
ignore.strand = FALSE)
{
hits <- findOverlaps(grglist(unlist(query, use.names = FALSE)),
## to take into account the right (real) strand, when ignore.strand=FALSE,
## we duplicate GAligbnmentsList object in memory and update the original
## strand with the real one, according to the strandMode parameter
## TODO: it may be worth investigating alternative ways to avoid duplicating
## a presumably large GAlignmentsList object
query2 <- query
if (!ignore.strand)
strand(query2) <- strand(query2) ## overwrite original strand w/ real one
hits <- findOverlaps(grglist(unlist(query2, use.names=FALSE)),
subject, maxgap = maxgap, minoverlap = minoverlap,
type = match.arg(type), select = match.arg(select),
ignore.strand = ignore.strand)
Expand All @@ -112,7 +120,15 @@ setMethod("findOverlaps", c("Vector", "GAlignmentsList"),
select = c("all", "first", "last", "arbitrary"),
ignore.strand = FALSE)
{
hits <- findOverlaps(query, grglist(unlist(subject, use.names = FALSE)),
## to take into account the right (real) strand, when ignore.strand=FALSE,
## we duplicate GAligbnmentsList object in memory and update the original
## strand with the real one, according to the strandMode parameter
## TODO: it may be worth investigating alternative ways to avoid duplicating
## a presumably large GAlignmentsList object
subject2 <- subject
if (!ignore.strand)
strand(subject2) <- strand(subject2) ## overwrite original strand w/ real one
hits <- findOverlaps(query, grglist(unlist(subject2, use.names = FALSE)),
maxgap = maxgap, minoverlap = minoverlap,
type = match.arg(type), select = match.arg(select),
ignore.strand = ignore.strand)
Expand All @@ -130,8 +146,19 @@ setMethod("findOverlaps", c("GAlignmentsList", "GAlignmentsList"),
select = c("all", "first", "last", "arbitrary"),
ignore.strand = FALSE)
{
hits <- findOverlaps(grglist(unlist(query, use.names = FALSE)),
grglist(unlist(subject, use.names = FALSE)),
## to take into account the right (real) strand, when ignore.strand=FALSE,
## we duplicate GAligbnmentsList object in memory and update the original
## strand with the real one, according to the strandMode parameter
## TODO: it may be worth investigating alternative ways to avoid duplicating
## a presumably large GAlignmentsList object
query2 <- query
subject2 <- subject
if (!ignore.strand) {
strand(query2) <- strand(query2) ## overwrite original strand w/ real one
strand(subject2) <- strand(subject2) ## overwrite original strand w/ real one
}
hits <- findOverlaps(grglist(unlist(query2, use.names = FALSE)),
grglist(unlist(subject2, use.names = FALSE)),
maxgap = maxgap, minoverlap = minoverlap,
type = match.arg(type),
select = match.arg(select),
Expand Down
22 changes: 14 additions & 8 deletions R/readGAlignments.R
Original file line number Diff line number Diff line change
Expand Up @@ -290,19 +290,23 @@ setMethod("readGAlignmentPairs", "character",

setGeneric("readGAlignmentsList", signature="file",
function(file, index=file, use.names=FALSE, param=ScanBamParam(),
with.which_label=FALSE)
with.which_label=FALSE, strandMode=1)
standardGeneric("readGAlignmentsList")
)

.matesFromBam <- function(file, use.names, param, what0, with.which_label)
.matesFromBam <- function(file, use.names, param, what0, with.which_label,
strandMode)
{
bamcols <- .load_bamcols_from_BamFile(file, param, what0,
with.which_label=with.which_label)
seqlengths <- .load_seqlengths_from_BamFile(file, levels(bamcols$rname))
gal <- GAlignments(seqnames=bamcols$rname, pos=bamcols$pos,
cigar=bamcols$cigar, strand=bamcols$strand,
seqlengths=seqlengths)
gal <- .bindExtraData(gal, use.names=FALSE, param, bamcols,
flag0 <- scanBamFlag()
what0 <- "flag"
param2 <- .normargParam(param, flag0, what0)
gal <- .bindExtraData(gal, use.names=FALSE, param2, bamcols,
with.which_label=with.which_label)
if (asMates(file)) {
f <- factor(bamcols$groupid)
Expand All @@ -315,35 +319,37 @@ setGeneric("readGAlignmentsList", signature="file",
}
if (use.names)
names(gal) <- unique(splitAsList(bamcols$qname, bamcols$groupid))
strandMode(gal) <- strandMode
gal
}

.readGAlignmentsList.BamFile <- function(file, index=file,
use.names=FALSE, param=ScanBamParam(),
with.which_label=FALSE)
with.which_label=FALSE, strandMode=1)
{
if (!isTRUEorFALSE(use.names))
stop("'use.names' must be TRUE or FALSE")
if (!asMates(file))
bamWhat(param) <- setdiff(bamWhat(param),
c("groupid", "mate_status"))
what0 <- c("rname", "strand", "pos", "cigar", "groupid", "mate_status")
what0 <- c("rname", "strand", "pos", "cigar", "groupid", "mate_status", "flag")
if (use.names)
what0 <- c(what0, "qname")
.matesFromBam(file, use.names, param, what0, with.which_label)
.matesFromBam(file, use.names, param, what0, with.which_label, strandMode)
}

setMethod("readGAlignmentsList", "BamFile", .readGAlignmentsList.BamFile)

setMethod("readGAlignmentsList", "character",
function(file, index=file, use.names=FALSE, param=ScanBamParam(),
with.which_label=FALSE)
with.which_label=FALSE, strandMode=1)
{
bam <- .open_BamFile(file, index=index, asMates=TRUE, param=param)
on.exit(close(bam))
readGAlignmentsList(bam, character(0),
use.names=use.names, param=param,
with.which_label=with.which_label)
with.which_label=with.which_label,
strandMode=strandMode)
}
)

Expand Down
17 changes: 16 additions & 1 deletion inst/unitTests/test_readGAlignmentsList.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ test_readGAlignmentsList_mcols <- function()
bf <- BamFile(chr4, asMates=TRUE, yieldSize=100)
param <- ScanBamParam(tag=("NM"))
galist <- readGAlignmentsList(bf, param=param)
checkIdentical(colnames(mcols(unlist(galist))), "NM")
checkIdentical(colnames(mcols(unlist(galist))), c("flag", "NM"))
checkTrue(names(mcols(galist)) == "mate_status")

param <- ScanBamParam(tag=("FO"))
Expand Down Expand Up @@ -182,3 +182,18 @@ test_readGAlignmentsList_which <- function()
rng2 <- as.vector(mcols(unlist(target1[2]))$which_label)
checkTrue(all(rng2 %in% my_ROI_labels[4]))
}

text_readGAlignmentsList_findOverlaps <- function()
{
fl <- system.file("extdata", "ex1.bam", package="Rsamtools")
bf <- BamFile(fl, asMates=TRUE)
galist <- readGAlignmentsList(bf, strandMode=1L)
f <- GRanges(seqnames="seq1", IRanges(30, 250), strand="-")
ov <- findOverlaps(galist[1], f, ignore.strand=FALSE)
checkIdentical(0L, length(ov))

galist <- readGAlignmentsList(bf, strandMode=2L)
f <- GRanges(seqnames="seq1", IRanges(30, 250), strand="-")
ov <- findOverlaps(galist[1], f, ignore.strand=FALSE)
checkIdentical(1L, length(ov))
}
Loading