Skip to content

Commit

Permalink
Merge pull request #45 from LieberInstitute/gdev
Browse files Browse the repository at this point in the history
proposed v1.9.1 updates
  • Loading branch information
Nick-Eagles authored Oct 9, 2024
2 parents c8c8ab0 + fd68d34 commit 7a3ec1b
Show file tree
Hide file tree
Showing 38 changed files with 359 additions and 291 deletions.
6 changes: 6 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,10 @@
^README\.Rmd$
^\.github$
^data-raw$
^dcs04_data$
^dcs04_data/.*$
^codecov\.yml$
^\.Rhistory$
^\.Rdata$
^\.httr-oauth$
^\.DS_Store$
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@
.Rdata
.httr-oauth
.DS_Store
dcs04_data
inst/doc
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: qsvaR
Title: Generate Quality Surrogate Variable Analysis for Degradation Correction
Version: 1.9.0
Date: 2024-05-03
Version: 1.9.1
Date: 2024-09-16
Authors@R:
c(
person("Joshua", "Stolz", email = "jstolz80@gmail.com",
Expand All @@ -25,7 +25,7 @@ biocViews: Software, WorkflowStep, Normalization, BiologicalQuestion,
DifferentialExpression, Sequencing, Coverage
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.0
RoxygenNote: 7.3.2
Suggests:
BiocFileCache,
BiocStyle,
Expand Down
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# Generated by roxygen2: do not edit by hand

export(DEqual)
export(check_tx_names)
export(getDegTx)
export(getPCs)
export(get_qsvs)
export(k_qsvs)
export(normalize_tx_names)
export(qSVA)
export(select_transcripts)
export(which_tx_names)
import(SummarizedExperiment)
import(ggplot2)
import(rlang)
Expand Down
85 changes: 60 additions & 25 deletions R/DEqual.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,22 @@
#' <https://doi.org/10.1016/j.neuron.2019.05.013>. This function compares your
#' t-statistics of interest computed on transcripts against the
#' t-statistics from degradation time adjusting for the six brain regions from
#' degradation experiment data used for determining `covComb_tx_deg`.
#' degradation experiment data used for determining `rse_tx`.
#'
#' @param DE a `data.frame()` with one column containing the t-statistics from
#' Differential Expression, typically generated with `limma::topTable()`.
#' The `rownames(DE)` should be transcript GENCODE IDs.
#' @param DE a `data.frame()` with a column "t" containing the t-statistics
#' from Differential Expression, typically generated with `limma::topTable()`.
#' `rownames(DE)` must have transcript Ensembl/Gencode IDs.
#' @param deg_tstats an optional `data.frame()` with a column "t" containing
#' t-statistics resulted from a degradation experiment. Default is the
#' internal `qsvaR::degradation_tstats` from the package authors.
#' @param show.legend logical (default TRUE) to show legend in the plot
#' @param show.cor specify where to show the correlation value. Can be one of
#' "caption", "corner-top", "corner-bottom", or "none".
#' @param font.size numeric value to set the base font size of the plot
#' @param cor.size numeric (default font.size/2) to set the font size for the
#' correlation text
#' @param cor.label character (default "cor: ") to set the text preceding the
#' correlation value
#'
#' @return a `ggplot` object of the DE t-statistic vs
#' the DE statistic from degradation
Expand All @@ -36,47 +47,71 @@
#'
#' ## Create the DEqual plot
#' DEqual(random_de)
DEqual <- function(DE) {
DEqual <- function(DE, deg_tstats = qsvaR::degradation_tstats, show.legend = TRUE,
show.cor = c('caption','corner-top','corner-bottom','none'),
font.size = 12, cor.size = font.size/2, cor.label = 'cor: ') {
## For R CMD check
DE_t <- degradation_t <- NULL

show.cor=rlang::arg_match(show.cor)
## Check input
# stopifnot("t" %in% colnames(DE))
# stopifnot(!is.null(rownames(DE)))

# Check if input is a dataframe
if (!is.data.frame(DE)) {
stop("The input to DEqual is not a dataframe.", call. = FALSE)
if (!is.data.frame(DE)) {
stop("The input to DEqual is not a dataframe.", call. = FALSE)
}

# Check if 't' is in the column names of DE
if (!("t" %in% colnames(DE))) {
stop("'t' is not a column in 'DE'.", call. = FALSE)
}

# Check if DE has non-null row names
if (is.null(rownames(DE))) {
stop("Row names of 'DE' are NULL.", call. = FALSE)
}

}
if (!missing(deg_tstats)) {
if (!is.data.frame(deg_tstats)) {
stop("The 'deg_tstats' parameter is not a dataframe.", call. = FALSE)
}
if (!("t" %in% colnames(deg_tstats))) {
stop("'t' is not a column in 'deg_tstats'.", call. = FALSE)
}
if (is.null(rownames(deg_tstats))) {
stop("Row names of 'deg_tstats' are NULL.", call. = FALSE)
}
if (!all(grepl("^ENST\\d+", rownames(deg_tstats)))) {
stop("The row names of 'deg_tstats' must be ENSEMBL or Gencode IDs (ENST...)", call. = FALSE)
}
}

## Locate common transcripts
deg_tstats = qsvaR::degradation_tstats
rownames(deg_tstats) = check_tx_names(rownames(DE),rownames(qsvaR::degradation_tstats),'rownames(DE)','qsvaR::degradation_tstats')
common = intersect(rownames(deg_tstats), rownames(DE))

whichTx <- which_tx_names(rownames(DE),rownames(deg_tstats))
common = qsvaR::normalize_tx_names(rownames(DE)[whichTx])
stopifnot(length(common) > 0)

rownames(deg_tstats) <- qsvaR::normalize_tx_names(rownames(deg_tstats))
## Create dataframe with common transcripts
common_data <- data.frame(
degradation_t = deg_tstats$t[match(common, rownames(deg_tstats))],
DE_t = DE$t[match(common, rownames(DE))]
degradation_t = deg_tstats[common, "t"],
DE_t = DE[whichTx, "t"]
)
cor_val <- signif(cor(common_data[, 1], common_data[, 2]), 2)
p <- ggplot(common_data, aes(x = DE_t, y = degradation_t)) +
xlab("DE t-statistic") +
ylab("Degradation t-statistic") +
geom_bin2d(bins = 70) +
geom_bin2d(bins = 70, show.legend = show.legend) +
scale_fill_continuous(type = "viridis") +
theme_bw() +
labs(caption = paste0("correlation: ", signif(cor(common_data[, 1], common_data[, 2]), 2)))
theme(text = element_text(size = font.size))
# labs(caption = paste0("correlation: ", cor_val)
if (show.cor != 'none') {
switch(show.cor,
'caption' = {
p <- p + labs(caption = paste0(cor.label, cor_val))
},
'corner-top' = {
p <- p + annotate("text", x = max(common_data$DE_t), y = max(common_data$degradation_t),
label = paste0(cor.label, cor_val), size = cor.size, hjust = 1, vjust = 1)
},
'corner-bottom' = {
p <- p + annotate("text", x = max(common_data$DE_t), y = min(common_data$degradation_t),
label = paste0(cor.label, cor_val), size = cor.size, hjust = 1, vjust = 0)
})
}
return(p)
}
14 changes: 0 additions & 14 deletions R/covComb_tx_deg-data.R

This file was deleted.

10 changes: 5 additions & 5 deletions R/degradation_tstats-data.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#' Degradation time t-statistics
#'
#' These t-statistics are derived from the same data that was used for
#' [covComb_tx_deg]. They are the results from main model where we determined
#' the relationship with degradation time adjusting for the brain region (so
#' parallel degradation effects across brain regions). They are used for
#' plotting in `DEqual()`.
#' These t-statistics are derived from the degradation timepoints data
#' built into qsvaR. They are the results from multiple models where
#' we determined the association of transcripts with degradation time
#' adjusting for brain region (so parallel degradation effects across
#' brain regions). They are used for plotting in `DEqual()`.
#'
#' @name degradation_tstats
#' @docType data
Expand Down
41 changes: 26 additions & 15 deletions R/getDegTx.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@
#' These proportions were then added to our `model.matrix()` and the union of the top 1000 transcripts in the interaction model,
#' the main effect model, and the cell proportions model were used to generate this model of qSVs.
#'
#' @param assayname character string specifying the name of the assay desired in rse_tx
#' @param sig_transcripts A list of transcripts determined to have degradation signal in the qsva expanded paper.
#' @param assayname character string specifying the name of the assay desired in rse_tx
#' @param verbose specify if the function should report how many model transcripts were matched
#'
#' @return A
#' [RangedSummarizedExperiment-class][SummarizedExperiment::RangedSummarizedExperiment-class]
Expand All @@ -31,28 +32,38 @@
#' @import rlang
#'
#' @examples
#' getDegTx(covComb_tx_deg)
#' stopifnot(mean(rowMeans(assays(covComb_tx_deg)$tpm)) > 1)
getDegTx <- function(rse_tx, type = c("cell_component", "standard", "top1500"), sig_transcripts = select_transcripts(type), assayname = "tpm") {

type = arg_match(type)

#' degTx <- getDegTx(rse_tx, "standard")
getDegTx <- function(rse_tx, type = c("cell_component", "standard", "top1500"),
sig_transcripts = NULL, assayname = "tpm", verbose = TRUE) {

#type = arg_match(type)
if (is.null(sig_transcripts)) {
type = arg_match(type)
sig_transcripts <- select_transcripts(type)
} else {
type = "custom"
}
# Validate rse_tx is a RangedSummarizedExperiment object
if (!is(rse_tx, "RangedSummarizedExperiment")) {
stop("'rse_tx' must be a RangedSummarizedExperiment object.", call. = FALSE)
}

# Check if assayname is in assayNames
if (!assayname %in% assayNames(rse_tx)) {
stop(sprintf("'%s' is not in assayNames(rse_tx).", assayname), call. = FALSE)
}

# Check for validity and matching of tx names
sig_transcripts = check_tx_names(rownames(rse_tx), sig_transcripts, 'rownames(rse_tx)', 'sig_transcripts')

# Subset rse_tx to include sig_transcripts
rse_tx <- rse_tx[rownames(rse_tx) %in% sig_transcripts, , drop = FALSE]


# Check for validity and matching of tx names and return the tx subset indexes in rse_tx
wtx <- which_tx_names(rownames(rse_tx), sig_transcripts)
if (length(wtx) == 0) {
stop("No transcripts found in the '",type, "' degradation model transcripts" )
}

if (verbose) {
message(" '",type,"' degradation model transcripts found: ", length(wtx))
}
rse_tx <- rse_tx[wtx, , drop = FALSE]

# Check if the row means is greater than 1
if (mean(rowMeans(assays(rse_tx)[[assayname]])) < 1) {
warning("The transcripts selected are lowly expressed in your dataset. This can impact downstream analysis.")
Expand Down
2 changes: 1 addition & 1 deletion R/getPCs.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#' @importFrom stats prcomp
#' @import SummarizedExperiment
#' @examples
#' getPCs(covComb_tx_deg, "tpm")
#' getPCs(rse_tx, "tpm")
getPCs <- function(rse_tx, assayname = "tpm") {

# Validate rse_tx is a RangedSummarizedExperiment object
Expand Down
2 changes: 1 addition & 1 deletion R/get_qsvs.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#' @export
#'
#' @examples
#' qsv <- getPCs(covComb_tx_deg, "tpm")
#' qsv <- getPCs(rse_tx, "tpm")
#' get_qsvs(qsv, 2)
get_qsvs <- function(qsvPCs, k) {

Expand Down
6 changes: 3 additions & 3 deletions R/k_qsvs.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,18 @@
#' @import SummarizedExperiment
#' @examples
#' ## First we need to define a statistical model. We'll use the example
#' ## covComb_tx_deg data. Note that the model you'll use in your own data
#' ## rse_tx data. Note that the model you'll use in your own data
#' ## might look different from this model.
#' mod <- model.matrix(~ mitoRate + Region + rRNA_rate + totalAssignedGene + RIN,
#' data = colData(covComb_tx_deg)
#' data = colData(rse_tx)
#' )
#'
#' ## To ensure that the results are reproducible, you will need to set a
#' ## random seed with the set.seed() function. Internally, we are using
#' ## sva::num.sv() which needs a random seed to ensure reproducibility of the
#' ## results.
#' set.seed(20230621)
#' k_qsvs(covComb_tx_deg, mod, "tpm")
#' k_qsvs(rse_tx, mod, "tpm")
k_qsvs <- function(rse_tx, mod, assayname) {

# Validate rse_tx is a RangedSummarizedExperiment object
Expand Down
45 changes: 21 additions & 24 deletions R/qSVA.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
#' @param rse_tx A [RangedSummarizedExperiment-class][SummarizedExperiment::RangedSummarizedExperiment-class] object containing
#' the transcript data desired to be studied.
#' @param type a character string specifying which model you would
#' like to use when selecting a degradation matrix.
#' @param sig_transcripts A list of transcripts that are associated with
#' degradation signal. Use `select_transcripts()` to select sets of transcripts
#' identified by the qSVA expanded paper. Specifying a `character()` input of
#' ENSEMBL transcript IDs (or whatever values you have at `rownames(rse_tx)`)
#' obtained outside of `select_transcripts()` overrides
#' the user friendly `type` argument. That is, this argument provides more fine
#' tuning options for advanced users.
#' like to use from the sets of signature transcripts identified
#' by the qsvaR package. This can be omitted if a custom set of
#' transcripts is provided to sig_transcripts.
#' @param sig_transcripts A list of transcript IDs that are associated
#' with degradation signal. Specifying a `character()` input with ENSEMBL
#' transcript IDs (whose values should match entries in `rownames(rse_tx)`).
#' This argument provides a custom list of transcripts for adjusting
#' for degradation; this should be used instead of the `type` argument.
#' @param mod Model Matrix with necessary variables the you would
#' model for in differential expression
#' @param assayname character string specifying the name of
Expand All @@ -21,43 +21,40 @@
#'
#' @examples
#' ## First we need to define a statistical model. We'll use the example
#' ## covComb_tx_deg data. Note that the model you'll use in your own data
#' ## rse_tx data. Note that the model you'll use in your own data
#' ## might look different from this model.
#' mod <- model.matrix(~ mitoRate + Region + rRNA_rate + totalAssignedGene + RIN,
#' data = colData(covComb_tx_deg)
#' data = colData(rse_tx)
#' )
#'
#' ## To ensure that the results are reproducible, you will need to set a
#' ## random seed with the set.seed() function. Internally, we are using
#' ## sva::num.sv() which needs a random seed to ensure reproducibility of the
#' ## results.
#' set.seed(20230621)
#' qSVA(rse_tx = covComb_tx_deg, type = "cell_component", mod = mod, assayname = "tpm")
#' qSVA(rse_tx = rse_tx, type = "cell_component", mod = mod, assayname = "tpm")
#'
qSVA <-
function(rse_tx,
type = c("cell_component", "standard", "top1500"),
sig_transcripts = select_transcripts(type),
mod,
assayname) {
## We don't need to pass type to getDegTx() since it's not used internally
## once the sig_transcripts have been defined.

type = arg_match(type)

function(rse_tx, type = c("cell_component", "standard", "top1500"),
sig_transcripts = NULL, mod, assayname) {

if (is.null(sig_transcripts)) {
type = arg_match(type) # must be one of those in the list if sig_transcripts is NULL
}

# Validate rse_tx is a RangedSummarizedExperiment object
if (!is(rse_tx, "RangedSummarizedExperiment")) {
stop("'rse_tx' must be a RangedSummarizedExperiment object.", call. = FALSE)
}

# Check if assayname is in assayNames
if (!assayname %in% assayNames(rse_tx)) {
stop(sprintf("'%s' is not in assayNames(rse_tx).", assayname), call. = FALSE)
}

# Get the qSVs
DegTx <-
getDegTx(rse_tx, sig_transcripts = sig_transcripts, assayname = assayname)
getDegTx(rse_tx, type = type, sig_transcripts = sig_transcripts, assayname = assayname)
PCs <- getPCs(DegTx, assayname)
k <- k_qsvs(DegTx, mod = mod, assayname = assayname)
qSV <- get_qsvs(PCs, k)
Expand Down
Loading

0 comments on commit 7a3ec1b

Please sign in to comment.