fix duplicate rownames introduced in CellPhoneDB v5 (#90)

* hopefully this fixes #89 * Update plot_cpdb.R * Update utils.R * ok fix the chords * bump version * Update DESCRIPTION be consistent * Update plot_cpdb4.R * fix formatting * Revert "fix formatting" This reverts commit ce4ca40. * first attempt at fixing the names * Update plot_cpdb.R * Update plot_cpdb.Rd * Update utils.R
zktuong · Dec 6, 2023 · 8b249e8 · 8b249e8
1 parent 18cf3ac
commit 8b249e8
Show file tree

Hide file tree

Showing 8 changed files with 285 additions and 165 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: ktplots
 Title: Plot single-cell data dotplots
-Version: 2.0.0
+Version: 2.1.0
 Authors@R: person("Kelvin", "Tuong", email = c("z.tuong@uq.edu.au"), role = c("aut", "cre"))
 Description: Plotting tools for scData.
 License: MIT + file LICENSE

diff --git a/R/plot_cpdb.R b/R/plot_cpdb.R
@@ -29,6 +29,7 @@
 #' @param scale_alpha_by_interaction_scores Whether or not to filter values by the interaction score.
 #' @param scale_alpha_by_cellsign Whether or not to filter the transparency of interactions by the cellsign.
 #' @param filter_by_cellsign Filter out interactions with a 0 value cellsign.
+#' @param keep_id_cp_interaction Whether or not to keep the id_cp_interaction in the plot.
 #' @param ... passes arguments to grep for cell_type1 and cell_type2.
 #' @return ggplot dot plot object of cellphone db output
 #' @examples
@@ -53,7 +54,7 @@ plot_cpdb <- function(
     default_style = TRUE, highlight_col = "red", highlight_size = NULL, max_highlight_size = 2,
     special_character_regex_pattern = NULL, degs_analysis = FALSE, return_table = FALSE,
     exclude_interactions = NULL, min_interaction_score = 0, scale_alpha_by_interaction_scores = FALSE,
-    scale_alpha_by_cellsign = FALSE, filter_by_cellsign = FALSE, title = "", ...) {
+    scale_alpha_by_cellsign = FALSE, filter_by_cellsign = FALSE, title = "", keep_id_cp_interaction = FALSE, ...) {
   requireNamespace("SingleCellExperiment")
   requireNamespace("grDevices")
   if (is.null(special_character_regex_pattern)) {
@@ -80,11 +81,10 @@ plot_cpdb <- function(
   # ok front load a 'dictionary' here.
   if (col_start == DEFAULT_V5_COL_START) {
     v5tmp <- reshape2::melt(means_mat, id.vars = colnames(means_mat)[1:col_start])
-    special_sep <- paste0(rep(DEFAULT_SEP, 3), collapse = "")
-    row.names(v5tmp) <- paste0(
-      gsub("_", "-", v5tmp$interacting_pair), special_sep,
-      v5tmp$variable
-    )
+    row.names(v5tmp) <- paste0(v5tmp$id_cp_interaction, SPECIAL_SEP, gsub(
+      "_",
+      "-", v5tmp$interacting_pair
+    ), SPECIAL_SEP, v5tmp$variable)
     v5tmp <- v5tmp[, c("is_integrin", "directionality", "classification")]
   }
   cell_type1 <- .sub_pattern(cell_type = cell_type1, pattern = special_character_regex_pattern)
@@ -410,6 +410,12 @@ plot_cpdb <- function(
   if (return_table) {
     return(df)
   } else {
+    # change the label of Var1
+    if (keep_id_cp_interaction){
+      df$Var1 <- gsub(SPECIAL_SEP, "_", df$Var1)
+    } else {
+      df$Var1 <- gsub(paste0(".*", SPECIAL_SEP), "", df$Var1)
+    }
     if (!is.null(interaction_scores)) {
       requireNamespace("dplyr")
       df <- df %>%

diff --git a/R/plot_cpdb2.R b/R/plot_cpdb2.R
@@ -32,50 +32,64 @@
 #' @import ggrepel
 #' @export
 
-plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pvals,
+plot_cpdb2 <- function(
+    scdata, cell_type1, cell_type2, celltype_key, means, pvals,
     deconvoluted, keep_significant_only = TRUE, splitby_key = NULL, standard_scale = TRUE,
     gene_symbol_mapping = NULL, frac = 0.1, remove_self = TRUE, desiredInteractions = NULL,
     interaction_grouping = NULL, edge_group_colors = NULL, node_group_colors = NULL,
     degs_analysis = FALSE, return_df = FALSE, plot_score_as_thickness = TRUE, ...) {
     if (class(scdata) == "Seurat") {
         stop("Sorry not supported. Please use a SingleCellExperiment object.")
     }
-    lr_interactions <- plot_cpdb(scdata = scdata, cell_type1 = cell_type1, cell_type2 = cell_type2,
+    lr_interactions <- plot_cpdb(
+        scdata = scdata, cell_type1 = cell_type1, cell_type2 = cell_type2,
         celltype_key = celltype_key, splitby_key = splitby_key, means = means, pvals = pvals,
         keep_significant_only = keep_significant_only, standard_scale = standard_scale,
-        return_table = TRUE, degs_analysis = degs_analysis, ...)
+        return_table = TRUE, degs_analysis = degs_analysis, ...
+    )
     requireNamespace("SummarizedExperiment")
     requireNamespace("SingleCellExperiment")
     if (is.null(splitby_key)) {
         if (any(lr_interactions[, 3] > 0)) {
             if (any(is.na(lr_interactions[, 3]))) {
-                lr_interactions <- lr_interactions[lr_interactions[, 3] > 0 & !is.na(lr_interactions[,
-                  3]), ]
+                lr_interactions <- lr_interactions[lr_interactions[, 3] > 0 & !is.na(lr_interactions[
+                    ,
+                    3
+                ]), ]
             } else {
                 lr_interactions <- lr_interactions[lr_interactions[, 3] > 0, ]
             }
         }
     }
-    subset_clusters <- unique(unlist(lapply(as.character(lr_interactions$group),
-        strsplit, DEFAULT_SEP)))
+    subset_clusters <- unique(unlist(lapply(
+        as.character(lr_interactions$group),
+        strsplit, DEFAULT_SEP
+    )))
     sce_subset <- scdata[, SummarizedExperiment::colData(scdata)[, celltype_key] %in%
         subset_clusters]
-    interactions <- means[, c("interacting_pair", "gene_a", "gene_b", "partner_a",
-        "partner_b", "receptor_a", "receptor_b")]
-    interactions$converted <- gsub("-", " ", interactions$interacting_pair)
-    interactions$converted <- gsub("_", "-", interactions$converted)
-    interactions_subset <- interactions[interactions$converted %in% lr_interactions$Var1,
-        ]
-    tm0 <- do.call(c, lapply(as.list(interactions_subset$interacting_pair), strsplit,
-        "_"))
+    interactions <- means[, c(
+        "id_cp_interaction", "interacting_pair", "gene_a", "gene_b", "partner_a",
+        "partner_b", "receptor_a", "receptor_b"
+    )]
+    interactions$use_interaction_name <- paste0(interactions$id_cp_interaction, SPECIAL_SEP, interactions$interacting_pair)
+    interactions$converted <- gsub("_", "-", interactions$use_interaction_name)
+    interactions_subset <- interactions[interactions$converted %in% lr_interactions$Var1, ]
+    tm0 <- do.call(c, lapply(
+        as.list(interactions_subset$use_interaction_name), strsplit,
+        "_"
+    ))
     if (any(lapply(tm0, length) > 2)) {
         complex_id <- which(lapply(tm0, length) > 2)
         interactions_subset_ <- interactions_subset[complex_id, ]
         simple_1 <- interactions_subset_$interacting_pair[grep("complex:", interactions_subset_$partner_b)]
-        partner_1 <- gsub("complex:", "", interactions_subset_$partner_b[grep("complex:",
-            interactions_subset_$partner_b)])
-        partner_2 <- gsub("complex:", "", interactions_subset_$partner_a[grep("complex:",
-            interactions_subset_$partner_a)])
+        partner_1 <- gsub("complex:", "", interactions_subset_$partner_b[grep(
+            "complex:",
+            interactions_subset_$partner_b
+        )])
+        partner_2 <- gsub("complex:", "", interactions_subset_$partner_a[grep(
+            "complex:",
+            interactions_subset_$partner_a
+        )])
         simple_2 <- interactions_subset_$interacting_pair[grep("complex:", interactions_subset_$partner_a)]
         for (i in seq_along(simple_1)) {
             simple_1[i] <- gsub(paste0(partner_1[i], "_|_", partner_1[i]), "", simple_1[i])
@@ -88,22 +102,29 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
         for (i in seq_along(complex_id)) {
             tm0[[complex_id[i]]] <- tmplist[[i]]
         }
-        tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0))/2)))
+        tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0)) / 2)))
         colnames(tm0) <- c("id_a", "id_b")
         interactions_subset <- cbind(interactions_subset, tm0)
-        dictionary <- interactions_subset[, c("gene_a", "gene_b", "partner_a", "partner_b",
-            "id_a", "id_b", "receptor_a", "receptor_b")]
+        dictionary <- interactions_subset[, c(
+            "id_cp_interaction", "gene_a", "gene_b", "partner_a", "partner_b",
+            "id_a", "id_b", "receptor_a", "receptor_b"
+        )]
     } else {
-        tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0))/2)))
+        tm0 <- data.frame(t(matrix(unlist(tm0), 2, length(unlist(tm0)) / 2)))
         colnames(tm0) <- c("id_a", "id_b")
+        tm0$id_a <- gsub(paste0(".*", SPECIAL_SEP), "", tm0$id_a)
         interactions_subset <- cbind(interactions_subset, tm0)
-        dictionary <- interactions_subset[, c("gene_a", "gene_b", "partner_a", "partner_b",
-            "id_a", "id_b", "receptor_a", "receptor_b")]
+        dictionary <- interactions_subset[, c(
+            "id_cp_interaction", "gene_a", "gene_b", "partner_a", "partner_b",
+            "id_a", "id_b", "receptor_a", "receptor_b"
+        )]
     }
     if (!is.null(interaction_grouping)) {
         if ((class(interaction_grouping) == "data.frame")) {
-            interactions_subset$group <- interaction_grouping[, 2][match(interactions_subset$interacting_pair,
-                interaction_grouping[, 1])]
+            interactions_subset$group <- interaction_grouping[, 2][match(
+                interactions_subset$interacting_pair,
+                interaction_grouping[, 1]
+            )]
         }
     }
     # extract all the possible genes.
@@ -130,9 +151,9 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
         for (n in names(sce_list)) {
             for (x in unique(meta[, celltype_key])) {
                 sce_list[[n]][[x]] <- sce_subset_tmp[, meta[, celltype_key] == x &
-                  meta[, splitby_key] == n]
+                    meta[, splitby_key] == n]
                 sce_list_alt[[n]][[x]] <- sce_subset[, meta[, celltype_key] == x &
-                  meta[, splitby_key] == n]
+                    meta[, splitby_key] == n]
             }
         }
         sce_list2 <- lapply(sce_list, function(y) {
@@ -177,8 +198,7 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
     }
     rownames(sce_list2) <- humanreadablename
     rownames(sce_list3) <- humanreadablename
-    decon_subset <- deconvoluted[deconvoluted$complex_name %in% .findComplex(interactions_subset),
-        ]
+    decon_subset <- deconvoluted[deconvoluted$complex_name %in% .findComplex(interactions_subset), ]
     if (nrow(decon_subset) > 0) {
         # although multiple rows are returned, really it's the same value for
         # the same complex
@@ -192,14 +212,14 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
             decon_subset_fraction <- lapply(decon_subset, function(x) {
                 x <- unique(x$gene_name)
                 test <- lapply(sce_list_alt, function(y) {
-                  return(lapply(y, .cellTypeFraction_complex, genes = z, gene_symbol_mapping = gene_symbol_mapping))
+                    return(lapply(y, .cellTypeFraction_complex, genes = z, gene_symbol_mapping = gene_symbol_mapping))
                 })
                 return(test)
             })
             decon_subset_fraction <- lapply(decon_subset_fraction, function(x) {
                 y <- lapply(x, function(z) do.call(cbind, z))
                 for (i in 1:length(y)) {
-                  colnames(y[[i]]) <- paste0(names(y[i]), "_", colnames(y[[i]]))
+                    colnames(y[[i]]) <- paste0(names(y[i]), "_", colnames(y[[i]]))
                 }
                 y <- do.call(cbind, y)
                 return(y)
@@ -208,7 +228,7 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
             decon_subset_fraction <- lapply(decon_subset, function(x) {
                 z <- unique(x$gene_name)
                 test <- lapply(sce_list_alt, function(y) {
-                  return(.cellTypeFraction_complex(y, genes = z, gene_symbol_mapping = gene_symbol_mapping))
+                    return(.cellTypeFraction_complex(y, genes = z, gene_symbol_mapping = gene_symbol_mapping))
                 })
                 return(do.call(cbind, test))
             })
@@ -228,8 +248,10 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
     # make a big fat edgelist
     if (!is.null(desiredInteractions)) {
         if (class(desiredInteractions) == "list") {
-            desiredInteractions_ <- c(desiredInteractions, lapply(desiredInteractions,
-                rev))
+            desiredInteractions_ <- c(desiredInteractions, lapply(
+                desiredInteractions,
+                rev
+            ))
             cell_type_grid <- as.data.frame(do.call(rbind, desiredInteractions_))
         } else if ((class(desiredInteractions) == "data.frame")) {
             cell_type_grid <- desiredInteractions
@@ -260,27 +282,33 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
     dfx <- list()
     if (!is.null(splitby_key)) {
         for (i in unique(meta[, splitby_key])) {
-            dfx[[i]] <- .generateDf(ligand = ligand, sep = DEFAULT_SEP, receptor = receptor,
+            dfx[[i]] <- .generateDf(
+                ligand = ligand, sep = DEFAULT_SEP, receptor = receptor,
                 receptor_a = receptor_a, receptor_b = receptor_b, pair = pair, converted_pair = converted_pair,
                 producers = producers, receivers = receivers, cell_type_means = expr_df,
                 cell_type_fractions = fraction_df, sce = sce_subset, sce_alt = sce_list_alt,
-                gsm = gene_symbol_mapping, splitted = i)
+                gsm = gene_symbol_mapping, splitted = i
+            )
             dfx[[i]] <- dfx[[i]][dfx[[i]]$barcode %in% barcodes, ]
         }
     } else {
-        dfx[[1]] <- .generateDf(ligand = ligand, sep = DEFAULT_SEP, receptor = receptor,
+        dfx[[1]] <- .generateDf(
+            ligand = ligand, sep = DEFAULT_SEP, receptor = receptor,
             receptor_a = receptor_a, receptor_b = receptor_b, pair = pair, converted_pair = converted_pair,
             producers = producers, receivers = receivers, cell_type_means = expr_df,
             cell_type_fractions = fraction_df, sce = sce_subset, sce_alt = sce_list_alt,
-            gsm = gene_symbol_mapping)
+            gsm = gene_symbol_mapping
+        )
         dfx[[1]] <- dfx[[1]][dfx[[1]]$barcode %in% barcodes, ]
     }
     if (return_df) {
         return(dfx)
     } else {
         # set the bundled connections
-        df0 <- lapply(dfx, function(x) x[x$producer_fraction >= frac | x$receiver_fraction >=
-            frac, ])  # save this for later
+        df0 <- lapply(dfx, function(x) {
+            x[x$producer_fraction >= frac | x$receiver_fraction >=
+                frac, ]
+        }) # save this for later
         # now construct the hierachy
         gl <- list()
         if (!is.null(interaction_grouping)) {
@@ -293,31 +321,36 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
         for (i in 1:length(dfx)) {
             if (!is.null(splitby_key)) {
                 if (nrow(dfx[[i]]) > 0 & nrow(df0[[i]]) > 0) {
-                  gl[[i]] <- .constructGraph(input_group = names(dfx)[i], sep = DEFAULT_SEP,
-                    el = dfx[[i]], el0 = df0[[i]], unique_id = cells_test, interactions_df = interactions_subset,
-                    plot_cpdb_out = lr_interactions, celltype_key = celltype_key,
-                    edge_group = edge_group, edge_group_colors = edge_group_colors,
-                    node_group_color = node_group_colors, plot_score_as_thickness = plot_score_as_thickness)
+                    gl[[i]] <- .constructGraph(
+                        input_group = names(dfx)[i], sep = DEFAULT_SEP,
+                        el = dfx[[i]], el0 = df0[[i]], unique_id = cells_test, interactions_df = interactions_subset,
+                        plot_cpdb_out = lr_interactions, celltype_key = celltype_key,
+                        edge_group = edge_group, edge_group_colors = edge_group_colors,
+                        node_group_color = node_group_colors, plot_score_as_thickness = plot_score_as_thickness
+                    )
                 } else {
-                  gl[[i]] <- NA
-                  cantplot <- c(cantplot, names(dfx)[i])
+                    gl[[i]] <- NA
+                    cantplot <- c(cantplot, names(dfx)[i])
                 }
             } else {
                 if (nrow(dfx[[i]]) > 0 & nrow(df0[[i]]) > 0) {
-                  gl[[i]] <- .constructGraph(input_group = NULL, sep = DEFAULT_SEP,
-                    el = dfx[[i]], el0 = df0[[i]], unique_id = cells_test, interactions_df = interactions_subset,
-                    plot_cpdb_out = lr_interactions, celltype_key = celltype_key,
-                    edge_group = edge_group, edge_group_colors = edge_group_colors,
-                    node_group_color = node_group_colors, plot_score_as_thickness = plot_score_as_thickness)
+                    gl[[i]] <- .constructGraph(
+                        input_group = NULL, sep = DEFAULT_SEP,
+                        el = dfx[[i]], el0 = df0[[i]], unique_id = cells_test, interactions_df = interactions_subset,
+                        plot_cpdb_out = lr_interactions, celltype_key = celltype_key,
+                        edge_group = edge_group, edge_group_colors = edge_group_colors,
+                        node_group_color = node_group_colors, plot_score_as_thickness = plot_score_as_thickness
+                    )
                 } else {
-                  gl[[i]] <- NA
-                  noplot <- TRUE
+                    gl[[i]] <- NA
+                    noplot <- TRUE
                 }
             }
         }
         if (length(cantplot) > 0) {
             cat("The following groups in splitby_key cannot be plotted due to missing/no significant interactions/celltypes",
-                sep = "\n")
+                sep = "\n"
+            )
             cat(cantplot, sep = "\n")
         }
         if (noplot) {
@@ -329,4 +362,4 @@ plot_cpdb2 <- function(scdata, cell_type1, cell_type2, celltype_key, means, pval
             return(gl[[1]])
         }
     }
-}
+}