From b993307dbe944a47aab9056e0e7df76510c98e0f Mon Sep 17 00:00:00 2001 From: gordon Date: Fri, 6 Mar 2020 09:38:56 -0600 Subject: [PATCH 1/6] new loadings plot option in plot_pca --- .Rbuildignore | 2 + .gitignore | 5 +++ DEP.Rproj | 17 ++++++++ R/plot_functions_explore.R | 86 +++++++++++++++++++++----------------- 4 files changed, 72 insertions(+), 38 deletions(-) create mode 100644 .Rbuildignore create mode 100644 .gitignore create mode 100644 DEP.Rproj diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..91114bf --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,2 @@ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c589fe2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata +/pca \ No newline at end of file diff --git a/DEP.Rproj b/DEP.Rproj new file mode 100644 index 0000000..21a4da0 --- /dev/null +++ b/DEP.Rproj @@ -0,0 +1,17 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/R/plot_functions_explore.R b/R/plot_functions_explore.R index 64a72ae..7c9b640 100644 --- a/R/plot_functions_explore.R +++ b/R/plot_functions_explore.R @@ -24,6 +24,9 @@ #' If \code{TRUE} (default) the PCA plot is produced. #' Otherwise (if \code{FALSE}), the data which the #' PCA plot is based on are returned. +#' @param loadings Logical(1), +#' If \code{TRUE} the PCA loadings plot is produced +#' instead of the PCA. #' @return A scatter plot (generated by \code{\link[ggplot2]{ggplot}}). #' @examples #' # Load example @@ -50,7 +53,7 @@ #' plot_pca(dep, indicate = "condition") #' @export plot_pca <- function(dep, x = 1, y = 2, indicate = c("condition", "replicate"), - label = FALSE, n = 500, point_size = 4, label_size = 3, plot = TRUE) { + label = FALSE, n = 500, point_size = 4, label_size = 3, plot = TRUE, loadings = FALSE) { if(is.integer(x)) x <- as.numeric(x) if(is.integer(y)) y <- as.numeric(y) if(is.integer(n)) n <- as.numeric(n) @@ -58,21 +61,23 @@ plot_pca <- function(dep, x = 1, y = 2, indicate = c("condition", "replicate"), if(is.integer(label_size)) label_size <- as.numeric(label_size) # Show error if inputs are not the required classes assertthat::assert_that(inherits(dep, "SummarizedExperiment"), - is.numeric(x), - length(x) == 1, - is.numeric(y), - length(y) == 1, - is.numeric(n), - length(n) == 1, - is.character(indicate), - is.logical(label), - is.numeric(point_size), - length(point_size) == 1, - is.numeric(label_size), - length(label_size) == 1, - is.logical(plot), - length(plot) == 1) - + is.numeric(x), + length(x) == 1, + is.numeric(y), + length(y) == 1, + is.numeric(n), + length(n) == 1, + is.character(indicate), + is.logical(label), + is.numeric(point_size), + length(point_size) == 1, + is.numeric(label_size), + length(label_size) == 1, + is.logical(plot), + length(plot) == 1, + is.logical(loadings), + length(loadings) == 1) + # Check for valid x and y values if(x > ncol(dep) | y > ncol(dep)) { stop(paste0("'x' and/or 'y' arguments are not valid\n", @@ -80,7 +85,7 @@ plot_pca <- function(dep, x = 1, y = 2, indicate = c("condition", "replicate"), ncol(dep), "."), call. = FALSE) } - + # Check for valid 'n' value if(n > nrow(dep)) { stop(paste0("'n' argument is not valid.\n", @@ -89,7 +94,7 @@ plot_pca <- function(dep, x = 1, y = 2, indicate = c("condition", "replicate"), "."), call. = FALSE) } - + # Check for valid 'indicate' columns <- colnames(colData(dep)) if(!is.null(indicate)) { @@ -99,43 +104,48 @@ plot_pca <- function(dep, x = 1, y = 2, indicate = c("condition", "replicate"), } if(any(!indicate %in% columns)) { stop(paste0("'", - paste0(indicate, collapse = "' and/or '"), - "' column(s) is/are not present in ", - deparse(substitute(dep)), - ".\nValid columns are: '", - paste(columns, collapse = "', '"), - "'."), - call. = FALSE) + paste0(indicate, collapse = "' and/or '"), + "' column(s) is/are not present in ", + deparse(substitute(dep)), + ".\nValid columns are: '", + paste(columns, collapse = "', '"), + "'."), + call. = FALSE) } } - + # Get the variance per protein and take the top n variable proteins var <- apply(assay(dep), 1, sd) df <- assay(dep)[order(var, decreasing = TRUE)[seq_len(n)],] - + # Calculate PCA pca <- prcomp(t(df), scale = FALSE) - pca_df <- pca$x %>% - data.frame() %>% - rownames_to_column() %>% - left_join(., data.frame(colData(dep)), by = c("rowname" = "ID")) - + if(loadings==FALSE) { + pca_df <- pca$x %>% + data.frame() %>% + rownames_to_column() %>% + left_join(., data.frame(colData(dep)), by = c("rowname" = "ID")) + } else { + pca_df <- pca$rotation %>% + data.frame() + } + # Calculate the percentage of variance explained percent <- round(100 * pca$sdev^2 / sum(pca$sdev^2), 1) - + # Make factors of indicate features for(feat in indicate) { pca_df[[feat]] <- as.factor(pca_df[[feat]]) } - - # Plot the PCA plot + + # Plot the PCA/loadings plot p <- ggplot(pca_df, aes(get(paste0("PC", x)), get(paste0("PC", y)))) + labs(title = paste0("PCA plot - top ", n, " variable proteins"), - x = paste0("PC", x, ": ", percent[x], "%"), - y = paste0("PC", y, ": ", percent[y], "%")) + + x = paste0("PC", x, ": ", percent[x], "%"), + y = paste0("PC", y, ": ", percent[y], "%")) + coord_fixed() + theme_DEP1() - + if(length(indicate) == 0) { p <- p + geom_point(size = point_size) } From f979e5c7f13f43ad145d8b7ffb111e5596a7aafb Mon Sep 17 00:00:00 2001 From: gtluu <46392631+gtluu@users.noreply.github.com> Date: Fri, 6 Mar 2020 09:40:01 -0600 Subject: [PATCH 2/6] Delete DEP.Rproj --- DEP.Rproj | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100644 DEP.Rproj diff --git a/DEP.Rproj b/DEP.Rproj deleted file mode 100644 index 21a4da0..0000000 --- a/DEP.Rproj +++ /dev/null @@ -1,17 +0,0 @@ -Version: 1.0 - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source From 9e2eae4c56594b8e1068786e1ff12aa842c280ea Mon Sep 17 00:00:00 2001 From: gtluu <46392631+gtluu@users.noreply.github.com> Date: Fri, 6 Mar 2020 09:40:25 -0600 Subject: [PATCH 3/6] Delete .Rbuildignore --- .Rbuildignore | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 .Rbuildignore diff --git a/.Rbuildignore b/.Rbuildignore deleted file mode 100644 index 91114bf..0000000 --- a/.Rbuildignore +++ /dev/null @@ -1,2 +0,0 @@ -^.*\.Rproj$ -^\.Rproj\.user$ From 5fc86d4991704757a741a1c19f85c9b0f457ecfd Mon Sep 17 00:00:00 2001 From: gtluu <46392631+gtluu@users.noreply.github.com> Date: Fri, 6 Mar 2020 09:41:01 -0600 Subject: [PATCH 4/6] Update .gitignore add files to ignore --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c589fe2..5c90ecf 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,6 @@ .Rhistory .RData .Ruserdata -/pca \ No newline at end of file +/pca +DEP.Rproj +.Rbuildignore From ee7d05cc508bb31c7e35c96d63a8db25d5547f30 Mon Sep 17 00:00:00 2001 From: gordon Date: Wed, 11 Mar 2020 14:07:38 -0500 Subject: [PATCH 5/6] add loadings plot option, fix loadings df, add testthat test for loadings plot --- .Rbuildignore | 2 - .gitignore | 5 -- DEP.Rproj | 17 ----- R/plot_functions_explore.R | 65 ++++++++++++------- .../testthat/test_8_plot_functions_explore.R | 1 + 5 files changed, 41 insertions(+), 49 deletions(-) delete mode 100644 .Rbuildignore delete mode 100644 .gitignore delete mode 100644 DEP.Rproj diff --git a/.Rbuildignore b/.Rbuildignore deleted file mode 100644 index 91114bf..0000000 --- a/.Rbuildignore +++ /dev/null @@ -1,2 +0,0 @@ -^.*\.Rproj$ -^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore deleted file mode 100644 index c589fe2..0000000 --- a/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -.Rproj.user -.Rhistory -.RData -.Ruserdata -/pca \ No newline at end of file diff --git a/DEP.Rproj b/DEP.Rproj deleted file mode 100644 index 21a4da0..0000000 --- a/DEP.Rproj +++ /dev/null @@ -1,17 +0,0 @@ -Version: 1.0 - -RestoreWorkspace: Default -SaveWorkspace: Default -AlwaysSaveHistory: Default - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/R/plot_functions_explore.R b/R/plot_functions_explore.R index 7c9b640..8e4db71 100644 --- a/R/plot_functions_explore.R +++ b/R/plot_functions_explore.R @@ -127,7 +127,9 @@ plot_pca <- function(dep, x = 1, y = 2, indicate = c("condition", "replicate"), left_join(., data.frame(colData(dep)), by = c("rowname" = "ID")) } else { pca_df <- pca$rotation %>% - data.frame() + data.frame() %>% + rownames_to_column() %>% + left_join(., data.frame(colData(dep)), by = c("rowname" = "ID")) } # Calculate the percentage of variance explained @@ -146,39 +148,52 @@ plot_pca <- function(dep, x = 1, y = 2, indicate = c("condition", "replicate"), coord_fixed() + theme_DEP1() - if(length(indicate) == 0) { - p <- p + geom_point(size = point_size) - } - if(length(indicate) == 1) { - p <- p + geom_point(aes(col = pca_df[[indicate[1]]]), - size = point_size) + - labs(col = indicate[1]) - } - if(length(indicate) == 2) { - p <- p + geom_point(aes(col = pca_df[[indicate[1]]], - shape = pca_df[[indicate[2]]]), - size = point_size) + + # Plot points for PCA plot or text for loadings plot + if (loadings == FALSE) { + if(length(indicate) == 0) { + p <- p + geom_point(size = point_size) + } + if(length(indicate) == 1) { + p <- p + geom_point(aes(col = pca_df[[indicate[1]]]), + size = point_size) + + labs(col = indicate[1]) + } + if(length(indicate) == 2) { + p <- p + geom_point(aes(col = pca_df[[indicate[1]]], + shape = pca_df[[indicate[2]]]), + size = point_size) + + labs(col = indicate[1], + shape = indicate[2]) + } + if(length(indicate) == 3) { + p <- p + geom_point(aes(col = pca_df[[indicate[1]]], + shape = pca_df[[indicate[2]]]), + size = point_size) + + facet_wrap(~pca_df[[indicate[3]]]) labs(col = indicate[1], shape = indicate[2]) + } + } else { + label <- TRUE } - if(length(indicate) == 3) { - p <- p + geom_point(aes(col = pca_df[[indicate[1]]], - shape = pca_df[[indicate[2]]]), - size = point_size) + - facet_wrap(~pca_df[[indicate[3]]]) - labs(col = indicate[1], - shape = indicate[2]) - } + if(label) { p <- p + geom_text(aes(label = rowname), size = label_size) } if(plot) { return(p) } else { - df <- pca_df %>% - select(rowname, paste0("PC", c(x, y)), match(indicate, colnames(pca_df))) - colnames(df)[1] <- "sample" - return(df) + if (loadings == FALSE) { + df <- pca_df %>% + select(rowname, paste0("PC", c(x, y)), match(indicate, colnames(pca_df))) + colnames(df)[1] <- "sample" + return(df) + } else { + df <- pca_df %>% + select(rowname, paste0("PC", c(x, y))) + colnames(df)[1] <- "sample" + return(df) + } } } diff --git a/tests/testthat/test_8_plot_functions_explore.R b/tests/testthat/test_8_plot_functions_explore.R index d3c4498..9a4fd46 100644 --- a/tests/testthat/test_8_plot_functions_explore.R +++ b/tests/testthat/test_8_plot_functions_explore.R @@ -15,6 +15,7 @@ test_that("plot_pca returns a ggplot object", { expect_is(plot_pca(test_sign, x = 1, y = 2, n = 100), "ggplot") expect_is(plot_pca(test_sign, x = 1, y = 2, n = 100, label = TRUE), "ggplot") expect_is(plot_pca(test_sign, x = 1, y = 2, n = 100, point_size = 2), "ggplot") + expect_is(plot_pca(test_sign, x = 1, y = 2, n = 100, loadings = TRUE), "ggplot") expect_is(plot_pca(test_sign, x = 1, y = 2, n = 100, indicate = "condition"), "ggplot") expect_is(plot_pca(test_sign, x = 1, y = 2, n = 100, indicate = c("label", "replicate", "condition")), "ggplot") }) From 8b5de39ab62904ae024a993a005cccdd8e23ee92 Mon Sep 17 00:00:00 2001 From: gtluu <46392631+gtluu@users.noreply.github.com> Date: Wed, 11 Mar 2020 14:09:44 -0500 Subject: [PATCH 6/6] Delete .gitignore --- .gitignore | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 .gitignore diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 5c90ecf..0000000 --- a/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -.Rproj.user -.Rhistory -.RData -.Ruserdata -/pca -DEP.Rproj -.Rbuildignore