Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 87 additions & 62 deletions R/plot_functions_explore.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
#' If \code{TRUE} (default) the PCA plot is produced.
#' Otherwise (if \code{FALSE}), the data which the
#' PCA plot is based on are returned.
#' @param loadings Logical(1),
#' If \code{TRUE} the PCA loadings plot is produced
#' instead of the PCA.
#' @return A scatter plot (generated by \code{\link[ggplot2]{ggplot}}).
#' @examples
#' # Load example
Expand All @@ -50,37 +53,39 @@
#' plot_pca(dep, indicate = "condition")
#' @export
plot_pca <- function(dep, x = 1, y = 2, indicate = c("condition", "replicate"),
label = FALSE, n = 500, point_size = 4, label_size = 3, plot = TRUE) {
label = FALSE, n = 500, point_size = 4, label_size = 3, plot = TRUE, loadings = FALSE) {
if(is.integer(x)) x <- as.numeric(x)
if(is.integer(y)) y <- as.numeric(y)
if(is.integer(n)) n <- as.numeric(n)
if(is.integer(point_size)) point_size <- as.numeric(point_size)
if(is.integer(label_size)) label_size <- as.numeric(label_size)
# Show error if inputs are not the required classes
assertthat::assert_that(inherits(dep, "SummarizedExperiment"),
is.numeric(x),
length(x) == 1,
is.numeric(y),
length(y) == 1,
is.numeric(n),
length(n) == 1,
is.character(indicate),
is.logical(label),
is.numeric(point_size),
length(point_size) == 1,
is.numeric(label_size),
length(label_size) == 1,
is.logical(plot),
length(plot) == 1)

is.numeric(x),
length(x) == 1,
is.numeric(y),
length(y) == 1,
is.numeric(n),
length(n) == 1,
is.character(indicate),
is.logical(label),
is.numeric(point_size),
length(point_size) == 1,
is.numeric(label_size),
length(label_size) == 1,
is.logical(plot),
length(plot) == 1,
is.logical(loadings),
length(loadings) == 1)

# Check for valid x and y values
if(x > ncol(dep) | y > ncol(dep)) {
stop(paste0("'x' and/or 'y' arguments are not valid\n",
"Run plot_pca() with 'x' and 'y' <= ",
ncol(dep), "."),
call. = FALSE)
}

# Check for valid 'n' value
if(n > nrow(dep)) {
stop(paste0("'n' argument is not valid.\n",
Expand All @@ -89,7 +94,7 @@ plot_pca <- function(dep, x = 1, y = 2, indicate = c("condition", "replicate"),
"."),
call. = FALSE)
}

# Check for valid 'indicate'
columns <- colnames(colData(dep))
if(!is.null(indicate)) {
Expand All @@ -99,76 +104,96 @@ plot_pca <- function(dep, x = 1, y = 2, indicate = c("condition", "replicate"),
}
if(any(!indicate %in% columns)) {
stop(paste0("'",
paste0(indicate, collapse = "' and/or '"),
"' column(s) is/are not present in ",
deparse(substitute(dep)),
".\nValid columns are: '",
paste(columns, collapse = "', '"),
"'."),
call. = FALSE)
paste0(indicate, collapse = "' and/or '"),
"' column(s) is/are not present in ",
deparse(substitute(dep)),
".\nValid columns are: '",
paste(columns, collapse = "', '"),
"'."),
call. = FALSE)
}
}

# Get the variance per protein and take the top n variable proteins
var <- apply(assay(dep), 1, sd)
df <- assay(dep)[order(var, decreasing = TRUE)[seq_len(n)],]

# Calculate PCA
pca <- prcomp(t(df), scale = FALSE)
pca_df <- pca$x %>%
data.frame() %>%
rownames_to_column() %>%
left_join(., data.frame(colData(dep)), by = c("rowname" = "ID"))

if(loadings==FALSE) {
pca_df <- pca$x %>%
data.frame() %>%
rownames_to_column() %>%
left_join(., data.frame(colData(dep)), by = c("rowname" = "ID"))
} else {
pca_df <- pca$rotation %>%
data.frame() %>%
rownames_to_column() %>%
left_join(., data.frame(colData(dep)), by = c("rowname" = "ID"))
}

# Calculate the percentage of variance explained
percent <- round(100 * pca$sdev^2 / sum(pca$sdev^2), 1)

# Make factors of indicate features
for(feat in indicate) {
pca_df[[feat]] <- as.factor(pca_df[[feat]])
}

# Plot the PCA plot
# Plot the PCA/loadings plot
p <- ggplot(pca_df, aes(get(paste0("PC", x)), get(paste0("PC", y)))) +
labs(title = paste0("PCA plot - top ", n, " variable proteins"),
x = paste0("PC", x, ": ", percent[x], "%"),
y = paste0("PC", y, ": ", percent[y], "%")) +
x = paste0("PC", x, ": ", percent[x], "%"),
y = paste0("PC", y, ": ", percent[y], "%")) +
coord_fixed() +
theme_DEP1()

if(length(indicate) == 0) {
p <- p + geom_point(size = point_size)
}
if(length(indicate) == 1) {
p <- p + geom_point(aes(col = pca_df[[indicate[1]]]),
size = point_size) +
labs(col = indicate[1])
}
if(length(indicate) == 2) {
p <- p + geom_point(aes(col = pca_df[[indicate[1]]],
shape = pca_df[[indicate[2]]]),
size = point_size) +

# Plot points for PCA plot or text for loadings plot
if (loadings == FALSE) {
if(length(indicate) == 0) {
p <- p + geom_point(size = point_size)
}
if(length(indicate) == 1) {
p <- p + geom_point(aes(col = pca_df[[indicate[1]]]),
size = point_size) +
labs(col = indicate[1])
}
if(length(indicate) == 2) {
p <- p + geom_point(aes(col = pca_df[[indicate[1]]],
shape = pca_df[[indicate[2]]]),
size = point_size) +
labs(col = indicate[1],
shape = indicate[2])
}
if(length(indicate) == 3) {
p <- p + geom_point(aes(col = pca_df[[indicate[1]]],
shape = pca_df[[indicate[2]]]),
size = point_size) +
facet_wrap(~pca_df[[indicate[3]]])
labs(col = indicate[1],
shape = indicate[2])
}
} else {
label <- TRUE
}
if(length(indicate) == 3) {
p <- p + geom_point(aes(col = pca_df[[indicate[1]]],
shape = pca_df[[indicate[2]]]),
size = point_size) +
facet_wrap(~pca_df[[indicate[3]]])
labs(col = indicate[1],
shape = indicate[2])
}

if(label) {
p <- p + geom_text(aes(label = rowname), size = label_size)
}
if(plot) {
return(p)
} else {
df <- pca_df %>%
select(rowname, paste0("PC", c(x, y)), match(indicate, colnames(pca_df)))
colnames(df)[1] <- "sample"
return(df)
if (loadings == FALSE) {
df <- pca_df %>%
select(rowname, paste0("PC", c(x, y)), match(indicate, colnames(pca_df)))
colnames(df)[1] <- "sample"
return(df)
} else {
df <- pca_df %>%
select(rowname, paste0("PC", c(x, y)))
colnames(df)[1] <- "sample"
return(df)
}
}
}

Expand Down
1 change: 1 addition & 0 deletions tests/testthat/test_8_plot_functions_explore.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ test_that("plot_pca returns a ggplot object", {
expect_is(plot_pca(test_sign, x = 1, y = 2, n = 100), "ggplot")
expect_is(plot_pca(test_sign, x = 1, y = 2, n = 100, label = TRUE), "ggplot")
expect_is(plot_pca(test_sign, x = 1, y = 2, n = 100, point_size = 2), "ggplot")
expect_is(plot_pca(test_sign, x = 1, y = 2, n = 100, loadings = TRUE), "ggplot")
expect_is(plot_pca(test_sign, x = 1, y = 2, n = 100, indicate = "condition"), "ggplot")
expect_is(plot_pca(test_sign, x = 1, y = 2, n = 100, indicate = c("label", "replicate", "condition")), "ggplot")
})
Expand Down