#Title: Honors Thesis R Script
#Author: Woo Jeong

###Package Preparation
library(DESeq2)
library(annotables)
library(RColorBrewer)
library(ggrepel)
library(annotables)
library(dplyr)
library(tidyverse)
library(pheatmap)
library(ggpubr)
library(openxlsx)
library(writexl)
library(DEGreport)
library(VennDiagram)
library(org.Hs.eg.db)
library(DOSE)
library(pathview)
library(clusterProfiler)
library(AnnotationHub)
library(ensembldb)
library(tidyverse)

###Data Importation
data1 <- read.delim("Day0_1.txt", header=TRUE, row.names=1)
data2 <- read.delim("Day0_2.txt", header=TRUE, row.names=1)
data3 <- read.delim("Day2_1.txt", header=TRUE, row.names=1)
data4 <- read.delim("Day2_2.txt", header=TRUE, row.names=1)
data5 <- read.delim("Day4_1.txt", header=TRUE, row.names=1)
data6 <- read.delim("Day4_2.txt", header=TRUE, row.names=1)
data7 <- read.delim("Day8_1.txt", header=TRUE, row.names=1)
data8 <- read.delim("Day8_2.txt", header=TRUE, row.names=1)
data9 <- read.delim("Day14.before.sorting_1.txt", header=TRUE, row.names=1)
data10 <- read.delim("Day14.before.sorting_2.txt", header=TRUE, row.names=1)
data11 <- read.delim("Day14.after.sorting_1.txt", header=TRUE, row.names=1)
data12 <- read.delim("Day14.after.sorting_2.txt", header=TRUE, row.names=1)

###Raw Data & Metadata Generation
#merge data by column bind (cbind)
hpsc.ec.raw <- cbind(data1, data2, data3, data4, data5, data6, data7, data8, data9, data10, data11, data12)
#name the columns of the raw data
colnames(hpsc.ec.raw) <- c("D.0_1", "D.0_2", "D.2_1", "D.2_2", "D.4_1", "D.4_2", "D.8_1", "D.8_2", "D14.not.sorted_1", "D14.not.sorted_2", "D 14.sorted_1", "D14.sorted_2")
#metadata
timepoints <- factor(c("Day.0", "Day.0", "Day.2", "Day.2", "Day.4", "Day.4", "Day.8", "Day.8", "Day14.not.sorted", "Day14.not.sorted", "Day14.sorted", "Day14.sorted"))
meta.condition <- data.frame(timepoints=as.factor(timepoints))
#name the rows of the metadata as the columns of the raw data (timepoints)
rownames(meta.condition) <- colnames(hpsc.ec.raw)
#check if row(meta)=col(raw)?
all(rownames(meta.condition) == colnames(hpsc.ec.raw))

###Normalization
#DESeq2 Object Generation
hpsc.ec.object <- DESeqDataSetFromMatrix(countData = hpsc.ec.raw, colData = meta.condition, design = ~timepoints)
#normalization: calculation
hpsc.ec.object <- estimateSizeFactors(hpsc.ec.object)
sizeFactors(hpsc.ec.object)
#normalization: extraction
normalized_hpsc.ec.object_counts <- counts(hpsc.ec.object, normalized=TRUE)


###Fitting Raw Data to DESeq2 Generalized Linear Model
##Diagnostic 1: Mean-Variance Plot
#calculating the mean for each gene (each row)
mean_counts <- apply(hpsc.ec.raw[,1:12],1,mean)
#calculating the variance for each gene (each row)
variance_counts <- apply(hpsc.ec.raw[,1:12], 1, var)
#Dispersion: creating data frame with mean and var for each gene
df <- data.frame(mean_counts, variance_counts)
#Dispersion: ggplot
ggplot(df, aes(x=mean_counts, y=variance_counts)) +
  geom_point() +
  geom_abline(slope=1, intercept=0, color="blue") +
  geom_smooth(method="lm", formula=y~x, color="red") +
  scale_y_log10() +
  scale_x_log10() +
  xlab("Mean counts per gene") +
  ylab("Variance per gene")

##Diagnostic 2: Dispersion-Mean Plot
#Dispersion Calculation
hpsc.ec.object <- estimateDispersions(hpsc.ec.object)
#Plot Dispersion Estimates
plotDispEsts(hpsc.ec.object)

###Unsupervised Clustering Analysis
##Hierarchial Clustering Analysis 
#unsupervised clustering analysis: log transformation
vsd_hpsc.ec <- vst(hpsc.ec.object, blind=TRUE)
##Hierarchical clustering with correlation heatmaps
#extract the vst matrix from the object
vsd_mat_hpsc.ec <- assay(vsd_hpsc.ec)
#compute pairwise correlation values #showing correlation between each samples
vsd_cor_hpsc.ec <- cor(vsd_mat_hpsc.ec)
#plot heatmap
pheatmap(vsd_cor_hpsc.ec,
         cluster_rows = F,
         cluster_cols = F)

##Principal Component Analysis (PCA)
#plot PCA
plotPCA(vsd_hpsc.ec, intgroup="timepoints")
#PCA details
plotPCA(vsd_hpsc.ec, intgroup="timepoints", returnData = TRUE)


###Differential Expression (DE) Analysis by Wald Test
#full model
hpsc.ec.object <- DESeqDataSetFromMatrix(countData = hpsc.ec.raw, colData = meta.condition, design = ~timepoints)
##Day0 vs. Day2
#contrast: Day0 vs. Day2
hpsc.ec.object_res_Day2_0 <- results(hpsc.ec.object, contrast = c("timepoints", "Day.2", "Day.0"), alpha = 0.05, lfcThreshold = 1)
#summary
mcols(hpsc.ec.object_res_Day2_0)
summary(hpsc.ec.object_res_Day2_0)
#MA plot
plotMA(hpsc.ec.object_res_Day2_0, ylim = c(-8,8))
#shrink contrast
hpsc.ec.object_res_Day2_0_shrink <- lfcShrink(hpsc.ec.object, contrast = c("timepoints", "Day.2", "Day.0"), res=hpsc.ec.object_res_Day2_0)
#summary
mcols(hpsc.ec.object_res_Day2_0_shrink)
#shrink MA plot
plotMA(hpsc.ec.object_res_Day2_0_shrink, ylim = c(-8,8), main="Day0 vs Day2")
#DE genes summary
summary(hpsc.ec.object_res_Day2_0_shrink)

#DESeq2 shrink data frame
hpsc.ec.object_result_Day2_0 <- data.frame(hpsc.ec.object_res_Day2_0_shrink)
hpsc.ec.object_result_Day2_0$symbol <- rownames(hpsc.ec.object_result_Day2_0)

hpsc.ec.object_result_Day2_0 <- left_join(x=hpsc.ec.object_result_Day2_0,
                                          y=grch38[,c("ensgene", "symbol", "description")], 
                                          by = "symbol")

#DESeq2 shrink DE genes
hpsc.ec.object_result_Day2_0_sig_original <- subset(hpsc.ec.object_result_Day2_0, padj < 0.05)
hpsc.ec.object_result_Day2_0_sig_original <- subset(hpsc.ec.object_result_Day2_0_sig_original, abs(log2FoldChange) > 1)

#in order of padj
hpsc.ec.object_result_Day2_0_sig <- hpsc.ec.object_result_Day2_0_sig_original %>%
  arrange(padj)

#upregulated
hpsc.ec.object_result_Day2_0_upregulated <- hpsc.ec.object_result_Day2_0_sig %>%
  filter(log2FoldChange > 0) %>%
  arrange(desc(log2FoldChange))

#downregulated
hpsc.ec.object_result_Day2_0_downregulated <- hpsc.ec.object_result_Day2_0_sig %>%
  filter(log2FoldChange < 0) %>%
  arrange(desc(log2FoldChange))

#volcano data.frame from all results
volcano_Day2_0 <- data.frame(hpsc.ec.object_result_Day2_0) %>%
  mutate(threshold = padj < 0.05) %>%
  arrange(padj)

#volcano plot
ggplot(volcano_Day2_0) +
  geom_point(aes(x = log2FoldChange, y = -log10(padj), color = threshold)) +
  geom_text_repel(data=head(volcano_Day2_0, 50), aes(x=log2FoldChange, y=-log10(padj), label=symbol)) +
  xlab("log2 fold change") +
  ylab("-log10 adjusted p-value") +
  xlim(c(-10,10)) +
  ylim(c(0,200)) +
  ggtitle("Day0 vs Day2") +
  scale_color_manual(values=c("black", "red")) +
  theme(legend.position = "none",
        plot.title = element_text(size = rel(1.5), hjust = 0.5),
        axis.title = element_text(size = rel(1.25)))

#make a color of our selection
heat_colors <- brewer.pal(6, "YlOrRd")
###DESeq2 visualization - heatmap
heatmap_general_Day2_0 <- normalized_hpsc.ec.object_counts[hpsc.ec.object_result_Day2_0_sig$symbol,]
pheatmap(heatmap_general_Day2_0,
         cluster_rows = T,
         cluster_cols = F,
         show_rownames = F,
         show_colnames = T,
         scale = "row",
         main = "Day0 vs Day2",
         treeheight_row = 0,
         treeheight_col = 0,
         col = heat_colors,
         border_color= NA, 
         fontsize = 10, 
         fontsize_row = 10)

#subset normalized counts to significant genes
sig_norm_counts_hpsc.ec_Day2_0 <- normalized_hpsc.ec.object_counts[hpsc.ec.object_result_Day2_0_sig$symbol,c(1:4)]

#heatmap plot: general scheme
pheatmap(sig_norm_counts_hpsc.ec_Day2_0,
         cluster_rows = T,
         cluster_cols = F,
         show_rownames = F,
         show_colnames = T,
         scale = "row",
         main = "Day0 vs Day2",
         treeheight_row = 0,
         treeheight_col = 0,
         drop_levels = T)

#heatmap: top20 genes by padj
top_genes_Day2_0 <- sig_norm_counts_hpsc.ec_Day2_0[c(1:20),]

pheatmap(top_genes_Day2_0,
         cluster_rows = T,
         cluster_cols = F,
         show_rownames = T,
         show_colnames = T,
         scale = "row",
         main = "Day0 vs Day2",
         treeheight_row = 0,
         treeheight_col = 0,
         fontsize_row = 11)

#Expression plotL top20 genes by padj
top20_Day2_0 <- data.frame(sig_norm_counts_hpsc.ec_Day2_0)[1:20,] %>%
  rownames_to_column(var = "symbol")

top20_Day2_0 <- gather(top20_Day2_0,
                       key = "samplename",
                       value = "normalized_counts",
                       2:5)

top20_Day2_0 <- inner_join(top20_Day2_0,
                           rownames_to_column(meta.condition, var = "samplename"),
                           by = "samplename")

ggplot(top20_Day2_0) +
  geom_point(aes(x = symbol, y = normalized_counts, color = timepoints)) + 
  scale_y_log10() +
  xlab("Genes") +
  ylab("Normalized Counts") +
  ggtitle("Top 20 Significant DE Genes: Day0 vs Day2") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme(plot.title = element_text(hjust = 0.5))

##DAY0 vs DAY4
#contrast:Day4&Day0
hpsc.ec.object_res_Day4_0 <- results(hpsc.ec.object, contrast = c("timepoints", "Day.4", "Day.0"), alpha = 0.05, lfcThreshold = 1)

#summary
mcols(hpsc.ec.object_res_Day4_0)
summary(hpsc.ec.object_res_Day4_0)
#MA plot
plotMA(hpsc.ec.object_res_Day4_0, ylim = c(-8,8))

#shrink contrast:Day4&0
hpsc.ec.object_res_Day4_0_shrink <- lfcShrink(hpsc.ec.object, contrast = c("timepoints", "Day.4", "Day.0"), res=hpsc.ec.object_res_Day4_0)

#summary
mcols(hpsc.ec.object_res_Day4_0_shrink)

#shrink MA plot
plotMA(hpsc.ec.object_res_Day4_0_shrink, ylim = c(-8,8), main="Day0 vs Day4")

#DE genes summary
summary(hpsc.ec.object_res_Day4_0_shrink)

#DESeq2 shrink data frame
hpsc.ec.object_result_Day4_0 <- data.frame(hpsc.ec.object_res_Day4_0_shrink)
hpsc.ec.object_result_Day4_0$symbol <- rownames(hpsc.ec.object_result_Day4_0)
hpsc.ec.object_result_Day4_0 <- left_join(x=hpsc.ec.object_result_Day4_0,
                                          y=grch38[,c("ensgene", "symbol", "description")], 
                                          by = "symbol")

#DESeq2 shrink DE genes
hpsc.ec.object_result_Day4_0_sig_original <- subset(hpsc.ec.object_result_Day4_0, padj < 0.05)
hpsc.ec.object_result_Day4_0_sig_original <- subset(hpsc.ec.object_result_Day4_0_sig_original, abs(log2FoldChange) > 1)

#in order of padj
hpsc.ec.object_result_Day4_0_sig <- hpsc.ec.object_result_Day4_0_sig_original %>%
  arrange(padj)

#upregulated
hpsc.ec.object_result_Day4_0_upregulated <- hpsc.ec.object_result_Day4_0_sig %>%
  filter(log2FoldChange > 0)

#downregulated
hpsc.ec.object_result_Day4_0_downregulated <- hpsc.ec.object_result_Day4_0_sig %>%
  filter(log2FoldChange < 0)

#volcano data.frame from all results
volcano_Day4_0 <- data.frame(hpsc.ec.object_result_Day4_0) %>%
  mutate(threshold = padj < 0.05) %>%
  arrange(padj)

#volcano plot
pdf("Volcano Plot Day0 vs. Day4.pdf")
ggplot(volcano_Day4_0) +
  geom_point(aes(x = log2FoldChange, y = -log10(padj), color = threshold)) +
  geom_text_repel(data=head(volcano_Day4_0, 30), aes(x=log2FoldChange, y=-log10(padj), label=symbol)) +
  xlab("log2 fold change") +
  ylab("-log10 adjusted p-value") +
  xlim(c(-10,10)) +
  ylim(c(0,200)) +
  ggtitle("Day0 vs Day4") +
  scale_color_manual(values=c("black", "red")) +
  theme(legend.position = "none",
        plot.title = element_text(size = rel(1.5), hjust = 0.5),
        axis.title = element_text(size = rel(1.25)))

#DESeq2 visualization - heatmap
heatmap_general_Day4_0 <- normalized_hpsc.ec.object_counts[hpsc.ec.object_result_Day4_0_sig$symbol,]
pheatmap(heatmap_general_Day4_0,
         cluster_rows = T,
         cluster_cols = F,
         show_rownames = F,
         show_colnames = T,
         scale = "row",
         main = "Day0 vs Day4",
         treeheight_row = 0,
         treeheight_col = 0,
         col = heat_colors)

#subset normalized counts to significant genes
sig_norm_counts_hpsc.ec_Day4_0 <- normalized_hpsc.ec.object_counts[hpsc.ec.object_result_Day4_0_sig$symbol,c(1:2,5:6)]

#heatmap plot: general scheme
pheatmap(sig_norm_counts_hpsc.ec_Day4_0,
         cluster_rows = T,
         cluster_cols = F,
         show_rownames = F,
         show_colnames = T,
         scale = "row",
         main = "Day0 vs Day4",
         treeheight_row = 0,
         treeheight_col = 0)

#topgenes
top_genes_Day4_0 <- sig_norm_counts_hpsc.ec_Day4_0[c(1:20),]

pheatmap(top_genes_Day4_0,
         cluster_rows = T,
         cluster_cols = F,
         show_rownames = T,
         show_colnames = T,
         scale = "row",
         main = "Day0 vs Day4",
         treeheight_row = 0,
         treeheight_col = 0)

#Expression plot
top20_Day4_0 <- data.frame(sig_norm_counts_hpsc.ec_Day4_0)[1:20,] %>%
  rownames_to_column(var = "symbol")

top20_Day4_0 <- gather(top20_Day4_0,
                       key = "samplename",
                       value = "normalized_counts",
                       2:5)

top20_Day4_0 <- inner_join(top20_Day4_0,
                           rownames_to_column(meta.condition, var = "samplename"),
                           by = "samplename")

ggplot(top20_Day4_0) +
  geom_point(aes(x = symbol, y = normalized_counts, color = timepoints)) + 
  scale_y_log10() +
  xlab("Genes") +
  ylab("Normalized Counts") +
  ggtitle("Top 20 Significant DE Genes: Day0 vs Day4") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme(plot.title = element_text(hjust = 0.5))

##Day0 vs. Day8
#contrast:Day8&Day0
hpsc.ec.object_res_Day8_0 <- results(hpsc.ec.object, contrast = c("timepoints", "Day.8", "Day.0"), alpha = 0.05, lfcThreshold = 1)

#summary
mcols(hpsc.ec.object_res_Day8_0)
summary(hpsc.ec.object_res_Day8_0)
#MA plot
plotMA(hpsc.ec.object_res_Day8_0, ylim = c(-8,8))

#shrink contrast:Day4&0
hpsc.ec.object_res_Day8_0_shrink <- lfcShrink(hpsc.ec.object, contrast = c("timepoints", "Day.8", "Day.0"), res=hpsc.ec.object_res_Day8_0)

#summary
mcols(hpsc.ec.object_res_Day8_0_shrink)
summary(hpsc.ec.object_res_Day8_0_shrink)

#shrink MA plot
plotMA(hpsc.ec.object_res_Day8_0_shrink, ylim = c(-8,8), main="Day0 vs Day8")
abline(h=c(-1, 1),col="dodgerblue",lwd=2)

#DESeq2 shrink data frame
hpsc.ec.object_result_Day8_0 <- data.frame(hpsc.ec.object_res_Day8_0_shrink)
hpsc.ec.object_result_Day8_0$symbol <- rownames(hpsc.ec.object_result_Day8_0)

hpsc.ec.object_result_Day8_0 <- left_join(x=hpsc.ec.object_result_Day8_0,
                                          y=grch38[,c("ensgene", "symbol", "description")], 
                                          by = "symbol")

#DESeq2 shrink DE genes
hpsc.ec.object_result_Day8_0_sig_original <- subset(hpsc.ec.object_result_Day8_0, padj < 0.05)
hpsc.ec.object_result_Day8_0_sig_original <- subset(hpsc.ec.object_result_Day8_0_sig_original, abs(log2FoldChange) > 1)

#in order of padj
hpsc.ec.object_result_Day8_0_sig <- hpsc.ec.object_result_Day8_0_sig_original %>%
  arrange(padj)

#upregulated
hpsc.ec.object_result_Day8_0_upregulated <- hpsc.ec.object_result_Day8_0_sig %>%
  filter(log2FoldChange > 0)

#downregulated
hpsc.ec.object_result_Day8_0_downregulated <- hpsc.ec.object_result_Day8_0_sig %>%
  filter(log2FoldChange < 0)

#volcano data.frame from all results
volcano_Day8_0 <- data.frame(hpsc.ec.object_result_Day8_0) %>%
  mutate(threshold = padj < 0.05) %>%
  arrange(padj)

#volcano plot
pdf("Volcano Plot Day0 vs. Day8.pdf")
ggplot(volcano_Day8_0) +
  geom_point(aes(x = log2FoldChange, y = -log10(padj), color = threshold)) +
  geom_text_repel(data=head(volcano_Day8_0, 50), aes(x=log2FoldChange, y=-log10(padj), label=symbol)) +
  xlab("log2 fold change") +
  ylab("-log10 adjusted p-value") +
  xlim(c(-10,10)) +
  ylim(c(0,200)) +
  ggtitle("Day0 vs Day8") +
  scale_color_manual(values=c("black", "red")) +
  theme(legend.position = "none",
        plot.title = element_text(size = rel(1.5), hjust = 0.5),
        axis.title = element_text(size = rel(1.25)))

#heatmap
heatmap_general_Day8_0 <- normalized_hpsc.ec.object_counts[hpsc.ec.object_result_Day8_0_sig$symbol,]

pheatmap(heatmap_general_Day8_0,
         cluster_rows = T,
         cluster_cols = F,
         show_rownames = F,
         show_colnames = T,
         scale = "row",
         main = "Day0 vs Day8",
         treeheight_row = 0,
         treeheight_col = 0,
         col = heat_colors)

#subset normalized counts to significant genes
sig_norm_counts_hpsc.ec_Day8_0 <- normalized_hpsc.ec.object_counts[hpsc.ec.object_result_Day8_0_sig$symbol,c(1:2,7:8)]

#heatmap plot: general scheme
pheatmap(sig_norm_counts_hpsc.ec_Day8_0,
         cluster_rows = T,
         cluster_cols = F,
         show_rownames = F,
         show_colnames = T,
         scale = "row",
         main = "Day0 vs Day8",
         treeheight_row = 0,
         treeheight_col = 0)

#topgenes
top_genes_Day8_0 <- sig_norm_counts_hpsc.ec_Day8_0[c(1:50),]

pheatmap(top_genes_Day8_0,
         cluster_rows = T,
         cluster_cols = F,
         show_rownames = T,
         show_colnames = T,
         scale = "row",
         main = "Day0 vs Day8",
         treeheight_row = 0,
         treeheight_col = 0)

#Expression plot
top20_Day8_0 <- data.frame(sig_norm_counts_hpsc.ec_Day8_0)[1:40,] %>%
  rownames_to_column(var = "symbol")

top20_Day8_0 <- gather(top20_Day8_0,
                       key = "samplename",
                       value = "normalized_counts",
                       2:5)

top20_Day8_0 <- inner_join(top20_Day8_0,
                           rownames_to_column(meta.condition, var = "samplename"),
                           by = "samplename")

ggplot(top20_Day8_0) +
  geom_point(aes(x = symbol, y = normalized_counts, color = timepoints)) + 
  scale_y_log10() +
  xlab("Genes") +
  ylab("Normalized Counts") +
  ggtitle("Top 20 Significant DE Genes: Day0 vs Day8") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme(plot.title = element_text(hjust = 0.5))

##DAY0 vs DAY14unsorted
#contrast:Day14unsorted vs. Day0
hpsc.ec.object_res_Day14.unsorted_0 <- results(hpsc.ec.object, contrast = c("timepoints", "Day14.not.sorted", "Day.0"), alpha = 0.05, lfcThreshold = 1)
#summary
mcols(hpsc.ec.object_res_Day14.unsorted_0)
summary(hpsc.ec.object_res_Day14.unsorted_0)
#MA plot
plotMA(hpsc.ec.object_res_Day14.unsorted_0, ylim = c(-8,8))
#shrink contrast:Day14.unsorted&0
hpsc.ec.object_res_Day14.unsorted_0_shrink <- lfcShrink(hpsc.ec.object, contrast = c("timepoints", "Day14.not.sorted", "Day.0"), res=hpsc.ec.object_res_Day14.unsorted_0)
#summary
mcols(hpsc.ec.object_res_Day14.unsorted_0_shrink)
#shrink MA plot
plotMA(hpsc.ec.object_res_Day14.unsorted_0_shrink, ylim = c(-8,8), main="Day0 vs Day14.not.sorted")
abline(h=c(-1, 1),col="dodgerblue",lwd=2)
#DE genes summary
summary(hpsc.ec.object_res_Day14.unsorted_0_shrink)

#DESeq2 shrink data frame
hpsc.ec.object_result_Day14.unsorted_0 <- data.frame(hpsc.ec.object_res_Day14.unsorted_0_shrink)
hpsc.ec.object_result_Day14.unsorted_0$symbol <- rownames(hpsc.ec.object_result_Day14.unsorted_0)

hpsc.ec.object_result_Day14.unsorted_0 <- left_join(x=hpsc.ec.object_result_Day14.unsorted_0,
                                                    y=grch38[,c("ensgene", "symbol", "description")], 
                                                    by = "symbol")

#DESeq2 shrink DE genes
hpsc.ec.object_result_Day14.unsorted_0_sig_original <- subset(hpsc.ec.object_result_Day14.unsorted_0, padj < 0.05)
hpsc.ec.object_result_Day14.unsorted_0_sig_original <- subset(hpsc.ec.object_result_Day14.unsorted_0_sig_original, abs(log2FoldChange) > 1)

#in order of padj
hpsc.ec.object_result_Day14.unsorted_0_sig <- hpsc.ec.object_result_Day14.unsorted_0_sig_original %>%
  arrange(padj)

#upregulated
hpsc.ec.object_result_Day14.unsorted_0_upregulated <- hpsc.ec.object_result_Day14.unsorted_0_sig %>%
  filter(log2FoldChange > 0)

#downregulated
hpsc.ec.object_result_Day14.unsorted_0_downregulated <- hpsc.ec.object_result_Day14.unsorted_0_sig %>%
  filter(log2FoldChange < 0)

#volcano data.frame from all results
volcano_Day14.unsorted_0 <- data.frame(hpsc.ec.object_result_Day14.unsorted_0) %>%
  mutate(threshold = padj < 0.05) %>%
  arrange(padj)

#volcano plot
ggplot(volcano_Day14.unsorted_0) +
  geom_point(aes(x = log2FoldChange, y = -log10(padj), color = threshold)) +
  geom_text_repel(data=head(volcano_Day14.unsorted_0, 50), aes(x=log2FoldChange, y=-log10(padj), label=symbol)) +
  xlab("log2 fold change") +
  ylab("-log10 adjusted p-value") +
  xlim(c(-10,10)) +
  ylim(c(0,200)) +
  ggtitle("Day0 vs Day14.unsorted") +
  scale_color_manual(values=c("black", "red")) +
  theme(legend.position = "none",
        plot.title = element_text(size = rel(1.5), hjust = 0.5),
        axis.title = element_text(size = rel(1.25)))

#DESeq2 visualization - heatmap
heatmap_general_Day14.unsorted_0 <- normalized_hpsc.ec.object_counts[hpsc.ec.object_result_Day14.unsorted_0_sig$symbol,]
pheatmap(heatmap_general_Day14.unsorted_0,
         cluster_rows = T,
         cluster_cols = F,
         show_rownames = F,
         show_colnames = T,
         scale = "row",
         main = "Day0 vs Day14.unsorted",
         treeheight_row = 0,
         treeheight_col = 0,
         col = heat_colors)

#subset normalized counts to significant genes
sig_norm_counts_hpsc.ec_Day14.unsorted_0 <- normalized_hpsc.ec.object_counts[hpsc.ec.object_result_Day14.unsorted_0_sig$symbol,c(1:2,9:10)]

#heatmap plot: general scheme
pheatmap(sig_norm_counts_hpsc.ec_Day14.unsorted_0,
         cluster_rows = T,
         cluster_cols = F,
         show_rownames = F,
         show_colnames = T,
         scale = "row",
         main = "Day0 vs Day14.unsorted",
         treeheight_row = 0,
         treeheight_col = 0)

#topgenes
top_genes_Day14.unsorted_0 <- sig_norm_counts_hpsc.ec_Day14.unsorted_0[c(1:50),]
pheatmap(top_genes_Day14.unsorted_0,
         cluster_rows = T,
         cluster_cols = F,
         show_rownames = T,
         show_colnames = T,
         scale = "row",
         main = "Day0 vs Day14.unsorted",
         treeheight_row = 0,
         treeheight_col = 0)

#Expression plot
top20_Day14.unsorted_0 <- data.frame(sig_norm_counts_hpsc.ec_Day14.unsorted_0)[1:20,] %>%
  rownames_to_column(var = "symbol")

top20_Day14.unsorted_0 <- gather(top20_Day14.unsorted_0,
                                 key = "samplename",
                                 value = "normalized_counts",
                                 2:5)

top20_Day14.unsorted_0 <- inner_join(top20_Day14.unsorted_0,
                                     rownames_to_column(meta.condition, var = "samplename"),
                                     by = "samplename")

ggplot(top20_Day14.unsorted_0) +
  geom_point(aes(x = symbol, y = normalized_counts, color = timepoints)) + 
  scale_y_log10() +
  xlab("Genes") +
  ylab("Normalized Counts") +
  ggtitle("Top 20 Significant DE Genes: Day0 vs Day14.unsorted") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme(plot.title = element_text(hjust = 0.5))

##DAY0 vs DAY14.sorted
#contrast:Day14sorted vs Day0
hpsc.ec.object_res_Day14.sorted_0 <- results(hpsc.ec.object, contrast = c("timepoints", "Day14.sorted", "Day.0"), alpha = 0.05, lfcThreshold = 1)

#summary
mcols(hpsc.ec.object_res_Day14.sorted_0)
summary(hpsc.ec.object_res_Day14.sorted_0)
#MA plot
plotMA(hpsc.ec.object_res_Day14.sorted_0, ylim = c(-8,8))

#shrink contrast:Day4&0
hpsc.ec.object_res_Day14.sorted_0_shrink <- lfcShrink(hpsc.ec.object, contrast = c("timepoints", "Day14.sorted", "Day.0"), res=hpsc.ec.object_res_Day14.sorted_0)

#summary
mcols(hpsc.ec.object_res_Day14.sorted_0_shrink)

#shrink MA plot
plotMA(hpsc.ec.object_res_Day14.sorted_0_shrink, ylim = c(-8,8), main="Day0 vs Day14.sorted")
abline(h=c(-1, 1),col="dodgerblue",lwd=2)

#DE genes summary
summary(hpsc.ec.object_res_Day14.sorted_0_shrink)

#DESeq2 shrink data frame
hpsc.ec.object_result_Day14.sorted_0 <- data.frame(hpsc.ec.object_res_Day14.sorted_0_shrink)
hpsc.ec.object_result_Day14.sorted_0$symbol <- rownames(hpsc.ec.object_result_Day14.sorted_0)

hpsc.ec.object_result_Day14.sorted_0 <- left_join(x=hpsc.ec.object_result_Day14.sorted_0,
                                                  y=grch38[,c("ensgene", "symbol", "description")], 
                                                  by = "symbol")

#DESeq2 shrink DE genes
hpsc.ec.object_result_Day14.sorted_0_sig_original <- subset(hpsc.ec.object_result_Day14.sorted_0, padj < 0.05)
hpsc.ec.object_result_Day14.sorted_0_sig_original <- subset(hpsc.ec.object_result_Day14.sorted_0_sig_original, abs(log2FoldChange) > 1)

#in order of padj
hpsc.ec.object_result_Day14.sorted_0_sig <- hpsc.ec.object_result_Day14.sorted_0_sig_original %>%
  arrange(padj)

#upregulated
hpsc.ec.object_result_Day14.sorted_0_upregulated <- hpsc.ec.object_result_Day14.sorted_0_sig %>%
  filter(log2FoldChange > 0)

#downregulated
hpsc.ec.object_result_Day14.sorted_0_downregulated <- hpsc.ec.object_result_Day14.sorted_0_sig %>%
  filter(log2FoldChange < 0)

#export
write_xlsx(hpsc.ec.object_result_Day14.sorted_0_sig, "DE_Day0vs14.sorted.xlsx")
write_xlsx(hpsc.ec.object_result_Day14.sorted_0_upregulated, "DE_Day0vs14.sorted_Upregulate.xlsx")
write_xlsx(hpsc.ec.object_result_Day14.sorted_0_downregulated, "DE_Day0vs14.sorted_Downregulate.xlsx")

#volcano data.frame from all results
volcano_Day14.sorted_0 <- data.frame(hpsc.ec.object_result_Day14.sorted_0) %>%
  mutate(threshold = padj < 0.05) %>%
  arrange(padj)

#volcano plot
ggplot(volcano_Day14.sorted_0) +
  geom_point(aes(x = log2FoldChange, y = -log10(padj), color = threshold)) +
  geom_text_repel(data=head(volcano_Day14.sorted_0, 50), aes(x=log2FoldChange, y=-log10(padj), label=symbol)) +
  xlab("log2 fold change") +
  ylab("-log10 adjusted p-value") +
  xlim(c(-10,10)) +
  ylim(c(0,200)) +
  ggtitle("Day0 vs Day14.sorted") +
  scale_color_manual(values=c("black", "red")) +
  theme(legend.position = "none",
        plot.title = element_text(size = rel(1.5), hjust = 0.5),
        axis.title = element_text(size = rel(1.25)))

#DESeq2 visualization - heatmap
heatmap_general_Day14.sorted_0 <- normalized_hpsc.ec.object_counts[hpsc.ec.object_result_Day14.sorted_0_sig$symbol,]
pheatmap(heatmap_general_Day14.sorted_0,
         cluster_rows = T,
         cluster_cols = F,
         show_rownames = F,
         show_colnames = T,
         scale = "row",
         main = "Day0 vs Day14.sorted",
         treeheight_row = 0,
         treeheight_col = 0,
         col = heat_colors)

#subset normalized counts to significant genes
sig_norm_counts_hpsc.ec_Day14.sorted_0 <- normalized_hpsc.ec.object_counts[hpsc.ec.object_result_Day14.sorted_0_sig$symbol,c(1:2,11:12)]

#heatmap plot: general scheme
pheatmap(sig_norm_counts_hpsc.ec_Day14.sorted_0,
         cluster_rows = T,
         cluster_cols = F,
         show_rownames = F,
         show_colnames = T,
         scale = "row",
         main = "Day0 vs Day14.sorted",
         treeheight_row = 0,
         treeheight_col = 0)

#topgenes
top_genes_Day14.sorted_0 <- sig_norm_counts_hpsc.ec_Day14.sorted_0[c(1:50),]

pheatmap(top_genes_Day14.sorted_0,
         cluster_rows = T,
         cluster_cols = F,
         show_rownames = T,
         show_colnames = T,
         scale = "row",
         main = "Day0 vs Day14.sorted",
         treeheight_row = 0,
         treeheight_col = 0)

#Expression plot
top20_Day14.sorted_0 <- data.frame(sig_norm_counts_hpsc.ec_Day14.sorted_0)[1:20,] %>%
  rownames_to_column(var = "symbol")

top20_Day14.sorted_0 <- gather(top20_Day14.sorted_0,
                               key = "samplename",
                               value = "normalized_counts",
                               2:5)

top20_Day14.sorted_0 <- inner_join(top20_Day14.sorted_0,
                                   rownames_to_column(meta.condition, var = "samplename"),
                                   by = "samplename")

ggplot(top20_Day14.sorted_0) +
  geom_point(aes(x = symbol, y = normalized_counts, color = timepoints)) + 
  scale_y_log10() +
  xlab("Genes") +
  ylab("Normalized Counts") +
  ggtitle("Top 20 Significant DE Genes: Day0 vs Day14.sorted") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + theme(plot.title = element_text(hjust = 0.5))


###GO Enrichment Analysis
##Venn Diagram
# area1: Day0 vs Day8 Upregulated
Day8_0_up <- hpsc.ec.object_result_Day8_0_upregulated %>%
  pull(symbol)

# area2: Day0 vs Day14.unsorted Upregulated length
Day14.unsorted_Day0_up <- hpsc.ec.object_result_Day14.unsorted_0_upregulated  %>% 
  pull(symbol)

# area3: Day0 vs Day14.sorted Upregulated length
Day14.sorted_Day0_up <- hpsc.ec.object_result_Day14.sorted_0_upregulated  %>% 
  pull(symbol)

#VENNDIAGRAM
GroupA_up <- Day8_0_up
GroupB_up <- Day14.unsorted_Day0_up
GroupC_up <- Day14.sorted_Day0_up

v1 <- venn.diagram(list(Day8=GroupA_up, Day14.unsorted=GroupB_up, Day14.sorted=GroupC_up), filename=NULL, fill=rainbow(3), lwd=2, cex=2, cat.cex=1.1)
grid.newpage()
grid.draw(v1)

#area1: Day0 vs Day8 downregulated
Day8_0_down <- hpsc.ec.object_result_Day8_0_downregulated %>%
  pull(symbol)

#area2: Day0 vs Day14.unsorted downregulated length
Day14.unsorted_Day0_down <- hpsc.ec.object_result_Day14.unsorted_0_downregulated  %>% 
  pull(symbol)

#area3: Day0 vs Day14.sorted downregulated length
Day14.sorted_Day0_down <- hpsc.ec.object_result_Day14.sorted_0_downregulated  %>% 
  pull(symbol)

#VENNDIAGRAM
GroupA_down <- Day8_0_down
GroupB_down <- Day14.unsorted_Day0_down
GroupC_down <- Day14.sorted_Day0_down

v2 <- venn.diagram(list(Day8=GroupA_down, Day14.unsorted=GroupB_down, Day14.sorted=GroupC_down), filename=NULL, fill=rainbow(3), lwd=2, cex=2, cat.cex=1.1)
grid.newpage()
grid.draw(v2)

###GO Enrichment Analysis based of DE Analysis by Wald Test
##Day8 Upregulated
#geneID for gene symbols
universe_idx_Day8_up <- grch38$symbol %in% hpsc.ec.object_result_Day8_0$symbol
universe_ids_Day8_up <- grch38[universe_idx_Day8_up,]
universe_non_duplicates_Day8 <- which(duplicated(universe_ids_Day8_up$symbol) == FALSE)
universe_ids_Day8_up <- universe_ids_Day8_up[universe_non_duplicates_Day8,]
allOE_genes_Day8_up <- as.character(universe_ids_Day8_up$ensgene)

#geneID for sig gene symbols
idx_Day8_up <- grch38$symbol %in% hpsc.ec.object_result_Day8_0_upregulated$symbol
ids_Day8_up <- grch38[idx_Day8_up, ]
non_duplicates_Day8 <- which(duplicated(ids_Day8_up$symbol) == FALSE)
ids_Day8_up <- ids_Day8_up[non_duplicates_Day8, ] 
sigOE_genes_Day8_up <- as.character(ids_Day8_up$ensgene)

#background dataset
sigOE_genes_Day8_up <- as.character(ids_Day8_up$ensgene)

#GO enrichment analysis 
ego_Day8_up <- enrichGO(gene = sigOE_genes_Day8_up,
                        universe = allOE_genes_Day8_up,
                        keyType = "ENSEMBL",
                        OrgDb = org.Hs.eg.db, 
                        ont = "BP", 
                        pAdjustMethod = "BH", 
                        qvalueCutoff = 0.05, 
                        readable = TRUE)

cluster_summary_Day8_up <- data.frame(ego_Day8_up)
write_xlsx(cluster_summary_Day8_up, "GO Enrichment Analysis Day0 vs. Day8.xlsx")

#dotplot
dotplot(ego_Day8_up, showCategory=50)

#emapplot
emapplot(ego_Day8_up, showCategory = 50)

#####Day14_unsorted Upregulated
#geneID for gene symbols
universe_idx_Day14unsorted_up <- grch38$symbol %in% hpsc.ec.object_result_Day14.unsorted_0$symbol
universe_ids_Day14unsorted_up <- grch38[universe_idx_Day14unsorted_up,]
universe_non_duplicates_Day14unsorted <- which(duplicated(universe_ids_Day14unsorted_up$symbol) == FALSE)
universe_ids_Day14unsorted_up <- universe_ids_Day14unsorted_up[universe_non_duplicates_Day14unsorted,]
allOE_genes_Day14unsorted_up <- as.character(universe_ids_Day14unsorted_up$ensgene)

#remove duplicate IDs
idx_Day14unsorted_up <- grch38$symbol %in% hpsc.ec.object_result_Day14.unsorted_0_upregulated$symbol
ids_Day14unsorted_up <- grch38[idx_Day14unsorted_up, ]
non_duplicates_Day14unsorted <- which(duplicated(ids_Day14unsorted_up$symbol) == FALSE)
ids_Day14unsorted_up <- ids_Day14unsorted_up[non_duplicates_Day14unsorted, ] 
sigOE_genes_Day14unsorted_up <- as.character(ids_Day14unsorted_up$ensgene)

#background dataset
sigOE_genes_Day14unsorted_up <- as.character(ids_Day14unsorted_up$ensgene)

#run GO enrichment analysis
ego_Day14unsorted_up <- enrichGO(gene = sigOE_genes_Day14unsorted_up,
                                 universe = allOE_genes_Day14unsorted_up,
                                 keyType = "ENSEMBL",
                                 OrgDb = org.Hs.eg.db, 
                                 ont = "BP", 
                                 pAdjustMethod = "BH", 
                                 qvalueCutoff = 0.05, 
                                 readable = TRUE)

cluster_summary_Day14unsorted_up <- data.frame(ego_Day14unsorted_up)
write_xlsx(cluster_summary_Day14unsorted_up, "GO Enrichment Analysis Day0 vs. Day14.unsorted.xlsx")

pdf("GO Enrichment Analysis Dot Plot Day0 vs. Day14.sorted.pdf",8,14)
dotplot(ego_Day14unsorted_up, showCategory=50)
dev.off()

pdf("GO Enrichment Analysis Enrichmap Day0 vs. Day14.unsorted.pdf", 24, 32)
emapplot(ego_Day14unsorted_up, showCategory = 50)
dev.off()

###Day14_sorted Upregulated
#gene symbol into IDs
universe_idx_Day14sorted_up <- grch38$symbol %in% hpsc.ec.object_result_Day14.sorted_0$symbol
universe_ids_Day14sorted_up <- grch38[universe_idx_Day14sorted_up,]
universe_non_duplicates_Day14sorted <- which(duplicated(universe_ids_Day14sorted_up$symbol) == FALSE)
universe_ids_Day14sorted_up <- universe_ids_Day14sorted_up[universe_non_duplicates_Day14sorted,]
allOE_genes_Day14sorted_up <- as.character(universe_ids_Day14sorted_up$ensgene)

#remove duplicates
idx_Day14sorted_up <- grch38$symbol %in% hpsc.ec.object_result_Day14.sorted_0_upregulated$symbol
ids_Day14sorted_up <- grch38[idx_Day14sorted_up, ]
non_duplicates_Day14sorted <- which(duplicated(ids_Day14sorted_up$symbol) == FALSE)
ids_Day14sorted_up <- ids_Day14sorted_up[non_duplicates_Day14sorted, ] 
sigOE_genes_Day14sorted_up <- as.character(ids_Day14sorted_up$ensgene)


#run GO enrichment analysis
ego_Day14sorted_up <- enrichGO(gene = sigOE_genes_Day14sorted_up,
                               universe = allOE_genes_Day14sorted_up,
                               keyType = "ENSEMBL",
                               OrgDb = org.Hs.eg.db, 
                               ont = "BP", 
                               pAdjustMethod = "BH", 
                               qvalueCutoff = 0.05, 
                               readable = TRUE)

cluster_summary_Day14sorted_up <- data.frame(ego_Day14sorted_up)
write_xlsx(cluster_summary_Day14sorted_up, "GO Enrichment Analysis Day0 vs. Day14.sorted.xlsx")

pdf("GO Enrichment Analysis Dot Plot Day0 vs. Day14.sorted.pdf",8,14)
dotplot(ego_Day14sorted_up, showCategory=50)
dev.off()

pdf("GO Enrichment Analysis Enrichmap Day0 vs. Day14.unsorted.pdf", 24, 32)
emapplot(ego_Day14sorted_up, showCategory = 50)
dev.off()


###Day8 DOWNregulated
#gene symbols into ID
universe_idx_Day8_down <- grch38$symbol %in% hpsc.ec.object_result_Day8_0$symbol
universe_ids_Day8_down <- grch38[universe_idx_Day8_down,]
universe_non_duplicates_Day8_down <- which(duplicated(universe_ids_Day8_down$symbol) == FALSE)
universe_ids_Day8_down <- universe_ids_Day8_down[universe_non_duplicates_Day8_down,]
allOE_genes_Day8_down <- as.character(universe_ids_Day8_down$ensgene)

#remove duplicate ID
idx_Day8_down <- grch38$symbol %in% hpsc.ec.object_result_Day8_0_downregulated$symbol
ids_Day8_down <- grch38[idx_Day8_down, ]
non_duplicates_Day8_down <- which(duplicated(ids_Day8_down$symbol) == FALSE)
ids_Day8_down <- ids_Day8_down[non_duplicates_Day8_down, ] 
sigOE_genes_Day8_down <- as.character(ids_Day8_down$ensgene)

#background dataset (all genes tested) for hypergeometric testing
sigOE_genes_Day8_down <- as.character(ids_Day8_down$ensgene)

#GO enrichment analysis 
ego_Day8_down <- enrichGO(gene = sigOE_genes_Day8_down,
                          universe = allOE_genes_Day8_down,
                          keyType = "ENSEMBL",
                          OrgDb = org.Hs.eg.db, 
                          ont = "BP", 
                          pAdjustMethod = "BH", 
                          qvalueCutoff = 0.05, 
                          readable = TRUE)

cluster_summary_Day8_down <- data.frame(ego_Day8_down)
write_xlsx(cluster_summary_Day8_down, "GO Enrichment Analysis Day0 vs. Day8 DOWN.xlsx")

pdf("GO Enrichment Analysis Dot Plot Day0 vs. Day8 DOWN.pdf",8,14)
dotplot(ego_Day8_down, showCategory=50)
dev.off()

pdf("GO Enrichment Analysis Enrichmap Day0 vs. Day8.pdf", 24, 32)
emapplot(ego_Day8_down, showCategory = 50)
dev.off()

##Day14_unsorted DOWNregulated
#gene symbol into ID
universe_idx_Day14unsorted_down <- grch38$symbol %in% hpsc.ec.object_result_Day14.unsorted_0$symbol
universe_ids_Day14unsorted_down <- grch38[universe_idx_Day14unsorted_down,]
universe_non_duplicates_Day14unsorted_down <- which(duplicated(universe_ids_Day14unsorted_down$symbol) == FALSE)
universe_ids_Day14unsorted_down <- universe_ids_Day14unsorted_down[universe_non_duplicates_Day14unsorted_down,]
allOE_genes_Day14unsorted_down <- as.character(universe_ids_Day14unsorted_down$ensgene)

#remove duplicate ID
idx_Day14unsorted_down <- grch38$symbol %in% hpsc.ec.object_result_Day14.unsorted_0_downregulated$symbol
ids_Day14unsorted_down <- grch38[idx_Day14unsorted_down, ]
non_duplicates_Day14unsorted_down <- which(duplicated(ids_Day14unsorted_down$symbol) == FALSE)
ids_Day14unsorted_down <- ids_Day14unsorted_down[non_duplicates_Day14unsorted_down, ] 
sigOE_genes_Day14unsorted_down <- as.character(ids_Day14unsorted_down$ensgene)

#background dataset (all genes tested) for hypergeometric testing
sigOE_genes_Day14unsorted_down <- as.character(ids_Day14unsorted_down$ensgene)

#GO Enrichment Analysis
ego_Day14unsorted_down <- enrichGO(gene = sigOE_genes_Day14unsorted_down,
                                   universe = allOE_genes_Day14unsorted_down,
                                   keyType = "ENSEMBL",
                                   OrgDb = org.Hs.eg.db, 
                                   ont = "BP", 
                                   pAdjustMethod = "BH", 
                                   qvalueCutoff = 0.05, 
                                   readable = TRUE)

cluster_summary_Day14unsorted_down <- data.frame(ego_Day14unsorted_down)
write_xlsx(cluster_summary_Day14unsorted_down, "GO Enrichment Analysis Day0 vs. Day14.unsorted Down.xlsx")

pdf("GO Enrichment Analysis Dot Plot Day0 vs. Day14.sorted DOWN.pdf",8,14)
dotplot(ego_Day14unsorted_down, showCategory=50)
dev.off()

pdf("GO Enrichment Analysis Enrichmap Day0 vs. Day14.unsorted DOWN.pdf", 24, 32)
emapplot(ego_Day14unsorted_down, showCategory = 50)
dev.off()

##Day14_sorted Downregulated
#gene symbols into ID
universe_idx_Day14sorted_down <- grch38$symbol %in% hpsc.ec.object_result_Day14.sorted_0$symbol
universe_ids_Day14sorted_down <- grch38[universe_idx_Day14sorted_down,]
universe_non_duplicates_Day14sorted_down <- which(duplicated(universe_ids_Day14sorted_down$symbol) == FALSE)
universe_ids_Day14sorted_down <- universe_ids_Day14sorted_down[universe_non_duplicates_Day14sorted_down,]
allOE_genes_Day14sorted_down <- as.character(universe_ids_Day14sorted_down$ensgene)

#remove duplicate ID
idx_Day14sorted_down <- grch38$symbol %in% hpsc.ec.object_result_Day14.sorted_0_downregulated$symbol
ids_Day14sorted_down <- grch38[idx_Day14sorted_down, ]
non_duplicates_Day14sorted_down <- which(duplicated(ids_Day14sorted_down$symbol) == FALSE)
ids_Day14sorted_down <- ids_Day14sorted_down[non_duplicates_Day14sorted_down, ] 
sigOE_genes_Day14sorted_down <- as.character(ids_Day14sorted_down$ensgene)


#GO Enrichment Analysis 
ego_Day14sorted_down <- enrichGO(gene = sigOE_genes_Day14sorted_down,
                                 universe = allOE_genes_Day14sorted_down,
                                 keyType = "ENSEMBL",
                                 OrgDb = org.Hs.eg.db, 
                                 ont = "BP", 
                                 pAdjustMethod = "BH", 
                                 qvalueCutoff = 0.05, 
                                 readable = TRUE)

cluster_summary_Day14sorted_down <- data.frame(ego_Day14sorted_down)
write_xlsx(cluster_summary_Day14sorted_down, "GO Enrichment Analysis Day0 vs. Day14.sorted Down.xlsx")

pdf("GO Enrichment Analysis Dot Plot Day0 vs. Day14.sorted DOWN.pdf",8,14)
dotplot(ego_Day14sorted_down, showCategory=50)
dev.off()

pdf("GO Enrichment Analysis Enrichmap Day0 vs. Day14.unsorted DOWN.pdf", 24, 32)
emapplot(ego_Day14sorted_down, showCategory = 50)
dev.off()