Dimensions of counts table, and design table:
design <- counts_design[counts_design$Ensembl == 'Empty',]
drops <- c("X","Ensembl")
counts<-counts_design[!counts_design$Ensembl == 'Empty',]
rownames(counts)<-counts$Ensembl
design <- design[ , !(names(design) %in% drops)]
counts <- counts[ , !(names(counts) %in% drops)]
design <- design[ , startsWith(names(design),"L_parva")]
counts <- counts[ , startsWith(names(counts),"L_parva")]
dim(counts)
## [1] 30466 9
# design cateogories (full)
species<-as.character(unlist(design[1,]))
nativephysiology<-as.character(unlist(design[2,]))
clade<-as.character(unlist(design[3,]))
condition<-as.character(unlist(design[5,]))
cols<-colnames(counts)
ExpDesign <- data.frame(row.names=cols,
condition=condition)
ExpDesign
The following shows the dimensions of the dataframe when we filter out genes with low counts.
2 samples must have a count of at least 0.1:
filter <- rownames(counts[rowSums(counts >= 2) >= 0.1,])
filtered_counts <- counts[filter,]
dim(filtered_counts)
## [1] 21573 9
Model results, dispersion plot, and mean variance plot.
resultsNames(dds)
## [1] "Intercept" "condition_15_ppt_vs_0.2_ppt"
## [3] "condition_transfer_vs_0.2_ppt"
plotDispEsts(dds)
vsd <- vst(dds, blind=FALSE)
meanSdPlot(assay(vsd))
plotPCA(vsd, intgroup=c("condition"))
plotPCAWithSampleNames(vsd,intgroup=c("condition"))
##
## Attaching package: 'genefilter'
## The following objects are masked from 'package:matrixStats':
##
## rowSds, rowVars
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 16.1489 10.9767 8.1032 6.91375 6.60877 6.08504
## Proportion of Variance 0.4116 0.1902 0.1036 0.07545 0.06894 0.05844
## Cumulative Proportion 0.4116 0.6018 0.7054 0.78088 0.84982 0.90827
## PC7 PC8 PC9
## Standard deviation 5.42896 5.35212 1.299e-14
## Proportion of Variance 0.04652 0.04521 0.000e+00
## Cumulative Proportion 0.95479 1.00000 1.000e+00
gene_id <- c("avpr2aa","slc24a5","CLDN4","aqp3","cftr","kcnj2a","polyamine-modulated factor 1-like","kcnj1a.6","sept2B","septin-2", "cipcb","clcn2c","zymogen granule membrane protein 16","atp1a1b","solute carrier family 24 member 2")
protein_id <- c("ENSFHEP00000000036","ENSFHEP00000001609","ENSFHEP00000003908","ENSFHEP00000006725","ENSFHEP00000008393","ENSFHEP00000009753","ENSFHEP00000013324",
"ENSFHEP00000015383","ENSFHEP00000015765","ENSFHEP00000016853","ENSFHEP00000017303","ENSFHEP00000019510","ENSFHEP00000025841",
"ENSFHEP00000031108","ENSFHEP00000034177")
res<-results(dds,contrast=c("condition","15_ppt","0.2_ppt"))
res_ordered <-as.data.frame(res[order(res$padj),])
res_filtered <-subset(res_ordered,res_ordered$padj<0.05)
id<-rownames(res_filtered)
res_filtered<-cbind(res_filtered,id)
plot(log2(res$baseMean), res$log2FoldChange,
col=ifelse(res$padj < 0.05, "red","gray67"),
main="L_parva (15_ppt vs. 0.2_ppt) (padj<0.05)",xlim=c(1,15),pch=20,cex=1)
abline(h=c(-1,1), col="blue")
resSig = res_ordered[rownames(res_ordered) %in% protein_id, ]
dim(resSig)
## [1] 15 6
genes<-rownames(resSig)
mygenes <- resSig[,]
baseMean_mygenes <- mygenes[,"baseMean"]
log2FoldChange_mygenes <- mygenes[,"log2FoldChange"]
text(log2(baseMean_mygenes),log2FoldChange_mygenes,labels=gene_id,pos=2,cex=0.60)
gene_id <- c("avpr2aa","slc24a5","CLDN4","aqp3","cftr","kcnj2a","polyamine-modulated factor 1-like","kcnj1a.6","sept2B","septin-2", "cipcb","clcn2c","zymogen granule membrane protein 16","atp1a1b","solute carrier family 24 member 2")
protein_id <- c("ENSFHEP00000000036","ENSFHEP00000001609","ENSFHEP00000003908","ENSFHEP00000006725","ENSFHEP00000008393","ENSFHEP00000009753","ENSFHEP00000013324",
"ENSFHEP00000015383","ENSFHEP00000015765","ENSFHEP00000016853","ENSFHEP00000017303","ENSFHEP00000019510","ENSFHEP00000025841",
"ENSFHEP00000031108","ENSFHEP00000034177")
res<-results(dds,contrast=c("condition","transfer","0.2_ppt"))
res_ordered <-as.data.frame(res[order(res$padj),])
res_filtered <-subset(res_ordered,res_ordered$padj<0.05)
id<-rownames(res_filtered)
res_filtered<-cbind(res_filtered,id)
plot(log2(res$baseMean), res$log2FoldChange,
col=ifelse(res$padj < 0.05, "red","gray67"),
main="L_parva (transfer vs. 0.2_ppt) (padj<0.05)",xlim=c(1,15),pch=20,cex=1)
abline(h=c(-1,1), col="blue")
resSig = res_ordered[rownames(res_ordered) %in% protein_id, ]
dim(resSig)
## [1] 15 6
genes<-rownames(resSig)
mygenes <- resSig[,]
baseMean_mygenes <- mygenes[,"baseMean"]
log2FoldChange_mygenes <- mygenes[,"log2FoldChange"]
text(log2(baseMean_mygenes),log2FoldChange_mygenes,labels=gene_id,pos=2,cex=0.60)
gene_id <- c("avpr2aa","slc24a5","CLDN4","aqp3","cftr","kcnj2a","polyamine-modulated factor 1-like","kcnj1a.6","sept2B","septin-2", "cipcb","clcn2c","zymogen granule membrane protein 16","atp1a1b","solute carrier family 24 member 2")
protein_id <- c("ENSFHEP00000000036","ENSFHEP00000001609","ENSFHEP00000003908","ENSFHEP00000006725","ENSFHEP00000008393","ENSFHEP00000009753","ENSFHEP00000013324",
"ENSFHEP00000015383","ENSFHEP00000015765","ENSFHEP00000016853","ENSFHEP00000017303","ENSFHEP00000019510","ENSFHEP00000025841",
"ENSFHEP00000031108","ENSFHEP00000034177")
res<-results(dds,contrast=c("condition","transfer","15_ppt"))
res_ordered <-as.data.frame(res[order(res$padj),])
res_filtered <-subset(res_ordered,res_ordered$padj<0.05)
id<-rownames(res_filtered)
res_filtered<-cbind(res_filtered,id)
plot(log2(res$baseMean), res$log2FoldChange,
col=ifelse(res$padj < 0.05, "red","gray67"),
main="L_parva (transfer vs. 15_ppt) (padj<0.05)",xlim=c(1,15),pch=20,cex=1)
abline(h=c(-1,1), col="blue")
resSig = res_ordered[rownames(res_ordered) %in% protein_id, ]
dim(resSig)
## [1] 15 6
genes<-rownames(resSig)
mygenes <- resSig[,]
baseMean_mygenes <- mygenes[,"baseMean"]
log2FoldChange_mygenes <- mygenes[,"log2FoldChange"]
text(log2(baseMean_mygenes),log2FoldChange_mygenes,labels=gene_id,pos=2,cex=0.60)
tcounts <- t(log2((counts(dds[c("ENSFHEP00000000036"), ], normalized=TRUE, replaced=FALSE)+.5))) %>%
merge(colData(dds), ., by="row.names") %>%
gather(gene, expression, (ncol(.)-1+1):ncol(.))
C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
geom_point() +
scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
stat_summary(fun.y="mean", geom="line") +
stat_summary(fun.data=mean_sdl, fun.args = list(mult=1),
geom="errorbar",width=0.2) +
theme_bw() +
theme(legend.position="none",panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(y="Expression (log2 normalized counts)")+
ggtitle("avpr2aa")
plot(C1)
tcounts <- t(log2((counts(dds[c("ENSFHEP00000001609"), ], normalized=TRUE, replaced=FALSE)+.5))) %>%
merge(colData(dds), ., by="row.names") %>%
gather(gene, expression, (ncol(.)-1+1):ncol(.))
C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
geom_point() +
scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
stat_summary(fun.y="mean", geom="line") +
stat_summary(fun.data=mean_sdl, fun.args = list(mult=1),
geom="errorbar",width=0.2) +
theme_bw() +
theme(legend.position="none",panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(y="Expression (log2 normalized counts)")+
ggtitle("slc24a5")
plot(C1)
tcounts <- t(log2((counts(dds[c("ENSFHEP00000003908"), ], normalized=TRUE, replaced=FALSE)+.5))) %>%
merge(colData(dds), ., by="row.names") %>%
gather(gene, expression, (ncol(.)-1+1):ncol(.))
C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
geom_point() +
scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
stat_summary(fun.y="mean", geom="line") +
stat_summary(fun.data=mean_sdl, fun.args = list(mult=1),
geom="errorbar",width=0.2) +
theme_bw() +
theme(legend.position="none",panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(y="Expression (log2 normalized counts)")+
ggtitle("CLDN4")
plot(C1)
tcounts <- t(log2((counts(dds[c("ENSFHEP00000006725"), ], normalized=TRUE, replaced=FALSE)+.5))) %>%
merge(colData(dds), ., by="row.names") %>%
gather(gene, expression, (ncol(.)-1+1):ncol(.))
C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
geom_point() +
scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
stat_summary(fun.y="mean", geom="line") +
stat_summary(fun.data=mean_sdl, fun.args = list(mult=1),
geom="errorbar",width=0.2) +
theme_bw() +
theme(legend.position="none",panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(y="Expression (log2 normalized counts)")+
ggtitle("aqp3")
plot(C1)
tcounts <- t(log2((counts(dds[c("ENSFHEP00000008393"), ], normalized=TRUE, replaced=FALSE)+.5))) %>%
merge(colData(dds), ., by="row.names") %>%
gather(gene, expression, (ncol(.)-1+1):ncol(.))
C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
geom_point() +
scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
stat_summary(fun.y="mean", geom="line") +
stat_summary(fun.data=mean_sdl, fun.args = list(mult=1),
geom="errorbar",width=0.2) +
theme_bw() +
theme(legend.position="none",panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(y="Expression (log2 normalized counts)")+
ggtitle("cftr")
plot(C1)
tcounts <- t(log2((counts(dds[c("ENSFHEP00000009753"), ], normalized=TRUE, replaced=FALSE)+.5))) %>%
merge(colData(dds), ., by="row.names") %>%
gather(gene, expression, (ncol(.)-1+1):ncol(.))
C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
geom_point() +
scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
stat_summary(fun.y="mean", geom="line") +
stat_summary(fun.data=mean_sdl, fun.args = list(mult=1),
geom="errorbar",width=0.2) +
theme_bw() +
theme(legend.position="none",panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(y="Expression (log2 normalized counts)")+
ggtitle("kcnj2a")
plot(C1)
tcounts <- t(log2((counts(dds[c("ENSFHEP00000013324"), ], normalized=TRUE, replaced=FALSE)+.5))) %>%
merge(colData(dds), ., by="row.names") %>%
gather(gene, expression, (ncol(.)-1+1):ncol(.))
C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
geom_point() +
scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
stat_summary(fun.y="mean", geom="line") +
stat_summary(fun.data=mean_sdl, fun.args = list(mult=1),
geom="errorbar",width=0.2) +
theme_bw() +
theme(legend.position="none",panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(y="Expression (log2 normalized counts)")+
ggtitle("polyamine-modulated factor 1-like")
plot(C1)
tcounts <- t(log2((counts(dds[c("ENSFHEP00000015383"), ], normalized=TRUE, replaced=FALSE)+.5))) %>%
merge(colData(dds), ., by="row.names") %>%
gather(gene, expression, (ncol(.)-1+1):ncol(.))
C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
geom_point() +
scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
stat_summary(fun.y="mean", geom="line") +
stat_summary(fun.data=mean_sdl, fun.args = list(mult=1),
geom="errorbar",width=0.2) +
theme_bw() +
theme(legend.position="none",panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(y="Expression (log2 normalized counts)")+
ggtitle("kcnj1a.6")
plot(C1)
tcounts <- t(log2((counts(dds[c("ENSFHEP00000015765"), ], normalized=TRUE, replaced=FALSE)+.5))) %>%
merge(colData(dds), ., by="row.names") %>%
gather(gene, expression, (ncol(.)-1+1):ncol(.))
C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
geom_point() +
scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
stat_summary(fun.y="mean", geom="line") +
stat_summary(fun.data=mean_sdl, fun.args = list(mult=1),
geom="errorbar",width=0.2) +
theme_bw() +
theme(legend.position="none",panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(y="Expression (log2 normalized counts)")+
ggtitle("sept2B")
plot(C1)
tcounts <- t(log2((counts(dds[c("ENSFHEP00000016853"), ], normalized=TRUE, replaced=FALSE)+.5))) %>%
merge(colData(dds), ., by="row.names") %>%
gather(gene, expression, (ncol(.)-1+1):ncol(.))
C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
geom_point() +
scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
stat_summary(fun.y="mean", geom="line") +
stat_summary(fun.data=mean_sdl, fun.args = list(mult=1),
geom="errorbar",width=0.2) +
theme_bw() +
theme(legend.position="none",panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(y="Expression (log2 normalized counts)")+
ggtitle("septin-2")
plot(C1)
tcounts <- t(log2((counts(dds[c("ENSFHEP00000017303"), ], normalized=TRUE, replaced=FALSE)+.5))) %>%
merge(colData(dds), ., by="row.names") %>%
gather(gene, expression, (ncol(.)-1+1):ncol(.))
C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
geom_point() +
scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
stat_summary(fun.y="mean", geom="line") +
stat_summary(fun.data=mean_sdl, fun.args = list(mult=1),
geom="errorbar",width=0.2) +
theme_bw() +
theme(legend.position="none",panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(y="Expression (log2 normalized counts)")+
ggtitle("cipcb")
plot(C1)
tcounts <- t(log2((counts(dds[c("ENSFHEP00000019510"), ], normalized=TRUE, replaced=FALSE)+.5))) %>%
merge(colData(dds), ., by="row.names") %>%
gather(gene, expression, (ncol(.)-1+1):ncol(.))
C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
geom_point() +
scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
stat_summary(fun.y="mean", geom="line") +
stat_summary(fun.data=mean_sdl, fun.args = list(mult=1),
geom="errorbar",width=0.2) +
theme_bw() +
theme(legend.position="none",panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(y="Expression (log2 normalized counts)")+
ggtitle("clcn2c")
plot(C1)
tcounts <- t(log2((counts(dds[c("ENSFHEP00000025841"), ], normalized=TRUE, replaced=FALSE)+.5))) %>%
merge(colData(dds), ., by="row.names") %>%
gather(gene, expression, (ncol(.)-1+1):ncol(.))
C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
geom_point() +
scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
stat_summary(fun.y="mean", geom="line") +
stat_summary(fun.data=mean_sdl, fun.args = list(mult=1),
geom="errorbar",width=0.2) +
theme_bw() +
theme(legend.position="none",panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(y="Expression (log2 normalized counts)")+
ggtitle("zymogen granule membrane protein 16")
plot(C1)
tcounts <- t(log2((counts(dds[c("ENSFHEP00000031108"), ], normalized=TRUE, replaced=FALSE)+.5))) %>%
merge(colData(dds), ., by="row.names") %>%
gather(gene, expression, (ncol(.)-1+1):ncol(.))
C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
geom_point() +
scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
stat_summary(fun.y="mean", geom="line") +
stat_summary(fun.data=mean_sdl, fun.args = list(mult=1),
geom="errorbar",width=0.2) +
theme_bw() +
theme(legend.position="none",panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(y="Expression (log2 normalized counts)")+
ggtitle("atp1a1b")
plot(C1)
tcounts <- t(log2((counts(dds[c("ENSFHEP00000034177"), ], normalized=TRUE, replaced=FALSE)+.5))) %>%
merge(colData(dds), ., by="row.names") %>%
gather(gene, expression, (ncol(.)-1+1):ncol(.))
C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
geom_point() +
scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
stat_summary(fun.y="mean", geom="line") +
stat_summary(fun.data=mean_sdl, fun.args = list(mult=1),
geom="errorbar",width=0.2) +
theme_bw() +
theme(legend.position="none",panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(y="Expression (log2 normalized counts)")+
ggtitle("solute carrier family 24 member 2")
plot(C1)
After filtering for low expression (where rowSum is greater than or equal to 1):
Number of significant genes between conditions 15ppt vs. 0.2ppt, padj <0.05:
sig <- subset(counts_table_stats, counts_table_stats$`padj-15ppt-v-0.2ppt`<= 0.05)
dim(sig)
## [1] 1081 29
sig_id <- sig$row
counts_table <- counts(dds,normalized=TRUE)
counts_sig <- counts_table[rownames(counts_table) %in% sig_id,]
id <- sig_id
d<-as.matrix(counts_sig)
hr <- hclust(as.dist(1-cor(t(d), method="pearson")), method="complete")
mycl <- cutree(hr, h=max(hr$height/1.5))
clusterCols <- rainbow(length(unique(mycl)))
myClusterSideBar <- clusterCols[mycl]
myheatcol <- greenred(75)
heatmap.2(d, main="L_parva, padj<0.05",
Rowv=as.dendrogram(hr),
cexRow=0.75,cexCol=0.8,srtCol= 90,
adjCol = c(NA,0),offsetCol=2.5,
Colv=NA, dendrogram="row",
scale="row", col=myheatcol,
density.info="none",
trace="none", RowSideColors= myClusterSideBar)