1 Load counts and design:

Dimensions of counts table, and design table:

design <- counts_design[counts_design$Ensembl == 'Empty',]
drops <- c("X","Ensembl")
counts<-counts_design[!counts_design$Ensembl == 'Empty',]
rownames(counts)<-counts$Ensembl
design <- design[ , !(names(design) %in% drops)]
counts <- counts[ , !(names(counts) %in% drops)]
design <- design[ , startsWith(names(design),"L_parva")]
counts <- counts[ , startsWith(names(counts),"L_parva")]
dim(counts)
## [1] 30466     9
# design cateogories (full)
species<-as.character(unlist(design[1,]))
nativephysiology<-as.character(unlist(design[2,]))
clade<-as.character(unlist(design[3,]))
condition<-as.character(unlist(design[5,]))
cols<-colnames(counts)
ExpDesign <- data.frame(row.names=cols,
                        condition=condition)
ExpDesign

2 Filtering counts

The following shows the dimensions of the dataframe when we filter out genes with low counts.

2 samples must have a count of at least 0.1:

filter <- rownames(counts[rowSums(counts >= 2) >= 0.1,])
filtered_counts <- counts[filter,]
dim(filtered_counts)
## [1] 21573     9

3 DESeq

Model results, dispersion plot, and mean variance plot.

resultsNames(dds)
## [1] "Intercept"                     "condition_15_ppt_vs_0.2_ppt"  
## [3] "condition_transfer_vs_0.2_ppt"
plotDispEsts(dds)

vsd <- vst(dds, blind=FALSE)
meanSdPlot(assay(vsd))

4 PCA

plotPCA(vsd, intgroup=c("condition"))

plotPCAWithSampleNames(vsd,intgroup=c("condition"))
## 
## Attaching package: 'genefilter'
## The following objects are masked from 'package:matrixStats':
## 
##     rowSds, rowVars
## Importance of components:
##                            PC1     PC2    PC3     PC4     PC5     PC6
## Standard deviation     16.1489 10.9767 8.1032 6.91375 6.60877 6.08504
## Proportion of Variance  0.4116  0.1902 0.1036 0.07545 0.06894 0.05844
## Cumulative Proportion   0.4116  0.6018 0.7054 0.78088 0.84982 0.90827
##                            PC7     PC8       PC9
## Standard deviation     5.42896 5.35212 1.299e-14
## Proportion of Variance 0.04652 0.04521 0.000e+00
## Cumulative Proportion  0.95479 1.00000 1.000e+00

5 MA plot, 15_ppt vs. 0.2_ppt

gene_id <- c("avpr2aa","slc24a5","CLDN4","aqp3","cftr","kcnj2a","polyamine-modulated factor 1-like","kcnj1a.6","sept2B","septin-2", "cipcb","clcn2c","zymogen granule membrane protein 16","atp1a1b","solute carrier family 24 member 2")
protein_id <- c("ENSFHEP00000000036","ENSFHEP00000001609","ENSFHEP00000003908","ENSFHEP00000006725","ENSFHEP00000008393","ENSFHEP00000009753","ENSFHEP00000013324",
                "ENSFHEP00000015383","ENSFHEP00000015765","ENSFHEP00000016853","ENSFHEP00000017303","ENSFHEP00000019510","ENSFHEP00000025841",
                "ENSFHEP00000031108","ENSFHEP00000034177")
res<-results(dds,contrast=c("condition","15_ppt","0.2_ppt"))
res_ordered <-as.data.frame(res[order(res$padj),])
res_filtered <-subset(res_ordered,res_ordered$padj<0.05)
id<-rownames(res_filtered)
res_filtered<-cbind(res_filtered,id)
plot(log2(res$baseMean), res$log2FoldChange, 
     col=ifelse(res$padj < 0.05, "red","gray67"),
     main="L_parva (15_ppt vs. 0.2_ppt) (padj<0.05)",xlim=c(1,15),pch=20,cex=1)
abline(h=c(-1,1), col="blue")
resSig = res_ordered[rownames(res_ordered) %in% protein_id, ]
dim(resSig)
## [1] 15  6
genes<-rownames(resSig)
mygenes <- resSig[,]
baseMean_mygenes <- mygenes[,"baseMean"]
log2FoldChange_mygenes <- mygenes[,"log2FoldChange"]
text(log2(baseMean_mygenes),log2FoldChange_mygenes,labels=gene_id,pos=2,cex=0.60)

6 MA plot, transfer vs. 0.2_ppt

gene_id <- c("avpr2aa","slc24a5","CLDN4","aqp3","cftr","kcnj2a","polyamine-modulated factor 1-like","kcnj1a.6","sept2B","septin-2", "cipcb","clcn2c","zymogen granule membrane protein 16","atp1a1b","solute carrier family 24 member 2")
protein_id <- c("ENSFHEP00000000036","ENSFHEP00000001609","ENSFHEP00000003908","ENSFHEP00000006725","ENSFHEP00000008393","ENSFHEP00000009753","ENSFHEP00000013324",
                "ENSFHEP00000015383","ENSFHEP00000015765","ENSFHEP00000016853","ENSFHEP00000017303","ENSFHEP00000019510","ENSFHEP00000025841",
                "ENSFHEP00000031108","ENSFHEP00000034177")
res<-results(dds,contrast=c("condition","transfer","0.2_ppt"))
res_ordered <-as.data.frame(res[order(res$padj),])
res_filtered <-subset(res_ordered,res_ordered$padj<0.05)
id<-rownames(res_filtered)
res_filtered<-cbind(res_filtered,id)
plot(log2(res$baseMean), res$log2FoldChange, 
     col=ifelse(res$padj < 0.05, "red","gray67"),
     main="L_parva (transfer vs. 0.2_ppt) (padj<0.05)",xlim=c(1,15),pch=20,cex=1)
abline(h=c(-1,1), col="blue")
resSig = res_ordered[rownames(res_ordered) %in% protein_id, ]
dim(resSig)
## [1] 15  6
genes<-rownames(resSig)
mygenes <- resSig[,]
baseMean_mygenes <- mygenes[,"baseMean"]
log2FoldChange_mygenes <- mygenes[,"log2FoldChange"]
text(log2(baseMean_mygenes),log2FoldChange_mygenes,labels=gene_id,pos=2,cex=0.60)

7 MA plot, transfer vs. 15_ppt

gene_id <- c("avpr2aa","slc24a5","CLDN4","aqp3","cftr","kcnj2a","polyamine-modulated factor 1-like","kcnj1a.6","sept2B","septin-2", "cipcb","clcn2c","zymogen granule membrane protein 16","atp1a1b","solute carrier family 24 member 2")
protein_id <- c("ENSFHEP00000000036","ENSFHEP00000001609","ENSFHEP00000003908","ENSFHEP00000006725","ENSFHEP00000008393","ENSFHEP00000009753","ENSFHEP00000013324",
                "ENSFHEP00000015383","ENSFHEP00000015765","ENSFHEP00000016853","ENSFHEP00000017303","ENSFHEP00000019510","ENSFHEP00000025841",
                "ENSFHEP00000031108","ENSFHEP00000034177")
res<-results(dds,contrast=c("condition","transfer","15_ppt"))
res_ordered <-as.data.frame(res[order(res$padj),])
res_filtered <-subset(res_ordered,res_ordered$padj<0.05)
id<-rownames(res_filtered)
res_filtered<-cbind(res_filtered,id)
plot(log2(res$baseMean), res$log2FoldChange, 
     col=ifelse(res$padj < 0.05, "red","gray67"),
     main="L_parva (transfer vs. 15_ppt) (padj<0.05)",xlim=c(1,15),pch=20,cex=1)
abline(h=c(-1,1), col="blue")
resSig = res_ordered[rownames(res_ordered) %in% protein_id, ]
dim(resSig)
## [1] 15  6
genes<-rownames(resSig)
mygenes <- resSig[,]
baseMean_mygenes <- mygenes[,"baseMean"]
log2FoldChange_mygenes <- mygenes[,"log2FoldChange"]
text(log2(baseMean_mygenes),log2FoldChange_mygenes,labels=gene_id,pos=2,cex=0.60)

8 Salinity-responsive genes of interest

8.1 avpr2aa

tcounts <- t(log2((counts(dds[c("ENSFHEP00000000036"), ], normalized=TRUE, replaced=FALSE)+.5))) %>% 
  merge(colData(dds), ., by="row.names") %>% 
  gather(gene, expression, (ncol(.)-1+1):ncol(.))

C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
  geom_point() + 
  scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
  stat_summary(fun.y="mean", geom="line") +
  stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
               geom="errorbar",width=0.2) +
  theme_bw() +
  theme(legend.position="none",panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(y="Expression (log2 normalized counts)")+
  ggtitle("avpr2aa")
plot(C1)

8.2 slc24a5

tcounts <- t(log2((counts(dds[c("ENSFHEP00000001609"), ], normalized=TRUE, replaced=FALSE)+.5))) %>% 
  merge(colData(dds), ., by="row.names") %>% 
  gather(gene, expression, (ncol(.)-1+1):ncol(.))

C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
  geom_point() + 
  scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
  stat_summary(fun.y="mean", geom="line") +
  stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
               geom="errorbar",width=0.2) +
  theme_bw() +
  theme(legend.position="none",panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(y="Expression (log2 normalized counts)")+
  ggtitle("slc24a5")
plot(C1)

8.3 CLDN4

tcounts <- t(log2((counts(dds[c("ENSFHEP00000003908"), ], normalized=TRUE, replaced=FALSE)+.5))) %>% 
  merge(colData(dds), ., by="row.names") %>% 
  gather(gene, expression, (ncol(.)-1+1):ncol(.))

C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
  geom_point() + 
  scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
  stat_summary(fun.y="mean", geom="line") +
  stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
               geom="errorbar",width=0.2) +
  theme_bw() +
  theme(legend.position="none",panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(y="Expression (log2 normalized counts)")+
  ggtitle("CLDN4")
plot(C1)

8.4 aqp3

tcounts <- t(log2((counts(dds[c("ENSFHEP00000006725"), ], normalized=TRUE, replaced=FALSE)+.5))) %>% 
  merge(colData(dds), ., by="row.names") %>% 
  gather(gene, expression, (ncol(.)-1+1):ncol(.))

C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
  geom_point() + 
  scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
  stat_summary(fun.y="mean", geom="line") +
  stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
               geom="errorbar",width=0.2) +
  theme_bw() +
  theme(legend.position="none",panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(y="Expression (log2 normalized counts)")+
  ggtitle("aqp3")
plot(C1)

8.5 cftr

tcounts <- t(log2((counts(dds[c("ENSFHEP00000008393"), ], normalized=TRUE, replaced=FALSE)+.5))) %>% 
  merge(colData(dds), ., by="row.names") %>% 
  gather(gene, expression, (ncol(.)-1+1):ncol(.))

C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
  geom_point() + 
  scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
  stat_summary(fun.y="mean", geom="line") +
  stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
               geom="errorbar",width=0.2) +
  theme_bw() +
  theme(legend.position="none",panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(y="Expression (log2 normalized counts)")+
  ggtitle("cftr")
plot(C1)

8.6 kcnj2a

tcounts <- t(log2((counts(dds[c("ENSFHEP00000009753"), ], normalized=TRUE, replaced=FALSE)+.5))) %>% 
  merge(colData(dds), ., by="row.names") %>% 
  gather(gene, expression, (ncol(.)-1+1):ncol(.))

C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
  geom_point() + 
  scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
  stat_summary(fun.y="mean", geom="line") +
  stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
               geom="errorbar",width=0.2) +
  theme_bw() +
  theme(legend.position="none",panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(y="Expression (log2 normalized counts)")+
  ggtitle("kcnj2a")
plot(C1)

8.7 polyamine-modulated factor 1-like

tcounts <- t(log2((counts(dds[c("ENSFHEP00000013324"), ], normalized=TRUE, replaced=FALSE)+.5))) %>% 
  merge(colData(dds), ., by="row.names") %>% 
  gather(gene, expression, (ncol(.)-1+1):ncol(.))

C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
  geom_point() + 
  scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
  stat_summary(fun.y="mean", geom="line") +
  stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
               geom="errorbar",width=0.2) +
  theme_bw() +
  theme(legend.position="none",panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(y="Expression (log2 normalized counts)")+
  ggtitle("polyamine-modulated factor 1-like")
plot(C1)

8.8 kcnj1a.6

tcounts <- t(log2((counts(dds[c("ENSFHEP00000015383"), ], normalized=TRUE, replaced=FALSE)+.5))) %>% 
  merge(colData(dds), ., by="row.names") %>% 
  gather(gene, expression, (ncol(.)-1+1):ncol(.))

C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
  geom_point() + 
  scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
  stat_summary(fun.y="mean", geom="line") +
  stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
               geom="errorbar",width=0.2) +
  theme_bw() +
  theme(legend.position="none",panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(y="Expression (log2 normalized counts)")+
  ggtitle("kcnj1a.6")
plot(C1)

8.9 sept2B

tcounts <- t(log2((counts(dds[c("ENSFHEP00000015765"), ], normalized=TRUE, replaced=FALSE)+.5))) %>% 
  merge(colData(dds), ., by="row.names") %>% 
  gather(gene, expression, (ncol(.)-1+1):ncol(.))

C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
  geom_point() + 
  scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
  stat_summary(fun.y="mean", geom="line") +
  stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
               geom="errorbar",width=0.2) +
  theme_bw() +
  theme(legend.position="none",panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(y="Expression (log2 normalized counts)")+
  ggtitle("sept2B")
plot(C1)

8.10 septin-2

tcounts <- t(log2((counts(dds[c("ENSFHEP00000016853"), ], normalized=TRUE, replaced=FALSE)+.5))) %>% 
  merge(colData(dds), ., by="row.names") %>% 
  gather(gene, expression, (ncol(.)-1+1):ncol(.))

C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
  geom_point() + 
  scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
  stat_summary(fun.y="mean", geom="line") +
  stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
               geom="errorbar",width=0.2) +
  theme_bw() +
  theme(legend.position="none",panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(y="Expression (log2 normalized counts)")+
  ggtitle("septin-2")
plot(C1)

8.11 cipcb

tcounts <- t(log2((counts(dds[c("ENSFHEP00000017303"), ], normalized=TRUE, replaced=FALSE)+.5))) %>% 
  merge(colData(dds), ., by="row.names") %>% 
  gather(gene, expression, (ncol(.)-1+1):ncol(.))

C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
  geom_point() + 
  scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
  stat_summary(fun.y="mean", geom="line") +
  stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
               geom="errorbar",width=0.2) +
  theme_bw() +
  theme(legend.position="none",panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(y="Expression (log2 normalized counts)")+
  ggtitle("cipcb")
plot(C1)

8.12 clcn2c

tcounts <- t(log2((counts(dds[c("ENSFHEP00000019510"), ], normalized=TRUE, replaced=FALSE)+.5))) %>% 
  merge(colData(dds), ., by="row.names") %>% 
  gather(gene, expression, (ncol(.)-1+1):ncol(.))

C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
  geom_point() + 
  scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
  stat_summary(fun.y="mean", geom="line") +
  stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
               geom="errorbar",width=0.2) +
  theme_bw() +
  theme(legend.position="none",panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(y="Expression (log2 normalized counts)")+
  ggtitle("clcn2c")
plot(C1)

8.13 zymogen granule membrane protein 16

tcounts <- t(log2((counts(dds[c("ENSFHEP00000025841"), ], normalized=TRUE, replaced=FALSE)+.5))) %>% 
  merge(colData(dds), ., by="row.names") %>% 
  gather(gene, expression, (ncol(.)-1+1):ncol(.))

C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
  geom_point() + 
  scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
  stat_summary(fun.y="mean", geom="line") +
  stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
               geom="errorbar",width=0.2) +
  theme_bw() +
  theme(legend.position="none",panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(y="Expression (log2 normalized counts)")+
  ggtitle("zymogen granule membrane protein 16")
plot(C1)

8.14 atp1a1b

tcounts <- t(log2((counts(dds[c("ENSFHEP00000031108"), ], normalized=TRUE, replaced=FALSE)+.5))) %>% 
  merge(colData(dds), ., by="row.names") %>% 
  gather(gene, expression, (ncol(.)-1+1):ncol(.))

C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
  geom_point() + 
  scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
  stat_summary(fun.y="mean", geom="line") +
  stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
               geom="errorbar",width=0.2) +
  theme_bw() +
  theme(legend.position="none",panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(y="Expression (log2 normalized counts)")+
  ggtitle("atp1a1b")
plot(C1)

8.15 solute carrier family 24 member 2

tcounts <- t(log2((counts(dds[c("ENSFHEP00000034177"), ], normalized=TRUE, replaced=FALSE)+.5))) %>% 
  merge(colData(dds), ., by="row.names") %>% 
  gather(gene, expression, (ncol(.)-1+1):ncol(.))

C1<-ggplot(tcounts, aes(condition, expression,group=1)) +
  geom_point() + 
  scale_x_discrete(limits=c('0.2_ppt','transfer','15_ppt')) +
  stat_summary(fun.y="mean", geom="line") +
  stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), 
               geom="errorbar",width=0.2) +
  theme_bw() +
  theme(legend.position="none",panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(y="Expression (log2 normalized counts)")+
  ggtitle("solute carrier family 24 member 2")
plot(C1)

After filtering for low expression (where rowSum is greater than or equal to 1):

9 Significant genes

Number of significant genes between conditions 15ppt vs. 0.2ppt, padj <0.05:

sig <- subset(counts_table_stats, counts_table_stats$`padj-15ppt-v-0.2ppt`<= 0.05)
dim(sig)
## [1] 1081   29
sig_id <- sig$row
counts_table <- counts(dds,normalized=TRUE)
counts_sig <- counts_table[rownames(counts_table) %in% sig_id,]

10 Heatmap

id <- sig_id
d<-as.matrix(counts_sig)
hr <- hclust(as.dist(1-cor(t(d), method="pearson")), method="complete")
mycl <- cutree(hr, h=max(hr$height/1.5))
clusterCols <- rainbow(length(unique(mycl)))
myClusterSideBar <- clusterCols[mycl]
myheatcol <- greenred(75)
heatmap.2(d, main="L_parva, padj<0.05",
          Rowv=as.dendrogram(hr),
          cexRow=0.75,cexCol=0.8,srtCol= 90,
          adjCol = c(NA,0),offsetCol=2.5, 
          Colv=NA, dendrogram="row", 
          scale="row", col=myheatcol, 
          density.info="none", 
          trace="none", RowSideColors= myClusterSideBar)