1 Introduction

A mixed model with random effects was chosen for this multifactor experiment, and analyzed using the limma package in R. This package implements a linear modeling approach and empirical Bayes statistics. The limma package with the voom method estimates the mean-variance relationship of the log-counts, generates a precision weight for each observation and enters these into the limma empirical Bayes analysis pipeline.

In this model, clade (levels = Clade1, Clade2, Clade3), physiology (levels = Marine, Freshwater), and experimental condition (levels = 15ppt, 0.2ppt) are fixed effects while species (levels = 14) is considered a random effect.

The raw counts file, generated with NumReads from the salmon (version 0.12.0) quantification tool and summarized with the tximport Bioconductor package (version 1.10.1) in R, can be downloaded from an osf repository with this link, then imported into the R framework.

Samples from species with low numbers of replicates were dropped from the raw counts table (F. zebrinus, F. nottii, F. sciadicus). The raw counts table has the following dimensions (genes x samples).

#dim(counts_design)
#[1] 31595   130

# -----------------------
# Format design and counts matrix
# Drop columns with no data
# -----------------------

design <- counts_design[counts_design$Ensembl == 'Empty',]
#design$type <- c("species","native_salinity","clade","group","condition")
drops <- c("X","Ensembl",
           "F_zebrinus_BW_1.quant","F_zebrinus_BW_2.quant",
           "F_zebrinus_FW_1.quant","F_zebrinus_FW_2.quant",
           "F_nottii_FW_1.quant","F_nottii_FW_2.quant",
           "F_sciadicus_BW_1.quant","F_sciadicus_FW_1.quant","F_sciadicus_FW_2.quant")
transfer_drops <- c("F_sciadicus_transfer_1.quant","F_rathbuni_transfer_1.quant","F_rathbuni_transfer_2.quant","F_rathbuni_transfer_3.quant",
                    "F_grandis_transfer_1.quant","F_grandis_transfer_2.quant","F_grandis_transfer_3.quant",
                    "F_notatus_transfer_1.quant","F_notatus_transfer_2.quant","F_notatus_transfer_3.quant",
                    "F_parvapinis_transfer_1.quant","F_parvapinis_transfer_2.quant",
                    "L_goodei_transfer_1.quant","L_goodei_transfer_2.quant","L_goodei_transfer_3.quant",
                    "F_olivaceous_transfer_1.quant","F_olivaceous_transfer_2.quant",
                    "L_parva_transfer_1.quant","L_parva_transfer_2.quant","L_parva_transfer_3.quant",
                    "F_heteroclitusMDPP_transfer_1.quant","F_heteroclitusMDPP_transfer_2.quant","F_heteroclitusMDPP_transfer_3.quant",
                    "F_similis_transfer_1.quant","F_similis_transfer_2.quant","F_similis_transfer_3.quant",
                    "F_diaphanus_transfer_1.quant","F_diaphanus_transfer_2.quant",
                    "F_chrysotus_transfer_1.quant","F_chrysotus_transfer_2.quant",
                    "A_xenica_transfer_1.quant","A_xenica_transfer_2.quant","A_xenica_transfer_3.quant" ,
                    "F_catanatus_transfer_1.quant","F_catanatus_transfer_2.quant",
                    "F_heteroclitusMDPL_transfer_1.quant","F_heteroclitusMDPL_transfer_2.quant","F_heteroclitusMDPL_transfer_3.quant")
counts<-counts_design[!counts_design$Ensembl == 'Empty',]
rownames(counts)<-counts$Ensembl
design <- design[ , !(names(design) %in% drops)]
counts <- counts[ , !(names(counts) %in% drops)]
design <- design[ , !(names(design) %in% transfer_drops)]
counts <- counts[ , !(names(counts) %in% transfer_drops)]
#dim(design)
#[1]  5 81
dim(counts)

## [1] 31590    81

gene.names<-rownames(counts)
design[] <- lapply( design, factor)

2 Sample Design Matrix

A matrix was generated using the following model with fixed effects:

~physiology*condition*clade

The random effect of species will be taken into account later.

# --------------------
# design cateogories
# --------------------

species<-as.character(unlist(design[1,]))
physiology<-as.character(unlist(design[2,]))
clade<-as.character(unlist(design[3,]))
condition<-as.character(unlist(design[5,]))
condition_physiology<-as.vector(paste(condition,physiology,sep="."))
condition_physiology_clade <- as.vector(paste(condition_physiology,clade,sep="."))
condition_physiology_clade <- as.vector(paste("group",condition_physiology_clade,sep=""))
cols<-colnames(counts)
ExpDesign <- data.frame(row.names=cols,
                        condition=condition,
                        physiology = physiology,
                        clade = clade,
                        species = species,
                        sample=cols)
ExpDesign

design = model.matrix( ~physiology*condition*clade, ExpDesign)
colnames(design)

##  [1] "(Intercept)"                            
##  [2] "physiologyM"                            
##  [3] "condition15_ppt"                        
##  [4] "cladeClade2"                            
##  [5] "cladeClade3"                            
##  [6] "physiologyM:condition15_ppt"            
##  [7] "physiologyM:cladeClade2"                
##  [8] "physiologyM:cladeClade3"                
##  [9] "condition15_ppt:cladeClade2"            
## [10] "condition15_ppt:cladeClade3"            
## [11] "physiologyM:condition15_ppt:cladeClade2"
## [12] "physiologyM:condition15_ppt:cladeClade3"

# check rank of matrix
#Matrix::rankMatrix( design )
#dim(design)

3 Filtering and Normalization

Genes with low expression across samples were dropped from the analysis using a conservative approach. The function filterByExpr was used on the raw counts matrix. For each condition_physiology group (regardless of species), each sample must have a minium count of 10, and a group minium total count of 100. This reduced the counts table to the following dimensions (genes x samples):

gene.names<-rownames(counts)
counts<-as.matrix(as.data.frame(sapply(counts, as.numeric)))
rownames(counts)<-gene.names
#class(counts)

keep<-filterByExpr(counts,design = design,group=condition_physiology,min.count = 10, min.total.count = 100)
counts.filt <- counts[keep,]
dim(counts.filt)

## [1] 21468    81

#write.csv(counts.filt,"../../../21k_counts_filt_30April2019.csv")

4 Genes of Interest

After filtration, we check the counts matrix for the presence of several genes of interest. These genes have demonstrated responsiveness to salinity change from past studies.

gene	Funhe	Ensembl
zymogen granule membrane protein 16	Funhe2EKm029929	ENSFHEP00000007220.1
zymogen granule membrane protein 16	Funhe2EKm029931	ENSFHEP00000025841
solute carrier family 12 member 3-like (removed)	Funhe2EKm006896	ENSFHEP00000009214
chloride channel, voltage-sensitive 2 (clcn2), transcript variant X2 (removed)	Funhe2EKm024148	ENSFHEP00000019510
ATP-sensitive inward rectifier potassium channel 1	Funhe2EKm001965	ENSFHEP00000015383
inward rectifier potassium channel 2	Funhe2EKm023780	ENSFHEP00000009753
aquaporin-3		ENSFHEP00000006725
cftr	Funhe2EKm024501	ENSFHEP00000008393
polyamine-modulated factor 1-like	Funhe2EKm031049	ENSFHEP00000013324
sodium/potassium/calcium exchanger 2	Funhe2EKm025497	ENSFHEP00000034177
septin-2B isoform X2		ENSFHEP00000015765
CLOCK-interacting pacemaker-like	Funhe2EKm026846	ENSFHEP00000017303
vasopressin V2 receptor-like	Funhe2EKm026721	ENSFHEP00000000036
sodium/potassium-transporting ATPase subunit beta-1-interacting protein 1	Funhe2EKm001174	ENSFHEP00000031108
septin-2	Funhe2EKm012182	ENSFHEP00000016853
otopetrin-2	Funhe2EKm035427	ENSFHEP00000026411
claudin 8	Funhe2EKm037718	ENSFHEP00000006282
claudin 4	Funhe2EKm007149	ENSFHEP00000003908

If the Ensembl ID displays below, the gene is present in the whole data set and has not been filtered.

# ---------------------------
# Andrew Whitehead's genes of interest:
# ---------------------------

# zymogen granule membrane protein 16
# Funhe2EKm029929
# ENSFHEP00000007220.1
countsfilt <- counts.filt
countsfilt$row <- rownames(countsfilt)
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000007220.1"]
goi

## [1] "ENSFHEP00000007220.1"

# zymogen granule membrane protein 16
# Funhe2EKm029931
# ENSFHEP00000025841
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000025841"]
goi

## [1] "ENSFHEP00000025841"

# solute carrier family 12 member 3-like (removed) 
# Funhe2EKm006896
# ENSFHEP00000009214
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000009214"]
goi

## [1] "ENSFHEP00000009214"

# chloride channel, voltage-sensitive 2 (clcn2), transcript variant X2 (removed)
# Funhe2EKm024148
# ENSFHEP00000019510
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000019510"]
goi

## [1] "ENSFHEP00000019510"

# ATP-sensitive inward rectifier potassium channel 1 
# Funhe2EKm001965
# ENSFHEP00000015383
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000015383"]
goi

## [1] "ENSFHEP00000015383"

# inward rectifier potassium channel 2
# Funhe2EKm023780
# ENSFHEP00000009753
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000009753"]

# --------------------------------
# other salinity genes of interest
# --------------------------------
# ============================================
# aquaporin-3
# ENSFHEP00000006725
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000006725"]
goi

## [1] "ENSFHEP00000006725"

# cftr
# Funhe2EKm024501
# ENSFHEP00000008393
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000008393"]
goi

## [1] "ENSFHEP00000008393"

# polyamine-modulated factor 1-like
# Funhe2EKm031049
# ENSFHEP00000013324
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000013324"]
goi

## [1] "ENSFHEP00000013324"

# sodium/potassium/calcium exchanger 2
# ENSFHEP00000034177
# Funhe2EKm025497
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000034177"]
goi

## character(0)

# septin-2B isoform X2
# ENSFHEP00000015765
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000015765"]
goi

## [1] "ENSFHEP00000015765"

# CLOCK-interacting pacemaker-like
# ENSFHEP00000017303
# Funhe2EKm026846
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000017303"]
goi

## [1] "ENSFHEP00000017303"

# vasopressin V2 receptor-like
# Funhe2EKm026721
# ENSFHEP00000000036
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000000036"]
goi

## [1] "ENSFHEP00000000036"

# sodium/potassium-transporting ATPase subunit beta-1-interacting protein 1
# ENSFHEP00000031108
# Funhe2EKm001174
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000031108"]
goi

## [1] "ENSFHEP00000031108"

# septin-2
# Funhe2EKm012182
# ENSFHEP00000016853
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000016853"]
goi

## [1] "ENSFHEP00000016853"

# otopetrin-2
# Funhe2EKm035427
# ENSFHEP00000026411
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000026411"]
goi

## [1] "ENSFHEP00000026411"

# claudin 8
# Funhe2EKm037718
# ENSFHEP00000006282
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000006282"]
goi

## [1] "ENSFHEP00000006282"

# claudin 4
# ENSFHEP00000003908
# Funhe2EKm007149
goi <- countsfilt$row[countsfilt$row == "ENSFHEP00000003908"]
goi

## [1] "ENSFHEP00000003908"

all_goi<-c("ENSFHEP00000007220.1","ENSFHEP00000025841","ENSFHEP00000019510",
           "ENSFHEP00000015383","ENSFHEP00000009753","ENSFHEP00000006725","ENSFHEP00000008393",
           "ENSFHEP00000013324","ENSFHEP00000001609","ENSFHEP00000013324","ENSFHEP00000034177",
           "ENSFHEP00000015765","ENSFHEP00000017303","ENSFHEP00000000036","ENSFHEP00000031108",
           "ENSFHEP00000016853","ENSFHEP00000003908")

5 Exploratory Plots

Log counts before normalization:

# log counts before DE
boxplot(log(counts.filt+1), las = 2, main = "")

Log counts after cpm normalization

# ---------------

# DE analysis

# ---------------

genes = DGEList(count = counts.filt, group = condition_physiology_clade)
genes = calcNormFactors( genes )

# write normalized counts
dir <- "~/Documents/UCDavis/Whitehead/"
tmp <- as.data.frame(cpm(genes))
tmp$Ensembl <- rownames(tmp)
tmp <- dplyr::select(tmp, Ensembl, everything())
#write.csv(tmp, file = file.path(dir, "normalized_counts.csv"), quote = F, row.names = F)

vobj = voom( genes, design, plot=TRUE)

lcpm <- cpm(genes$counts, log = TRUE)

# log counts after DE

boxplot(lcpm, las = 2, main = "")

plot(colSums(t(lcpm)))

Voom weights:

vwts <- voomWithQualityWeights(genes, design=design, normalization="quantile", plot=TRUE)

The random effects of species are taken into account with the duplicateCorrelation function, which estimates the correlation between species. This reflects the between-species variability. The higher the correlation (0-1.0), the higher the variability between species.

corfit <- duplicateCorrelation(vobj,design,block=ExpDesign$species)

corfit$consensus

## [1] 0.758966

#[1] 0.758966

5.0.1 PCA of un-normalized expression vs. limma-voom log cpm normalized

Un-normalized log counts

x <- data.matrix(genes)
dim(x)

## [1] 21468    81

x <- x+1
log_x<-log(x)
names<-colnames(log_x)
pca = prcomp(t(log_x))
summary(pca)

## Importance of components:
##                            PC1      PC2      PC3      PC4      PC5
## Standard deviation     82.3397 70.46780 63.70790 62.13699 61.07070
## Proportion of Variance  0.1232  0.09024  0.07375  0.07016  0.06777
## Cumulative Proportion   0.1232  0.21344  0.28719  0.35735  0.42513
##                             PC6      PC7      PC8      PC9     PC10
## Standard deviation     60.37768 56.47475 55.63547 54.15081 53.62872
## Proportion of Variance  0.06624  0.05796  0.05625  0.05329  0.05226
## Cumulative Proportion   0.49137  0.54933  0.60557  0.65886  0.71112
##                           PC11     PC12     PC13     PC14     PC15
## Standard deviation     52.2958 51.04201 45.46807 41.26500 19.26591
## Proportion of Variance  0.0497  0.04734  0.03757  0.03094  0.00674
## Cumulative Proportion   0.7608  0.80816  0.84573  0.87667  0.88342
##                            PC16     PC17     PC18    PC19     PC20
## Standard deviation     16.23883 14.92378 13.72871 12.8511 12.22137
## Proportion of Variance  0.00479  0.00405  0.00342  0.0030  0.00271
## Cumulative Proportion   0.88821  0.89225  0.89568  0.8987  0.90139
##                            PC21     PC22     PC23     PC24     PC25
## Standard deviation     12.05614 11.78337 11.58730 11.45017 11.41091
## Proportion of Variance  0.00264  0.00252  0.00244  0.00238  0.00237
## Cumulative Proportion   0.90404  0.90656  0.90900  0.91138  0.91375
##                            PC26     PC27     PC28     PC29     PC30
## Standard deviation     11.17743 11.12899 10.97378 10.91720 10.77980
## Proportion of Variance  0.00227  0.00225  0.00219  0.00217  0.00211
## Cumulative Proportion   0.91602  0.91827  0.92046  0.92262  0.92473
##                            PC31     PC32     PC33     PC34     PC35
## Standard deviation     10.62179 10.58322 10.52957 10.46736 10.38039
## Proportion of Variance  0.00205  0.00204  0.00201  0.00199  0.00196
## Cumulative Proportion   0.92678  0.92882  0.93083  0.93283  0.93478
##                            PC36     PC37    PC38     PC39     PC40
## Standard deviation     10.34727 10.29678 10.2170 10.18013 10.10653
## Proportion of Variance  0.00195  0.00193  0.0019  0.00188  0.00186
## Cumulative Proportion   0.93673  0.93866  0.9405  0.94244  0.94429
##                            PC41   PC42    PC43    PC44    PC45    PC46
## Standard deviation     10.02890 9.9653 9.91271 9.84890 9.80171 9.69293
## Proportion of Variance  0.00183 0.0018 0.00179 0.00176 0.00175 0.00171
## Cumulative Proportion   0.94612 0.9479 0.94971 0.95147 0.95322 0.95493
##                           PC47    PC48    PC49    PC50    PC51    PC52
## Standard deviation     9.65210 9.59175 9.56323 9.52095 9.40335 9.33365
## Proportion of Variance 0.00169 0.00167 0.00166 0.00165 0.00161 0.00158
## Cumulative Proportion  0.95662 0.95829 0.95995 0.96160 0.96321 0.96479
##                           PC53    PC54    PC55    PC56   PC57    PC58
## Standard deviation     9.31391 9.21468 9.16155 9.13598 9.0845 9.01272
## Proportion of Variance 0.00158 0.00154 0.00153 0.00152 0.0015 0.00148
## Cumulative Proportion  0.96637 0.96791 0.96943 0.97095 0.9725 0.97393
##                           PC59    PC60    PC61    PC62    PC63   PC64
## Standard deviation     8.95928 8.89391 8.75183 8.70351 8.60786 8.4511
## Proportion of Variance 0.00146 0.00144 0.00139 0.00138 0.00135 0.0013
## Cumulative Proportion  0.97538 0.97682 0.97821 0.97959 0.98094 0.9822
##                           PC65    PC66    PC67    PC68    PC69    PC70
## Standard deviation     8.36601 8.32499 8.19265 8.15319 8.08437 7.96402
## Proportion of Variance 0.00127 0.00126 0.00122 0.00121 0.00119 0.00115
## Cumulative Proportion  0.98351 0.98477 0.98599 0.98719 0.98838 0.98953
##                           PC71    PC72    PC73   PC74    PC75    PC76
## Standard deviation     7.93466 7.87773 7.83077 7.7876 7.67938 7.60013
## Proportion of Variance 0.00114 0.00113 0.00111 0.0011 0.00107 0.00105
## Cumulative Proportion  0.99068 0.99181 0.99292 0.9940 0.99509 0.99614
##                           PC77    PC78    PC79    PC80      PC81
## Standard deviation     7.57061 7.44751 7.14685 6.95443 1.249e-13
## Proportion of Variance 0.00104 0.00101 0.00093 0.00088 0.000e+00
## Cumulative Proportion  0.99719 0.99819 0.99912 1.00000 1.000e+00

fac = factor(physiology)
colours = function(vec){
  cols=rainbow(length(unique(vec)))
  return(cols[as.numeric(as.factor(vec))])}
#mar.default <- c(5,4,4,2) + 0.1
#par(mar = mar.default + c(0, 4, 0, 0)) 
plot(pca$x[,1:2], 
     col=colours(clade), 
     pch = c(16, 2, 9)[as.numeric(as.factor(physiology))],
     cex=2,
     xlab="PC1",
     ylab="PC2",
     cex.lab=2,
     cex.axis = 2)
legend(140,100,legend=c("Clade 1","Clade 2","Clade 3"),col=rainbow(length(unique(clade))),cex=0.75, pch=19)
legend(140,-67,legend=c("Freshwater","Marine"),cex=0.75,pch=c(16, 2, 9))

#legend(-75,50,legend=c("Clade 1","Clade 2","Clade 3"),col=rainbow(length(unique(clade))),cex=0.75, pch=19)
#legend(-75,25,legend=c("Freshwater","Marine"),cex=0.75,pch=c(16, 2, 9))

CPM-normalized log counts

x <- data.matrix(cpm(genes))
dim(x)

## [1] 21468    81

x <- x+1
log_x<-log(x)
names<-colnames(log_x)
pca = prcomp(t(log_x))
summary(pca)

## Importance of components:
##                            PC1      PC2      PC3     PC4      PC5      PC6
## Standard deviation     48.6654 44.53062 42.97722 41.0499 39.67087 39.45871
## Proportion of Variance  0.1027  0.08603  0.08013  0.0731  0.06827  0.06755
## Cumulative Proportion   0.1027  0.18877  0.26890  0.3420  0.41027  0.47782
##                             PC7     PC8      PC9     PC10    PC11     PC12
## Standard deviation     38.06462 37.7123 36.73365 36.26002 35.7674 35.07016
## Proportion of Variance  0.06286  0.0617  0.05854  0.05704  0.0555  0.05336
## Cumulative Proportion   0.54068  0.6024  0.66091  0.71795  0.7734  0.82681
##                            PC13    PC14     PC15     PC16     PC17    PC18
## Standard deviation     30.74866 15.8475 12.30488 11.53589 10.03307 9.24181
## Proportion of Variance  0.04102  0.0109  0.00657  0.00577  0.00437 0.00371
## Cumulative Proportion   0.86782  0.8787  0.88529  0.89106  0.89543 0.89913
##                           PC19    PC20   PC21    PC22    PC23    PC24
## Standard deviation     8.62704 8.25760 8.0268 7.94057 7.77719 7.69003
## Proportion of Variance 0.00323 0.00296 0.0028 0.00274 0.00262 0.00257
## Cumulative Proportion  0.90236 0.90532 0.9081 0.91085 0.91347 0.91604
##                           PC25    PC26    PC27    PC28    PC29    PC30
## Standard deviation     7.42919 7.39519 7.25650 7.08385 7.06522 6.96936
## Proportion of Variance 0.00239 0.00237 0.00228 0.00218 0.00217 0.00211
## Cumulative Proportion  0.91843 0.92080 0.92309 0.92527 0.92743 0.92954
##                           PC31    PC32   PC33    PC34    PC35    PC36
## Standard deviation     6.87536 6.85467 6.7898 6.74624 6.65733 6.64807
## Proportion of Variance 0.00205 0.00204 0.0020 0.00197 0.00192 0.00192
## Cumulative Proportion  0.93159 0.93363 0.9356 0.93760 0.93952 0.94144
##                           PC37    PC38    PC39    PC40    PC41    PC42
## Standard deviation     6.60885 6.54262 6.52852 6.50744 6.45851 6.41235
## Proportion of Variance 0.00189 0.00186 0.00185 0.00184 0.00181 0.00178
## Cumulative Proportion  0.94334 0.94519 0.94704 0.94888 0.95069 0.95247
##                           PC43    PC44    PC45    PC46    PC47    PC48
## Standard deviation     6.34137 6.23549 6.21619 6.18682 6.16092 6.12525
## Proportion of Variance 0.00174 0.00169 0.00168 0.00166 0.00165 0.00163
## Cumulative Proportion  0.95422 0.95590 0.95758 0.95924 0.96089 0.96252
##                           PC49    PC50    PC51    PC52    PC53    PC54
## Standard deviation     6.08510 6.04849 5.96420 5.95173 5.90230 5.84461
## Proportion of Variance 0.00161 0.00159 0.00154 0.00154 0.00151 0.00148
## Cumulative Proportion  0.96412 0.96571 0.96725 0.96879 0.97030 0.97178
##                           PC55    PC56    PC57    PC58    PC59    PC60
## Standard deviation     5.81771 5.74468 5.71253 5.56658 5.54543 5.46102
## Proportion of Variance 0.00147 0.00143 0.00142 0.00134 0.00133 0.00129
## Cumulative Proportion  0.97325 0.97468 0.97610 0.97744 0.97878 0.98007
##                           PC61    PC62    PC63    PC64    PC65    PC66
## Standard deviation     5.35294 5.29980 5.28837 5.20784 5.18028 5.07110
## Proportion of Variance 0.00124 0.00122 0.00121 0.00118 0.00116 0.00112
## Cumulative Proportion  0.98131 0.98253 0.98374 0.98492 0.98609 0.98720
##                           PC67    PC68    PC69    PC70    PC71    PC72
## Standard deviation     5.01085 4.90135 4.84854 4.83996 4.75664 4.71223
## Proportion of Variance 0.00109 0.00104 0.00102 0.00102 0.00098 0.00096
## Cumulative Proportion  0.98829 0.98933 0.99035 0.99137 0.99235 0.99331
##                           PC73    PC74    PC75    PC76    PC77    PC78
## Standard deviation     4.66055 4.59076 4.46244 4.45147 4.38650 4.27280
## Proportion of Variance 0.00094 0.00091 0.00086 0.00086 0.00083 0.00079
## Cumulative Proportion  0.99426 0.99517 0.99603 0.99689 0.99773 0.99852
##                           PC79    PC80      PC81
## Standard deviation     4.14812 4.11138 6.012e-14
## Proportion of Variance 0.00075 0.00073 0.000e+00
## Cumulative Proportion  0.99927 1.00000 1.000e+00

fac = factor(physiology)
colours = function(vec){
  cols=rainbow(length(unique(vec)))
  return(cols[as.numeric(as.factor(vec))])}
#mar.default <- c(5,4,4,2) + 0.1
#par(mar = mar.default + c(0, 4, 0, 0)) 
plot(pca$x[,1:2], 
     col=colours(clade), 
     pch = c(16, 2, 9)[as.numeric(as.factor(physiology))],
     cex=2,
     xlab="PC1",
     ylab="PC2",
     cex.lab=2,
     cex.axis = 2)
#legend(140,100,legend=c("Clade 1","Clade 2","Clade 3"),col=rainbow(length(unique(clade))),cex=0.75, pch=19)
#legend(140,-67,legend=c("Freshwater","Marine"),cex=0.75,pch=c(16, 2, 9))
legend(-75,50,legend=c("Clade 1","Clade 2","Clade 3"),col=rainbow(length(unique(clade))),cex=0.75, pch=19)
legend(-75,25,legend=c("Freshwater","Marine"),cex=0.75,pch=c(16, 2, 9))

5.0.2 Individuals clustered by overall expression

counts_round<-round(counts.filt,digits=0)
dds <- DESeqDataSetFromMatrix(countData = counts_round,colData = ExpDesign,design = design)

## converting counts to integer mode

rld <- vst(dds, blind = FALSE,fitType='local')
sampleDists <- dist(t(assay(rld)))
df <- as.data.frame(colData(dds)[,c("physiology","condition","clade")])
sampleDistMatrix <- as.matrix( sampleDists )
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
         clustering_distance_rows = sampleDists,
         clustering_distance_cols = sampleDists,
         col = colors, annotation = df, show_rownames=F)

5.0.3 Individuals clustered by Top 100 genes

5.0.4 Individuals clustered by top 50 gene expression

5.0.5 PCA for overall expression

cowplot::plot_grid( plotPCA(rld, intgroup="condition"),
                    plotPCA(rld, intgroup="physiology"),
                    plotPCA(rld, intgroup="clade"),
                    plotPCA(rld, intgroup=c("clade","physiology","condition")),
                           align="c", ncol=2)

sessionInfo()

## R version 3.5.2 (2018-12-20)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.3
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] data.table_1.12.2           emmeans_1.3.4              
##  [3] kableExtra_1.1.0            biomaRt_2.38.0             
##  [5] vsn_3.50.0                  lattice_0.20-38            
##  [7] gplots_3.0.1.1              edgeR_3.24.3               
##  [9] limma_3.38.3                DESeq2_1.22.2              
## [11] SummarizedExperiment_1.12.0 DelayedArray_0.8.0         
## [13] BiocParallel_1.16.6         matrixStats_0.54.0         
## [15] Biobase_2.42.0              GenomicRanges_1.34.0       
## [17] GenomeInfoDb_1.18.2         IRanges_2.16.0             
## [19] S4Vectors_0.20.1            BiocGenerics_0.28.0        
## [21] ggrepel_0.8.0               knitr_1.22                 
## [23] tidyr_0.8.3                 dplyr_0.8.0.1              
## [25] RColorBrewer_1.1-2          cowplot_0.9.4              
## [27] pheatmap_1.0.12             gtools_3.8.1               
## [29] ggplot2_3.1.1               MASS_7.3-51.4              
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_1.4-1       estimability_1.3       htmlTable_1.13.1      
##  [4] XVector_0.22.0         base64enc_0.1-3        rstudioapi_0.10       
##  [7] affyio_1.52.0          bit64_0.9-7            AnnotationDbi_1.44.0  
## [10] mvtnorm_1.0-10         xml2_1.2.0             splines_3.5.2         
## [13] geneplotter_1.60.0     Formula_1.2-3          jsonlite_1.6          
## [16] annotate_1.60.1        cluster_2.0.8          BiocManager_1.30.4    
## [19] readr_1.3.1            compiler_3.5.2         httr_1.4.0            
## [22] backports_1.1.4        assertthat_0.2.1       Matrix_1.2-17         
## [25] lazyeval_0.2.2         acepack_1.4.1          htmltools_0.3.6       
## [28] prettyunits_1.0.2      tools_3.5.2            coda_0.19-2           
## [31] gtable_0.3.0           glue_1.3.1             GenomeInfoDbData_1.2.0
## [34] affy_1.60.0            Rcpp_1.0.1             gdata_2.18.0          
## [37] preprocessCore_1.44.0  xfun_0.6               stringr_1.4.0         
## [40] rvest_0.3.2            statmod_1.4.30         XML_3.98-1.19         
## [43] zlibbioc_1.28.0        scales_1.0.0           hms_0.4.2             
## [46] yaml_2.2.0             memoise_1.1.0          gridExtra_2.3         
## [49] rpart_4.1-13           latticeExtra_0.6-28    stringi_1.4.3         
## [52] RSQLite_2.1.1          genefilter_1.64.0      checkmate_1.9.1       
## [55] caTools_1.17.1.2       rlang_0.3.4            pkgconfig_2.0.2       
## [58] bitops_1.0-6           evaluate_0.13          purrr_0.3.2           
## [61] labeling_0.3           htmlwidgets_1.3        bit_1.1-14            
## [64] tidyselect_0.2.5       plyr_1.8.4             magrittr_1.5          
## [67] R6_2.4.0               Hmisc_4.2-0            DBI_1.0.0             
## [70] pillar_1.3.1           foreign_0.8-71         withr_2.1.2           
## [73] survival_2.44-1.1      RCurl_1.95-4.12        nnet_7.3-12           
## [76] tibble_2.1.1           crayon_1.3.4           KernSmooth_2.23-15    
## [79] rmarkdown_1.12         progress_1.2.0         locfit_1.5-9.1        
## [82] grid_3.5.2             blob_1.1.1             digest_0.6.18         
## [85] webshot_0.5.1          xtable_1.8-3           munsell_0.5.0         
## [88] viridisLite_0.3.0

DEanalysis_kfish_osmotic_limma_QC

Lisa Johnson

2019-08-04