library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
[30m── [1mAttaching packages[22m ──────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──[39m
[30m[32m✓[30m [34mggplot2[30m 3.3.5 [32m✓[30m [34mpurrr [30m 0.3.4
[32m✓[30m [34mtibble [30m 3.1.3 [32m✓[30m [34mdplyr [30m 1.0.7
[32m✓[30m [34mtidyr [30m 1.1.3 [32m✓[30m [34mstringr[30m 1.4.0
[32m✓[30m [34mreadr [30m 1.4.0 [32m✓[30m [34mforcats[30m 0.5.1[39m
[30m── [1mConflicts[22m ─────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31mx[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31mx[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()[39m
library(phyloseq)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
library(phangorn)
Loading required package: ape
library(readr)
library(ape)
library(vegan)
Loading required package: permute
Loading required package: lattice
This is vegan 2.5-7
Attaching package: ‘vegan’
The following objects are masked from ‘package:phangorn’:
diversity, treedist
library(RColorBrewer)
library(microbiome)
microbiome R package (microbiome.github.com)
Copyright (C) 2011-2020 Leo Lahti,
Sudarshan Shetty et al. <microbiome.github.io>
Attaching package: ‘microbiome’
The following object is masked from ‘package:vegan’:
diversity
The following object is masked from ‘package:phangorn’:
diversity
The following object is masked from ‘package:ggplot2’:
alpha
The following object is masked from ‘package:base’:
transform
library(compositions)
Welcome to compositions, a package for compositional data analysis.
Find an intro with "? compositions"
Attaching package: ‘compositions’
The following object is masked from ‘package:ape’:
balance
The following objects are masked from ‘package:stats’:
cor, cov, dist, var
The following objects are masked from ‘package:base’:
%*%, norm, scale, scale.default
library(SpiecEasi)
Attaching package: ‘SpiecEasi’
The following objects are masked from ‘package:compositions’:
alr, clr
library(otuSummary)
library(psych)
Attaching package: ‘psych’
The following objects are masked from ‘package:SpiecEasi’:
cor2cov, shannon
The following objects are masked from ‘package:compositions’:
ellipses, pairwisePlot
The following object is masked from ‘package:microbiome’:
alpha
The following objects are masked from ‘package:ggplot2’:
%+%, alpha
library(Matrix)
Attaching package: ‘Matrix’
The following objects are masked from ‘package:SpiecEasi’:
tril, triu
The following objects are masked from ‘package:tidyr’:
expand, pack, unpack
library(igraph)
Attaching package: ‘igraph’
The following object is masked from ‘package:SpiecEasi’:
make_graph
The following object is masked from ‘package:compositions’:
normalize
The following object is masked from ‘package:microbiome’:
diversity
The following object is masked from ‘package:vegan’:
diversity
The following object is masked from ‘package:permute’:
permute
The following object is masked from ‘package:phangorn’:
diversity
The following objects are masked from ‘package:ape’:
edges, mst, ring
The following objects are masked from ‘package:dplyr’:
as_data_frame, groups, union
The following objects are masked from ‘package:purrr’:
compose, simplify
The following object is masked from ‘package:tidyr’:
crossing
The following object is masked from ‘package:tibble’:
as_data_frame
The following objects are masked from ‘package:stats’:
decompose, spectrum
The following object is masked from ‘package:base’:
union
library(plotly)
Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: ‘plotly’
The following object is masked from ‘package:igraph’:
groups
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
library(egg)
Loading required package: gridExtra
Attaching package: ‘gridExtra’
The following object is masked from ‘package:dplyr’:
combine
library(ggvegan)
# Helper functions from J. Cram https://biovcnet.github.io/_pages/NetworkScience_SparCC.nb
pass <- function(x){x}
# Get lower triangle of the correlation matrix
get_lower_tri<-function(cormat){
cormat[upper.tri(cormat)] <- NA
return(cormat)
}
# Get upper triangle of the correlation matrix
get_upper_tri <- function(cormat){
cormat[lower.tri(cormat)]<- NA
return(cormat)
}
reorder_cormat <- function(cormat){
# Use correlation between variables as distance
dd <- as.dist((1-cormat)/2)
hc <- hclust(dd)
cormat <-cormat[hc$order, hc$order]
}
reorder_cor_and_p <- function(cormat, pmat){
dd <- as.dist((1-cormat)/2)
hc <- hclust(dd)
cormat <-cormat[hc$order, hc$order]
pmat <- pmat[hc$order, hc$order]
list(r = cormat, p = pmat)
}
# Report versions of packages
sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Catalina 10.15.7
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] ggvegan_0.1-0 egg_0.4.5 gridExtra_2.3 plotly_4.9.2.2 igraph_1.2.6 Matrix_1.3-0 psych_2.1.3
[8] otuSummary_0.1.1 SpiecEasi_1.1.1 compositions_2.0-0 microbiome_1.10.0 RColorBrewer_1.1-2 vegan_2.5-7 lattice_0.20-41
[15] permute_0.9-5 phangorn_2.5.5 ape_5.4-1 phyloseq_1.32.0 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7
[22] purrr_0.3.4 readr_1.4.0 tidyr_1.1.3 tibble_3.1.3 ggplot2_3.3.5 tidyverse_1.3.1
loaded via a namespace (and not attached):
[1] Rtsne_0.15 VGAM_1.1-5 colorspace_2.0-2 ellipsis_0.3.2 XVector_0.28.0 fs_1.5.0 rstudioapi_0.13
[8] ggrepel_0.9.1 fansi_0.5.0 lubridate_1.7.10 xml2_1.3.2 codetools_0.2-18 splines_4.0.2 mnormt_2.0.2
[15] robustbase_0.93-6 knitr_1.30 ade4_1.7-16 jsonlite_1.7.2 broom_0.7.9 cluster_2.1.0 dbplyr_2.1.1
[22] compiler_4.0.2 httr_1.4.2 backports_1.2.1 assertthat_0.2.1 lazyeval_0.2.2 cli_3.0.1 htmltools_0.5.1.1
[29] prettyunits_1.1.1 tools_4.0.2 gtable_0.3.0 glue_1.4.2 reshape2_1.4.4 fastmatch_1.1-0 Rcpp_1.0.7
[36] Biobase_2.48.0 cellranger_1.1.0 vctrs_0.3.8 Biostrings_2.56.0 multtest_2.44.0 nlme_3.1-151 iterators_1.0.13
[43] tensorA_0.36.2 xfun_0.24 rvest_1.0.1 lifecycle_1.0.0 DEoptimR_1.0-8 zlibbioc_1.34.0 MASS_7.3-53
[50] scales_1.1.1 hms_1.1.0 parallel_4.0.2 biomformat_1.16.0 rhdf5_2.32.4 huge_1.3.4.1 stringi_1.7.3
[57] S4Vectors_0.26.1 foreach_1.5.1 BiocGenerics_0.34.0 shape_1.4.6 rlang_0.4.11 pkgconfig_2.0.3 Rhdf5lib_1.10.1
[64] htmlwidgets_1.5.3 tidyselect_1.1.1 plyr_1.8.6 magrittr_2.0.1 R6_2.5.0 IRanges_2.22.2 generics_0.1.0
[71] DBI_1.1.1 pillar_1.6.2 haven_2.3.1 withr_2.4.2 mgcv_1.8-33 survival_3.2-7 bayesm_3.1-4
[78] modelr_0.1.8 pulsar_0.3.7 crayon_1.4.1 utf8_1.2.2 tmvnsim_1.0-2 progress_1.2.2 grid_4.0.2
[85] readxl_1.3.1 data.table_1.13.4 reprex_2.0.1 digest_0.6.27 stats4_4.0.2 munsell_0.5.0 glmnet_4.1-1
[92] viridisLite_0.4.0 quadprog_1.5-8
Metadata:
metadata <- read_csv("Metadata.csv")
[36m──[39m [1m[1mColumn specification[1m[22m [36m─────────────────────────────────────────────────────────────────────────────────────────[39m
cols(
.default = col_double(),
`Sample Name` = [31mcol_character()[39m,
Replicate = [31mcol_character()[39m,
Type = [31mcol_character()[39m,
SizeFraction = [31mcol_character()[39m,
Season = [31mcol_character()[39m,
OxCond = [31mcol_character()[39m
)
[36mℹ[39m Use [38;5;235m[48;5;253m[38;5;235m[48;5;253m`spec()`[48;5;253m[38;5;235m[49m[39m for the full column specifications.
Import SRA table and match SRA IDs with sample IDs in metadata file
SRARunTable <- read_csv("sra_data/SraRunTable.txt")
[36m──[39m [1m[1mColumn specification[1m[22m [36m─────────────────────────────────────────────────────────────────────────────────────────[39m
cols(
.default = col_character(),
AvgSpotLen = [32mcol_double()[39m,
Bases = [32mcol_double()[39m,
Bytes = [32mcol_double()[39m,
ReleaseDate = [34mcol_datetime(format = "")[39m,
Depth_m = [32mcol_double()[39m,
CH4_uM = [32mcol_double()[39m,
H2S_Um = [32mcol_double()[39m,
Oxygen_uM = [32mcol_double()[39m,
Particulate_Sulfur_uM = [32mcol_double()[39m,
salinity = [32mcol_double()[39m,
Temperature_degree_C = [32mcol_double()[39m,
TZVS_uM = [32mcol_double()[39m
)
[36mℹ[39m Use [38;5;235m[48;5;253m[38;5;235m[48;5;253m`spec()`[48;5;253m[38;5;235m[49m[39m for the full column specifications.
metadata <- left_join(metadata, SRARunTable, by = 'Sample Name')
DADA2 results:
# Import Count table. Skip first row of tsv file, which is just some text
count_table <- read_tsv(file="dada2_export/ASVs_counts.tsv")
Missing column names filled in: 'X1' [1]
[36m──[39m [1m[1mColumn specification[1m[22m [36m─────────────────────────────────────────────────────────────────────────────────────────[39m
cols(
.default = col_double(),
X1 = [31mcol_character()[39m
)
[36mℹ[39m Use [38;5;235m[48;5;253m[38;5;235m[48;5;253m`spec()`[48;5;253m[38;5;235m[49m[39m for the full column specifications.
# And specify that the first column of data are rownames
count_table <- column_to_rownames(count_table, var = colnames(count_table)[1])
# Import taxonomy of ASVs
taxonomy <- read_tsv(file="dada2_export/ASVs_taxonomy.tsv")
Missing column names filled in: 'X1' [1]
[36m──[39m [1m[1mColumn specification[1m[22m [36m─────────────────────────────────────────────────────────────────────────────────────────[39m
cols(
X1 = [31mcol_character()[39m,
Kingdom = [31mcol_character()[39m,
Supergroup = [31mcol_character()[39m,
Division = [31mcol_character()[39m,
Class = [31mcol_character()[39m,
Order = [31mcol_character()[39m,
Family = [31mcol_character()[39m,
Genus = [31mcol_character()[39m,
Species = [31mcol_character()[39m
)
# And specify that the first column of data are rownames
taxonomy <- column_to_rownames(taxonomy, var = colnames(taxonomy)[1])
# Use rarecurve, from the Vegan package. Rarcurve expects the dataset as a dataframe so we need to use as.data.frame again:
count_table_df <- as.data.frame(count_table)
# Plot the rarefaction curves, color-coding by the colors listed in sample_info_tab, which indicate sample type, and transforming using t() again
# Running this 5-10 samples at a time because otherwise it takes a long time to render
rarecurve(t(count_table_df), step=100, cex=0.5, ylab="ASVs", label=T)
count_table_no_singletons <- filter(count_table,rowSums(count_table)>1)
# retains all ASVs (out of 14176)
and change sample names from NCBI ID to our internal sample IDs
# Modify taxa names in count_table_no_singletons, which are the NCBI SRA numbers. Want to use our internal sample key
key <- SRARunTable %>% select(Run, 'Sample Name')
x <- (t(count_table_no_singletons))
x <- as.data.frame(cbind(x, Run = rownames(x)))
y <- t(left_join(x, key, by = "Run"))
colnames(y) <- y['Sample Name',]
y <- y[ !(rownames(y) %in% c('Sample Name', 'Run')), ]
count_table_2 <- type_convert(as.data.frame(y))
[36m──[39m [1m[1mColumn specification[1m[22m [36m─────────────────────────────────────────────────────────────────────────────────────────[39m
cols(
.default = col_double()
)
[36mℹ[39m Use [38;5;235m[48;5;253m[38;5;235m[48;5;253m`spec()`[48;5;253m[38;5;235m[49m[39m for the full column specifications.
This process takes a LONG time so run once and save .RData object In the Dada2 tools, there are no options to build a tree (unlike in Qiime2) but we can build it here using DECIPHER and phangorn
(Based on https://f1000research.com/articles/5-1492/v2)
Make an alignment using tools from Decipher (Note- alignment step takes several hours. Commented out for now. Only need to run once)
## import fasta
# fas <- "dada2_export/ASVs.fa"
# seqs <- readDNAStringSet(fas)
# seqs
#
# # perform the alignment
# aligned <- AlignSeqs(seqs) # automatically detects and uses all cores
#
# # view the alignment in a browser (optional)
# BrowseSeqs(aligned, highlight=0)
#
# # write out aligned sequence file
# writeXStringSet(aligned, file="ASVs.aligned.fasta")
Use phangorn package to build tree. Here we are building a maximum likelihood neighbor-joining tree. (Also takes a while to run. Comment out for now.)
# phang.align <- phyDat(as(aligned, "matrix"), type="DNA") # convert to phyDat format
# dm <- dist.ml(phang.align) # calculate pairwise distance matrix
# treeNJ <- NJ(dm) # perform neighbor-joining tree method
# fit = pml(treeNJ, data=phang.align) # compute intermal max likelihood
Since the step above takes a long time, save all variables up to this point in environment as RData object
save.image("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_tree.RData")
Re-load
load("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_tree.RData")
Here we will do ordinations using the phyloseq package, which first requires making phyloseq objects out of each of our input data tables (in the last tutorial, I imported the tree using phyloseq so it is already a phyloseq object)
ASV = otu_table(count_table_2, taxa_are_rows = TRUE)
TAX = tax_table(as.matrix(taxonomy))
META = sample_data(data.frame(metadata, row.names = metadata$`Sample Name`))
TREE = phy_tree(fit$tree)
First check that the inputs are in compatible formats by checking for ASV names with the phyloseq function, taxa_names
head(taxa_names(TAX))
[1] "ASV_1" "ASV_2" "ASV_3" "ASV_4" "ASV_5" "ASV_6"
head(taxa_names(ASV))
[1] "ASV_1" "ASV_2" "ASV_3" "ASV_4" "ASV_5" "ASV_6"
head(taxa_names(TREE))
[1] "ASV_1" "ASV_2" "ASV_3" "ASV_4" "ASV_5" "ASV_6"
And check sample names were also detected
head(sample_names(ASV))
[1] "AE3a103A" "AE3b103A" "AE1b900AM" "AE3a103B" "AE3b103B" "AE3a198B"
head(sample_names(META))
[1] "AE3a103A" "AE3b103A" "AE3a198A" "AE3b198A" "AE3a234A" "AE3b234A"
And make the phyloseq object
ps <- phyloseq(ASV, TAX, META , TREE)
Check some features of the phyloseq object
rank_names(ps)
[1] "Kingdom" "Supergroup" "Division" "Class" "Order" "Family" "Genus" "Species"
table(tax_table(ps)[, "Supergroup"], exclude = NULL)
Alveolata Amoebozoa Apusozoa Archaeplastida Excavata Hacrobia Opisthokonta
8880 9 45 108 9 395 768
Rhizaria Stramenopiles <NA>
2405 1086 471
unique(tax_table(ps)[, "Supergroup"])
Taxonomy Table: [10 taxa by 1 taxonomic ranks]:
Supergroup
ASV_1 "Alveolata"
ASV_2 "Rhizaria"
ASV_6 "Stramenopiles"
ASV_18 "Opisthokonta"
ASV_78 "Hacrobia"
ASV_148 "Archaeplastida"
ASV_193 NA
ASV_557 "Apusozoa"
ASV_1114 "Amoebozoa"
ASV_2665 "Excavata"
Filter out those ambigious Supergroup annotations- losing 471 ASVs
ps <- subset_taxa(ps, !is.na(Supergroup) & !Supergroup %in% c("", "NA"))
table(tax_table(ps)[, "Supergroup"], exclude = NULL)
Alveolata Amoebozoa Apusozoa Archaeplastida Excavata Hacrobia Opisthokonta
8880 9 45 108 9 395 768
Rhizaria Stramenopiles
2405 1086
Check out the Division names
table(tax_table(ps)[, "Division"], exclude = NULL)
Apicomplexa Apusomonadidae Centroheliozoa Cercozoa Chlorophyta
29 26 40 246 64
Choanoflagellida Ciliophora Cryptophyta Dinoflagellata Discoba
54 407 50 8330 1
Foraminifera Fungi Haptophyta Hilomonadea Katablepharidophyta
2 57 215 17 2
Lobosa Mesomycetozoa Metamonada Metazoa Ochrophyta
9 17 8 561 453
Opalozoa Opisthokonta_X Perkinsea Picozoa Pseudofungi
216 14 5 61 72
Radiolaria Rhodophyta Sagenista Stramenopiles_X Streptophyta
2155 4 186 61 38
Telonemia <NA>
27 278
Filter out any with “NA” as Division
ps <- subset_taxa(ps, !is.na(Division) & !Division %in% c(""))
table(tax_table(ps)[, "Division"], exclude = NULL)
Apicomplexa Apusomonadidae Centroheliozoa Cercozoa Chlorophyta
29 26 40 246 64
Choanoflagellida Ciliophora Cryptophyta Dinoflagellata Discoba
54 407 50 8330 1
Foraminifera Fungi Haptophyta Hilomonadea Katablepharidophyta
2 57 215 17 2
Lobosa Mesomycetozoa Metamonada Metazoa Ochrophyta
9 17 8 561 453
Opalozoa Opisthokonta_X Perkinsea Picozoa Pseudofungi
216 14 5 61 72
Radiolaria Rhodophyta Sagenista Stramenopiles_X Streptophyta
2155 4 186 61 38
Telonemia
27
After the above, 13,427 ASVs remain from the original 14,177
Eliminate the libraries that didn’t have many sequences, AE3a198A, AE3b314A, AE2a200A, AE2b900AN, AE2a200B, AE2a267B, AE2a900BN
taxa_to_keep <- !sample_names(ps) %in% c("AE3a198A","AE3b314A","AE2a200A","AE2b900AN","AE2a200B","AE2a267B","AE2a900BN")
ps <- prune_samples(taxa_to_keep, ps)
41 samples remain and stil 13,427 ASVs
Check rarefaction curve again to make sure those low-sqeuencing-effort samples have been removed
rarecurve(t(otu_table(ps)), step=100, cex=0.5, ylab="ASVs", label=T)
Have to do this because you may have removed the root of your tree when pruning). (I found this handy function from here which picks the longest branch to root from).
# first define function from link above to find furthest outgroup
pick_new_outgroup <- function(tree.unrooted){
require("magrittr")
require("data.table")
require("ape") # ape::Ntip
# tablify parts of tree that we need.
treeDT <-
cbind(
data.table(tree.unrooted$edge),
data.table(length = tree.unrooted$edge.length)
)[1:Ntip(tree.unrooted)] %>%
cbind(data.table(id = tree.unrooted$tip.label))
# Take the longest terminal branch as outgroup
new.outgroup <- treeDT[which.max(length)]$id
return(new.outgroup) }
# then run on my phyloseq tree
my.tree <- phy_tree(ps)
out.group <- pick_new_outgroup(my.tree)
Loading required package: magrittr
Attaching package: ‘magrittr’
The following object is masked from ‘package:purrr’:
set_names
The following object is masked from ‘package:tidyr’:
extract
Loading required package: data.table
data.table 1.13.4 using 1 threads (see ?getDTthreads). Latest news: r-datatable.com
**********
This installation of data.table has not detected OpenMP support. It should still work but in single-threaded mode.
This is a Mac. Please read https://mac.r-project.org/openmp/. Please engage with Apple and ask them for support. Check r-datatable.com for updates, and our Mac instructions here: https://github.com/Rdatatable/data.table/wiki/Installation. After several years of many reports of installation problems on Mac, it's time to gingerly point out that there have been no similar problems on Windows or Linux.
**********
Attaching package: ‘data.table’
The following objects are masked from ‘package:dplyr’:
between, first, last
The following object is masked from ‘package:purrr’:
transpose
out.group
[1] "ASV_10740"
# Then use this outgroup to root the tree
new.tree1 <- ape::root(my.tree, outgroup=out.group, resolve.root=TRUE)
phy_tree(ps) <- new.tree1
# Check if tree is binary (dichotomous not multichotomous)
is.binary.tree(phy_tree(ps))
[1] TRUE
# If false, would have to run
# new.tree2 <- ape::multi2di(new.tree1)
# phy_tree(ps) <- new.tree2
# phy_tree(ps)
Check overall how the phyla are distributed among samples. Phyloseq makes this easy
# First aglomerate the ASVs at the phylum level using the phyloseq function, tax_glom
DivisionGlommed = tax_glom(ps, "Division")
# There are many phyla here, so have to make a custom color palette by interpolating from an existing one in RColorBrewer
colourCount = length(table(tax_table(ps)[, "Division"], exclude = NULL))
getPalette = colorRampPalette(brewer.pal(11, "Spectral"))
DivisionPalette = getPalette(colourCount)
# and plot
plot_bar(DivisionGlommed, x = "Sample", fill = "Division") +
scale_fill_manual(values = DivisionPalette)
Plot compositional (relative abundances) instead of absolute abundance using microbiome::transform
ps_ra <- microbiome::transform(ps, transform = "compositional")
(otu_table(ps_ra))[1:5,1:5]
OTU Table: [5 taxa and 5 samples]
taxa are rows
AE3a103A AE3b103A AE1b900AM AE3a103B AE3b103B
ASV_1 4.046390e-04 0.000105531 2.462054e-05 0.000000e+00 2.400346e-05
ASV_2 0.000000e+00 0.000000000 3.132963e-02 0.000000e+00 5.600807e-05
ASV_3 6.674871e-03 0.014117702 2.265089e-02 3.696079e-03 1.055352e-02
ASV_4 1.244014e-03 0.001524337 1.231027e-05 4.769134e-05 6.720968e-04
ASV_5 2.675299e-05 0.000000000 0.000000e+00 7.948557e-06 1.040150e-04
# Then aglomerate the ASVs at the phylum level using the phyloseq function, tax_glom
DivisionGlommed_RA = tax_glom(ps_ra, "Division")
# and plot
Division_barplot <- plot_bar(DivisionGlommed_RA, x = "Sample", fill = "Division") +
scale_fill_manual(values = DivisionPalette) +
theme(legend.text = element_text(size = 10))
Division_barplot
# export
ggsave("Figures/Division_barplot.eps",Division_barplot, width = 15, height = 5, units = c("in"))
Lots of dinoflagellates and radiolaria. Makes sense. But the above is the distribution from all samples. Next make plots that indicate distributions across environmental gradients. Calculate averages and use bubble plots
Get average relative abundances from sample replicates
otu_table_mean_ra <-
mutate(data.frame(otu_table(ps_ra)), "103A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a103A","AE3b103A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "198A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3b198A")), na.rm = TRUE)) %>% # Sample AE3a198A was removed
mutate(data.frame(otu_table(ps_ra)), "234A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a234A","AE3b234A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "295A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a295A","AE3b295A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "314A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a314A")), na.rm = TRUE)) %>% # Sample AE3b314A was removed
mutate(data.frame(otu_table(ps_ra)), "900AM" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a900AM","AE1b900AM")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "103B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a103B","AE3b103B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "198B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a198B","AE3b198B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "234B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a234B","AE3b234B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "295B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a295B","AE3b295B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "314B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a314B","AE3b314B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "900BM" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a900BM","AE1b900BM")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "143A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a143A","AE2b143A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "200A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2b200A")), na.rm = TRUE)) %>% # AE2a200A was removed
mutate(data.frame(otu_table(ps_ra)), "237A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a237A","AE2b237A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "247A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a247A","AE2b247A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "267A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a267A","AE2b267A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "900AN" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a900AN")), na.rm = TRUE)) %>% # AE2b900AN was removed
mutate(data.frame(otu_table(ps_ra)), "143B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a143B","AE2b143B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "200B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2b200B")), na.rm = TRUE)) %>% # AE2a200B was removed
mutate(data.frame(otu_table(ps_ra)), "237B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a237B","AE2b237B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "247B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a247B","AE2b247B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "267B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2b267B")), na.rm = TRUE)) %>% # AE2a267B was removed
mutate(data.frame(otu_table(ps_ra)), "900BN" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2b900BN")), na.rm = TRUE)) # AE2a900BN was removed
otu_table_mean_ra <- otu_table_mean_ra[,unique(metadata$Replicate)]
otu_table_mean_ra
Make into new phyloseq object
metadata2 <- unique(select(metadata,!c('Sample Name',Type,colnames(SRARunTable))))
META2 <- sample_data(data.frame(metadata2, row.names = metadata2$Replicate))
ps_ra_mean <- phyloseq(otu_table(otu_table_mean_ra, taxa_are_rows = TRUE), TAX, TREE, META2)
# First aglomerate the ASVs at the phylum level using the phyloseq function, tax_glom
ps_ra_mean_division <- tax_glom(ps_ra_mean, "Division")
# and check by bar plotting
plot_bar(ps_ra_mean_division, x = "Sample", fill = "Division") +
scale_fill_manual(values = DivisionPalette)
Extract mean relative abundance, glommed by division, from the phyloseq object and pair it to taxonomic data
division_df <- data.frame(otu_table(ps_ra_mean_division))
colnames(division_df) <- colnames(otu_table(ps_ra_mean_division))
division_df$ASV <- rownames(division_df)
otu_table_mean_ra <- left_join(division_df, as_tibble(taxonomy, rownames = "ASV"), by = "ASV")
otu_table_mean_ra
Some manual curating for plottin
# Make a new column that has Supergroup-Division in same colum
otu_table_mean_ra$SupergroupDivision <- paste(otu_table_mean_ra$Supergroup, otu_table_mean_ra$Division)
otu_table_mean_ra
Pivot longer
otu_table_mean_ra <- pivot_longer(otu_table_mean_ra, cols = unique(metadata$Replicate), names_to = "Replicate", values_to = "Mean_RA")
otu_table_mean_ra
Join metadata
otu_table_mean_ra <- left_join(otu_table_mean_ra, unique(select(metadata, c("Replicate", "Depth", "SizeFraction", "Season", "OxCond", "Fluorescence", "BeamAtt", "O2", "Temp", "Salinity", "H2S", "ParticulateS", "TZVS", "CH4", "NO3", "NO2", "NH4", "PO4", "Chemoautotrophy", "BNP", "MicroAbun(x10^8 L^-1)", "FlagAbun(x10^5 L-1)", "VLP(x10^8 L-1)"))), by = "Replicate")
# Replace zeroes in RA with NA (better for plotting)
otu_table_mean_ra$Mean_RA[otu_table_mean_ra$Mean_RA == 0] <- NA
otu_table_mean_ra
# reorder some factors to make them plot in the order I want
otu_table_mean_ra$OxCond <- factor(otu_table_mean_ra$OxCond, levels = c("Oxycline", "ShallowAnoxic", "Euxinic"))
otu_table_mean_ra$SizeFraction <- factor(otu_table_mean_ra$SizeFraction, levels = c("PA", "FL"))
euk_divisions_bubbleplot_color <- ggplot(otu_table_mean_ra,aes (x = as.character(Depth), y = reorder(SupergroupDivision, Mean_RA, function(x){sum(x,na.rm = TRUE)}), color = OxCond)) +
geom_point(aes(size =Mean_RA))+
facet_wrap(Season~SizeFraction, scales = "free_x", drop= TRUE, ncol = 4) +
scale_size(range = c(1,15)) +
scale_size_area(breaks = c(0,.25,.5,.75,1), max_size = 6) +
xlab("Depth") +
ylab("") +
labs(size="Relative Abundance", color = "Redox Condition") +
scale_color_manual(values = c("blue", "red", "brown4")) +
theme_bw() +
theme(axis.text = element_text(size=8),
axis.text.x = element_text(size=8, angle = 45, hjust = 1),
axis.title = element_text(size=8),
legend.title = element_text(size=8),
legend.text = element_text(size=8),
strip.text = element_text(size = 8),
legend.margin=margin(0,0,0,2),
legend.box.margin=margin(-10,-10,-10,-10),
plot.margin=grid::unit(c(0,0,0,0), "mm"))
Scale for 'size' is already present. Adding another scale for 'size', which will replace the existing scale.
euk_divisions_bubbleplot_color
Save figure
# set explicit panel size so they will be consistent for all figures
euk_divisions_bubbleplot_color <- set_panel_size(euk_divisions_bubbleplot_color, width = unit(22, "mm"), height = unit(100, "mm"))
Removed 249 rows containing missing values (geom_point).
ggsave(filename = "Figures/euk_divisions_bubbleplot_color.eps", plot = euk_divisions_bubbleplot_color, units = c("mm"), width = 180, height = 125, dpi = 300)
Filter to only Alveolates; glom by order
keeptaxa <- taxa_names(ps_ra_mean)[(as.data.frame(tax_table(ps_ra_mean))$Supergroup %in% c("Alveolata"))]
ps_ra_mean_alveolates <- prune_taxa(keeptaxa, ps_ra_mean)
ps_ra_mean_alveolate_orders <- tax_glom(ps_ra_mean_alveolates, "Order")
aveloates_df <- data.frame(otu_table(ps_ra_mean_alveolate_orders))
colnames(aveloates_df) <- colnames(otu_table(ps_ra_mean_alveolate_orders))
aveloates_df$ASV <- rownames(aveloates_df)
otu_table_mean_ra <- left_join(aveloates_df, as_tibble(taxonomy, rownames = "ASV"), by = "ASV")
otu_table_mean_ra
Some manual curating for plottin
# Make a new column that has descriptive taxonomy
otu_table_mean_ra$Descriptive <- paste(otu_table_mean_ra$Division, otu_table_mean_ra$Class, otu_table_mean_ra$Order)
otu_table_mean_ra
Pivot longer
otu_table_mean_ra <- pivot_longer(otu_table_mean_ra, cols = unique(metadata$Replicate), names_to = "Replicate", values_to = "Mean_RA")
otu_table_mean_ra
Join metadata
otu_table_mean_ra <- left_join(otu_table_mean_ra, unique(select(metadata, c("Replicate", "Depth", "SizeFraction", "Season", "OxCond", "Fluorescence", "BeamAtt", "O2", "Temp", "Salinity", "H2S", "ParticulateS", "TZVS", "CH4", "NO3", "NO2", "NH4", "PO4", "Chemoautotrophy", "BNP", "MicroAbun(x10^8 L^-1)", "FlagAbun(x10^5 L-1)", "VLP(x10^8 L-1)"))), by = "Replicate")
# Replace zeroes in RA with NA (better for plotting)
otu_table_mean_ra$Mean_RA[otu_table_mean_ra$Mean_RA == 0] <- NA
otu_table_mean_ra
Shorten some labels to make space in plot
otu_table_mean_ra[otu_table_mean_ra == c("Ciliophora Cyclotrichium_like_organism Cyclotrichium_like_organism_X")] <- c("Cilio. Cyclotrichium_like Cyclotrichium_like")
otu_table_mean_ra[otu_table_mean_ra == c("Apicomplexa Gregarinomorphea Gregarines_GRE2")] <- c("Apicom. Gregarinomorphea GRE2")
otu_table_mean_ra[otu_table_mean_ra == c("Apicomplexa Gregarinomorphea Eugregarinorida")] <- c("Apicom. Gregarinomorphea Eugregarinorida")
otu_table_mean_ra[otu_table_mean_ra == c("Apicomplexa Coccidiomorphea Agamococcidiorida")] <- c("Apicom. Coccidiomorphea Agamococcidiorida")
otu_table_mean_ra[otu_table_mean_ra == c("Dinoflagellata Ellobiophyceae Thalassomycetales")] <- c("Dino. Ellobiophyceae Thalassomycetales")
otu_table_mean_ra[otu_table_mean_ra == c("Ciliophora Oligohymenophorea Scuticociliatia_1")] <- c("Cilio. Oligohymenophorea Scuticociliatia_1")
otu_table_mean_ra[otu_table_mean_ra == c("Apicomplexa Apicomplexa_X Apicomplexa_XX")] <- c("Apicom. Apicomplexa_X Apicomplexa_XX")
otu_table_mean_ra
# reorder some factors to make them plot in the order I want
otu_table_mean_ra$OxCond <- factor(otu_table_mean_ra$OxCond, levels = c("Oxycline", "ShallowAnoxic", "Euxinic"))
otu_table_mean_ra$SizeFraction <- factor(otu_table_mean_ra$SizeFraction, levels = c("PA", "FL"))
alveolata_bubbleplot_color <- ggplot(otu_table_mean_ra,aes (x = as.character(Depth), y = reorder(Descriptive, Mean_RA, function(x){sum(x,na.rm = TRUE)}), color = OxCond)) +
geom_point(aes(size =Mean_RA))+
facet_wrap(Season~SizeFraction, scales = "free_x", drop= TRUE, ncol = 4) +
scale_size(range = c(1,15)) +
scale_size_area(breaks = c(0,.25,.5,.75,1), max_size = 6) +
xlab("Depth") +
ylab("") +
labs(size="Relative Abundance", color = "Redox Condition") +
scale_color_manual(values = c("blue", "red", "brown4")) +
theme_bw() +
theme(axis.text = element_text(size=8),
axis.text.x = element_text(size=8, angle = 45, hjust = 1),
axis.title = element_text(size=8),
legend.title = element_text(size=8),
legend.text = element_text(size=8),
strip.text = element_text(size = 8),
legend.margin=margin(0,0,0,2),
legend.box.margin=margin(-10,-10,-10,-10),
plot.margin=grid::unit(c(0,0,0,0), "mm"))
Scale for 'size' is already present. Adding another scale for 'size', which will replace the existing scale.
alveolata_bubbleplot_color
Save figure
# set explicit panel size so they will be consistent for all figures
alveolata_bubbleplot_color <- set_panel_size(alveolata_bubbleplot_color, width = unit(20, "mm"), height = unit(125, "mm"))
Removed 724 rows containing missing values (geom_point).
ggsave(filename = "Figures/alveolata_bubbleplot_color.eps", plot = alveolata_bubbleplot_color, units = c("mm"), width = 180, height = 150, dpi = 300)
Filter to only Rhizaria; glom by order
keeptaxa <- taxa_names(ps_ra_mean)[(as.data.frame(tax_table(ps_ra_mean))$Supergroup %in% c("Rhizaria"))]
ps_ra_mean_rhizaria <- prune_taxa(keeptaxa, ps_ra_mean)
ps_ra_mean_rhizaria_orders <- tax_glom(ps_ra_mean_rhizaria, "Order")
rhizaria_df <- data.frame(otu_table(ps_ra_mean_rhizaria_orders))
colnames(rhizaria_df) <- colnames(otu_table(ps_ra_mean_rhizaria_orders))
rhizaria_df$ASV <- rownames(rhizaria_df)
otu_table_mean_ra <- left_join(rhizaria_df, as_tibble(taxonomy, rownames = "ASV"), by = "ASV")
otu_table_mean_ra
Some manual curating for plotting
# Make a new column that has descriptive taxonomy
otu_table_mean_ra$Descriptive <- paste(otu_table_mean_ra$Division, otu_table_mean_ra$Class, otu_table_mean_ra$Order)
otu_table_mean_ra
Pivot longer
otu_table_mean_ra <- pivot_longer(otu_table_mean_ra, cols = unique(metadata$Replicate), names_to = "Replicate", values_to = "Mean_RA")
otu_table_mean_ra
Join metadata
otu_table_mean_ra <- left_join(otu_table_mean_ra, unique(select(metadata, c("Replicate", "Depth", "SizeFraction", "Season", "OxCond", "Fluorescence", "BeamAtt", "O2", "Temp", "Salinity", "H2S", "ParticulateS", "TZVS", "CH4", "NO3", "NO2", "NH4", "PO4", "Chemoautotrophy", "BNP", "MicroAbun(x10^8 L^-1)", "FlagAbun(x10^5 L-1)", "VLP(x10^8 L-1)"))), by = "Replicate")
# Replace zeroes in RA with NA (better for plotting)
otu_table_mean_ra$Mean_RA[otu_table_mean_ra$Mean_RA == 0] <- NA
otu_table_mean_ra
Shorten some labels to make space in plot
otu_table_mean_ra[otu_table_mean_ra == c("Radiolaria Acantharea Arthracanthida-Symphyacanthida")] <- c("Radiolaria Acantharea A-S")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Chlorarachniophyceae Chlorarachniophyceae_X")] <- c("Cercozoa Chlor. Chlor._X")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Thecofilosea Filosa-Thecofilosea_X")] <- c("Cercozoa F-T F-T_X")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Granofilosea Filosa-Granofilosea_X")] <- c("Cercozoa F-G. F-G._X")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Chlorarachniophyceae Chlorarachnida")] <- c("Cercozoa Chlor. Chlorarachnida")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Imbricatea Thaumatomonadida")] <- c("Cercozoa F-I Thaumatomonadida")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Imbricatea Filosa-Imbricatea_X")] <- c("Cercozoa F-I F-I_X")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Endomyxa-Phytomyxea Phagomyxida")] <- c("Cercozoa E-P Phagomyxida")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Sarcomonadea Cercomonadida")] <- c("Cercozoa F-S Cercomonadida")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Novel-clade-10-12 Novel-clade-12")] <- c("Cercozoa N-C−10−12 N-C−12")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Thecofilosea Ventricleftida")] <- c("Cercozoa F-T Ventricleftida")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Imbricatea Marimonadida")] <- c("Cercozoa F-I Marimonadida")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Thecofilosea Cryomonadida")] <- c("Cercozoa F-T Cryomonadida")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Sarcomonadea Glissomonadida")] <- c("Cercozoa F-S Glissomonadida")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Imbricatea Euglyphida")] <- c("Cercozoa F-I Euglyphida")
otu_table_mean_ra
# reorder some factors to make them plot in the order I want
otu_table_mean_ra$OxCond <- factor(otu_table_mean_ra$OxCond, levels = c("Oxycline", "ShallowAnoxic", "Euxinic"))
otu_table_mean_ra$SizeFraction <- factor(otu_table_mean_ra$SizeFraction, levels = c("PA", "FL"))
rhizaria_bubbleplot_color <- ggplot(otu_table_mean_ra,aes (x = as.character(Depth), y = reorder(Descriptive, Mean_RA, function(x){sum(x,na.rm = TRUE)}), color = OxCond)) +
geom_point(aes(size =Mean_RA))+
facet_wrap(Season~SizeFraction, scales = "free_x", drop= TRUE, ncol = 4) +
scale_size(range = c(1,15)) +
scale_size_area(breaks = c(0,.25,.5,.75,1), max_size = 6) +
xlab("Depth") +
ylab("") +
labs(size="Relative Abundance", color = "Redox Condition") +
scale_color_manual(values = c("blue", "red", "brown4")) +
theme_bw() +
theme(axis.text = element_text(size=8),
axis.text.x = element_text(size=8, angle = 45, hjust = 1),
axis.title = element_text(size=8),
legend.title = element_text(size=8),
legend.text = element_text(size=8),
strip.text = element_text(size = 8),
legend.margin=margin(0,0,0,2),
legend.box.margin=margin(-10,-10,-10,-10),
plot.margin=grid::unit(c(0,0,0,0), "mm"))
Scale for 'size' is already present. Adding another scale for 'size', which will replace the existing scale.
rhizaria_bubbleplot_color
Save figure
# set explicit panel size so they will be consistent for all figures
rhizaria_bubbleplot_color <- set_panel_size(rhizaria_bubbleplot_color, width = unit(20, "mm"), height = unit(100, "mm"))
ggsave(filename = "Figures/rhizaria_bubbleplot_color.eps", plot = rhizaria_bubbleplot_color, units = c("mm"), width = 180, height = 125, dpi = 300)
Filter to only Opisthokonta; glom by order
There were 29 warnings (use warnings() to see them)
keeptaxa <- taxa_names(ps_ra_mean)[(as.data.frame(tax_table(ps_ra_mean))$Supergroup %in% c("Opisthokonta"))]
ps_ra_mean_opisthokonta <- prune_taxa(keeptaxa, ps_ra_mean)
ps_ra_mean_opisthokonta_orders <- tax_glom(ps_ra_mean_opisthokonta, "Order")
opisthokonta_df <- data.frame(otu_table(ps_ra_mean_opisthokonta_orders))
colnames(opisthokonta_df) <- colnames(otu_table(ps_ra_mean_opisthokonta_orders))
opisthokonta_df$ASV <- rownames(opisthokonta_df)
otu_table_mean_ra <- left_join(opisthokonta_df, as_tibble(taxonomy, rownames = "ASV"), by = "ASV")
otu_table_mean_ra
Some manual curating for plottin
# Make a new column that has descriptive taxonomy
otu_table_mean_ra$Descriptive <- paste(otu_table_mean_ra$Division, otu_table_mean_ra$Class, otu_table_mean_ra$Order)
otu_table_mean_ra
Pivot longer
otu_table_mean_ra <- pivot_longer(otu_table_mean_ra, cols = unique(metadata$Replicate), names_to = "Replicate", values_to = "Mean_RA")
otu_table_mean_ra
Join metadata
otu_table_mean_ra <- left_join(otu_table_mean_ra, unique(select(metadata, c("Replicate", "Depth", "SizeFraction", "Season", "OxCond", "Fluorescence", "BeamAtt", "O2", "Temp", "Salinity", "H2S", "ParticulateS", "TZVS", "CH4", "NO3", "NO2", "NH4", "PO4", "Chemoautotrophy", "BNP", "MicroAbun(x10^8 L^-1)", "FlagAbun(x10^5 L-1)", "VLP(x10^8 L-1)"))), by = "Replicate")
# Replace zeroes in RA with NA (better for plotting)
otu_table_mean_ra$Mean_RA[otu_table_mean_ra$Mean_RA == 0] <- NA
otu_table_mean_ra
Shorten some labels to make space in plot
otu_table_mean_ra[otu_table_mean_ra == c("Choanoflagellida Choanoflagellatea Acanthoecida")] <- c("Choanof. Ch. Acanthoecida")
otu_table_mean_ra[otu_table_mean_ra == c("Choanoflagellida Choanoflagellatea Craspedida")] <- c("Choanof. Ch. Craspedida")
otu_table_mean_ra[otu_table_mean_ra == c("Choanoflagellida Choanoflagellatea Choanoflagellatea_X")] <- c("Choanof. Ch. Choanoflagellatea_X")
otu_table_mean_ra[otu_table_mean_ra == c("Choanoflagellida Choanoflagellida_X Choanoflagellida_XX")] <- c("Choanof. Choanof._X Choanof._XX")
otu_table_mean_ra[otu_table_mean_ra == c("Mesomycetozoa Ichthyosporea Ichthyosphonida")] <- c("Mesomy. Ichthyosporea Ichthyosphonida")
otu_table_mean_ra[otu_table_mean_ra == c("Opisthokonta_X Opisthokonta_XX Opisthokonta_XXX")] <- c("Opis._X Opis._XX Opis._XXX")
otu_table_mean_ra
# reorder some factors to make them plot in the order I want
otu_table_mean_ra$OxCond <- factor(otu_table_mean_ra$OxCond, levels = c("Oxycline", "ShallowAnoxic", "Euxinic"))
otu_table_mean_ra$SizeFraction <- factor(otu_table_mean_ra$SizeFraction, levels = c("PA", "FL"))
opithokonta_bubbleplot_color <- ggplot(otu_table_mean_ra,aes (x = as.character(Depth), y = reorder(Descriptive, Mean_RA, function(x){sum(x,na.rm = TRUE)}), color = OxCond)) +
geom_point(aes(size =Mean_RA))+
facet_wrap(Season~SizeFraction, scales = "free_x", drop= TRUE, ncol = 4) +
scale_size(range = c(1,15)) +
scale_size_area(breaks = c(0,.1,.2,.3), max_size = 6) +
xlab("Depth") +
ylab("") +
labs(size="Relative Abundance", color = "Redox Condition") +
scale_color_manual(values = c("blue", "red", "brown4")) +
theme_bw() +
theme(axis.text = element_text(size=8),
axis.text.x = element_text(size=8, angle = 45, hjust = 1),
axis.title = element_text(size=8),
legend.title = element_text(size=8),
legend.text = element_text(size=8),
strip.text = element_text(size = 8),
legend.margin=margin(0,0,0,2),
legend.box.margin=margin(-10,-10,-10,-10),
plot.margin=grid::unit(c(0,0,0,0), "mm"))
Scale for 'size' is already present. Adding another scale for 'size', which will replace the existing scale.
opithokonta_bubbleplot_color
Save figure
# set explicit panel size so they will be consistent for all figures
opithokonta_bubbleplot_color <- set_panel_size(opithokonta_bubbleplot_color, width = unit(20, "mm"), height = unit(100, "mm"))
Removed 447 rows containing missing values (geom_point).
ggsave(filename = "Figures/opithokonta_bubbleplot_color.eps", plot = opithokonta_bubbleplot_color, units = c("mm"), width = 180, height = 125, dpi = 300)
Filter to only Stramenopiles; glom by class (more meaningful than Order in this case)
There were 29 warnings (use warnings() to see them)
keeptaxa <- taxa_names(ps_ra_mean)[(as.data.frame(tax_table(ps_ra_mean))$Supergroup %in% c("Stramenopiles"))]
ps_ra_mean_stramenopiles <- prune_taxa(keeptaxa, ps_ra_mean)
ps_ra_mean_stramenopiles_classes <- tax_glom(ps_ra_mean_stramenopiles, "Class")
stramenopiles_df <- data.frame(otu_table(ps_ra_mean_stramenopiles_classes))
colnames(stramenopiles_df) <- colnames(otu_table(ps_ra_mean_stramenopiles_classes))
stramenopiles_df$ASV <- rownames(stramenopiles_df)
otu_table_mean_ra <- left_join(stramenopiles_df, as_tibble(taxonomy, rownames = "ASV"), by = "ASV")
otu_table_mean_ra
Some manual curating for plottin
# Make a new column that has descriptive taxonomy
otu_table_mean_ra$Descriptive <- paste(otu_table_mean_ra$Division, otu_table_mean_ra$Class)
otu_table_mean_ra
Pivot longer
otu_table_mean_ra <- pivot_longer(otu_table_mean_ra, cols = unique(metadata$Replicate), names_to = "Replicate", values_to = "Mean_RA")
otu_table_mean_ra
Join metadata
otu_table_mean_ra <- left_join(otu_table_mean_ra, unique(select(metadata, c("Replicate", "Depth", "SizeFraction", "Season", "OxCond", "Fluorescence", "BeamAtt", "O2", "Temp", "Salinity", "H2S", "ParticulateS", "TZVS", "CH4", "NO3", "NO2", "NH4", "PO4", "Chemoautotrophy", "BNP", "MicroAbun(x10^8 L^-1)", "FlagAbun(x10^5 L-1)", "VLP(x10^8 L-1)"))), by = "Replicate")
# Replace zeroes in RA with NA (better for plotting)
otu_table_mean_ra$Mean_RA[otu_table_mean_ra$Mean_RA == 0] <- NA
otu_table_mean_ra
Shorten some labels to make space in plot
otu_table_mean_ra[otu_table_mean_ra == c("Stramenopiles_X Stramenopiles_XX")] <- c("Strameno._X Strameno._XX")
otu_table_mean_ra[otu_table_mean_ra == c("Stramenopiles_X Stramenopiles_X-Group-7")] <- c("Strameno._X Strameno._X−Group−7")
otu_table_mean_ra[otu_table_mean_ra == c("Stramenopiles_X MAST-21")] <- c("Strameno._X MAST−21")
otu_table_mean_ra[otu_table_mean_ra == c("Stramenopiles_X MAST-25")] <- c("Strameno._X MAST-25")
otu_table_mean_ra[otu_table_mean_ra == c("Stramenopiles_X Stramenopiles_X-Group-4")] <- c("Strameno._X Strameno._X−Group−4")
otu_table_mean_ra[otu_table_mean_ra == c("Stramenopiles_X Stramenopiles_X-Group-6")] <- c("Strameno._X Strameno._X−Group−6")
otu_table_mean_ra[otu_table_mean_ra == c("Stramenopiles_X Stramenopiles_X-Group-8")] <- c("Strameno._X Strameno._X−Group−8")
otu_table_mean_ra
# reorder some factors to make them plot in the order I want
otu_table_mean_ra$OxCond <- factor(otu_table_mean_ra$OxCond, levels = c("Oxycline", "ShallowAnoxic", "Euxinic"))
otu_table_mean_ra$SizeFraction <- factor(otu_table_mean_ra$SizeFraction, levels = c("PA", "FL"))
stramenopiles_bubbleplot_color <- ggplot(otu_table_mean_ra,aes (x = as.character(Depth), y = reorder(Descriptive, Mean_RA, function(x){sum(x,na.rm = TRUE)}), color = OxCond)) +
geom_point(aes(size =Mean_RA))+
facet_wrap(Season~SizeFraction, scales = "free_x", drop= TRUE, ncol = 4) +
scale_size(range = c(1,15)) +
scale_size_area(breaks = c(0,.1,.2,.3), max_size = 6) +
xlab("Depth") +
ylab("") +
labs(size="Relative Abundance", color = "Redox Condition") +
scale_color_manual(values = c("blue", "red", "brown4")) +
theme_bw() +
theme(axis.text = element_text(size=8),
axis.text.x = element_text(size=8, angle = 45, hjust = 1),
axis.title = element_text(size=8),
legend.title = element_text(size=8),
legend.text = element_text(size=8),
strip.text = element_text(size = 8),
legend.margin=margin(0,0,0,2),
legend.box.margin=margin(-10,-10,-10,-10),
plot.margin=grid::unit(c(0,0,0,0), "mm"))
Scale for 'size' is already present. Adding another scale for 'size', which will replace the existing scale.
stramenopiles_bubbleplot_color
Save figure
# set explicit panel size so they will be consistent for all figures
stramenopiles_bubbleplot_color <- set_panel_size(stramenopiles_bubbleplot_color, width = unit(22, "mm"), height = unit(115, "mm"))
ggsave(filename = "Figures/stramenopiles_bubbleplot_color.eps", plot = stramenopiles_bubbleplot_color, units = c("mm"), width = 180, height = 150, dpi = 300)
shannons <- vegan::diversity(t(otu_table(ps)), index = "shannon")
shannons <- t(shannons)
shannons
AE3a103A AE3b103A AE1b900AM AE3a103B AE3b103B AE3a198B AE3b198B AE3a234B AE3b234B AE3a295B AE3b295B AE3a314B
[1,] 4.871221 4.956114 2.916447 4.192101 5.048457 5.352167 5.143548 5.169616 4.959116 2.736109 3.53949 2.780448
AE3b198A AE3b314B AE3a900BM AE1b900BM AE2a143A AE2b143A AE2b200A AE2a237A AE2b237A AE2a247A AE3a234A AE2b247A
[1,] 4.391812 3.143426 3.137984 2.137569 3.083671 4.690686 3.128682 4.191647 4.308389 2.398659 5.334367 2.36533
AE2a267A AE2b267A AE2a900AN AE2a143B AE2b143B AE2b200B AE2a237B AE3b234A AE2b237B AE2a247B AE2b247B AE2b267B
[1,] 3.826925 3.929226 3.047765 4.962882 3.019449 4.772924 2.413723 4.62931 3.37624 2.595961 2.714695 4.361093
AE2b900BN AE3a295A AE3b295A AE3a314A AE3a900AM
[1,] 4.492629 3.07776 2.638438 4.522401 3.592396
shannons_mean <-
mutate(data.frame(shannons), "103A" = rowMeans(select(data.frame(shannons), c("AE3a103A","AE3b103A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "198A" = rowMeans(select(data.frame(shannons), c("AE3b198A")), na.rm = TRUE)) %>% # Sample AE3a198A was removed
mutate(data.frame(shannons), "234A" = rowMeans(select(data.frame(shannons), c("AE3a234A","AE3b234A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "295A" = rowMeans(select(data.frame(shannons), c("AE3a295A","AE3b295A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "314A" = rowMeans(select(data.frame(shannons), c("AE3a314A")), na.rm = TRUE)) %>% # Sample AE3b314A was removed
mutate(data.frame(shannons), "900AM" = rowMeans(select(data.frame(shannons), c("AE3a900AM","AE1b900AM")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "103B" = rowMeans(select(data.frame(shannons), c("AE3a103B","AE3b103B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "198B" = rowMeans(select(data.frame(shannons), c("AE3a198B","AE3b198B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "234B" = rowMeans(select(data.frame(shannons), c("AE3a234B","AE3b234B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "295B" = rowMeans(select(data.frame(shannons), c("AE3a295B","AE3b295B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "314B" = rowMeans(select(data.frame(shannons), c("AE3a314B","AE3b314B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "900BM" = rowMeans(select(data.frame(shannons), c("AE3a900BM","AE1b900BM")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "143A" = rowMeans(select(data.frame(shannons), c("AE2a143A","AE2b143A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "200A" = rowMeans(select(data.frame(shannons), c("AE2b200A")), na.rm = TRUE)) %>% # AE2a200A was removed
mutate(data.frame(shannons), "237A" = rowMeans(select(data.frame(shannons), c("AE2a237A","AE2b237A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "247A" = rowMeans(select(data.frame(shannons), c("AE2a247A","AE2b247A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "267A" = rowMeans(select(data.frame(shannons), c("AE2a267A","AE2b267A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "900AN" = rowMeans(select(data.frame(shannons), c("AE2a900AN")), na.rm = TRUE)) %>% # AE2b900AN was removed
mutate(data.frame(shannons), "143B" = rowMeans(select(data.frame(shannons), c("AE2a143B","AE2b143B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "200B" = rowMeans(select(data.frame(shannons), c("AE2b200B")), na.rm = TRUE)) %>% # AE2a200B was removed
mutate(data.frame(shannons), "237B" = rowMeans(select(data.frame(shannons), c("AE2a237B","AE2b237B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "247B" = rowMeans(select(data.frame(shannons), c("AE2a247B","AE2b247B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "267B" = rowMeans(select(data.frame(shannons), c("AE2b267B")), na.rm = TRUE)) %>% # AE2a267B was removed
mutate(data.frame(shannons), "900BN" = rowMeans(select(data.frame(shannons), c("AE2b900BN")), na.rm = TRUE)) # AE2a900BN was removed
shannons_mean <- shannons_mean[,unique(metadata$Replicate)]
shannons_mean
# Pivot longer
shannons_mean <- pivot_longer(shannons_mean, cols = unique(metadata$Replicate), names_to = "Replicate", values_to = "Shannons")
# Join metadata
shannons_mean <- left_join(shannons_mean, unique(select(metadata, c("Replicate", "Depth", "SizeFraction", "Season", "OxCond", "Fluorescence", "BeamAtt", "O2", "Temp", "Salinity", "H2S", "ParticulateS", "TZVS", "CH4", "NO3", "NO2", "NH4", "PO4", "Chemoautotrophy", "BNP", "MicroAbun(x10^8 L^-1)", "FlagAbun(x10^5 L-1)", "VLP(x10^8 L-1)"))), by = "Replicate")
shannons_mean
# reorder some factors to make them plot in the order I want
shannons_mean$OxCond <- factor(shannons_mean$OxCond, levels = c("Oxycline", "ShallowAnoxic", "Euxinic"))
shannons_mean$SizeFraction <- factor(shannons_mean$SizeFraction, levels = c("PA", "FL"))
ytitle <- expression(paste("Shannon's Diversity Index (",italic("H'"),")"))
shannonsplot <- ggplot(shannons_mean, aes(x = Depth, y = Shannons, color = OxCond)) +
geom_line(size=1, color = "black", lty = "dotted") +
geom_point(size=3, shape = c(16)) +
labs(y= ytitle, x = "Depth (m)") +
scale_x_reverse(expand = c(0, 0)) +
coord_flip(xlim = c(910, 100)) +
theme_bw() +
theme(legend.position = "right",
axis.text = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title = element_text(size=8),
legend.title = element_text(size=8),
legend.text = element_text(size=8),
strip.text = element_text(size = 8),
legend.margin=margin(0,0,0,2)) +
facet_wrap(Season~SizeFraction, drop= TRUE, ncol = 4) +
scale_color_manual(values = c("blue", "red", "brown4")) +
labs(color = "Redox Condition")
shannonsplot
NA
NA
Export Plot
# set explicit panel size so they will be consistent for all figures
shannonsplot <- set_panel_size(shannonsplot, width = unit(22, "mm"), height = unit(60, "mm"))
ggsave(filename = "Figures/shannonsplot.eps", plot = shannonsplot, units = c("mm"), width = 180, height = 80, dpi = 300)
McMurdie and Holmes (2013) filter out taxa that were not seen with more than 3 counts in at least 20% of the samples. Also add a pseduocount of 1 to all counts. This is so that later when we do different calculations (log, division, etc) we don’t get back errors due to zeroes
ps_filtered = filter_taxa(ps, function(x) sum(x > 3) > (0.2*length(x)), TRUE)
ps_filtered <- transform_sample_counts(ps_filtered, function(x) x+1)
# Also make a filtered version of the relative abundance count table (for plotting purposes)
ps_ra_filtered <- prune_taxa(taxa_names(ps_filtered),ps_ra) # prune from ps_ra object (relative abundances)
# check number of ASVs in each
ps
phyloseq-class experiment-level object
otu_table() OTU Table: [ 13427 taxa and 41 samples ]
sample_data() Sample Data: [ 41 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 13427 taxa by 8 taxonomic ranks ]
phy_tree() Phylogenetic Tree: [ 13427 tips and 13426 internal nodes ]
ps_filtered
phyloseq-class experiment-level object
otu_table() OTU Table: [ 979 taxa and 41 samples ]
sample_data() Sample Data: [ 41 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 979 taxa by 8 taxonomic ranks ]
phy_tree() Phylogenetic Tree: [ 979 tips and 978 internal nodes ]
ps_ra_filtered
phyloseq-class experiment-level object
otu_table() OTU Table: [ 979 taxa and 41 samples ]
sample_data() Sample Data: [ 41 samples by 66 sample variables ]
tax_table() Taxonomy Table: [ 979 taxa by 8 taxonomic ranks ]
phy_tree() Phylogenetic Tree: [ 979 tips and 978 internal nodes ]
Reduced from 13,427 to 979 ASVs
based on Coenen et al. tutorials for clustering. See repo
# Estimate covariance matrix for OTUs
covariance_matrix <- as.matrix(otu_table(ps_filtered)) %*% t(otu_table(ps_filtered))
# %*% = matrix multiplication sign in R; used here to multiply OTU/ASV data matrix to itself to estimate covariance.
# Evaluate determinant of covariance matrix
cov_determinant <- det(covariance_matrix)
cov_determinant
[1] 0
The determinant of the covariance matrix (what we just calculated) is equivalent to the product of the proportion of variance explained by every PCA axis. If the determinant is 0, that means there is an axis which explains 0 variance that we can’t separate from the other axes. This means the data need to be transformed to be suitable for PCA.
PCA is essentially a type of PCoA using the Euclidean distance matrix as input. When combined with a log-ratio transformation of the count table, this is deemed appropriate for compositional datasets.
First do a CLR, centered log ratio transformation of the absolute abundance data (after filtering), as suggested by Gloor et al. 2017 and check the determinant of this matrix. Compare it to the determinant without any transformation.
# Estimate covariance matrix for absolute abundance ASV table
covariance_matrix <- as.matrix(otu_table(ps_filtered)) %*% t(otu_table(ps_filtered))
# Evaluate determinant of covariance matrix
cov_determinant <- det(covariance_matrix)
# Estimate covariance matrix for CLR-transformed ASV table
clr_asv_table_ps_filtered <- data.frame(compositions::clr(t(otu_table(ps_filtered))))
## Check new determinant of clr transformed table
new_covdet <- det(as.matrix(clr_asv_table_ps_filtered) %*% t(clr_asv_table_ps_filtered))
# Compare
cov_determinant #Original Count Data
[1] 0
new_covdet # New
[1] 1.939146e+130
The determinant of the CLR-transformed table is not zero, so we can proceed with PCA of the CLR-transformed data.
Generate the PCA and visualize axes
# Generate a Principle Component Analysis (PCA) and evaluated based on the eigen decomposition from sample covariance matrix.
lograt_pca <- prcomp(clr_asv_table_ps_filtered)
# NOTE- this is equivalent to first making a Euclidean distance matrix using the CLR data table and then running a PCoA. A Euclidean distance matrix of a log-transformed data table = an Aitchison distance matrix. So this is equivalent to the compositional methods listed in Gloor et al.
# Visual representation with a screeplot
lograt_variances <- as.data.frame(lograt_pca$sdev^2/sum(lograt_pca$sdev^2)) %>% #Extract axes
# Format to plot
select(PercVar = 'lograt_pca$sdev^2/sum(lograt_pca$sdev^2)') %>%
rownames_to_column(var = "PCaxis") %>%
data.frame
head(lograt_variances)
# Plot screeplot
ggplot(lograt_variances, aes(x = as.numeric(PCaxis), y = PercVar)) +
geom_bar(stat = "identity", fill = "grey", color = "black") +
theme_minimal() +
theme(axis.title = element_text(color = "black", face = "bold", size = 10),
axis.text.y = element_text(color = "black", face = "bold"),
axis.text.x = element_blank()) +
labs(x = "PC axis", y = "% Variance", title = "Log-Ratio PCA Screeplot, CLR Tranformation")
First two axes explain a decent proportion of variance: 24.8 + 13.4 = 38.2
Visualize the PCA
# extract PC values
pca_lograt_frame <- data.frame(lograt_pca$x) %>%
rownames_to_column(var = "Sample Name")
# Merge metadata into the pca data table
pca_lograt_frame <- left_join(pca_lograt_frame, metadata, by = "Sample Name")
# reorder some factors to make them plot in the order I want
pca_lograt_frame <- pca_lograt_frame %>%
mutate(SizeFraction = fct_relevel(SizeFraction, "PA", "FL")) %>%
mutate(OxCond = fct_relevel(OxCond, "Oxycline", "ShallowAnoxic", "Euxinic"))
pca_lograt_frame
# Plot PCA with Redox Regime and Size fraction
pca_lograt_plot <- ggplot(pca_lograt_frame, aes(x = PC1, y = PC2, color = OxCond)) +
geom_point(aes(shape = SizeFraction), size = 4) +
ylab(paste0('PC2 ', round(lograt_variances[2,2]*100,2),'%')) + #Extract y axis value from variance
xlab(paste0('PC1 ', round(lograt_variances[1,2]*100,2),'%')) + #Extract x axis value from variance
ggtitle('CLR-Euclidean PCA') +
scale_color_manual(values = c("blue", "red", "brown4")) +
coord_fixed(ratio = 1) +
theme_bw()
pca_lograt_plot
Use vegan’s envfit to determine relationships between the ordination and environmental variables
# make metadata_ordinations, trimmed from metadata to only samples that are in PCA
metadata_ordinations <- metadata[metadata$`Sample Name` %in% sample_data(ps_filtered)$Sample.Name,]
# reorder some factors in metadata_ordinations to make them plot in the order I want
metadata_ordinations <- metadata_ordinations %>%
mutate(SizeFraction = fct_relevel(SizeFraction, "PA", "FL")) %>%
mutate(OxCond = fct_relevel(OxCond, "Oxycline", "ShallowAnoxic", "Euxinic"))
# sort clr_asv_table_ps_filtered in same order as metadata
clr_asv_table_ps_filtered <- clr_asv_table_ps_filtered[metadata_ordinations$"Sample Name",]
# re-run the PCA on clr_asv_table_ps_filtered
lograt_pca <- prcomp(clr_asv_table_ps_filtered)
# remove metadata that don't make sense to test (eg. NCBI sample IDs, etc.), repetitive variables (eg. Particulate S and TZVS), and those that didn't work on both cruises (like fluorescence, beam attenuation, etc)
metadata_ordinations <- select(metadata_ordinations, -Replicate, -Fluorescence, -BeamAtt, -TZVS, -Run, -"Assay Type", -AvgSpotLen, -Bases, -BioProject, -BioSample, -BioSampleModel, -Bytes, -"Center Name", -Collection_Date, -Consent, -"DATASTORE filetype", -"DATASTORE provider", -"DATASTORE region", -Experiment, -geo_loc_name_country, -geo_loc_name_country_continent, -geo_loc_name, -Instrument, -isolation_source, -lat_lon, -"Library Name", -LibraryLayout, -LibrarySelection, -LibrarySource, -Organism, -Platform, -ReleaseDate, -samp_collect_device, -"SRA Study", -Depth_m, -replicate, -size_fraction, -CH4_uM, -H2S_Um, -oxygen, -Oxygen_uM, -Particulate_Sulfur_uM, -salinity, -Temperature_degree_C, -TZVS_uM)
# change the name of some variables to make them easier to plot
metadata_ordinations <- rename(metadata_ordinations, PartS = ParticulateS, MicroAbun = "MicroAbun(x10^8 L^-1)", FlagAbun = "FlagAbun(x10^5 L-1)", VLPAbun = "VLP(x10^8 L-1)", Chemo = "Chemoautotrophy")
# fit environmental factors and save stats output
set.seed(10010)
pca_envfit <- envfit(lograt_pca, metadata_ordinations, permutations = 1000)
capture.output(pca_envfit, file = "stats_results/PCA_envfit_stat.txt")
pca_envfit
***VECTORS
PC1 PC2 r2 Pr(>r)
Depth -0.98577 -0.16812 0.1464 0.053946 .
O2 0.38556 0.92268 0.6792 0.000999 ***
Temp 0.50233 0.86468 0.4698 0.000999 ***
Salinity 0.38721 0.92199 0.4349 0.000999 ***
H2S -0.93149 0.36378 0.0622 0.284715
PartS -0.96687 -0.25527 0.2378 0.008991 **
CH4 -0.92338 0.38389 0.1143 0.099900 .
NO3 0.87658 0.48127 0.7438 0.000999 ***
NO2 0.65514 -0.75551 0.0489 0.378621
NH4 -0.92121 0.38907 0.0956 0.140859
PO4 -0.89466 -0.44674 0.3551 0.001998 **
Chemo -0.98789 -0.15514 0.2174 0.009990 **
BNP -0.50518 0.86301 0.1857 0.021978 *
MicroAbun -0.38662 0.92224 0.1379 0.064935 .
FlagAbun -0.88594 -0.46381 0.2942 0.001998 **
VLPAbun -0.34508 -0.93857 0.2224 0.011988 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Permutation: free
Number of permutations: 1000
***FACTORS:
Centroids:
PC1 PC2
Sample.NameAE1b900AM -29.8546 9.9007
Sample.NameAE1b900BM -10.2525 1.2934
Sample.NameAE2a143A 3.5890 13.6286
Sample.NameAE2a143B 44.7696 -15.1082
Sample.NameAE2a237A -13.4858 -29.6025
Sample.NameAE2a237B 13.6807 -42.2385
Sample.NameAE2a247A -26.4642 -14.7376
Sample.NameAE2a247B -13.9878 -7.3599
Sample.NameAE2a267A -22.8582 -4.8656
Sample.NameAE2a900AN -14.7861 1.3805
Sample.NameAE2b143A 5.5093 23.1323
Sample.NameAE2b143B 14.9987 11.5463
Sample.NameAE2b200A 15.0768 2.4830
Sample.NameAE2b200B 35.1975 -24.0981
Sample.NameAE2b237A -12.4369 -25.4440
Sample.NameAE2b237B 12.4311 -32.6782
Sample.NameAE2b247A -18.1375 -14.0838
Sample.NameAE2b247B -17.2418 -20.6486
Sample.NameAE2b267A -27.1824 0.6048
Sample.NameAE2b267B -22.0115 -7.5764
Sample.NameAE2b900BN -0.1983 -5.0092
Sample.NameAE3a103A -0.6935 58.4628
Sample.NameAE3a103B 24.8367 33.9070
Sample.NameAE3a198B 56.1304 4.2180
Sample.NameAE3a234A 20.4610 4.2756
Sample.NameAE3a234B 58.9877 -22.5333
Sample.NameAE3a295A -44.5164 -0.6909
Sample.NameAE3a295B -16.6237 -3.2697
Sample.NameAE3a314A -41.6693 9.1064
Sample.NameAE3a314B -21.9304 -2.6987
Sample.NameAE3a900AM -31.7213 18.2451
Sample.NameAE3a900BM -20.0520 3.1981
Sample.NameAE3b103A 3.4583 51.4401
Sample.NameAE3b103B 24.9704 35.9596
Sample.NameAE3b198A 30.4747 23.7773
Sample.NameAE3b198B 56.7512 -3.2900
Sample.NameAE3b234A 21.6594 11.7876
Sample.NameAE3b234B 52.9667 -20.3142
Sample.NameAE3b295A -28.3927 -6.7477
Sample.NameAE3b295B -29.9989 -12.2357
Sample.NameAE3b314B -31.4534 -3.1165
TypeAnoxMay1FL -21.9304 -2.6987
TypeAnoxMay1PA -41.6693 9.1064
TypeAnoxMay2FL -31.4534 -3.1165
TypeAnoxNov1PA -22.8582 -4.8656
TypeAnoxNov2FL -22.0115 -7.5764
TypeAnoxNov2PA -27.1824 0.6048
TypeDeepMay1FL -20.0520 3.1981
TypeDeepMay1PA -31.7213 18.2451
TypeDeepMay2FL -10.2525 1.2934
TypeDeepMay2PA -29.8546 9.9007
TypeDeepNov1PA -14.7861 1.3805
TypeDeepNov2FL -0.1983 -5.0092
TypeIntMay1FL -16.6237 -3.2697
TypeIntMay1PA -44.5164 -0.6909
TypeIntMay2FL -29.9989 -12.2357
TypeIntMay2PA -28.3927 -6.7477
TypeIntNov1FL -13.9878 -7.3599
TypeIntNov1PA -26.4642 -14.7376
TypeIntNov2FL -17.2418 -20.6486
TypeIntNov2PA -18.1375 -14.0838
TypeMicroOxMay1FL 56.1304 4.2180
TypeMicroOxMay2FL 56.7512 -3.2900
TypeMicroOxMay2PA 30.4747 23.7773
TypeMicroOxNov2FL 35.1975 -24.0981
TypeMicroOxNov2PA 15.0768 2.4830
TypeOxicMay1FL 24.8367 33.9070
TypeOxicMay1PA -0.6935 58.4628
TypeOxicMay2FL 24.9704 35.9596
TypeOxicMay2PA 3.4583 51.4401
TypeOxicNov1FL 44.7696 -15.1082
TypeOxicNov1PA 3.5890 13.6286
TypeOxicNov2FL 14.9987 11.5463
TypeOxicNov2PA 5.5093 23.1323
TypeSubOxMay1FL 58.9877 -22.5333
TypeSubOxMay1PA 20.4610 4.2756
TypeSubOxMay2FL 52.9667 -20.3142
TypeSubOxMay2PA 21.6594 11.7876
TypeSubOxNov1FL 13.6807 -42.2385
TypeSubOxNov1PA -13.4858 -29.6025
TypeSubOxNov2FL 12.4311 -32.6782
TypeSubOxNov2PA -12.4369 -25.4440
SizeFractionPA -10.5985 6.6026
SizeFractionFL 10.0938 -6.2882
SeasonMay 1.9790 8.6671
SeasonNov -2.2915 -10.0355
OxCondOxycline 22.3492 2.8243
OxCondShallowAnoxic -25.8906 -6.3086
OxCondEuxinic -17.8108 4.8348
Goodness of fit:
r2 Pr(>r)
Sample.Name 1.0000 1.000000
Type 1.0000 1.000000
SizeFraction 0.1190 0.008991 **
Season 0.0733 0.044955 *
OxCond 0.4424 0.000999 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Permutation: free
Number of permutations: 1000
# significant vector variables, at the p<0.01 level: O2, Temp, Salinity, Particulate S, NO3, PO4, Chemoautotrophy, Flagellate Abundance
# significant centroid variables at the p<0.01 level: OxCond and SizeFraction
# fit species and save stats output
pca_sppfit <- envfit(lograt_pca, clr_asv_table_ps_filtered, permutations = 1000)
capture.output(pca_sppfit, file = "stats_results/PCA_sppfit_stat.txt")
pca_sppfit
***VECTORS
PC1 PC2 r2 Pr(>r)
ASV_1 -0.94879 -0.31590 0.6367 0.000999 ***
ASV_2 -0.95743 -0.28866 0.5478 0.000999 ***
ASV_3 -0.66103 -0.75036 0.4248 0.000999 ***
ASV_4 0.91119 0.41198 0.3182 0.000999 ***
ASV_5 -0.13944 -0.99023 0.5837 0.000999 ***
ASV_6 -0.72934 0.68415 0.3824 0.000999 ***
ASV_7 -0.89988 -0.43614 0.5795 0.000999 ***
ASV_8 -0.99973 -0.02323 0.3232 0.000999 ***
ASV_9 0.79352 0.60855 0.3634 0.000999 ***
ASV_10 0.35944 -0.93317 0.0182 0.693307
ASV_11 0.81570 0.57847 0.3390 0.000999 ***
ASV_12 -0.99954 -0.03042 0.2399 0.011988 *
ASV_13 -0.35551 0.93467 0.2923 0.000999 ***
ASV_14 -0.90892 -0.41696 0.5135 0.000999 ***
ASV_15 -0.54155 -0.84067 0.1551 0.038961 *
ASV_16 -0.12825 -0.99174 0.5570 0.000999 ***
ASV_17 0.85455 0.51936 0.4011 0.000999 ***
ASV_18 -0.93204 0.36236 0.4675 0.000999 ***
ASV_19 0.76290 0.64652 0.4054 0.000999 ***
ASV_20 0.74332 0.66893 0.3693 0.000999 ***
ASV_21 0.94112 0.33808 0.7068 0.000999 ***
ASV_22 -0.03575 0.99936 0.6102 0.000999 ***
ASV_23 -0.96906 0.24683 0.3277 0.001998 **
ASV_24 0.75409 0.65678 0.4173 0.000999 ***
ASV_25 -0.70979 -0.70441 0.1603 0.030969 *
ASV_26 0.77164 0.63606 0.3377 0.001998 **
ASV_27 -0.72076 -0.69319 0.1532 0.048951 *
ASV_28 -0.91095 -0.41251 0.3518 0.001998 **
ASV_29 -0.89941 -0.43710 0.5228 0.000999 ***
ASV_30 0.14112 -0.98999 0.6551 0.000999 ***
ASV_31 0.74503 0.66703 0.5581 0.000999 ***
ASV_32 0.61333 0.78983 0.0922 0.149850
ASV_33 0.96796 0.25110 0.5839 0.000999 ***
ASV_34 0.48068 0.87690 0.1288 0.070929 .
ASV_35 -0.96144 -0.27501 0.4856 0.000999 ***
ASV_36 0.17479 -0.98461 0.1455 0.050949 .
ASV_37 -0.57080 -0.82109 0.4490 0.000999 ***
ASV_38 -0.55821 -0.82970 0.3634 0.000999 ***
ASV_39 0.89786 -0.44029 0.7727 0.000999 ***
ASV_40 0.54765 0.83671 0.2576 0.000999 ***
ASV_41 0.85857 0.51269 0.4075 0.000999 ***
ASV_42 -0.03359 0.99944 0.6764 0.000999 ***
ASV_43 0.79848 0.60202 0.7293 0.000999 ***
ASV_44 0.91567 -0.40193 0.6031 0.000999 ***
ASV_45 0.58674 -0.80978 0.6323 0.000999 ***
ASV_47 0.76868 0.63964 0.4841 0.000999 ***
ASV_48 -0.01144 0.99993 0.0110 0.807193
ASV_49 -0.28654 0.95807 0.3425 0.001998 **
ASV_50 -0.95827 -0.28585 0.4911 0.000999 ***
ASV_51 -0.15109 0.98852 0.6496 0.000999 ***
ASV_52 -0.42255 -0.90634 0.4405 0.000999 ***
ASV_53 -0.85901 -0.51196 0.4958 0.000999 ***
ASV_54 0.99845 0.05559 0.5791 0.000999 ***
ASV_55 0.08476 0.99640 0.4400 0.000999 ***
ASV_56 0.18670 0.98242 0.4767 0.000999 ***
ASV_57 0.87367 -0.48653 0.6257 0.000999 ***
ASV_58 -0.33160 -0.94342 0.4368 0.000999 ***
ASV_59 0.87362 -0.48660 0.0266 0.581419
ASV_61 0.99530 -0.09684 0.8206 0.000999 ***
ASV_62 0.67265 0.73996 0.3537 0.000999 ***
ASV_63 -0.95850 -0.28510 0.3152 0.000999 ***
ASV_64 0.65845 -0.75262 0.8012 0.000999 ***
ASV_65 -0.99096 0.13417 0.4403 0.000999 ***
ASV_66 0.98308 0.18319 0.3301 0.001998 **
ASV_67 0.99228 0.12403 0.5932 0.000999 ***
ASV_68 -0.07256 0.99736 0.6304 0.000999 ***
ASV_69 0.92668 0.37584 0.6110 0.000999 ***
ASV_70 0.75387 -0.65702 0.3063 0.000999 ***
ASV_71 0.70881 -0.70539 0.1778 0.019980 *
ASV_72 0.62361 -0.78173 0.2473 0.003996 **
ASV_73 0.86964 0.49369 0.7019 0.000999 ***
ASV_75 -0.97001 0.24308 0.4272 0.000999 ***
ASV_76 0.94362 -0.33102 0.5229 0.000999 ***
ASV_77 -0.77761 0.62875 0.2168 0.008991 **
ASV_78 -0.92119 -0.38910 0.5809 0.000999 ***
ASV_79 -0.99411 -0.10833 0.5673 0.000999 ***
ASV_80 0.46163 0.88707 0.5916 0.000999 ***
ASV_81 -0.97935 -0.20218 0.3814 0.000999 ***
ASV_82 0.75332 -0.65765 0.7328 0.000999 ***
ASV_83 0.91717 0.39849 0.7793 0.000999 ***
ASV_84 -0.92403 -0.38233 0.3370 0.000999 ***
ASV_85 0.25193 0.96774 0.2175 0.007992 **
ASV_86 0.94191 -0.33587 0.6329 0.000999 ***
ASV_87 0.58716 -0.80947 0.5471 0.000999 ***
ASV_88 0.48617 0.87386 0.4838 0.000999 ***
ASV_89 -0.70961 -0.70460 0.1628 0.031968 *
ASV_90 0.94964 -0.31335 0.6680 0.000999 ***
ASV_91 -0.15797 -0.98744 0.4723 0.000999 ***
ASV_92 0.83089 -0.55643 0.1673 0.019980 *
ASV_93 0.15178 0.98841 0.2552 0.003996 **
ASV_94 0.36042 -0.93279 0.6799 0.000999 ***
ASV_95 -0.98471 -0.17421 0.4095 0.000999 ***
ASV_96 0.08413 0.99645 0.3041 0.001998 **
ASV_97 0.98829 0.15262 0.7834 0.000999 ***
ASV_99 0.81168 0.58410 0.4439 0.000999 ***
ASV_100 -0.97026 -0.24205 0.4647 0.000999 ***
ASV_101 0.57438 -0.81859 0.7301 0.000999 ***
ASV_102 0.69424 0.71974 0.6313 0.000999 ***
ASV_103 -0.90516 0.42508 0.5250 0.000999 ***
ASV_104 -0.49163 -0.87080 0.5500 0.000999 ***
ASV_105 -0.96963 -0.24457 0.2440 0.004995 **
ASV_106 -0.04813 0.99884 0.0125 0.783217
ASV_107 0.99655 0.08299 0.8209 0.000999 ***
ASV_108 0.90255 0.43059 0.3793 0.000999 ***
ASV_109 0.18840 -0.98209 0.5633 0.000999 ***
ASV_110 -0.99856 0.05364 0.3955 0.000999 ***
ASV_111 -0.79184 -0.61073 0.3650 0.000999 ***
ASV_113 0.78091 0.62464 0.4847 0.000999 ***
ASV_114 0.73484 0.67824 0.4835 0.000999 ***
ASV_115 0.09745 -0.99524 0.6475 0.000999 ***
ASV_116 0.93013 0.36723 0.7964 0.000999 ***
ASV_117 0.42481 0.90528 0.1028 0.111888
ASV_118 0.99069 -0.13612 0.1310 0.068931 .
ASV_119 -0.33032 -0.94387 0.4257 0.000999 ***
ASV_120 -0.75767 0.65263 0.1544 0.046953 *
ASV_122 0.98160 0.19096 0.8455 0.000999 ***
ASV_123 -0.02325 0.99973 0.1943 0.017982 *
ASV_125 0.42248 -0.90637 0.1769 0.030969 *
ASV_126 -0.48526 -0.87437 0.3396 0.000999 ***
ASV_127 -0.72493 0.68883 0.5072 0.000999 ***
ASV_129 0.85586 -0.51720 0.4291 0.000999 ***
ASV_130 0.67202 -0.74053 0.6871 0.000999 ***
ASV_131 -0.57484 -0.81826 0.5689 0.000999 ***
ASV_132 -0.93034 -0.36669 0.5766 0.000999 ***
ASV_133 -0.77173 0.63595 0.4962 0.000999 ***
ASV_134 0.98676 0.16221 0.2293 0.008991 **
ASV_135 0.65189 -0.75831 0.8054 0.000999 ***
ASV_136 0.34613 0.93819 0.3783 0.000999 ***
ASV_137 -0.98260 0.18572 0.5150 0.000999 ***
ASV_138 0.91768 0.39731 0.7674 0.000999 ***
ASV_139 -0.15862 0.98734 0.1217 0.084915 .
ASV_140 0.90861 -0.41765 0.5751 0.000999 ***
ASV_141 0.57542 -0.81786 0.1002 0.137862
ASV_142 0.79995 -0.60006 0.8416 0.000999 ***
ASV_143 0.79364 -0.60839 0.6547 0.000999 ***
ASV_144 0.45676 -0.88959 0.3956 0.000999 ***
ASV_145 0.53528 0.84468 0.4380 0.000999 ***
ASV_146 0.68811 -0.72560 0.7750 0.000999 ***
ASV_147 0.36469 -0.93113 0.6131 0.000999 ***
ASV_148 -0.90620 -0.42284 0.1703 0.038961 *
ASV_149 -0.13172 0.99129 0.4334 0.000999 ***
ASV_150 -0.94312 0.33246 0.4157 0.000999 ***
ASV_151 -0.20501 0.97876 0.5520 0.000999 ***
ASV_152 0.95131 -0.30823 0.0038 0.938062
ASV_153 0.41155 0.91139 0.3053 0.000999 ***
ASV_154 -0.98120 -0.19299 0.4128 0.000999 ***
ASV_155 0.99400 -0.10938 0.1196 0.089910 .
ASV_156 -0.03421 0.99941 0.5686 0.000999 ***
ASV_157 0.72295 -0.69090 0.6207 0.000999 ***
ASV_158 -0.53908 -0.84225 0.5685 0.000999 ***
ASV_159 -0.77708 0.62940 0.4091 0.000999 ***
ASV_161 0.67556 -0.73731 0.8036 0.000999 ***
ASV_162 0.80096 -0.59872 0.6939 0.000999 ***
ASV_163 -0.89141 0.45320 0.4723 0.000999 ***
ASV_164 -0.35116 0.93631 0.1788 0.016983 *
ASV_165 -0.60354 -0.79733 0.6197 0.000999 ***
ASV_167 0.90755 -0.41994 0.2492 0.002997 **
ASV_168 0.94770 0.31917 0.6323 0.000999 ***
ASV_169 0.96934 -0.24570 0.4338 0.000999 ***
ASV_170 0.98435 0.17620 0.2656 0.004995 **
ASV_171 -0.82003 0.57232 0.4193 0.000999 ***
ASV_172 -0.96491 0.26258 0.2857 0.001998 **
ASV_173 0.92938 -0.36913 0.5319 0.000999 ***
ASV_174 0.48785 0.87293 0.1821 0.023976 *
ASV_175 0.87786 0.47891 0.1631 0.022977 *
ASV_176 0.39528 0.91856 0.1710 0.032967 *
ASV_177 0.82178 -0.56981 0.3760 0.001998 **
ASV_178 0.82885 -0.55948 0.1752 0.022977 *
ASV_179 0.66025 -0.75105 0.7251 0.000999 ***
ASV_180 -0.49284 -0.87012 0.4924 0.000999 ***
ASV_181 -0.96710 -0.25439 0.4136 0.000999 ***
ASV_183 -0.56267 -0.82668 0.5489 0.000999 ***
ASV_184 0.02021 0.99980 0.3986 0.000999 ***
ASV_185 0.40544 0.91412 0.5613 0.000999 ***
ASV_186 0.80750 -0.58986 0.6950 0.000999 ***
ASV_187 -0.99940 0.03464 0.3257 0.001998 **
ASV_188 -0.90566 0.42400 0.3680 0.001998 **
ASV_189 -0.42612 -0.90467 0.4463 0.000999 ***
ASV_190 0.83403 -0.55172 0.2928 0.000999 ***
ASV_191 -0.53648 0.84391 0.0279 0.601399
ASV_192 0.99195 -0.12663 0.8316 0.000999 ***
ASV_194 -0.77532 0.63157 0.3241 0.001998 **
ASV_195 0.05248 0.99862 0.0819 0.188811
ASV_197 0.97344 -0.22895 0.7982 0.000999 ***
ASV_198 0.09697 0.99529 0.2453 0.006993 **
ASV_199 -0.90727 0.42056 0.4735 0.000999 ***
ASV_200 -0.57495 0.81819 0.2380 0.009990 **
ASV_201 0.27134 0.96248 0.2029 0.023976 *
ASV_202 0.82275 -0.56841 0.5077 0.000999 ***
ASV_203 -0.79991 0.60012 0.1394 0.055944 .
ASV_204 -0.49194 -0.87063 0.2146 0.008991 **
ASV_205 0.87207 -0.48938 0.5432 0.000999 ***
ASV_206 -0.30909 0.95103 0.3950 0.000999 ***
ASV_207 -0.98314 -0.18286 0.1984 0.011988 *
ASV_208 -0.76094 0.64882 0.4574 0.000999 ***
ASV_210 -0.98383 0.17912 0.5124 0.000999 ***
ASV_211 0.80921 0.58752 0.2800 0.000999 ***
ASV_212 -0.01332 0.99991 0.0049 0.904096
ASV_213 -0.70411 -0.71009 0.3093 0.001998 **
ASV_214 -0.51767 -0.85558 0.5341 0.000999 ***
[ reached getOption("max.print") -- omitted 779 rows ]
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Permutation: free
Number of permutations: 1000
Many of the typical variables that indicate redox condition are significant (O2, NO3, Particulate S,etc), plus size fraction. There are many species that are sig
Make individual envfit objects for all the vectors that will be plotted
# vectors
pca_envfit_O2 <- envfit(lograt_pca~O2, metadata_ordinations, permutations = 1000)
pca_envfit_partS <- envfit(lograt_pca~PartS, metadata_ordinations, permutations = 1000)
pca_envfit_NO3 <- envfit(lograt_pca~NO3, metadata_ordinations, permutations = 1000)
pca_envfit_PO4 <- envfit(lograt_pca~PO4, metadata_ordinations, permutations = 1000)
pca_envfit_temp <- envfit(lograt_pca~Temp, metadata_ordinations, permutations = 1000)
pca_envfit_sal <- envfit(lograt_pca~Salinity, metadata_ordinations, permutations = 1000)
pca_envfit_chemo <- envfit(lograt_pca~Chemo, metadata_ordinations, permutations = 1000)
pca_envfit_FlagAbun <- envfit(lograt_pca~FlagAbun, metadata_ordinations, permutations = 1000)
Next, trim the sppfit vegan object to just include those species with r2 value greater than 0.60 I got this function from here. Later, when plotting, I can also trim by p-value/
#__FUNCTION: select.envfit__#
# function (select.envit) filters the resulting list of function (envfit) based on their p values. This allows to display only significant values in the final plot.
# just run this
select.envfit<-function(fit, r.select){ #needs two sorts of input: fit= result of envfit, r.select= numeric, correlation minimum threshold
for (i in 1:length(fit$vectors$r)) { #run for-loop through the entire length of the column r in object fit$vectors$r starting at i=1
if (fit$vectors$r[i]<r.select) { #Check wether r<r.select, i.e. if the correlation is weaker than the threshold value. Change this Parameter for r-based selection
fit$vectors$arrows[i,]=NA #If the above statement is TRUE, i.e. r is smaller than r.select, then the coordinates of the vectors are set to NA, so they cannot be displayed
i=i+1 #increase the running parameter i from 1 to 2, i.e. check the next value in the column until every value has been checked
} #close if-loop
} #close for-loop
return(fit) #return fit as the result of the function
} #close the function
pca_sppfit_trim<-select.envfit(pca_sppfit, 0.6)
Complicated to plot vegan output in ggplot. Plot in base R
# Convert characters in metadata to factors
metadata_ordinations <- metadata_ordinations %>% mutate_if(sapply(metadata_ordinations, is.character), as.factor)
with(as.data.frame(metadata_ordinations), levels(OxCond))
[1] "Oxycline" "ShallowAnoxic" "Euxinic"
with(as.data.frame(metadata_ordinations), levels(SizeFraction))
[1] "PA" "FL"
# Define colors and shapes for plot
colvec <- c("blue", "red", "brown4")
shapevec <- c(16,17)
# Plot here in notebook
# Set up 2x2 panels
op <- par(oma=c(0,0,0,1),# Room for the title and legend
mfrow=c(2,2),
mai=c(.65,.65,.1,0))
# Panel 1- Add first half of envfit vectors
with(metadata_ordinations, plot(scores(lograt_pca, display = "sites"), col = colvec[OxCond], pch = shapevec[SizeFraction], cex = 1.5, cex.lab = .8, cex.axis = .8, xlab = "", ylab = paste0('PC2 ', round(lograt_variances[2,2]*100,2),'%'), xaxt='n', xlim=c(-60,60)))
plot(pca_envfit_O2, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_partS, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_NO3, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_PO4, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
title("A", line = -1, adj = 0.02)
# Panel 2- Add rest of envfit vectors
with(metadata_ordinations, plot(scores(lograt_pca, display = "sites"), col = colvec[OxCond], pch = shapevec[SizeFraction], cex = 1.5, cex.lab = .8, cex.axis = .8, xlab = "", ylab = "", xaxt='n', yaxt='n', xlim=c(-60,60)))
plot(pca_envfit_temp, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_sal, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_FlagAbun, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_chemo, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
title("B", line = -1, adj = 0.02)
# Panel 3- Add spider lines indicating envfit centroids for Size Fraction
with(metadata_ordinations, plot(scores(lograt_pca, display = "sites"), col = colvec[OxCond], pch = shapevec[SizeFraction], cex = 1.5, cex.lab = .8, cex.axis = .8, xlab = paste0('PC1 ', round(lograt_variances[1,2]*100,2),'%'), ylab = paste0('PC2 ', round(lograt_variances[2,2]*100,2),'%'), xlim=c(-60,60)))
with(metadata_ordinations, ordispider(lograt_pca, SizeFraction, lwd = 1.5, lty = c(1,2), label = TRUE, cex = 0.6))
title("C", line = -1, adj = 0.02)
# Panel 4 -Add vectors indicating significant spp
with(metadata_ordinations, plot(scores(lograt_pca, display = "sites"), col = colvec[OxCond], pch = shapevec[SizeFraction], cex = 1.5, cex.lab = .8, cex.axis = .8, xlab = paste0('PC1 ', round(lograt_variances[1,2]*100,2),'%'), ylab = "", yaxt='n', xlim=c(-60,60)))
plot(pca_sppfit_trim, p.max = 0.001, col = "black", cex = 0.6)
# annotate the 3 clusters of ASVs in panel D
text(x=c(0), y=c(45), labels=c("Cluster I"), adj = 0.5, font = 2, cex = 0.8)
text(x=c(50), y=c(30), labels=c("Cluster II"), adj = 0.5, font = 2, cex = 0.8)
text(x=c(48), y=c(-35), labels=c("Cluster III"), adj = 0.5, font = 2, cex = 0.8)
text(x=c(-48), y=c(-18), labels=c("Cluster IV"), adj = 0.5, font = 2, cex = 0.8)
text(x=c(-48), y=c(10), labels=c("Cluster V"), adj = 0.5, font = 2, cex = 0.8)
title("D", line = -1, adj = 0.02)
# Add legend
par(op) # Leave the last plot
op <- par(usr=c(0,1,0,1), # Reset the coordinates
xpd=NA) # Allow plotting outside the plot region
legend(-0.018,.57, c("PA", "FL", "Oxycline", "Shallow Anoxic", "Euxinic"), col=c("black", "black","blue", "red", "brown4"), pch = c(16, 17, 15, 15, 15), box.col=NA, cex = .8, horiz = T, x.intersp = c(0.3), text.width = c(0, 0.18, 0.18, 0.18, 0.2))
# Set up EPS and make plot
setEPS(width = 6, height = 6)
postscript("Figures/PCA_envfit.eps")
# Set up 2x2 panels
op <- par(oma=c(0,0,0,1),# Room for the title and legend
mfrow=c(2,2),
mai=c(.65,.65,.1,0))
# Panel 1- Add first half of envfit vectors
with(metadata_ordinations, plot(scores(lograt_pca, display = "sites"), col = colvec[OxCond], pch = shapevec[SizeFraction], cex = 1.5, cex.lab = .8, cex.axis = .8, xlab = "", ylab = paste0('PC2 ', round(lograt_variances[2,2]*100,2),'%'), xaxt='n', xlim=c(-60,60)))
plot(pca_envfit_O2, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_partS, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_NO3, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_PO4, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
title("A", line = -1, adj = 0.02)
# Panel 2- Add rest of envfit vectors
with(metadata_ordinations, plot(scores(lograt_pca, display = "sites"), col = colvec[OxCond], pch = shapevec[SizeFraction], cex = 1.5, cex.lab = .8, cex.axis = .8, xlab = "", ylab = "", xaxt='n', yaxt='n', xlim=c(-60,60)))
plot(pca_envfit_temp, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_sal, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_FlagAbun, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_chemo, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
title("B", line = -1, adj = 0.02)
# Panel 3- Add spider lines indicating envfit centroids for Size Fraction
with(metadata_ordinations, plot(scores(lograt_pca, display = "sites"), col = colvec[OxCond], pch = shapevec[SizeFraction], cex = 1.5, cex.lab = .8, cex.axis = .8, xlab = paste0('PC1 ', round(lograt_variances[1,2]*100,2),'%'), ylab = paste0('PC2 ', round(lograt_variances[2,2]*100,2),'%'), xlim=c(-60,60)))
with(metadata_ordinations, ordispider(lograt_pca, SizeFraction, lwd = 1.5, lty = c(1,2), label = TRUE, cex = 0.6))
title("C", line = -1, adj = 0.02)
# Panel 4 -Add vectors indicating significant spp
with(metadata_ordinations, plot(scores(lograt_pca, display = "sites"), col = colvec[OxCond], pch = shapevec[SizeFraction], cex = 1.5, cex.lab = .8, cex.axis = .8, xlab = paste0('PC1 ', round(lograt_variances[1,2]*100,2),'%'), ylab = "", yaxt='n', xlim=c(-60,60)))
plot(pca_sppfit_trim, p.max = 0.001, col = "black", cex = 0.6)
# annotate the 3 clusters of ASVs in panel D
text(x=c(0), y=c(45), labels=c("Cluster I"), adj = 0.5, font = 2, cex = 0.8)
text(x=c(50), y=c(30), labels=c("Cluster II"), adj = 0.5, font = 2, cex = 0.8)
text(x=c(48), y=c(-35), labels=c("Cluster III"), adj = 0.5, font = 2, cex = 0.8)
text(x=c(-48), y=c(-18), labels=c("Cluster IV"), adj = 0.5, font = 2, cex = 0.8)
text(x=c(-48), y=c(10), labels=c("Cluster V"), adj = 0.5, font = 2, cex = 0.8)
title("D", line = -1, adj = 0.02)
# Add legend
par(op) # Leave the last plot
op <- par(usr=c(0,1,0,1), # Reset the coordinates
xpd=NA) # Allow plotting outside the plot region
legend(-0.018,.57, c("PA", "FL", "Oxycline", "Shallow Anoxic", "Euxinic"), col=c("black", "black","blue", "red", "brown4"), pch = c(16, 17, 15, 15, 15), box.col=NA, cex = .8, horiz = T, x.intersp = c(0.3), text.width = c(0, 0.18, 0.18, 0.18, 0.2))
dev.off()
quartz_off_screen
2
For the manuscript, I want to discuss what these significant species are. Make a table:
# extract p-values for each species
fit_pvals <- pca_sppfit$vectors$pvals %>%
as.data.frame() %>%
rownames_to_column("ASVID") %>%
dplyr::rename("pvals" = ".")
# extract r2 values
fit_r2vals <- pca_sppfit$vectors$r %>%
as.data.frame() %>%
rownames_to_column("ASVID") %>%
dplyr::rename("r2vals" = ".")
# only keep species with p-val < 0.001 and r2 value >0.6
fit_spp <- pca_sppfit %>%
scores(., display = "vectors") %>%
as.data.frame() %>%
rownames_to_column("ASVID") %>%
full_join(., fit_pvals, by = "ASVID") %>%
full_join(., fit_r2vals, by = "ASVID") %>%
filter(pvals < 0.001) %>%
filter(r2vals > 0.6)
# --> filters to 107 species
# put in ASV identifying information
pca_sig_ASVs <- taxonomy %>%
mutate(ASVID = rownames(taxonomy)) %>%
right_join(fit_spp, by = "ASVID")
# sort by PC2 to differentiate those above and below the PC2= 0 axis
pca_sig_ASVs <- pca_sig_ASVs %>%
arrange(desc(PC2))
pca_sig_ASVs
# the vegan plot also scales the species scores to fit the current plot (which is why PC values don't match what is seen in plot) Get these scaled PC values
ordiArrowMul(lograt_pca, display = "species") #7.636856
[1] 7.636856
ordiArrowMul(pca_sppfit, display = "vectors") #0.8291121
[1] 0.8291121
# export as table
write.csv(pca_sig_ASVs, file="stats_results/pca_sig_ASVs.csv", row.names=FALSE)
Import
arch_counts <- read_csv("Suter_2018_count_tables/Cariaco_AA_updated_raw.csv");
bac_counts <- read_csv("Suter_2018_count_tables/Cariaco_AB_updated_raw.csv");
Get sample names
bac_samples <- colnames(bac_counts)[2:49]
arch_samples <- colnames(arch_counts)[2:47]
bac_samples
arch_samples
Make separate taxonomy and count variables
arch_OTU <- arch_counts[,c("#OTU ID",arch_samples)]
arch_taxonomy <- arch_counts %>%
select(-arch_samples) %>%
select(-Sum)
arch_OTU
arch_taxonomy
bac_OTU <- bac_counts[,c("#OTU ID",bac_samples)]
bac_taxonomy <- bac_counts %>%
select(-bac_samples) %>%
select(-Sum) %>%
select(-"Interesting close relatives")
bac_OTU
bac_taxonomy
bac_OTU <- type_convert(as.data.frame(bac_OTU))
rownames(bac_OTU) <- bac_OTU$`#OTU ID`
bac_OTU <- bac_OTU[,!names(bac_OTU) %in% (c("#OTU ID"))]
bac_OTU = otu_table(bac_OTU, taxa_are_rows = TRUE)
#
arch_OTU <- type_convert(as.data.frame(arch_OTU))
rownames(arch_OTU) <- arch_OTU$`#OTU ID`
arch_OTU <- arch_OTU[,!names(arch_OTU) %in% (c("#OTU ID"))]
arch_OTU = otu_table(arch_OTU, taxa_are_rows = TRUE)
#
bac_TAX <- type_convert(as.data.frame(bac_taxonomy))
rownames(bac_TAX) <- bac_TAX$`#OTU ID`
bac_TAX <- bac_TAX[,!names(bac_TAX) %in% (c("#OTU ID"))]
bac_TAX = tax_table(as.matrix(bac_TAX))
#
arch_TAX <- type_convert(as.data.frame(arch_taxonomy))
rownames(arch_TAX) <- arch_TAX$`#OTU ID`
arch_TAX <- arch_TAX[,!names(arch_TAX) %in% (c("#OTU ID"))]
arch_TAX = tax_table(as.matrix(arch_TAX))
#
META = sample_data(data.frame(metadata, row.names = metadata$`Sample Name`))
#
ps_bac <- phyloseq(bac_OTU, bac_TAX, META)
ps_arch <- phyloseq(arch_OTU, arch_TAX, META)
Filter out the samples with low sequencing effort. These were previously identified for itags paper
taxa_to_keep_b <- !sample_names(ps_bac) %in% c("AB3a900A","AB2a200A","AB2b267A")
ps_bac <- prune_samples(taxa_to_keep_b, ps_bac)
taxa_to_keep_a <- !sample_names(ps_arch) %in% c("AA2b900AN","AA2a247B","AA2a900BN","AA2b900BN")
ps_arch <- prune_samples(taxa_to_keep_a, ps_arch)
First calculate relative abdunance of bac and arch OTU tables
ps_bac_ra <- microbiome::transform(ps_bac, transform = "compositional")
(otu_table(ps_bac_ra))[1:5,1:5]
ps_arch_ra <- microbiome::transform(ps_arch, transform = "compositional")
(otu_table(ps_arch_ra))[1:5,1:5]
Remove rows of glommed taxa from the full dataframe if their sum across all samples doesn’t exceed 5% (RA > 0.05)
# Bacteria
x <- taxa_sums(ps_bac_ra)
# keepTaxa <- base::which(x > .05)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_bac_ra_pruned <- prune_taxa(keepTaxa, ps_bac_ra)
ps_bac_pruned <- prune_taxa(keepTaxa, ps_bac)
ps_bac_ra_pruned
ps_bac_pruned
# Archaea
x <- taxa_sums(ps_arch_ra)
# keepTaxa <- base::which(x > .05)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_arch_ra_pruned <- prune_taxa(keepTaxa, ps_arch_ra)
ps_arch_pruned <- prune_taxa(keepTaxa, ps_arch)
ps_arch_ra_pruned
ps_arch_pruned
# Eukaryotes
x <- taxa_sums(ps_ra)
# keepTaxa <- base::which(x > .05)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_euk_ra_pruned <- prune_taxa(keepTaxa, ps_ra)
ps_euk_pruned <- prune_taxa(keepTaxa, ps)
ps_euk_ra_pruned
ps_euk_pruned
Trimmed to 124 bacteria OTUs, 52 archaea OTUs, and 123 eukaryotic ASVs (299 total). Proceed with this dataset of the most abundant OTUs for correlations and network analyses…
To do the multi-domain analysis, the sample names from each phyloseq object must match. These currently have “B” for bacteria, A, E etc. Remove this letter from sample names so that “AE2a247B”, “AA2a247B”, “AB2a247B” all become just “Type” from the metadata sheet [IntNov1FL in this case- for Interface, November, rep 1, free-living].
Import my SampleKey
samplekey <- read_csv("SampleKey.csv")
Change the sample names in the otu tables to sample “Type”
# Archaea
# remove missing archaea samples from samplekey_A
samplekey_A <- filter(samplekey, SampleID_arch %in% colnames(otu_table(ps_arch_ra_pruned)))
# sort SampleKey by order of column names from ps_arch_ra_pruned
samplekey_A <- samplekey_A %>% arrange(factor(SampleID_arch, levels = colnames(otu_table(ps_arch_ra_pruned))))
# replace col names of otu table from ps_arch_ra_pruned
sample_names(ps_arch_ra_pruned) <- samplekey_A$Type
# and ps_arch_pruned
sample_names(ps_arch_pruned) <- samplekey_A$Type
# Bacteria
samplekey_B <- filter(samplekey, SampleID_bac %in% colnames(otu_table(ps_bac_ra_pruned)))
samplekey_B <- samplekey_B %>% arrange(factor(SampleID_bac, levels = colnames(otu_table(ps_bac_ra_pruned))))
sample_names(ps_bac_ra_pruned) <- samplekey_B$Type
sample_names(ps_bac_pruned) <- samplekey_B$Type
# Eukaryotes
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_ra_pruned)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_ra_pruned))))
sample_names(ps_euk_ra_pruned) <- samplekey_E$Type
sample_names(ps_euk_pruned) <- samplekey_E$Type
Move all pruned otu tables into one table by matching the sample Type- will use this for SparCC Make one for the 3-domain analysis and one for the 2-domain analysis (bacteria and archaea only)
alldomains_df <- bind_rows(data.frame(otu_table(ps_bac_pruned)), data.frame(otu_table(ps_arch_pruned)), data.frame(otu_table(ps_euk_pruned)))
alldomains_df
twodomains_df <- bind_rows(data.frame(otu_table(ps_bac_pruned)), data.frame(otu_table(ps_arch_pruned)))
twodomains_df
Change row names from “denovoXXX” to meaningful names
alldomains_df_full <- cbind(ID = rownames(alldomains_df), alldomains_df)
twodomains_df_full <- cbind(ID = rownames(twodomains_df), twodomains_df)
# start with only first rows, which are bacteria. make one column of meaningful labels
temp1 <- left_join(alldomains_df_full[1:dim(otu_table(ps_bac_pruned))[1],], bac_taxonomy, by = c("ID" = "#OTU ID"))
temp1$New_ID <- paste(temp1$ID, temp1$"taxonomy-2", temp1$"taxonomy-3", temp1$"taxonomy-4")
temp1 <- select(temp1,-colnames(bac_taxonomy[,2:11]))
# next rows are the archaea
temp2 <- left_join(alldomains_df_full[sum(dim(otu_table(ps_bac_pruned))[1],1):sum(dim(otu_table(ps_bac_pruned))[1],dim(otu_table(ps_arch_pruned))[1]),], arch_taxonomy, by = c("ID" = "#OTU ID"))
temp2$New_ID <- paste(temp2$ID, temp2$"taxonomy-2", temp2$"taxonomy-3")
temp2 <- select(temp2,-colnames(arch_taxonomy[,2:9]))
# last rows are eukarya
euk_taxonomy <- cbind("#ASV ID" = rownames(taxonomy), taxonomy)
temp3 <- left_join(alldomains_df_full[sum(dim(otu_table(ps_arch_pruned))[1], dim(otu_table(ps_bac_pruned))[1],1):sum(dim(otu_table(ps_arch_pruned))[1], dim(otu_table(ps_bac_pruned))[1],dim(otu_table(ps_euk_pruned))[1]),], euk_taxonomy, by = c("ID" = "#ASV ID"))
temp3$New_ID <- paste(temp3$ID, temp3$"Supergroup", temp3$"Division", temp3$"Class", temp3$"Order")
temp3 <- select(temp3,-colnames(euk_taxonomy[,2:9]))
# combine back all 3 domains, with new names as row names in a dataframe
alldomains_df_full <- rbind(temp1, temp2, temp3)
alldomains_df_full <- data.frame(alldomains_df_full)
rownames(alldomains_df_full) <- alldomains_df_full$New_ID
alldomains_df_full <- select(alldomains_df_full, -c("ID","New_ID"))
# and make one for the 2-domain dataset
twodomains_df_full <- rbind(temp1, temp2)
twodomains_df_full <- data.frame(twodomains_df_full)
rownames(twodomains_df_full) <- twodomains_df_full$New_ID
twodomains_df_full <- select(twodomains_df_full, -c("ID","New_ID"))
Remove columns with NAs. These are samples for which the library for at least one domain didn’t work (can’t do correlations with missing values in columns)
alldomains_df_full <- alldomains_df_full %>%
select_if(~ !any(is.na(.)))
alldomains_df_full
alldomains_df <- alldomains_df %>%
select_if(~ !any(is.na(.)))
alldomains_df
twodomains_df_full <- twodomains_df_full %>%
select_if(~ !any(is.na(.)))
twodomains_df_full
twodomains_df <- twodomains_df %>%
select_if(~ !any(is.na(.)))
twodomains_df
Simlarly, make pruned datasets of the most abundant OTUs/ASVs in the oxycline, anoxic, and euxinic samples as separate datasets
Pull out samples and taxa from each redox regime
# Pull out oxycline bacteria sample IDs
oxyclinetypes_bac <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_bac)) %>%
filter(OxCond == "Oxycline") %>%
select("Sample Name")
oxyclinetypes_bac <- unlist(c(unique(oxyclinetypes_bac)), use.names = FALSE)
# Pull out all bacteria from oxycline
ps_bac_oxycline <- prune_samples(oxyclinetypes_bac, ps_bac)
ps_bac_ra_oxycline <- prune_samples(oxyclinetypes_bac, ps_bac_ra)
# Pull out oxycline archaea sample IDs
oxyclinetypes_arch <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_arch)) %>%
filter(OxCond == "Oxycline") %>%
select("Sample Name")
oxyclinetypes_arch <- unlist(c(unique(oxyclinetypes_arch)), use.names = FALSE)
# Pull out all archaea from oxycline
ps_arch_oxycline <- prune_samples(oxyclinetypes_arch, ps_arch)
ps_arch_ra_oxycline <- prune_samples(oxyclinetypes_arch, ps_arch_ra)
# Pull out oxycline eukaryotic sample IDs
oxyclinetypes_euk <- metadata %>%
filter(`Sample Name` %in% sample_names(ps)) %>%
filter(OxCond == "Oxycline") %>%
select("Sample Name")
oxyclinetypes_euk <- unlist(c(unique(oxyclinetypes_euk)), use.names = FALSE)
# Pull out all eukaryotes from oxycline
ps_euk_oxycline <- prune_samples(oxyclinetypes_euk, ps)
ps_euk_ra_oxycline <- prune_samples(oxyclinetypes_euk, ps_ra)
Filter out low abundance taxa from the oxycline samples. Use 5% as cutoff
# Bacteria
x <- taxa_sums(ps_bac_ra_oxycline)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_bac_ra_oxycline_pruned <- prune_taxa(keepTaxa, ps_bac_ra_oxycline)
ps_bac_oxycline_pruned <- prune_taxa(keepTaxa, ps_bac_oxycline)
ps_bac_ra_oxycline_pruned
ps_bac_oxycline_pruned
# Archaea
x <- taxa_sums(ps_arch_ra_oxycline)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_arch_ra_oxycline_pruned <- prune_taxa(keepTaxa, ps_arch_ra_oxycline)
ps_arch_oxycline_pruned <- prune_taxa(keepTaxa, ps_arch_oxycline)
ps_arch_ra_oxycline_pruned
ps_arch_oxycline_pruned
# Eukaryotes
x <- taxa_sums(ps_euk_ra_oxycline)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_euk_ra_oxycline_pruned <- prune_taxa(keepTaxa, ps_euk_ra_oxycline)
ps_euk_oxycline_pruned <- prune_taxa(keepTaxa, ps_euk_oxycline)
ps_euk_ra_oxycline_pruned
ps_euk_oxycline_pruned
79 bacteria, 36 archaea, 76 eukaryota remain
Change the sample names in the otu tables to “Type”
# Archaea
# remove missing archaea samples from samplekey_A
samplekey_A <- filter(samplekey, SampleID_arch %in% colnames(otu_table(ps_arch_ra_oxycline_pruned)))
# sort SampleKey by order of column names from ps_arch_ra_oxycline_pruned
samplekey_A <- samplekey_A %>% arrange(factor(SampleID_arch, levels = colnames(otu_table(ps_arch_ra_oxycline_pruned))))
# replace col names of otu table from ps_arch_ra_oxycline_pruned
sample_names(ps_arch_ra_oxycline_pruned) <- samplekey_A$Type
# and ps_arch_pruned
sample_names(ps_arch_oxycline_pruned) <- samplekey_A$Type
# Bacteria
samplekey_B <- filter(samplekey, SampleID_bac %in% colnames(otu_table(ps_bac_ra_oxycline_pruned)))
samplekey_B <- samplekey_B %>% arrange(factor(SampleID_bac, levels = colnames(otu_table(ps_bac_ra_oxycline_pruned))))
sample_names(ps_bac_ra_oxycline_pruned) <- samplekey_B$Type
sample_names(ps_bac_oxycline_pruned) <- samplekey_B$Type
# Eukaryotes
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_ra_oxycline_pruned)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_ra_oxycline_pruned))))
sample_names(ps_euk_ra_oxycline_pruned) <- samplekey_E$Type
sample_names(ps_euk_oxycline_pruned) <- samplekey_E$Type
Move all pruned otu tables into one table by matching the sample Type- will use this for SparCC
alldomains_df_oxycline <- bind_rows(data.frame(otu_table(ps_bac_oxycline_pruned)), data.frame(otu_table(ps_arch_oxycline_pruned)), data.frame(otu_table(ps_euk_oxycline_pruned)))
alldomains_df_oxycline
Change row names from “denovoXXX” to meaningful names
alldomains_df_full_oxycline <- cbind(ID = rownames(alldomains_df_oxycline), alldomains_df_oxycline)
# start with only first rows, which are bacteria. make one column of meaningful labels
temp1 <- left_join(alldomains_df_full_oxycline[1:dim(otu_table(ps_bac_oxycline_pruned))[1],], bac_taxonomy, by = c("ID" = "#OTU ID"))
temp1$New_ID <- paste(temp1$ID, temp1$"taxonomy-2", temp1$"taxonomy-3", temp1$"taxonomy-4")
temp1 <- select(temp1,-colnames(bac_taxonomy[,2:11]))
# next rows are the archaea
temp2 <- left_join(alldomains_df_full_oxycline[sum(dim(otu_table(ps_bac_oxycline_pruned))[1],1):sum(dim(otu_table(ps_bac_oxycline_pruned))[1],dim(otu_table(ps_arch_oxycline_pruned))[1]),], arch_taxonomy, by = c("ID" = "#OTU ID"))
temp2$New_ID <- paste(temp2$ID, temp2$"taxonomy-2", temp2$"taxonomy-3")
temp2 <- select(temp2,-colnames(arch_taxonomy[,2:9]))
# last rows are eukarya
euk_taxonomy <- cbind("#ASV ID" = rownames(taxonomy), taxonomy)
temp3 <- left_join(alldomains_df_full_oxycline[sum(dim(otu_table(ps_arch_oxycline_pruned))[1], dim(otu_table(ps_bac_oxycline_pruned))[1],1):sum(dim(otu_table(ps_arch_oxycline_pruned))[1], dim(otu_table(ps_bac_oxycline_pruned))[1],dim(otu_table(ps_euk_oxycline_pruned))[1]),], euk_taxonomy, by = c("ID" = "#ASV ID"))
temp3$New_ID <- paste(temp3$ID, temp3$"Supergroup", temp3$"Division", temp3$"Class", temp3$"Order")
temp3 <- select(temp3,-colnames(euk_taxonomy[,2:9]))
# combine back all 3 domains, with new names as row names in a dataframe
alldomains_df_full_oxycline <- rbind(temp1, temp2, temp3)
alldomains_df_full_oxycline <- data.frame(alldomains_df_full_oxycline)
rownames(alldomains_df_full_oxycline) <- alldomains_df_full_oxycline$New_ID
alldomains_df_full_oxycline <- select(alldomains_df_full_oxycline, -c("ID","New_ID"))
alldomains_df_full_oxycline
Remove columns with NAs. These are samples for which the library for at least one domain didn’t work (can’t do correlations with missing values in columns)
alldomains_df_full_oxycline <- alldomains_df_full_oxycline %>%
select_if(~ !any(is.na(.)))
alldomains_df_full_oxycline
alldomains_df_oxycline <- alldomains_df_oxycline %>%
select_if(~ !any(is.na(.)))
alldomains_df_oxycline
21 samples remain for correlation
Pull out samples from shallow anoxic regime
# Pull out anoxic layer bacteria sample IDs
anoxictypes_bac <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_bac)) %>%
filter(OxCond == "ShallowAnoxic") %>%
select("Sample Name")
anoxictypes_bac <- unlist(c(unique(anoxictypes_bac)), use.names = FALSE)
# Pull out all bacteria from anoxic layer
ps_bac_anoxic <- prune_samples(anoxictypes_bac, ps_bac)
ps_bac_ra_anoxic <- prune_samples(anoxictypes_bac, ps_bac_ra)
# Pull out anoxic layer archaea sample IDs
anoxictypes_arch <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_arch)) %>%
filter(OxCond == "ShallowAnoxic") %>%
select("Sample Name")
anoxictypes_arch <- unlist(c(unique(anoxictypes_arch)), use.names = FALSE)
# Pull out all archaea from anoxic layer
ps_arch_anoxic<- prune_samples(anoxictypes_arch, ps_arch)
ps_arch_ra_anoxic <- prune_samples(anoxictypes_arch, ps_arch_ra)
# Pull out anoxic layer eukaryotic sample IDs
anoxictypes_euk <- metadata %>%
filter(`Sample Name` %in% sample_names(ps)) %>%
filter(OxCond == "ShallowAnoxic") %>%
select("Sample Name")
anoxictypes_euk <- unlist(c(unique(anoxictypes_euk)), use.names = FALSE)
# Pull out all eukaryotes from anoxic layer
ps_euk_anoxic <- prune_samples(anoxictypes_euk, ps)
ps_euk_ra_anoxic <- prune_samples(anoxictypes_euk, ps_ra)
Filter out low abundance taxa from the oxycline samples. Use 5% as cutoff
# Bacteria
x <- taxa_sums(ps_bac_ra_anoxic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_bac_ra_anoxic_pruned <- prune_taxa(keepTaxa, ps_bac_ra_anoxic)
ps_bac_anoxic_pruned <- prune_taxa(keepTaxa, ps_bac_anoxic)
ps_bac_ra_anoxic_pruned
ps_bac_anoxic_pruned
# Archaea
x <- taxa_sums(ps_arch_ra_anoxic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_arch_ra_anoxic_pruned <- prune_taxa(keepTaxa, ps_arch_ra_anoxic)
ps_arch_anoxic_pruned <- prune_taxa(keepTaxa, ps_arch_anoxic)
ps_arch_ra_anoxic_pruned
ps_arch_anoxic_pruned
# Eukaryotes
x <- taxa_sums(ps_euk_ra_anoxic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_euk_ra_anoxic_pruned <- prune_taxa(keepTaxa, ps_euk_ra_anoxic)
ps_euk_anoxic_pruned <- prune_taxa(keepTaxa, ps_euk_anoxic)
ps_euk_ra_anoxic_pruned
ps_euk_anoxic_pruned
32 bacteria, 19 archaea, 37 eukaryota remain
Change the sample names in the otu tables to “Type”
# Archaea
# remove missing archaea samples from samplekey_A
samplekey_A <- filter(samplekey, SampleID_arch %in% colnames(otu_table(ps_arch_ra_anoxic_pruned)))
# sort SampleKey by order of column names from ps_arch_ra_anoxic_pruned
samplekey_A <- samplekey_A %>% arrange(factor(SampleID_arch, levels = colnames(otu_table(ps_arch_ra_anoxic_pruned))))
# replace col names of otu table from ps_arch_ra_anoxic_pruned
sample_names(ps_arch_ra_anoxic_pruned) <- samplekey_A$Type
# and ps_arch_pruned
sample_names(ps_arch_anoxic_pruned) <- samplekey_A$Type
# Bacteria
samplekey_B <- filter(samplekey, SampleID_bac %in% colnames(otu_table(ps_bac_ra_anoxic_pruned)))
samplekey_B <- samplekey_B %>% arrange(factor(SampleID_bac, levels = colnames(otu_table(ps_bac_ra_anoxic_pruned))))
sample_names(ps_bac_ra_anoxic_pruned) <- samplekey_B$Type
sample_names(ps_bac_anoxic_pruned) <- samplekey_B$Type
# Eukaryotes
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_ra_anoxic_pruned)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_ra_anoxic_pruned))))
sample_names(ps_euk_ra_anoxic_pruned) <- samplekey_E$Type
sample_names(ps_euk_anoxic_pruned) <- samplekey_E$Type
Move all pruned otu tables into one table by matching the sample Type- will use this for SparCC
alldomains_df_anoxic <- bind_rows(data.frame(otu_table(ps_bac_anoxic_pruned)), data.frame(otu_table(ps_arch_anoxic_pruned)), data.frame(otu_table(ps_euk_anoxic_pruned)))
alldomains_df_anoxic
Change row names from “denovoXXX” to meaningful names
alldomains_df_full_anoxic <- cbind(ID = rownames(alldomains_df_anoxic), alldomains_df_anoxic)
# start with only first rows, which are bacteria. make one column of meaningful labels
temp1 <- left_join(alldomains_df_full_anoxic[1:dim(otu_table(ps_bac_anoxic_pruned))[1],], bac_taxonomy, by = c("ID" = "#OTU ID"))
temp1$New_ID <- paste(temp1$ID, temp1$"taxonomy-2", temp1$"taxonomy-3", temp1$"taxonomy-4")
temp1 <- select(temp1,-colnames(bac_taxonomy[,2:11]))
# next rows are the archaea
temp2 <- left_join(alldomains_df_full_anoxic[sum(dim(otu_table(ps_bac_anoxic_pruned))[1],1):sum(dim(otu_table(ps_bac_anoxic_pruned))[1],dim(otu_table(ps_arch_anoxic_pruned))[1]),], arch_taxonomy, by = c("ID" = "#OTU ID"))
temp2$New_ID <- paste(temp2$ID, temp2$"taxonomy-2", temp2$"taxonomy-3")
temp2 <- select(temp2,-colnames(arch_taxonomy[,2:9]))
# last rows are eukarya
euk_taxonomy <- cbind("#ASV ID" = rownames(taxonomy), taxonomy)
temp3 <- left_join(alldomains_df_full_anoxic[sum(dim(otu_table(ps_arch_anoxic_pruned))[1], dim(otu_table(ps_bac_anoxic_pruned))[1],1):sum(dim(otu_table(ps_arch_anoxic_pruned))[1], dim(otu_table(ps_bac_anoxic_pruned))[1],dim(otu_table(ps_euk_anoxic_pruned))[1]),], euk_taxonomy, by = c("ID" = "#ASV ID"))
temp3$New_ID <- paste(temp3$ID, temp3$"Supergroup", temp3$"Division", temp3$"Class", temp3$"Order")
temp3 <- select(temp3,-colnames(euk_taxonomy[,2:9]))
# combine back all 3 domains, with new names as row names in a dataframe
alldomains_df_full_anoxic <- rbind(temp1, temp2, temp3)
alldomains_df_full_anoxic <- data.frame(alldomains_df_full_anoxic)
rownames(alldomains_df_full_anoxic) <- alldomains_df_full_anoxic$New_ID
alldomains_df_full_anoxic <- select(alldomains_df_full_anoxic, -c("ID","New_ID"))
alldomains_df_full_anoxic
Remove columns with NAs. These are samples for which the library for at least one domain didn’t work (can’t do correlations with missing values in columns)
alldomains_df_full_anoxic <- alldomains_df_full_anoxic %>%
select_if(~ !any(is.na(.)))
alldomains_df_full_anoxic
alldomains_df_anoxic <- alldomains_df_anoxic %>%
select_if(~ !any(is.na(.)))
alldomains_df_anoxic
11 samples remain for correlation
Pull out samples from shallow anoxic regime
# Pull out anoxic layer bacteria sample IDs
euxinictypes_bac <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_bac)) %>%
filter(OxCond == "Euxinic") %>%
select("Sample Name")
euxinictypes_bac <- unlist(c(unique(euxinictypes_bac)), use.names = FALSE)
# Pull out all bacteria from euxinic layer
ps_bac_euxinic <- prune_samples(euxinictypes_bac, ps_bac)
ps_bac_ra_euxinic <- prune_samples(euxinictypes_bac, ps_bac_ra)
# Pull out euxinic layer archaea sample IDs
euxinictypes_arch <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_arch)) %>%
filter(OxCond == "Euxinic") %>%
select("Sample Name")
euxinictypes_arch <- unlist(c(unique(euxinictypes_arch)), use.names = FALSE)
# Pull out all archaea from euxinic layer
ps_arch_euxinic<- prune_samples(euxinictypes_arch, ps_arch)
ps_arch_ra_euxinic <- prune_samples(euxinictypes_arch, ps_arch_ra)
# Pull out euxinic layer eukaryotic sample IDs
euxinictypes_euk <- metadata %>%
filter(`Sample Name` %in% sample_names(ps)) %>%
filter(OxCond == "Euxinic") %>%
select("Sample Name")
euxinictypes_euk <- unlist(c(unique(euxinictypes_euk)), use.names = FALSE)
# Pull out all eukaryotes from euxinic layer
ps_euk_euxinic <- prune_samples(euxinictypes_euk, ps)
ps_euk_ra_euxinic <- prune_samples(euxinictypes_euk, ps_ra)
Filter out low abundance taxa from the oxycline samples. Use 5% as cutoff
# Bacteria
x <- taxa_sums(ps_bac_ra_euxinic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_bac_ra_euxinic_pruned <- prune_taxa(keepTaxa, ps_bac_ra_euxinic)
ps_bac_euxinic_pruned <- prune_taxa(keepTaxa, ps_bac_euxinic)
ps_bac_ra_euxinic_pruned
ps_bac_euxinic_pruned
# Archaea
x <- taxa_sums(ps_arch_ra_euxinic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_arch_ra_euxinic_pruned <- prune_taxa(keepTaxa, ps_arch_ra_euxinic)
ps_arch_euxinic_pruned <- prune_taxa(keepTaxa, ps_arch_euxinic)
ps_arch_ra_euxinic_pruned
ps_arch_euxinic_pruned
# Eukaryotes
x <- taxa_sums(ps_euk_ra_euxinic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_euk_ra_euxinic_pruned <- prune_taxa(keepTaxa, ps_euk_ra_euxinic)
ps_euk_euxinic_pruned <- prune_taxa(keepTaxa, ps_euk_euxinic)
ps_euk_ra_euxinic_pruned
ps_euk_euxinic_pruned
16 bacteria, 16 archaea, 20 eukaryota remain
Change the sample names in the otu tables to “Type”
# Archaea
# remove missing archaea samples from samplekey_A
samplekey_A <- filter(samplekey, SampleID_arch %in% colnames(otu_table(ps_arch_ra_euxinic_pruned)))
# sort SampleKey by order of column names from ps_arch_ra_euxinic_pruned
samplekey_A <- samplekey_A %>% arrange(factor(SampleID_arch, levels = colnames(otu_table(ps_arch_ra_euxinic_pruned))))
# replace col names of otu table from ps_arch_ra_euxinic_pruned
sample_names(ps_arch_ra_euxinic_pruned) <- samplekey_A$Type
# and ps_arch_pruned
sample_names(ps_arch_euxinic_pruned) <- samplekey_A$Type
# Bacteria
samplekey_B <- filter(samplekey, SampleID_bac %in% colnames(otu_table(ps_bac_ra_euxinic_pruned)))
samplekey_B <- samplekey_B %>% arrange(factor(SampleID_bac, levels = colnames(otu_table(ps_bac_ra_euxinic_pruned))))
sample_names(ps_bac_ra_euxinic_pruned) <- samplekey_B$Type
sample_names(ps_bac_euxinic_pruned) <- samplekey_B$Type
# Eukaryotes
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_ra_euxinic_pruned)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_ra_euxinic_pruned))))
sample_names(ps_euk_ra_euxinic_pruned) <- samplekey_E$Type
sample_names(ps_euk_euxinic_pruned) <- samplekey_E$Type
Move all pruned otu tables into one table by matching the sample Type- will use this for SparCC
alldomains_df_euxinic <- bind_rows(data.frame(otu_table(ps_bac_euxinic_pruned)), data.frame(otu_table(ps_arch_euxinic_pruned)), data.frame(otu_table(ps_euk_euxinic_pruned)))
alldomains_df_euxinic
Change row names from “denovoXXX” to meaningful names
alldomains_df_full_euxinic <- cbind(ID = rownames(alldomains_df_euxinic), alldomains_df_euxinic)
# start with only first rows, which are bacteria. make one column of meaningful labels
temp1 <- left_join(alldomains_df_full_euxinic[1:dim(otu_table(ps_bac_euxinic_pruned))[1],], bac_taxonomy, by = c("ID" = "#OTU ID"))
temp1$New_ID <- paste(temp1$ID, temp1$"taxonomy-2", temp1$"taxonomy-3", temp1$"taxonomy-4")
temp1 <- select(temp1,-colnames(bac_taxonomy[,2:11]))
# next rows are the archaea
temp2 <- left_join(alldomains_df_full_euxinic[sum(dim(otu_table(ps_bac_euxinic_pruned))[1],1):sum(dim(otu_table(ps_bac_euxinic_pruned))[1],dim(otu_table(ps_arch_euxinic_pruned))[1]),], arch_taxonomy, by = c("ID" = "#OTU ID"))
temp2$New_ID <- paste(temp2$ID, temp2$"taxonomy-2", temp2$"taxonomy-3")
temp2 <- select(temp2,-colnames(arch_taxonomy[,2:9]))
# last rows are eukarya
euk_taxonomy <- cbind("#ASV ID" = rownames(taxonomy), taxonomy)
temp3 <- left_join(alldomains_df_full_euxinic[sum(dim(otu_table(ps_arch_euxinic_pruned))[1], dim(otu_table(ps_bac_euxinic_pruned))[1],1):sum(dim(otu_table(ps_arch_euxinic_pruned))[1], dim(otu_table(ps_bac_euxinic_pruned))[1],dim(otu_table(ps_euk_euxinic_pruned))[1]),], euk_taxonomy, by = c("ID" = "#ASV ID"))
temp3$New_ID <- paste(temp3$ID, temp3$"Supergroup", temp3$"Division", temp3$"Class", temp3$"Order")
temp3 <- select(temp3,-colnames(euk_taxonomy[,2:9]))
# combine back all 3 domains, with new names as row names in a dataframe
alldomains_df_full_euxinic <- rbind(temp1, temp2, temp3)
alldomains_df_full_euxinic <- data.frame(alldomains_df_full_euxinic)
rownames(alldomains_df_full_euxinic) <- alldomains_df_full_euxinic$New_ID
alldomains_df_full_euxinic <- select(alldomains_df_full_euxinic, -c("ID","New_ID"))
alldomains_df_full_euxinic
Remove columns with NAs. These are samples for which the library for at least one domain didn’t work (can’t do correlations with missing values in columns)
alldomains_df_full_euxinic <- alldomains_df_full_euxinic %>%
select_if(~ !any(is.na(.)))
alldomains_df_full_euxinic
alldomains_df_euxinic <- alldomains_df_euxinic %>%
select_if(~ !any(is.na(.)))
alldomains_df_euxinic
4 samples remain for correlation
This is largely based on BVCN tutorials NOTE- input for SparCC should be raw count data (after filtering out low-abundance ASVs). The function does a log-ratio transformation to account for compositionality
sparcctable_alldomains <- sparcc(t(alldomains_df))
Put sample names back into result tables
rownames(sparcctable_alldomains$Cor) <- rownames(alldomains_df_full)
colnames(sparcctable_alldomains$Cor) <- rownames(alldomains_df_full)
rownames(sparcctable_alldomains$Cov) <- rownames(alldomains_df_full)
colnames(sparcctable_alldomains$Cov) <- rownames(alldomains_df_full)
sparcctable_alldomains$Cor[1:2,1:2]
Plot correlation
plotableSparcc <- sparcctable_alldomains$Cor %>% reorder_cormat %>% get_upper_tri() %>% reshape2::melt() %>% na.omit()
Sparcc_plot <- plotableSparcc %>% ggplot(aes(x = Var2, y = Var1, fill = value)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
Sparcc_plot
# ggsave("figures/sparcc_corr_alldomains.eps",Sparcc_plot, width = 35, height = 35, units = c("in"))
Calculate Sparcc p-values by bootstrapping- TAKES A LONG TIME
# tp0 <- proc.time()
# out2 <- sparccboot(t(alldomains_df), R = 1000, ncpus = 2)
# tp1 <- proc.time()
# tp1 - tp0
The above took ~14 hours to run 1000 iterations
Extract p-values
outP <- pval.sparccboot(out2)
data.frame(outP$cors, outP$pvals) %>% head
cors <- outP$cors
pvals <- outP$pvals
sparCCpcors <- diag(0.5, nrow = dim(sparcctable_alldomains$Cor)[1], ncol = dim(sparcctable_alldomains$Cor)[1])
sparCCpcors[upper.tri(sparCCpcors, diag=FALSE)] <- cors
sparCCpcors <- sparCCpcors + t(sparCCpcors)
sparCCpval <- diag(0.5, nrow = dim(sparcctable_alldomains$Cor)[1], ncol = dim(sparcctable_alldomains$Cor)[1])
sparCCpval[upper.tri(sparCCpval, diag=FALSE)] <- pvals
sparCCpval <- sparCCpval + t(sparCCpval)
rownames(sparCCpcors) <- rownames(alldomains_df_full)
colnames(sparCCpcors) <- rownames(alldomains_df_full)
rownames(sparCCpval) <- rownames(alldomains_df_full)
colnames(sparCCpval) <- rownames(alldomains_df_full)
sparCCpcors[1:2, 1:2]
sparCCpval[1:2, 1:2]
Reorder for plotting
reordered_all_sparcc <- reorder_cor_and_p(sparCCpcors, sparCCpval)
reordered_sparccCor <- reordered_all_sparcc$r
reordered_sparccP<- reordered_all_sparcc$p
sparccCor_processed <- reordered_sparccCor %>% get_upper_tri() %>% reshape2::melt() %>% na.omit() %>% rename(cor = value)
sparccP_processed <- reordered_sparccP %>% get_upper_tri() %>% reshape2::melt() %>% na.omit() %>% rename(p = value)
# join the two data frames
SparccP <- left_join(sparccCor_processed, sparccP_processed, by = c("Var1", "Var2")) %>%
# # remove self correlations
# filter(Var1 != Var2) %>%
# calculate the false discovery rate to adjust for multiple p values
mutate(fdr = p.adjust(p, method = "BH"))
And plot correlation with p-values. Circles mean that the relationship is sig. at p = 0.05 level, based on bootstrapping
fdrThresh <- 0.01 # fdr threshold
sparccOkP <- SparccP%>% filter(fdr < fdrThresh)
SparccP_plot <- SparccP %>% ggplot(aes(x = Var2, y = Var1, fill = cor)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + geom_point(data = sparccOkP, shape = 1)
SparccP_plot
ggsave("figures/sparcc_corr_alldomains_w_pvals.eps",SparccP_plot, width = 35, height = 35, units = c("in"))
Save environment again
# save.image("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_sparcc_bootstrap.RData")
Or load if coming back
load("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_sparcc_bootstrap.RData")
Try the SpiecEasi method, which accounts for sparse data, as described in the SpiecEasi publication, spieceasi github, and BVCN lessons 1.2. This reduces the clumps (eg. sparse relationships that are secondary or teriary, not direct relationships).
Make functions from tutorial
convertSEToTable <- function(se_out,sp.names){
#This is just a fancy helper function to get the data in a comparable format to the output of lesson 1 so we can make a similar plot. We will cover other methods for visualizing this type of output in future lessons.
secor <- cov2cor(as.matrix(getOptCov(se_out))) # See spieceasi documentation for how to pull out weights for comparison
elist <- summary(triu(secor*getRefit(se_out), k=1))
elist[,1] <- sp.names[elist[,1]]
elist[,2] <- sp.names[elist[,2]]
elist[,4] <- paste(elist[,1],elist[,2])
full_e <- expand.grid(sp.names,sp.names)
rownames(full_e) <- paste(full_e[,1],full_e[,2])
full_e[,"Weight"] <- 0
full_e[elist[,4],"Weight"] <- elist[,3]
x <- expand.grid(1:length(sp.names),1:length(sp.names))
full_e[x[,"Var1"]>x[,"Var2"],"Weight"] <- NA
return(as.data.frame(full_e,stringsAsFactors=F))
}
Follow the spieceasi documentation to find optimal parameters. Also, because I want to compare networks, this convo on using optimal parameters for different network comparisons is helpful.
Remove samples from the phyloseq objects that are not in all 3 domains and reorder samples so they are in same order in all 3 objects
bac_arch_common <- intersect(sample_names(ps_bac_ra_pruned), sample_names(ps_arch_ra_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_ra_pruned))
ps_bac_pruned_3domains <- prune_samples(all_common, ps_bac_pruned)
ps_arch_pruned_3domains <- prune_samples(all_common, ps_arch_pruned)
ps_euk_pruned_3domains <- prune_samples(all_common, ps_euk_pruned)
ps_bac_ra_pruned_3domains <- prune_samples(all_common, ps_bac_ra_pruned)
ps_arch_ra_pruned_3domains <- prune_samples(all_common, ps_arch_ra_pruned)
ps_euk_ra_pruned_3domains <- prune_samples(all_common, ps_euk_ra_pruned)
otu_table(ps_arch_pruned_3domains) <- otu_table(ps_arch_pruned_3domains)[,sample_names(ps_bac_ra_pruned_3domains)]
otu_table(ps_euk_pruned_3domains) <- otu_table(ps_euk_pruned_3domains)[,sample_names(ps_bac_ra_pruned_3domains)]
sample_data(ps_bac_pruned_3domains)
sample_data(ps_arch_pruned_3domains)
sample_data(ps_euk_pruned_3domains)
#Run Spieceasi
pargs <- list(seed=10010)
se <- spiec.easi(list(ps_bac_pruned_3domains, ps_arch_pruned_3domains, ps_euk_pruned_3domains), method='glasso', lambda.min.ratio=1e-2, nlambda=100, pulsar.params=pargs)
getStability(se)
the above takes a while to run (20-30 mins). Using parameters above, the stability along the lambda path crosses the 0.05 threshold and the final stability value (0.044) is sufficiently close to 0.05
#This is just a fancy helper function to get the data in a comparable format to the output of above
tab.se <- convertSEToTable(se,sp.names=colnames(t(alldomains_df_full)))
#Plot
plot.se <- ggplot(tab.se,aes(x = Var1, y = Var2, fill = Weight)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
plot(plot.se)
ggsave("figures/spieceasi_alldomains.eps",plot.se, width = 35, height = 35, units = c("in"))
Note- only the significant values above show up in the heatmap above (ie. there is no “p-value”)
Remove samples from the phyloseq objects that are not in both domains and reorder samples so they are in same order in all 3 objects
bac_arch_common <- intersect(sample_names(ps_bac_ra_pruned), sample_names(ps_arch_ra_pruned))
ps_bac_pruned_2domains <- prune_samples(bac_arch_common, ps_bac_pruned)
ps_arch_pruned_2domains <- prune_samples(bac_arch_common, ps_arch_pruned)
ps_bac_ra_pruned_2domains <- prune_samples(bac_arch_common, ps_bac_ra_pruned)
ps_arch_ra_pruned_2domains <- prune_samples(bac_arch_common, ps_arch_ra_pruned)
otu_table(ps_arch_pruned_2domains) <- otu_table(ps_arch_pruned_2domains)[,sample_names(ps_bac_ra_pruned_3domains)]
sample_data(ps_bac_pruned_2domains)
sample_data(ps_arch_pruned_2domains)
#Run Spieceasi
pargs <- list(seed=10010)
se.2domains <- spiec.easi(list(ps_bac_pruned_2domains, ps_arch_pruned_2domains), method='glasso', lambda.min.ratio=1e-2, nlambda=200, pulsar.params=pargs)
getStability(se.2domains)
the above takes a while to run . Using parameters above, the stability along the lambda path crosses the 0.05 threshold and the final stability value (0.046) is close to 0.05
#This is just a fancy helper function to get the data in a comparable format to the output of above
tab.se <- convertSEToTable(se.2domains,sp.names=colnames(t(twodomains_df_full)))
#Plot
plot.se <- ggplot(tab.se,aes(x = Var1, y = Var2, fill = Weight)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
plot(plot.se)
ggsave("figures/spieceasi_2domains.eps",plot.se, width = 35, height = 35, units = c("in"))
Note- only the significant values above show up in the heatmap above (ie. there is no “p-value”)
bac_arch_common <- intersect(sample_names(ps_bac_oxycline_pruned), sample_names(ps_arch_oxycline_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_oxycline_pruned))
ps_bac_oxycline_pruned <- prune_samples(all_common, ps_bac_oxycline_pruned)
ps_arch_oxycline_pruned <- prune_samples(all_common, ps_arch_oxycline_pruned)
ps_euk_oxycline_pruned <- prune_samples(all_common, ps_euk_oxycline_pruned)
otu_table(ps_arch_oxycline_pruned) <- otu_table(ps_arch_oxycline_pruned)[,sample_names(ps_bac_oxycline_pruned)]
otu_table(ps_euk_oxycline_pruned) <- otu_table(ps_euk_oxycline_pruned)[,sample_names(ps_bac_oxycline_pruned)]
sample_data(ps_bac_oxycline_pruned)
sample_data(ps_arch_oxycline_pruned)
sample_data(ps_euk_oxycline_pruned)
#Run Spieceasi
pargs <- list(seed=10010)
se.oxycline <- spiec.easi(list(ps_bac_oxycline_pruned, ps_arch_oxycline_pruned, ps_euk_oxycline_pruned), method='glasso', lambda.min.ratio=5e-3, nlambda=300, pulsar.params=pargs)
getStability(se.oxycline)
the above takes a couple of minutes to run. Stability and stability along lambda path are very similar to the full dataset spieceasi object (se) with these parameters above. Continue with these.
#This is just a fancy helper function to get the data in a comparable format to the output of above
tab.se.oxycline <- convertSEToTable(se.oxycline, sp.names=colnames(t(alldomains_df_full_oxycline)))
#Plot
plot.se.oxycline <- ggplot(tab.se.oxycline,aes(x = Var1, y = Var2, fill = Weight)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
plot(plot.se.oxycline)
ggsave("figures/spieceasi_alldomains_oxycline.eps",plot.se.oxycline, width = 35, height = 35, units = c("in"))
bac_arch_common <- intersect(sample_names(ps_bac_anoxic_pruned), sample_names(ps_arch_anoxic_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_anoxic_pruned))
ps_bac_anoxic_pruned <- prune_samples(all_common, ps_bac_anoxic_pruned)
ps_arch_anoxic_pruned <- prune_samples(all_common, ps_arch_anoxic_pruned)
ps_euk_anoxic_pruned <- prune_samples(all_common, ps_euk_anoxic_pruned)
otu_table(ps_arch_anoxic_pruned) <- otu_table(ps_arch_anoxic_pruned)[,sample_names(ps_bac_anoxic_pruned)]
otu_table(ps_euk_anoxic_pruned) <- otu_table(ps_euk_anoxic_pruned)[,sample_names(ps_bac_anoxic_pruned)]
sample_data(ps_bac_anoxic_pruned)
sample_data(ps_arch_anoxic_pruned)
sample_data(ps_euk_anoxic_pruned)
#Run Spieceasi
pargs <- list(seed=10010)
se.anoxic <- spiec.easi(list(ps_bac_anoxic_pruned, ps_arch_anoxic_pruned, ps_euk_anoxic_pruned), method='glasso', lambda.min.ratio=1e-1, nlambda=300, pulsar.params=pargs)
getStability(se.anoxic)
the above takes a couple of minutes to run
#This is just a fancy helper function to get the data in a comparable format to the output of above
tab.se.anoxic <- convertSEToTable(se.anoxic, sp.names=colnames(t(alldomains_df_full_anoxic)))
#Plot
plot.se.anoxic <- ggplot(tab.se.anoxic,aes(x = Var1, y = Var2, fill = Weight)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
plot(plot.se.anoxic)
ggsave("figures/spieceasi_alldomains_anoxic.eps",plot.se.anoxic, width = 35, height = 35, units = c("in"))
bac_arch_common <- intersect(sample_names(ps_bac_euxinic_pruned), sample_names(ps_arch_euxinic_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_euxinic_pruned))
ps_bac_euxinic_pruned <- prune_samples(all_common, ps_bac_euxinic_pruned)
ps_arch_euxinic_pruned <- prune_samples(all_common, ps_arch_euxinic_pruned)
ps_euk_euxinic_pruned <- prune_samples(all_common, ps_euk_euxinic_pruned)
otu_table(ps_arch_euxinic_pruned) <- otu_table(ps_arch_euxinic_pruned)[,sample_names(ps_bac_euxinic_pruned)]
otu_table(ps_euk_euxinic_pruned) <- otu_table(ps_euk_euxinic_pruned)[,sample_names(ps_bac_euxinic_pruned)]
sample_data(ps_bac_euxinic_pruned)
sample_data(ps_arch_euxinic_pruned)
sample_data(ps_euk_euxinic_pruned)
#Run Spieceasi
pargs <- list(seed=10010)
se.euxinic <- spiec.easi(list(ps_bac_euxinic_pruned, ps_arch_euxinic_pruned, ps_euk_euxinic_pruned), method='glasso', lambda.min.ratio=1e-5,nlambda=20, pulsar.params=pargs)
getStability(se.euxinic)
I tried many parameters on the above but cannot get a satisfactory solution. There are just too few samples after quality filtering to do SpiecEasi on the euxinic depths only.
save.image("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_spieceasi.RData")
Or load if coming back
load("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_spieceasi.RData")
Build networks from the SpiecEasi association matrices using iGraph
#Extract adjacency matrix from spiecEasi output
adj.mat <- getRefit(se)
table(as.numeric(adj.mat))
0 1
83721 5680
# Extract weighted adjacency
se.cor <- cov2cor(as.matrix(getOptCov(se)))
weighted.adj.mat <- se.cor*getRefit(se)
#Convert to graph objects
grph.unweighted <- adj2igraph(adj.mat)
grph <- adj2igraph(weighted.adj.mat)
# Put back in species names
V(grph)$name <- rownames(alldomains_df)
# V(grph)
# Make size of nodes proportional to degree (number of connections)
V(grph)$size <- (degree(grph) + 1) # the +1 avoids size zero vertices
# Change width of edges to be proportional to their weights
E(grph)$width <- abs(E(grph)$weight)*10
# Scale node sizes to be smaller
V(grph)$size <- V(grph)$size/2
# Remove low-weight edges (you decide what threshold is right for your network):
# weight_threshold <- 0.07
# grph <- delete.edges(grph,which(abs(E(grph)$weight)<weight_threshold))
# Join taxonomy data of each node
# Convert graph to datafram
grph_df <- igraph::as_data_frame(grph, 'both')
# make formatted taxonomy table for each domain
ps_bac_pruned_tax_table <- as.data.frame(tax_table(ps_bac_pruned)) %>%
mutate(name = rownames(tax_table(ps_bac_pruned)))
ps_arch_pruned_tax_table <- as.data.frame(tax_table(ps_arch_pruned)) %>%
mutate(name = rownames(tax_table(ps_arch_pruned)))
ps_euk_pruned_tax_table <- as.data.frame(tax_table(ps_euk_pruned)) %>%
mutate(name = rownames(tax_table(ps_euk_pruned)))
# link graph data frame to formatted taxonomy tables
bac_temp <- left_join(grph_df$vertices[1:ntaxa(ps_bac_pruned),],
ps_bac_pruned_tax_table, by = "name")
# delete columns that don't match other tax tables
bac_temp <- select(bac_temp, -"taxonomy-9", -"Refined taxonomy")
arch_temp <- left_join(grph_df$vertices[ntaxa(ps_bac_pruned)+1:ntaxa(ps_arch_pruned),],
ps_arch_pruned_tax_table, by = "name")
euk_temp <- left_join(grph_df$vertices[ntaxa(ps_bac_pruned)+ntaxa(ps_arch_pruned)+1:ntaxa(ps_euk_pruned),],
ps_euk_pruned_tax_table, by = "name")
# rename column names in euk table to match others
euk_temp <- euk_temp %>%
rename("taxonomy-1" = Kingdom, "taxonomy-2" = Supergroup, "taxonomy-3" = Division, "taxonomy-4" = Class, "taxonomy-5" = Order, "taxonomy-6" = Family, "taxonomy-7" = Genus, "taxonomy-8" = Species)
# build full dataframe with all 3 domains
all_temp <- rbind(bac_temp, arch_temp, euk_temp)
# remake into graph
grph <- graph_from_data_frame(grph_df$edges,
directed = F,
vertices = all_temp)
# Make color palette for domain
dtype = c("red", "green", "blue", "yellow")
# Make color vector
domain_color <- dtype[as.numeric(as.factor(V(grph)$"taxonomy-1"))]
# check
domain_color
[1] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[11] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[21] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[31] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[41] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[51] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[61] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[71] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[81] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[91] "green" "green" "green" "green" "yellow" "green" "green" "green" "green" "green"
[101] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[111] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[121] "green" "green" "green" "green" "red" "red" "red" "red" "red" "red"
[131] "red" "red" "red" "red" "red" "red" "red" "red" "red" "red"
[141] "red" "red" "red" "red" "red" "red" "red" "red" "red" "red"
[151] "red" "red" "red" "red" "red" "red" "red" "red" "red" "red"
[161] "red" "red" "red" "red" "red" "red" "red" "red" "red" "red"
[171] "red" "red" "red" "red" "red" "red" "blue" "blue" "blue" "blue"
[181] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[191] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[201] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[211] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[221] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[231] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[241] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[251] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[261] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[271] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[281] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[291] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
as.factor(V(grph)$"taxonomy-1")
[1] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[8] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[15] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[22] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[29] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[36] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[43] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[50] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[57] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[64] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[71] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[78] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[85] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[92] Bacteria Bacteria Bacteria No blast hit Bacteria Bacteria Bacteria
[99] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[106] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[113] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[120] Bacteria Bacteria Bacteria Bacteria Bacteria Archaea Archaea
[127] Archaea Archaea Archaea Archaea Archaea Archaea Archaea
[134] Archaea Archaea Archaea Archaea Archaea Archaea Archaea
[141] Archaea Archaea Archaea Archaea Archaea Archaea Archaea
[148] Archaea Archaea Archaea Archaea Archaea Archaea Archaea
[155] Archaea Archaea Archaea Archaea Archaea Archaea Archaea
[162] Archaea Archaea Archaea Archaea Archaea Archaea Archaea
[169] Archaea Archaea Archaea Archaea Archaea Archaea Archaea
[176] Archaea Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[183] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[190] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[197] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[204] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[211] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[218] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[225] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[232] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[239] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[246] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[253] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[260] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[267] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[274] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[281] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[288] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[295] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
Levels: Archaea Bacteria Eukaryota No blast hit
# Plot
plot(grph,
vertex.label=NA,
layout=layout_with_graphopt(grph),
vertex.color=domain_color)
title("SpiecEasi Network: All domains, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red", "green", "blue", "yellow"), border=NA)
# Save plot
setEPS()
postscript(file = "Figures/3domains_alldepths_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph,
vertex.label=NA,
layout=layout_with_graphopt(grph),
vertex.color=domain_color)
title("SpiecEasi Network: All domains, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red", "green", "blue", "yellow"), border=NA)
dev.off()
quartz_off_screen
2
# Subset based on pos or neg edges
grph.pos <-delete.edges(grph, which(E(grph)$weight<0))
grph.neg <-delete.edges(grph, which(E(grph)$weight>0))
# For each subsetted graph, remove those nodes that are no longer connected to anything
grph.pos <- delete.vertices(grph.pos, which(degree(grph.pos)==0))
grph.neg <- delete.vertices(grph.neg, which(degree(grph.neg)==0))
# Make color vector for each
domain_color_pos <- dtype[as.numeric(as.factor(V(grph.pos)$"taxonomy-1"))]
domain_color_neg <- dtype[as.numeric(as.factor(V(grph.neg)$"taxonomy-1"))]
# Plot pos
plot(grph.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.pos),
vertex.color=domain_color_pos)
title("SpiecEasi Network: All domains, Positive Edges, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red", "green", "blue", "yellow"), border=NA)
# Plot neg
plot(grph.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.neg),
vertex.color=domain_color_neg)
title("SpiecEasi Network: All domains, Negative Edges, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red", "green", "blue", "yellow"), border=NA)
# Save plots
setEPS()
postscript(file = "Figures/3domains_alldepths_posedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.pos),
vertex.color=domain_color_pos)
title("SpiecEasi Network: All domains, Positive Edges, Whole Water Column")
legend("topright",bty = "n",
legend=c("Bacteria","Archaea", "Eukarya"),
fill=c("red","green","blue"), border=NA)
dev.off()
quartz_off_screen
2
setEPS()
postscript(file = "Figures/3domains_alldepths_negedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.neg),
vertex.color=domain_color_neg)
title("SpiecEasi Network: All domains, Negative Edges, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red", "green", "blue", "yellow"), border=NA)
dev.off()
quartz_off_screen
2
Remove eukaryotes to see impact on network
#Extract adjacency matrix from spiecEasi output
adj.mat <- getRefit(se.2domains)
table(as.numeric(adj.mat))
0 1
28568 2408
# Extract weighted adjacency
se.cor <- cov2cor(as.matrix(getOptCov(se.2domains)))
weighted.adj.mat <- se.cor*getRefit(se.2domains)
#Convert to graph objects
grph.unweighted <- adj2igraph(adj.mat)
grph.2domains <- adj2igraph(weighted.adj.mat)
# Put back in species names
V(grph.2domains)$name <- rownames(twodomains_df)
# V(grph.2domains)
# Make size of nodes proportional to degree (number of connections)
V(grph.2domains)$size <- (degree(grph.2domains) + 1) # the +1 avoids size zero vertices
# Color edges by connection (positive or negative)
# E(grph.2domains)$color <- custombluegreen
# E(grph.2domains)$color[E(grph.2domains)$weight<0] <- customreddishpurple
# Change width of edges to be proportional to their weights
E(grph.2domains)$width <- abs(E(grph.2domains)$weight)*10
# Scale node sizes to be smaller
V(grph.2domains)$size <- V(grph.2domains)$size/2
# Remove low-weight edges (you decide what threshold is right for your network):
# weight_threshold <- 0.07
# grph.2domains <- delete.edges(grph.2domains,which(abs(E(grph.2domains)$weight)<weight_threshold))
# Join taxonomy data of each node
# Convert graph to datafram
grph.2domains_df <- igraph::as_data_frame(grph.2domains, 'both')
# make formatted taxonomy table for each domain
ps_bac_pruned_2domains_tax_table <- as.data.frame(tax_table(ps_bac_pruned_2domains)) %>%
mutate(name = rownames(tax_table(ps_bac_pruned_2domains)))
ps_arch_2domains_pruned_tax_table <- as.data.frame(tax_table(ps_arch_pruned_2domains)) %>%
mutate(name = rownames(tax_table(ps_arch_pruned_2domains)))
# link graph data frame to formatted taxonomy tables
bac_temp <- left_join(grph.2domains_df$vertices[1:ntaxa(ps_bac_pruned_2domains),],
ps_bac_pruned_2domains_tax_table, by = "name")
# delete columns that don't match other tax tables
bac_temp <- select(bac_temp, -"taxonomy-9", -"Refined taxonomy")
arch_temp <- left_join(grph.2domains_df$vertices[ntaxa(ps_bac_pruned_2domains)+1:ntaxa(ps_arch_pruned_2domains),], ps_arch_2domains_pruned_tax_table, by = "name")
# build full dataframe with all 3 domains
all_temp <- rbind(bac_temp, arch_temp)
# remake into graph
grph.2domains <- graph_from_data_frame(grph.2domains_df$edges,
directed = F,
vertices = all_temp)
# Make color paletter for domain
dtype = c("red", "green", "yellow")
# Make color vector
domain_color_2domains <- dtype[as.numeric(as.factor(V(grph.2domains)$"taxonomy-1"))]
# Plot
plot(grph.2domains,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains),
vertex.color=domain_color_2domains)
title("SpiecEasi Network: Bacteria and Archaea, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "No blast hit"),
fill=c("red","green", "yellow"), border=NA)
# Save plot
setEPS()
postscript(file = "Figures/2domains_alldepths_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.2domains,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains),
vertex.color=domain_color_2domains)
title("SpiecEasi Network: Bacteria and Archaea, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "No blast hit"),
fill=c("red","green", "yellow"), border=NA)
dev.off()
quartz_off_screen
2
# Subset based on pos or neg edges
grph.2domains.pos <-delete.edges(grph.2domains, which(E(grph.2domains)$weight<0))
grph.2domains.neg <-delete.edges(grph.2domains, which(E(grph.2domains)$weight>0))
# For each subsetted graph, remove those nodes that are no longer connected to anything
grph.2domains.pos <- delete.vertices(grph.2domains.pos, which(degree(grph.2domains.pos)==0))
grph.2domains.neg <- delete.vertices(grph.2domains.neg, which(degree(grph.2domains.neg)==0))
# Make color vector for each
domain_color_2domains_pos <- dtype[as.numeric(as.factor(V(grph.2domains.pos)$"taxonomy-1"))]
domain_color_2domains_neg <- dtype[as.numeric(as.factor(V(grph.2domains.neg)$"taxonomy-1"))]
# Plot pos
plot(grph.2domains.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.pos),
vertex.color=domain_color_2domains_pos)
title("SpiecEasi Network: Bacteria and Archaea, Positive Edges Only, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "No Blast Hit"),
fill=c("green","red","yellow"), border=NA)
# Plot neg
plot(grph.2domains.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.neg),
vertex.color=domain_color_2domains_neg)
title("SpiecEasi Network: Bacteria and Archaea, Negative Edges Only, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "No Blast Hit"),
fill=c("green","red","yellow"), border=NA)
# Save plots
setEPS()
postscript(file = "Figures/2domains_alldepths_posedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.2domains.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.pos),
vertex.color=domain_color_2domains_pos)
title("SpiecEasi Network: Bacteria and Archaea, Positive Edges Only, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "No Blast Hit"),
fill=c("green","red","yellow"), border=NA)
dev.off()
quartz_off_screen
2
setEPS()
postscript(file = "Figures/2domains_alldepths_negedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.2domains.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.neg),
vertex.color=domain_color_2domains_neg)
title("SpiecEasi Network: Bacteria and Archaea, Negative Edges Only, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "No Blast Hit"),
fill=c("green","red","yellow"), border=NA)
dev.off()
quartz_off_screen
2
#Extract adjacency matrix from spiecEasi output
adj.mat <- getRefit(se.oxycline)
table(as.numeric(adj.mat))
0 1
35241 1240
# Extract weighted adjacency
se.cor <- cov2cor(as.matrix(getOptCov(se.oxycline)))
weighted.adj.mat <- se.cor*getRefit(se.oxycline)
#Convert to graph objects
grph.unweighted.oxycline <- adj2igraph(adj.mat)
grph.oxycline <- adj2igraph(weighted.adj.mat)
# Put back in species names
V(grph.oxycline)$name <- rownames(alldomains_df_oxycline)
# V(grph.oxycline)
# Make size of nodes proportional to degree (number of connections)
V(grph.oxycline)$size <- (degree(grph.oxycline) + 1) # the +1 avoids size zero vertices
# Color edges by connection (positive or negative)
# E(grph.oxycline)$color <- custombluegreen
# E(grph.oxycline)$color[E(grph.oxycline)$weight<0] <- customreddishpurple
# Change width of edges to be proportional to their weights
E(grph.oxycline)$width <- abs(E(grph.oxycline)$weight)*10
# Scale node sizes to be smaller
V(grph.oxycline)$size <- V(grph.oxycline)$size/2
# Remove low-weight edges (you decide what threshold is right for your network):
# weight_threshold <- 0.07
# grph.oxycline <- delete.edges(grph.oxycline,which(abs(E(grph.oxycline)$weight)<weight_threshold))
# Join taxonomy data of each node
# Convert graph to datafram
grph.oxycline_df <- igraph::as_data_frame(grph.oxycline, 'both')
# make formatted taxonomy table for each domain
ps_bac_oxycline_pruned_tax_table <- as.data.frame(tax_table(ps_bac_oxycline_pruned)) %>%
mutate(name = rownames(tax_table(ps_bac_oxycline_pruned)))
ps_arch_oxycline_pruned_tax_table <- as.data.frame(tax_table(ps_arch_oxycline_pruned)) %>%
mutate(name = rownames(tax_table(ps_arch_oxycline_pruned)))
ps_euk_oxycline_pruned_tax_table <- as.data.frame(tax_table(ps_euk_oxycline_pruned)) %>%
mutate(name = rownames(tax_table(ps_euk_oxycline_pruned)))
# link graph data frame to formatted taxonomy tables
bac_temp <- left_join(grph.oxycline_df$vertices[1:ntaxa(ps_bac_oxycline_pruned),],
ps_bac_oxycline_pruned_tax_table, by = "name")
# delete columns that don't match other tax tables
bac_temp <- select(bac_temp, -"taxonomy-9", -"Refined taxonomy")
arch_temp <- left_join(grph.oxycline_df$vertices[ntaxa(ps_bac_oxycline_pruned)+1:ntaxa(ps_arch_oxycline_pruned),],ps_arch_oxycline_pruned_tax_table, by = "name")
euk_temp <- left_join(grph.oxycline_df$vertices[ntaxa(ps_bac_oxycline_pruned)+ntaxa(ps_arch_oxycline_pruned)+1:ntaxa(ps_euk_oxycline_pruned),], ps_euk_oxycline_pruned_tax_table, by = "name")
# rename column names in euk table to match others
euk_temp <- euk_temp %>%
rename("taxonomy-1" = Kingdom, "taxonomy-2" = Supergroup, "taxonomy-3" = Division, "taxonomy-4" = Class, "taxonomy-5" = Order, "taxonomy-6" = Family, "taxonomy-7" = Genus, "taxonomy-8" = Species)
# build full dataframe with all 3 domains
all_temp <- rbind(bac_temp, arch_temp, euk_temp)
# remake into graph
grph.oxycline <- graph_from_data_frame(grph.oxycline_df$edges,
directed = F,
vertices = all_temp)
# Make color paletter for domain
dtype = c("red", "green", "blue")
# Make color vector
domain_color_oxycline <- dtype[as.numeric(as.factor(V(grph.oxycline)$"taxonomy-1"))]
# Plot
plot(grph.oxycline,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline),
vertex.color=domain_color_oxycline)
title("SpiecEasi Network: All domains, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
# Save plot
setEPS()
postscript(file = "Figures/3domains_oxycline_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.oxycline,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline),
vertex.color=domain_color_oxycline)
title("SpiecEasi Network: All domains, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
dev.off()
quartz_off_screen
2
# Subset based on pos or neg edges
grph.oxycline.pos <-delete.edges(grph.oxycline, which(E(grph.oxycline)$weight<0))
grph.oxycline.neg <-delete.edges(grph.oxycline, which(E(grph.oxycline)$weight>0))
# For each subsetted graph, remove those nodes that are no longer connected to anything
grph.oxycline.pos <- delete.vertices(grph.oxycline.pos, which(degree(grph.oxycline.pos)==0))
grph.oxycline.neg <- delete.vertices(grph.oxycline.neg, which(degree(grph.oxycline.neg)==0))
# Make color vector for each
domain_color_oxycline_pos <- dtype[as.numeric(as.factor(V(grph.oxycline.pos)$"taxonomy-1"))]
domain_color_oxycline_neg <- dtype[as.numeric(as.factor(V(grph.oxycline.neg)$"taxonomy-1"))]
# Plot pos
plot(grph.oxycline.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline.pos),
vertex.color=domain_color_oxycline_pos)
title("SpiecEasi Network: All domains, Positive Edges only, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
# Plot neg
plot(grph.oxycline.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline.neg),
vertex.color=domain_color_oxycline_neg)
title("SpiecEasi Network: All domains, Negative Edges Only, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
# Save plots
setEPS()
postscript(file = "Figures/3domains_oxycline_posedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.oxycline.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline.pos),
vertex.color=domain_color_oxycline_pos)
title("SpiecEasi Network: All domains, Positive Edges Only, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
dev.off()
quartz_off_screen
2
setEPS()
postscript(file = "Figures/3domains_oxycline_negedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.oxycline.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline.neg),
vertex.color=domain_color_oxycline_neg)
title("SpiecEasi Network: All domains, Negative Edges Only, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
dev.off()
quartz_off_screen
2
#Extract adjacency matrix from spiecEasi output
adj.mat <- getRefit(se.anoxic)
table(as.numeric(adj.mat))
0 1
7566 178
# Extract weighted adjacency
se.cor <- cov2cor(as.matrix(getOptCov(se.anoxic)))
weighted.adj.mat <- se.cor*getRefit(se.anoxic)
#Convert to graph objects
grph.unweighted.anoxic <- adj2igraph(adj.mat)
grph.anoxic <- adj2igraph(weighted.adj.mat)
# Put back in species names
V(grph.anoxic)$name <- rownames(alldomains_df_anoxic)
# V(grph.anoxic)
# Make size of nodes proportional to degree (number of connections)
V(grph.anoxic)$size <- (degree(grph.anoxic) + 1) # the +1 avoids size zero vertices
# Color edges by connection (positive or negative)
# E(grph.anoxic)$color <- custombluegreen
# E(grph.anoxic)$color[E(grph.anoxic)$weight<0] <- customreddishpurple
# Change width of edges to be proportional to their weights
E(grph.anoxic)$width <- abs(E(grph.anoxic)$weight)*10
# Scale node sizes to be smaller
V(grph.anoxic)$size <- V(grph.anoxic)$size/2
# Remove low-weight edges (you decide what threshold is right for your network):
# weight_threshold <- 0.07
# grph.anoxic <- delete.edges(grph.anoxic,which(abs(E(grph.anoxic)$weight)<weight_threshold))
# Join taxonomy data of each node
# Convert graph to datafram
grph.anoxic_df <- igraph::as_data_frame(grph.anoxic, 'both')
# make formatted taxonomy table for each domain
ps_bac_anoxic_pruned_tax_table <- as.data.frame(tax_table(ps_bac_anoxic_pruned)) %>%
mutate(name = rownames(tax_table(ps_bac_anoxic_pruned)))
ps_arch_anoxic_pruned_tax_table <- as.data.frame(tax_table(ps_arch_anoxic_pruned)) %>%
mutate(name = rownames(tax_table(ps_arch_anoxic_pruned)))
ps_euk_anoxic_pruned_tax_table <- as.data.frame(tax_table(ps_euk_anoxic_pruned)) %>%
mutate(name = rownames(tax_table(ps_euk_anoxic_pruned)))
# link graph data frame to formatted taxonomy tables
bac_temp <- left_join(grph.anoxic_df$vertices[1:ntaxa(ps_bac_anoxic_pruned),],ps_bac_anoxic_pruned_tax_table, by = "name")
# delete columns that don't match other tax tables
bac_temp <- select(bac_temp, -"taxonomy-9", -"Refined taxonomy")
arch_temp <- left_join(grph.anoxic_df$vertices[ntaxa(ps_bac_anoxic_pruned)+1:ntaxa(ps_arch_anoxic_pruned),],ps_arch_anoxic_pruned_tax_table, by = "name")
euk_temp <- left_join(grph.anoxic_df$vertices[ntaxa(ps_bac_anoxic_pruned)+ntaxa(ps_arch_anoxic_pruned)+1:ntaxa(ps_euk_anoxic_pruned),], ps_euk_anoxic_pruned_tax_table, by = "name")
# rename column names in euk table to match others
euk_temp <- euk_temp %>%
rename("taxonomy-1" = Kingdom, "taxonomy-2" = Supergroup, "taxonomy-3" = Division, "taxonomy-4" = Class, "taxonomy-5" = Order, "taxonomy-6" = Family, "taxonomy-7" = Genus, "taxonomy-8" = Species)
# build full dataframe with all 3 domains
all_temp <- rbind(bac_temp, arch_temp, euk_temp)
# remake into graph
grph.anoxic <- graph_from_data_frame(grph.anoxic_df$edges,
directed = F,
vertices = all_temp)
# Make color paletter for domain
dtype = c("red", "green", "blue")
# Make color vector
domain_color_anoxic <- dtype[as.numeric(as.factor(V(grph.anoxic)$"taxonomy-1"))]
# Plot
plot(grph.anoxic,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic),
vertex.color=domain_color_anoxic)
title("SpiecEasi Network: All domains, Anoxic Layer")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
# Save plot
setEPS()
postscript(file = "Figures/3domains_anoxic_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.anoxic,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic),
vertex.color=domain_color_anoxic)
title("SpiecEasi Network: All domains, Anoxic")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
dev.off()
quartz_off_screen
2
# Subset based on pos or neg edges
grph.anoxic.pos <-delete.edges(grph.anoxic, which(E(grph.anoxic)$weight<0))
grph.anoxic.neg <-delete.edges(grph.anoxic, which(E(grph.anoxic)$weight>0))
# For each subsetted graph, remove those nodes that are no longer connected to anything
grph.anoxic.pos <- delete.vertices(grph.anoxic.pos, which(degree(grph.anoxic.pos)==0))
grph.anoxic.neg <- delete.vertices(grph.anoxic.neg, which(degree(grph.anoxic.neg)==0))
# Make color vector for each
domain_color_anoxic_pos <- dtype[as.numeric(as.factor(V(grph.anoxic.pos)$"taxonomy-1"))]
domain_color_anoxic_neg <- dtype[as.numeric(as.factor(V(grph.anoxic.neg)$"taxonomy-1"))]
# Plot pos
plot(grph.anoxic.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic.pos),
vertex.color=domain_color_anoxic_pos)
title("SpiecEasi Network: All domains, Positive Edges only, anoxic")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
# Plot neg
plot(grph.anoxic.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic.neg),
vertex.color=domain_color_anoxic_neg)
title("SpiecEasi Network: All domains, Negative Edges Only, anoxic")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
# Save plots
setEPS()
postscript(file = "Figures/3domains_anoxic_posedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.anoxic.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic.pos),
vertex.color=domain_color_anoxic_pos)
title("SpiecEasi Network: All domains, Positive Edges Only, Shallow Anoxic")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
dev.off()
quartz_off_screen
2
setEPS()
postscript(file = "Figures/3domains_anoxic_negedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.anoxic.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic.neg),
vertex.color=domain_color_anoxic_neg)
title("SpiecEasi Network: All domains, Negative Edges Only, Shallow Anoxic")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
dev.off()
quartz_off_screen
2
# Set up in panels
op <- par(oma=c(2,.5,.5,0),# Room for the titles and legend
mfrow=c(4,2),
mai=c(.15,.3,.15,.1))
# Panel 1- All depths, positive network
plot(grph.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.pos),
vertex.color=domain_color_pos)
mtext ("Positive", side = 3, outer = TRUE, line = -1, adj = 0.22, cex = .8)
mtext("All depths", side=2, cex = .8, line = 1.5)
# Panel 2- All depths, negative network
plot(grph.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.neg),
vertex.color=domain_color_neg)
mtext ("Negative", side = 3, outer = TRUE, line = -1, adj = .8, cex = .8)
# Panel 3- Oxycline, positive network
plot(grph.oxycline.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline.pos),
vertex.color=domain_color_oxycline_pos)
mtext("Oxycline", side=2, cex = .8, line = 1.5)
# Panel 4- Oxycline, negative network
plot(grph.oxycline.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline.neg),
vertex.color=domain_color_oxycline_neg)
# Panel 5- Anoxic, positive network
plot(grph.anoxic.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic.pos),
vertex.color=domain_color_anoxic_pos)
mtext("Anoxic", side=2, cex = .8, line = 1.5)
# Panel 6- Anoxic, negative network
plot(grph.anoxic.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic.neg),
vertex.color=domain_color_anoxic_neg)
# Panel 7- 2 Domains, positive network
plot(grph.2domains.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.pos),
vertex.color=domain_color_2domains_pos)
mtext("Prok Only", side=2, cex = .8, line = 1.5)
# Panel 8- 2 Domains, negative network
plot(grph.2domains.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.pos),
vertex.color=domain_color_2domains_neg)
# Add legend
par(op) # Leave the last plot
op <- par(usr=c(0,1,0,1), # Reset the coordinates
xpd=NA) # Allow plotting outside the plot region
legend(.15,-0.04, c("Archaea","Bacteria", "Eukarya", "No Blast Hit"), col=c("red", "green", "blue", "yellow"), pch = c(16), box.col=NA, cex = .8, horiz = T, x.intersp = c(0.3))
# Save figure
# Set up EPS and make plot
setEPS(width = 6, height = 9)
postscript("Figures/Networks_pos_neg.eps")
op <- par(oma=c(2,.5,.5,0),# Room for the titles and legend
mfrow=c(4,2),
mai=c(.15,.3,.15,.1))
# Panel 1- All depths, positive network
plot(grph.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.pos),
vertex.color=domain_color_pos)
mtext ("Positive", side = 3, outer = TRUE, line = -1, adj = 0.22, cex = .8)
mtext("All depths", side=2, cex = .8, line = 1.5)
# Panel 2- All depths, negative network
plot(grph.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.neg),
vertex.color=domain_color_neg)
mtext ("Negative", side = 3, outer = TRUE, line = -1, adj = .8, cex = .8)
# Panel 3- Oxycline, positive network
plot(grph.oxycline.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline.pos),
vertex.color=domain_color_oxycline_pos)
mtext("Oxycline", side=2, cex = .8, line = 1.5)
# Panel 4- Oxycline, negative network
plot(grph.oxycline.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline.neg),
vertex.color=domain_color_oxycline_neg)
# Panel 5- Anoxic, positive network
plot(grph.anoxic.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic.pos),
vertex.color=domain_color_anoxic_pos)
mtext("Anoxic", side=2, cex = .8, line = 1.5)
# Panel 6- Anoxic, negative network
plot(grph.anoxic.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic.neg),
vertex.color=domain_color_anoxic_neg)
# Panel 7- 2 Domains, positive network
plot(grph.2domains.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.pos),
vertex.color=domain_color_2domains_pos)
mtext("Prok Only", side=2, cex = .8, line = 1.5)
# Panel 8- 2 Domains, negative network
plot(grph.2domains.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.pos),
vertex.color=domain_color_2domains_neg)
# Add legend
par(op) # Leave the last plot
op <- par(usr=c(0,1,0,1), # Reset the coordinates
xpd=NA) # Allow plotting outside the plot region
legend(.15,-0.04, c("Archaea","Bacteria", "Eukarya", "No Blast Hit"), col=c("red", "green", "blue", "yellow"), pch = c(16), box.col=NA, cex = .8, horiz = T, x.intersp = c(0.3))
dev.off()
quartz_off_screen
2
the number of edges and how many are positive vs negative
# total number of edges in full dataset network
length(E(grph)$weight)
[1] 2840
# percent of neg edges
(sum(E(grph)$weight<0)/length(E(grph)$weight))*100
[1] 34.40141
# total number of edges in 2-domain dataset network
length(E(grph.2domains)$weight)
[1] 1204
# percent of neg edges
(sum(E(grph.2domains)$weight<0)/length(E(grph.2domains)$weight))*100
[1] 34.5515
# total number of edges in oxycline network
length(E(grph.oxycline)$weight)
[1] 620
# percent of neg edges
(sum(E(grph.oxycline)$weight<0)/length(E(grph.oxycline)$weight))*100
[1] 37.74194
# total number of edges in anoxic network
length(E(grph.anoxic)$weight)
[1] 89
# percent of neg edges
(sum(E(grph.anoxic)$weight<0)/length(E(grph.anoxic)$weight))*100
[1] 34.83146
Declining number of total edges going from full dataset –> oxycline only –> anoxic only. But the percentage of negative associations is similar (34.4-37.7%). Most associations (~65%) in each network are positive.
the number of edges relatives to total number of possible edges
edge_density(grph)*100
[1] 6.374717
edge_density(grph.2domains)*100
[1] 7.818182
edge_density(grph.oxycline)*100
[1] 3.416919
edge_density(grph.anoxic)*100
[1] 2.324974
The full dataset has the highest edge density, then oxycline, then anoxic
The size of the components, or “clumps,” in the network, and how many members in each
# full dataset
components(grph)$no
[1] 27
components(grph)$csize
[1] 262 9 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1
[25] 1 1 1
# 2 domains
components(grph.2domains)$no
[1] 30
components(grph.2domains)$csize
[1] 131 1 1 2 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 1 1 1 1 6
[25] 1 9 1 1 1 1
# oxycline
components(grph.oxycline)$no
[1] 32
components(grph.oxycline)$csize
[1] 144 1 2 1 1 1 1 1 1 1 1 1 1 1 2 2 11 1 2 1 1 1 1 1
[25] 1 1 1 1 3 1 1 1
# anoxic
components(grph.anoxic)$no
[1] 48
components(grph.anoxic)$csize
[1] 24 2 2 1 1 1 1 1 1 1 4 1 1 1 1 2 1 3 3 1 1 2 1 1 1 1 1 1 1 1 1 1
[33] 1 1 1 1 1 1 1 4 1 1 1 4 1 1 1 1
The anoxic network is most disjointed, with 48 clumps and the largest containing only 24 members. The next is oxycline, with 32 clumps and the largest with 144 members. Then the full dataset has only 27 clumps and the largest clump contains 262 members.
Path is the shortest distance between two nodes (fewest number of edges). Average path length of a network gives a sense of how connected every node is to another. Unconnected hubs in the netowrk will have “infinite” paths from other hubs. The function mean_distance
ignores the infinite edges and calculates the average of all other edges
mean_distance(grph)
[1] 3.353247
mean_distance(grph.2domains)
[1] 3.11972
mean_distance(grph.oxycline)
[1] 3.976057
mean_distance(grph.anoxic)
[1] 2.703947
The longest average path length is in the oxycline, followed by the whole dataset and then anoxic. Meaning the nodes in the anoxic are more closely associated with each other. Even though there are more hubs in anoxic, as shown above, the nodes in the hubs are close to each other. The oxycline hubs have the longest average distances between nodes.
# Positive network- full dataset
grph.pos_df <- igraph::as_data_frame(grph.pos, 'both')
grph.pos_df_vert <- grph.pos_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.pos_df_vert$"taxonomy-5"))
# 37 Dino-Group-II
# 3 Dino-Group-I
# 34 Spumellarida
# Negative network- full dataset
grph.neg_df <- igraph::as_data_frame(grph.neg, 'both')
grph.neg_df_vert <- grph.neg_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.neg_df_vert$"taxonomy-5"))
# 18 Dino-Group-II
# 4 Dino-Group-I
# 17 Spumellarida
# Positive network- oxycline
grph.oxycline.pos_df <- igraph::as_data_frame(grph.oxycline.pos, 'both')
grph.oxycline.pos_df_vert <- grph.oxycline.pos_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.oxycline.pos_df_vert$"taxonomy-5"))
# 34 Dino-Group-II
# 1 Dino-Group-I
# 24 Spumellarida
# Negative network- oxycline
grph.oxycline.neg_df <- igraph::as_data_frame(grph.oxycline.neg, 'both')
grph.oxycline.neg_df_vert <- grph.oxycline.neg_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.oxycline.neg_df_vert$"taxonomy-5"))
# 24 Dino-Group-II
# 1 Dino-Group-I
# 9 Spumellarida
# Positive network- anoxic
grph.anoxic.pos_df <- igraph::as_data_frame(grph.anoxic.pos, 'both')
grph.anoxic.pos_df_vert <- grph.anoxic.pos_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.anoxic.pos_df_vert$"taxonomy-5"))
# 0 Dino-Group-II
# 1 Dino-Group-I
# 8 Spumellarida
# Negative network- anoxic
grph.anoxic.neg_df <- igraph::as_data_frame(grph.anoxic.neg, 'both')
grph.anoxic.neg_df_vert <- grph.anoxic.neg_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.anoxic.neg_df_vert$"taxonomy-5"))
# 0 Dino-Group-II
# 0 Dino-Group-I
# 0 Spumellarida
# Positive associations: Syndiniales
# Pull out names of Syndiniales and Spumellarida ASVs
grph.pos_df_vert_synd <- filter(grph.pos_df_vert, `taxonomy-4` == "Syndiniales")
# filter graph to include only edges connected to those nodes
grph.pos_synd_edges <- E(grph.pos)[from(grph.pos_df_vert_synd$name)] # get edges
grph.pos.synd_subgraph <- subgraph.edges(grph.pos, grph.pos_synd_edges) # filter graph
# get taxonomy of remaining nodes, removing the Syndiniales from table (eg. only connected nodes) and grouping by taxonomy
grph.pos_df_vert %>%
filter(`name` %in% V(grph.pos.synd_subgraph)$name & !`taxonomy-4` %in% c("Syndiniales")) %>%
count(`taxonomy-2`,`taxonomy-3`,`taxonomy-4`,`taxonomy-5`, name = "count", sort = TRUE)
# Positive associations: Spumellarida
grph.pos_df_vert_spum <- filter(grph.pos_df_vert, `taxonomy-5` == "Spumellarida")
grph.pos_spum_edges <- E(grph.pos)[from(grph.pos_df_vert_spum$name)]
grph.pos.spum_subgraph <- subgraph.edges(grph.pos, grph.pos_spum_edges)
grph.pos_df_vert %>%
filter(`name` %in% V(grph.pos.spum_subgraph)$name & !`taxonomy-5` %in% c("Spumellarida")) %>%
count(`taxonomy-2`,`taxonomy-3`,`taxonomy-4`,`taxonomy-5`, name = "count", sort = TRUE)
# Negative associations: Syndiniales
grph.neg_df_vert_synd <- filter(grph.neg_df_vert, `taxonomy-4` == "Syndiniales")
grph.neg_synd_edges <- E(grph.neg)[from(grph.neg_df_vert_synd$name)]
grph.neg.synd_subgraph <- subgraph.edges(grph.neg, grph.neg_synd_edges)
grph.neg_df_vert %>%
filter(`name` %in% V(grph.neg.synd_subgraph)$name & !`taxonomy-4` %in% c("Syndiniales")) %>%
count(`taxonomy-2`,`taxonomy-3`,`taxonomy-4`,`taxonomy-5`, name = "count", sort = TRUE)
# Negative associations: Spumellarida
grph.neg_df_vert_spum <- filter(grph.neg_df_vert, `taxonomy-5` == "Spumellarida")
grph.neg_spum_edges <- E(grph.neg)[from(grph.neg_df_vert_spum$name)]
grph.neg.spum_subgraph <- subgraph.edges(grph.neg, grph.neg_spum_edges)
grph.neg_df_vert %>%
filter(`name` %in% V(grph.neg.spum_subgraph)$name & !`taxonomy-5` %in% c("Spumellarida")) %>%
count(`taxonomy-2`,`taxonomy-3`,`taxonomy-4`,`taxonomy-5`, name = "count", sort = TRUE)
# Also ckeck out Cariacotrichea to hypothesize about possible symbiosis partners
# Positive associations only
grph.pos_df_vert_cari <- filter(grph.pos_df_vert, `taxonomy-4` == "Cariacotrichea")
grph.pos_cari_edges <- E(grph.pos)[from(grph.pos_df_vert_cari$name)]
grph.pos.cari_subgraph <- subgraph.edges(grph.pos, grph.pos_cari_edges)
grph.pos_df_vert %>%
filter(`name` %in% V(grph.pos.cari_subgraph)$name & !`taxonomy-4` %in% c("Cariacotrichea"))%>%
count(`taxonomy-2`,`taxonomy-3`,`taxonomy-4`,`taxonomy-5`, name = "count", sort = TRUE)
Calculate 4 parameters for each individual node:
# First change the weights of the edges (the strength of association) to absolute value. This won't work if negative edge weights are left with the negative signs
E(grph)$weight <- abs(E(grph)$weight)
# calculate parameters
names=V(grph)$name
de=degree(grph)
st=graph.strength(grph)
be=betweenness(grph, normalized=T)
cc = closeness(grph)
At centrality.c:2617 :closeness centrality is not well-defined for disconnected graphs
l.cluster=transitivity(grph, "local")
# assemble dataset and match full taxonomy
fulldateset_node_measures <- data.frame(ID=names, degree=de, strength=st, betweenness=be, closeness = cc, clustering_coefficient = l.cluster)
# Put back bac taxaonomy
temp1 <- left_join(fulldateset_node_measures[1:dim(otu_table(ps_bac_pruned_3domains))[1],], bac_taxonomy, by = c("ID" = "#OTU ID"))
# delete "Taxonomy-9" and "refined Taxonomy" columns
temp1 <- select(temp1, -"taxonomy-9", -"Refined taxonomy")
temp2 <- left_join(fulldateset_node_measures[sum(dim(otu_table(ps_bac_pruned_3domains))[1],1):sum(dim(otu_table(ps_bac_pruned_3domains))[1],dim(otu_table(ps_arch_pruned_3domains))[1]),], arch_taxonomy, by = c("ID" = "#OTU ID"))
temp3 <- left_join(fulldateset_node_measures[sum(dim(otu_table(ps_arch_pruned_3domains))[1], dim(otu_table(ps_bac_pruned_3domains))[1],1):sum(dim(otu_table(ps_arch_pruned_3domains))[1], dim(otu_table(ps_bac_pruned_3domains))[1],dim(otu_table(ps_euk_pruned_3domains))[1]),], euk_taxonomy, by = c("ID" = "#ASV ID"))
# Rename col names to match those from Bac and Arch
temp3 <- temp3 %>%
rename("taxonomy-1" = Kingdom, "taxonomy-2" = Supergroup, "taxonomy-3" = Division, "taxonomy-4" = Class, "taxonomy-5" = Order, "taxonomy-6" = Family, "taxonomy-7" = Genus, "taxonomy-8" = Species)
# combine back all 3 domains, with new names as row names in a dataframe
fulldateset_node_measures <- rbind(temp1, temp2, temp3)
fulldateset_node_measures
Plot betweeness vs degree for each node. - Tipton et al. argue that nodes with high betweenness are “bottlenecks” or important connectors and nodes with high degree are “hubs” - Berry et al. argue that nodes with low betweenness, high degree, high closeness, and high transitivity are candidate keystone species - Add in closeness into the node’s plotly label since these don’t vary much node-to-node and wouldn’t make sense to plot
# replace NA in taxonomy with unidentified
# remove nodes with 0 betweenness (can't calculate log10 of 0)
# replace NaN clustering coefs with 0
fulldateset_node_measures <- fulldateset_node_measures %>%
replace(is.na(.), "unidentified") %>%
filter(!betweenness == 0)
# get enough colors and randomly rearrange so they are easier to separate on the plot
mycolors <- colorRampPalette(brewer.pal(12, "Paired"))(length(unique(fulldateset_node_measures$`taxonomy-3`)))
set.seed(123)
mycolors <- sample(mycolors)
# plot with plotly and so I can hover over points and determine which taxa they are
p <- ggplot(fulldateset_node_measures, aes(x = degree, y = betweenness, ID = ID, shape = `taxonomy-1`, `taxonomy-2` = `taxonomy-2`, color = `taxonomy-3`, `taxonomy-4` = `taxonomy-4`, `taxonomy-5` = `taxonomy-5`)) +
geom_point(size = 4) +
scale_y_continuous(trans='log10') +
scale_color_manual(values = mycolors) +
theme(legend.title = element_blank()) +
theme_bw()
p
ggplotly(p, tooltip = c("ID","taxonomy-2", "taxonomy-3", "taxonomy-4", "taxonomy-5"))
NA
Make static figure for manuscript
p2 <- ggplot(fulldateset_node_measures, aes(x = degree, y = betweenness, shape = `taxonomy-1`, color = `taxonomy-3`)) +
geom_point(size = 4) +
scale_y_continuous(trans='log10') +
scale_color_manual(values = mycolors, name = "") +
scale_shape_manual(values = c(19,17,15,18), name = "") +
theme(legend.title=element_blank(),
axis.text = element_text(size=8),
axis.text.x = element_text(size=8, angle = 45, hjust = 1),
axis.title = element_text(size=8),
legend.text = element_text(size=8),
strip.text = element_text(size = 8),
legend.margin=margin(0,0,0,2),
legend.box.margin=margin(-10,-10,-10,-10),
plot.margin=grid::unit(c(0,0,0,0), "mm")) +
theme_bw()
p2
ggsave("figures/betweenness_vs_degree.eps",p2, width = 10, height = 6, units = c("in"))
de_df <- as.data.frame(de)
de_df$name <- rownames(de_df)
de_df <- left_join(de_df, all_temp, by = "name")
de_df %>%
group_by(`taxonomy-1`) %>%
summarise(degreesum = sum(de)) %>%
arrange(desc(degreesum))
de_df %>%
group_by(`taxonomy-2`) %>%
summarise(degreesum = sum(de)) %>%
arrange(desc(degreesum))
de_df %>%
group_by(`taxonomy-3`) %>%
summarise(degreesum = sum(de)) %>%
arrange(desc(degreesum))
de_df %>%
group_by(`taxonomy-4`) %>%
summarise(degreesum = sum(de)) %>%
arrange(desc(degreesum))
de_df %>%
group_by(`taxonomy-5`) %>%
summarise(degreesum = sum(de)) %>%
arrange(desc(degreesum))
NA
save.image("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_nodelevelmeasures.RData")
Or load if coming back
load("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_nodelevelmeasures.RData")