library(phyloseq)
library(vegan)
library(ggplot2)
R version 3.4.3
Phyloseq version 1.22.3
Linux machine image 4.4.0-64-generic
## Sample data
soiltab <- read.table('data/soil.csv', sep = ';', row.names = 1, header = TRUE)
# Taxonomy table
load("data/taxotab.01.05.rdata")
# ASV table
load("data/seqtab.nochim.rdata")
ps <- phyloseq(otu_table(seqtab.nochim, taxa_are_rows=FALSE),
sample_data(soiltab),
tax_table(as.matrix(taxo$tax)))
ps
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 546 taxa and 74 samples ]
## sample_data() Sample Data: [ 74 samples by 8 sample variables ]
## tax_table() Taxonomy Table: [ 546 taxa by 7 taxonomic ranks ]
sample_data(ps)$horizon = factor(sample_data(ps)$horizon, levels = c('L', 'F', 'H', 'Ae', 'B'))
sample_data(ps)$forest = factor(sample_data(ps)$forest, levels = c('AS', 'Mixed', 'FG'))
#load(file = "data/ps.rdata")
#ps
ps.fungi = subset_taxa(ps, Kingdom=="k__Fungi")
ntaxa(ps)
## [1] 546
ntaxa(ps.fungi)
## [1] 524
Filtered 22 non-fungal taxa
ps.fungi.nosd = filter_taxa(ps.fungi, function(x) sum(x) > 2, TRUE)
ntaxa(ps.fungi.nosd)
## [1] 523
ps.fungi.nosd.log <- transform_sample_counts(ps.fungi.nosd, function(x) log(x+1))
ps.fungi.nosd.log.sp = tax_glom(ps.fungi.nosd.log, "Species", NArm = FALSE) # NArm = FALSE in order to keep ESV not taxonomically assigned at the species level
ps.fungi.nosd.hel = transform_sample_counts(ps.fungi.nosd, function(x) sqrt(x/sum(x))) # Hellinger transformation
ntaxa(ps.fungi.nosd.log)
## [1] 523
nsamples(ps.fungi.nosd.log)
## [1] 74
rank_names(ps.fungi.nosd.log)
## [1] "Kingdom" "Phylum" "Class" "Order" "Family" "Genus" "Species"
sample_variables(ps.fungi.nosd.log)
## [1] "sampleID" "plot" "block" "forest" "horizon" "Carbon"
## [7] "Nitrogen" "pH"
otu_table(ps.fungi.nosd.log)[1:2, 1:2]
## OTU Table: [2 taxa and 2 samples]
## taxa are columns
## ATGCGATAAGTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACCTTGCGCCCTTTGGTATTCCGAAGGGCATGCCTGTTTGAGTGTCATTAAATTATCAACCTTGCTCGCTTTTATTAGCTTGAGTTAGGCTTGGATGTGAGGGTTTTGCTGGCTTCCTTCAGTGGATGGTCTGCTCCCTTTGAATGCATTAGCGGGATCTCTTGTGGACCGTCACTTGGTGTGATAATTATCTATGCCTTGAGACTTTGAAACAAACTTATGAGAATCTGCTTATAACCGTCCTCACGGACAACTTTTGACAATTTGACCTCAAATCAGGTAGGACTACCCGCTGAACTTAA
## S10 0.000000
## S11 1.098612
## ATGCGATACGTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCACTCCTTGGTATTCCGAGGAGTATGCCTGTTTCAGTATCATGAGCACTCTCACACCTAACCTTTGGGTTTATGGCGTGGAATTGGAATGCGCCGACTGTCATGGTTGGCCCTTCTAAAATGTAGTTCTTGGCTGTCACCTAATACAGCAGTTTGGCCTAATAGTTTTGGCATTCATTGTCAAATCTTTGGCTAACATTTGCTCCAGGAGTCAGTCTTGATAATACAGAAAACTCATTCAAATTTTGATCTGAAATCAGGTAGGGCTACCCGCTGAACTTAA
## S10 0.000000
## S11 2.639057
tax_table(ps.fungi.nosd.log)[1:2, 1:2]
## Taxonomy Table: [2 taxa by 2 taxonomic ranks]:
## Kingdom
## ATGCGATAAGTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACCTTGCGCCCTTTGGTATTCCGAAGGGCATGCCTGTTTGAGTGTCATTAAATTATCAACCTTGCTCGCTTTTATTAGCTTGAGTTAGGCTTGGATGTGAGGGTTTTGCTGGCTTCCTTCAGTGGATGGTCTGCTCCCTTTGAATGCATTAGCGGGATCTCTTGTGGACCGTCACTTGGTGTGATAATTATCTATGCCTTGAGACTTTGAAACAAACTTATGAGAATCTGCTTATAACCGTCCTCACGGACAACTTTTGACAATTTGACCTCAAATCAGGTAGGACTACCCGCTGAACTTAA "k__Fungi"
## ATGCGATACGTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCACTCCTTGGTATTCCGAGGAGTATGCCTGTTTCAGTATCATGAGCACTCTCACACCTAACCTTTGGGTTTATGGCGTGGAATTGGAATGCGCCGACTGTCATGGTTGGCCCTTCTAAAATGTAGTTCTTGGCTGTCACCTAATACAGCAGTTTGGCCTAATAGTTTTGGCATTCATTGTCAAATCTTTGGCTAACATTTGCTCCAGGAGTCAGTCTTGATAATACAGAAAACTCATTCAAATTTTGATCTGAAATCAGGTAGGGCTACCCGCTGAACTTAA "k__Fungi"
## Phylum
## ATGCGATAAGTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACCTTGCGCCCTTTGGTATTCCGAAGGGCATGCCTGTTTGAGTGTCATTAAATTATCAACCTTGCTCGCTTTTATTAGCTTGAGTTAGGCTTGGATGTGAGGGTTTTGCTGGCTTCCTTCAGTGGATGGTCTGCTCCCTTTGAATGCATTAGCGGGATCTCTTGTGGACCGTCACTTGGTGTGATAATTATCTATGCCTTGAGACTTTGAAACAAACTTATGAGAATCTGCTTATAACCGTCCTCACGGACAACTTTTGACAATTTGACCTCAAATCAGGTAGGACTACCCGCTGAACTTAA "p__Basidiomycota"
## ATGCGATACGTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCACTCCTTGGTATTCCGAGGAGTATGCCTGTTTCAGTATCATGAGCACTCTCACACCTAACCTTTGGGTTTATGGCGTGGAATTGGAATGCGCCGACTGTCATGGTTGGCCCTTCTAAAATGTAGTTCTTGGCTGTCACCTAATACAGCAGTTTGGCCTAATAGTTTTGGCATTCATTGTCAAATCTTTGGCTAACATTTGCTCCAGGAGTCAGTCTTGATAATACAGAAAACTCATTCAAATTTTGATCTGAAATCAGGTAGGGCTACCCGCTGAACTTAA "p__Mucoromycota"
CHALLENGE
Pouvez-vous trouvez le nombre de phylum de champignons?
Can you find the number of fungal phylum?
plot_bar(ps, x = "Sample", y = "Abundance", fill = "Kingdom") +
geom_bar(stat="identity", color = "black") +
labs(x = "Sample ID", y = "Reads abundance")
plot_bar(ps.fungi, x = "Sample", y = "Abundance", fill = "Kingdom") +
geom_bar(stat="identity", color = "black") +
labs(x = "Sample ID", y = "Reads abundance of fungi")
plot_bar(ps.fungi, x = "Sample", y = "Abundance", fill = "Phylum") +
geom_bar(stat="identity") +
labs(x = "Sample ID", y = "Reads abundance of fungi")
facet_wrap(~forest)
plot_bar(ps.fungi, fill = "Phylum") +
geom_bar(stat="identity") +
facet_wrap(~forest, ncol = 3, scales = "free_x") +
labs(x = "Sample ID", y = "Reads abundance of fungi")
facet_wrap(horizon~forest)
plot_bar(ps.fungi, fill = "Phylum") +
geom_bar(stat="identity") +
facet_wrap(horizon~forest, ncol = 3, scales = "free_x") +
labs(x = "Sample ID", y = "Reads abundance of fungi")
ord.nmds.bray <- ordinate(ps.fungi.nosd.log.sp, method="NMDS", k = 2, try = 20, distance = "bray") # /!\ number of tries need to be high enough to reach convergence
## Wisconsin double standardization
## Run 0 stress 0.225241
## Run 1 stress 0.2223462
## ... New best solution
## ... Procrustes: rmse 0.05440405 max resid 0.306738
## Run 2 stress 0.2203151
## ... New best solution
## ... Procrustes: rmse 0.04550081 max resid 0.2865166
## Run 3 stress 0.2446921
## Run 4 stress 0.2253402
## Run 5 stress 0.2207518
## ... Procrustes: rmse 0.04898002 max resid 0.3214373
## Run 6 stress 0.2325877
## Run 7 stress 0.2307777
## Run 8 stress 0.2425666
## Run 9 stress 0.2187777
## ... New best solution
## ... Procrustes: rmse 0.04569631 max resid 0.2879466
## Run 10 stress 0.2188551
## ... Procrustes: rmse 0.04600953 max resid 0.2881654
## Run 11 stress 0.2272405
## Run 12 stress 0.2310672
## Run 13 stress 0.2221372
## Run 14 stress 0.2296952
## Run 15 stress 0.2181904
## ... New best solution
## ... Procrustes: rmse 0.04294399 max resid 0.2883536
## Run 16 stress 0.2148475
## ... New best solution
## ... Procrustes: rmse 0.02259523 max resid 0.185004
## Run 17 stress 0.2207538
## Run 18 stress 0.2355219
## Run 19 stress 0.2181883
## Run 20 stress 0.2269349
## *** No convergence -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
plot_ordination(ps.fungi.nosd.log.sp, ord.nmds.bray, type="samples", color = "horizon")
plot_ordination(ps.fungi.nosd.log.sp, ord.nmds.bray, type="samples", color = "horizon") +
geom_point(size = 6, alpha = .5, show.legend = TRUE) +
scale_color_manual(name = "Horizon", values = c('darkgreen', 'sienna4', 'grey1', 'grey60', 'darkorange2')) +
labs(x = "Axis 1", y = "Axis 2", caption = paste("Stress =", round(ord.nmds.bray$stress, 2))) +
facet_wrap(~forest) +
stat_ellipse(type = "t", level = 0.8) + # 80% confidence interval ellispes
coord_fixed() +
theme(text = element_text(size=20), strip.text = element_text(size=20))
ord.nmds.bin <- ordinate(ps.fungi.nosd.log.sp, method="NMDS", k = 2, try = 20, distance = "bray", binary =TRUE)
## Wisconsin double standardization
## Run 0 stress 0.2034727
## Run 1 stress 0.2170128
## Run 2 stress 0.215945
## Run 3 stress 0.2169219
## Run 4 stress 0.2156148
## Run 5 stress 0.2177145
## Run 6 stress 0.2141023
## Run 7 stress 0.2170497
## Run 8 stress 0.2166483
## Run 9 stress 0.2189259
## Run 10 stress 0.2133106
## Run 11 stress 0.2127333
## Run 12 stress 0.2166672
## Run 13 stress 0.2034529
## ... New best solution
## ... Procrustes: rmse 0.01271852 max resid 0.05686887
## Run 14 stress 0.2178162
## Run 15 stress 0.2158355
## Run 16 stress 0.2183089
## Run 17 stress 0.2261992
## Run 18 stress 0.2182643
## Run 19 stress 0.2181284
## Run 20 stress 0.2202713
## *** No convergence -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
plot_ordination(ps.fungi.nosd.log.sp, ord.nmds.bin, type="samples", color = "horizon") +
geom_point(size = 6, alpha = .5, show.legend = TRUE) +
scale_color_manual(name = "Horizon", values = c('darkgreen', 'sienna4', 'grey1', 'grey60', 'darkorange2')) +
labs(x = "Axis 1", y = "Axis 2", caption = paste("Stress =", round(ord.nmds.bin$stress, 2))) +
facet_wrap(~forest) +
stat_ellipse(type = "t", level = 0.8) + # 80% confidence interval ellispes
coord_fixed() +
theme(text = element_text(size=20), strip.text = element_text(size=20))
CHALLENGE
Pouvez-vous faire une analyse en composantes principales?
Can you run a principal component analysis?
metadata <- as(sample_data(ps.fungi.nosd.log.sp), "data.frame") # Export data
adonis(phyloseq::distance(physeq = ps.fungi.nosd.log.sp, method="bray") ~ horizon * forest, strata = metadata$block,# Horizon and forest effect with interaction and block as strata
data = metadata,
permutations = 99999)
##
## Call:
## adonis(formula = phyloseq::distance(physeq = ps.fungi.nosd.log.sp, method = "bray") ~ horizon * forest, data = metadata, permutations = 99999, strata = metadata$block)
##
## Blocks: strata
## Permutation: free
## Number of permutations: 99999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## horizon 4 6.8294 1.70736 7.6311 0.29952 1e-05 ***
## forest 2 0.9236 0.46181 2.0641 0.04051 0.00216 **
## horizon:forest 8 1.8480 0.23099 1.0324 0.08105 0.38681
## Residuals 59 13.2005 0.22374 0.57893
## Total 73 22.8015 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
betadisp <- betadisper(phyloseq::distance(physeq = ps.fungi.nosd.log.sp, method="bray"), metadata$forest)
permutest(betadisp, permutations = 99999)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 99999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 2 0.00882 0.0044116 0.9854 99999 0.3799
## Residuals 71 0.31786 0.0044769
betadisp <- betadisper(phyloseq::distance(physeq = ps.fungi.nosd.log.sp, method="bray"), metadata$horizon)
permutest(betadisp, permutations = 99999)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 99999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 4 0.13844 0.03461 5.1657 99999 0.00123 **
## Residuals 69 0.46229 0.00670
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(phyloseq::distance(ps.fungi.nosd.log.sp, method="bray", binary = TRUE) ~ horizon * forest, strata = metadata$block, # Horizon and forest effect with interaction and block as strata
data = metadata,
permutations = 99999)
##
## Call:
## adonis(formula = phyloseq::distance(ps.fungi.nosd.log.sp, method = "bray", binary = TRUE) ~ horizon * forest, data = metadata, permutations = 99999, strata = metadata$block)
##
## Blocks: strata
## Permutation: free
## Number of permutations: 99999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## horizon 4 6.8302 1.70756 8.8627 0.33451 1e-05 ***
## forest 2 0.7049 0.35247 1.8294 0.03452 0.01524 *
## horizon:forest 8 1.5161 0.18951 0.9836 0.07425 0.53206
## Residuals 59 11.3674 0.19267 0.55672
## Total 73 20.4186 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
betadisp <- betadisper(phyloseq::distance(ps.fungi.nosd.log.sp, method="bray", binary = TRUE), metadata$forest)
permutest(betadisp, permutations = 99999)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 99999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 2 0.00669 0.0033460 0.5549 99999 0.5757
## Residuals 71 0.42809 0.0060294
betadisp <- betadisper(phyloseq::distance(ps.fungi.nosd.log.sp, method="bray", binary = TRUE), metadata$horizon)
permutest(betadisp, permutations = 99999)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 99999
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 4 0.17097 0.042743 6.5332 99999 0.00022 ***
## Residuals 69 0.45143 0.006542
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ord.rda <- ordinate(ps.fungi.nosd.log.sp, formula = ps.fungi.nosd.log.sp ~ Carbon+Nitrogen+pH, method="CAP", distance = "bray") # Uses Capscale function of Vegan package
RsquareAdj(ord.rda)
## $r.squared
## [1] 0.2387887
##
## $adj.r.squared
## [1] 0.2061653
anova.cca(ord.rda)
## Permutation test for capscale under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: capscale(formula = OTU ~ Carbon + Nitrogen + pH, data = data, distance = distance)
## Df SumOfSqs F Pr(>F)
## Model 3 5.4447 7.1209 0.001 ***
## Residual 70 17.8409
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
vif.cca(ord.rda) # VIF < 10 should be fine
## Carbon Nitrogen pH
## 6.975395 7.163518 1.129545
CHALLENGE
Que feriez-vous pour exclure les données d’azote de la RDA?
What if you want to exclude Nitrogen data of the RDA?
plot_ordination(ps.fungi.nosd.log.sp, vegan::scores(ord.rda, scaling=1), type="sites", color = "horizon")
evals <- ord.rda$CCA$eig/sum(ord.rda$CCA$eig)
arrowmat = vegan::scores(ord.rda, display = "bp", scaling = 1) # Add the environmental variables as arrows
arrowdf <- data.frame(arrowmat) # Add labels
rownames(arrowdf) <- c("Carbon","Nitrogen","pH") # Add rownames
arrowdf <- data.frame(labels = c("Carbon","Nitrogen","pH"), arrowmat) # Make a data.frame
plot_ordination(ps.fungi.nosd.log.sp, vegan::scores(ord.rda, scaling = 1), type="sites", color = "horizon") +
coord_fixed(sqrt(evals[2] / evals[1])) +
geom_point(size = 8, alpha = .5) +
scale_color_manual(name = "Horizon", values = c('darkgreen', 'sienna4', 'grey1', 'grey60', 'darkorange2')) +
geom_segment(aes(xend = CAP1, yend = CAP2, x = 0, y = 0), size = .7, data = arrowdf, color = "black", arrow = arrow(length = unit(0.05, "npc"))) +
geom_text(aes(x = CAP1*1.15, y = CAP2*1.15, color = NULL, label = labels), size = 6, data = arrowdf, show.legend = FALSE) +
facet_wrap(~forest) +
theme(text = element_text(size=18), strip.text = element_text(size=20)) +
labs(x = paste("Constrained axis 1 (",round(evals[1]*100, 0),"%)"), y = paste("Constrained axis 2 (",round(evals[2]*100, 0),"%)"))