Load packages
library(mosaic)
library(dplyr)
library(ggplot2)
library(ggvis)
library(parallel)


AxB ANOVA

I’m going to use a dataset that comes with a standard R installation. The dataset is from a study on the effect of vitamin C on tooth growth in guinea pigs

len = tooth length

supp = the way the supplement was administered (OJ = orange juice) (VC = absorbic acid)

dose = amount of supplement in mg (levels are 0.5, 1.0, 2.0)

## Load data
data(ToothGrowth)

## The data has 60 observations on 3 variables:
head(ToothGrowth)
##    len supp dose
## 1  4.2   VC  0.5
## 2 11.5   VC  0.5
## 3  7.3   VC  0.5
## 4  5.8   VC  0.5
## 5  6.4   VC  0.5
## 6 10.0   VC  0.5
## This shows that the "dose" variable is numeric
## We want to change it to a factor variable
  ## where 0.5 = low, 1.0 = med, 2.0 = high
str(ToothGrowth)
## 'data.frame':    60 obs. of  3 variables:
##  $ len : num  4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
##  $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
##  $ dose: num  0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...
ToothGrowth$dose = factor(ToothGrowth$dose, levels=c(0.5,1.0,2.0),
                          labels=c("low","med","high"))

## Let's look at the sample data per cell
ToothGrowth %>%
  group_by(supp, dose) %>%
  summarize(n=n(), mean=mean(len), sd=sd(len))
## Source: local data frame [6 x 5]
## Groups: supp
## 
##   supp dose  n  mean       sd
## 1   OJ  low 10 13.23 4.459709
## 2   OJ  med 10 22.70 3.910953
## 3   OJ high 10 26.06 2.655058
## 4   VC  low 10  7.98 2.746634
## 5   VC  med 10 16.77 2.515309
## 6   VC high 10 26.14 4.797731

Here are the means and (standard deviations) for each cell. Note that each cell has a sample size of n=10.

…… | Orange Juice | .AbsorbAcid. | Total

—— | ——————- | —————– | ————————-

Low | 13.23 (4.460) | 07.98 (2.747) | 10.605 (4.4998)

Med | 22.70 (3.911) | 16.77 (2.515) | 19.735 (4.4154)

High | 26.06 (2.655) | 26.14 (4.798) | 26.100 (3.7742)

—— | ——————- | —————– | ————————-

Total | 20.66 (6.606) | 16.96 (8.266) | N=60, 18.813 (7.6493)

There’s an easier way to get these summary statistics, but it requires running the ANOVA first.

### Let's take a look at some visualizations of this data

## Density plots to investigate normality
densityplot(~len|dose*supp, data=ToothGrowth,
    main="Tooth Growth by Dose and Supplement",
   xlab="Tooth Growth", 
   layout=c(3,2))

## Let's also run a test to check for homogeneity of variances
## Bartlett's test (run separately for dose and supplement groups)
bartlett.test(len~dose, data=ToothGrowth)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  len by dose
## Bartlett's K-squared = 0.6655, df = 2, p-value = 0.717
bartlett.test(len~supp, data=ToothGrowth)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  len by supp
## Bartlett's K-squared = 1.4217, df = 1, p-value = 0.2331
## What do those relatively high p-values tell us?


## Boxplots to investigate group differences
bwplot(supp~len|dose, data=ToothGrowth,
    ylab="supplement", xlab="Tooth Growth", 
   main="Tooth Growth by Dose and Supplement", 
   layout=(c(1,3)))

## Let's take a look at a means plot to check for interaction
with(ToothGrowth, interaction.plot(x.factor=dose, trace.factor=supp,
    response=len, fun=mean, type="b", legend=T,
    ylab="Tooth Length", main="Interaction Plot",
    pch=c(1,19)))

** We’re now ready to run the AxB ANOVA**

## AxB ANOVA
# Note that it models the LEN as a function of
#   supp * dose.  This means our model is:
#  len ~ f(supp + dose + suppXdose)
aov.out = aov(len ~ supp * dose, data=ToothGrowth)

## Let's get a summary of the model
summary(aov.out)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## supp         1  205.4   205.4  15.572 0.000231 ***
## dose         2 2426.4  1213.2  92.000  < 2e-16 ***
## supp:dose    2  108.3    54.2   4.107 0.021860 *  
## Residuals   54  712.1    13.2                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Assuming we're using an alpha of 0.05, we have a significant
## interaction.  We can now split the data by dose and compare
## supplements using a Tukey test
TukeyHSD(aov.out, which=c("supp"), conf.level=.99)
##   Tukey multiple comparisons of means
##     99% family-wise confidence level
## 
## Fit: aov(formula = len ~ supp * dose, data = ToothGrowth)
## 
## $supp
##       diff       lwr       upr     p adj
## VC-OJ -3.7 -6.203448 -1.196552 0.0002312
## The p-value indicates the difference is statistically significant
## The confidence interval for the difference ranges from -1.2 to -6.2.
## We can plot this confidence interval
plot(TukeyHSD(aov.out, which=c("supp")))

## We could have also split our data by supplement and compare doses
TukeyHSD(aov.out, which=c("dose"), conf.level=.99)
##   Tukey multiple comparisons of means
##     99% family-wise confidence level
## 
## Fit: aov(formula = len ~ supp * dose, data = ToothGrowth)
## 
## $dose
##            diff       lwr       upr   p adj
## med-low   9.130  5.637681 12.622319 0.0e+00
## high-low 15.495 12.002681 18.987319 0.0e+00
## high-med  6.365  2.872681  9.857319 2.7e-06
plot(TukeyHSD(aov.out, which=c("dose")))

## Finally, we could have also run Tukey tests to compare all cell means
TukeyHSD(aov.out, conf.level=.99)
##   Tukey multiple comparisons of means
##     99% family-wise confidence level
## 
## Fit: aov(formula = len ~ supp * dose, data = ToothGrowth)
## 
## $supp
##       diff       lwr       upr     p adj
## VC-OJ -3.7 -6.203448 -1.196552 0.0002312
## 
## $dose
##            diff       lwr       upr   p adj
## med-low   9.130  5.637681 12.622319 0.0e+00
## high-low 15.495 12.002681 18.987319 0.0e+00
## high-med  6.365  2.872681  9.857319 2.7e-06
## 
## $`supp:dose`
##                  diff        lwr        upr     p adj
## VC:low-OJ:low   -5.25 -11.012826  0.5128257 0.0242521
## OJ:med-OJ:low    9.47   3.707174 15.2328257 0.0000046
## VC:med-OJ:low    3.54  -2.222826  9.3028257 0.2640208
## OJ:high-OJ:low  12.83   7.067174 18.5928257 0.0000000
## VC:high-OJ:low  12.91   7.147174 18.6728257 0.0000000
## OJ:med-VC:low   14.72   8.957174 20.4828257 0.0000000
## VC:med-VC:low    8.79   3.027174 14.5528257 0.0000210
## OJ:high-VC:low  18.08  12.317174 23.8428257 0.0000000
## VC:high-VC:low  18.16  12.397174 23.9228257 0.0000000
## VC:med-OJ:med   -5.93 -11.692826 -0.1671743 0.0073930
## OJ:high-OJ:med   3.36  -2.402826  9.1228257 0.3187361
## VC:high-OJ:med   3.44  -2.322826  9.2028257 0.2936430
## OJ:high-VC:med   9.29   3.527174 15.0528257 0.0000069
## VC:high-VC:med   9.37   3.607174 15.1328257 0.0000058
## VC:high-OJ:high  0.08  -5.682826  5.8428257 1.0000000
plot(TukeyHSD(aov.out))