Load packages
library(mosaic)
library(dplyr)
library(ggplot2)
library(ggvis)
library(parallel)


ANOVA assumptions

First, let’s take a look at the “comprehending ambiguous prose” study:

#### Load the data
ambiguous <- read.csv("http://www.bradthiessen.com/html5/data/ambiguousprose.csv")

#### Run an ANOVA modeling comprehension as a function of condition (treatments)
mod1 <- aov(Comprehension ~ Condition, data=ambiguous)

#### Summarize the model in ANOVA format
anova(mod1)
## Analysis of Variance Table
## 
## Response: Comprehension
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## Condition  2 35.053 17.5263  10.012 0.0002002 ***
## Residuals 54 94.526  1.7505                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### Box plots
bwplot(~Comprehension|Condition, data=ambiguous, lwd=3,
    ylab="Condition", xlab="Comprehension", 
   main="Ambiguous Prose", 
   layout=(c(1,3)))

#### To check for normality, we could look at plots of the data
densityplot(~Comprehension|Condition, data=ambiguous, lwd=3,
    main="Comprehension by Condition",
   xlab="Comprehension score",
   layout=c(1,3))

#### or a Q-Q plot for each condition
after <- ambiguous %>%
  filter(Condition=="After")
qqnorm(after$Comprehension, ylab = "Comprehension"); qqline(ambiguous$Comprehension, ylab = "Comprehension", lwd=3)

before <- ambiguous %>%
  filter(Condition=="Before")
qqnorm(before$Comprehension, ylab = "Comprehension"); qqline(ambiguous$Comprehension, ylab = "Comprehension", lwd=3)

none <- ambiguous %>%
  filter(Condition=="None")
qqnorm(none$Comprehension, ylab = "Comprehension"); qqline(ambiguous$Comprehension, ylab = "Comprehension", lwd=3)

## Normality test
shapiro.test(ambiguous$Comprehension)
## 
##  Shapiro-Wilk normality test
## 
## data:  ambiguous$Comprehension
## W = 0.9433, p-value = 0.009909
## Bartlett's test (test for equal variances)
bartlett.test(Comprehension~Condition, data=ambiguous)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Comprehension by Condition
## Bartlett's K-squared = 0.2025, df = 2, p-value = 0.9037
## Post-hoc tests
## Tukey's HSD
TukeyHSD(mod1, conf.level=.95)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Comprehension ~ Condition, data = ambiguous)
## 
## $Condition
##                    diff        lwr        upr     p adj
## Before-After  1.7368421  0.7023389  2.7713453 0.0004836
## None-After    0.1578947 -0.8766084  1.1923979 0.9282350
## None-Before  -1.5789474 -2.6134505 -0.5444442 0.0015494
## We can plot this confidence interval
plot(TukeyHSD(mod1))

## We could also use t-tests with the Bonferroni adjustment
## to compare pairs of means
with(ambiguous, pairwise.t.test(Comprehension, Condition, p.adjust.method="bonferroni"))
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  Comprehension and Condition 
## 
##        After  Before
## Before 0.0005 -     
## None   1.0000 0.0016
## 
## P value adjustment method: bonferroni
#### Load the data
diet <- read.csv("http://www.bradthiessen.com/html5/data/diet.csv")

#### Run an ANOVA modeling comprehension as a function of condition (treatments)
mod2 <- aov(WeightLoss ~ Diet, data=diet)

#### Summarize the model in ANOVA format
anova(mod2)
## Analysis of Variance Table
## 
## Response: WeightLoss
##           Df Sum Sq Mean Sq F value Pr(>F)
## Diet       3   77.6  25.866  0.5361 0.6587
## Residuals 89 4293.7  48.244
### Box plots
bwplot(~WeightLoss|Diet, data=diet, lwd=3,
    ylab="Weight Loss", xlab="Diet", 
   main="Diet and Weightloss", 
   layout=(c(1,4)))

#### To check for normality, we could look at plots of the data
densityplot(~WeightLoss|Diet, data=diet, lwd=3,
    main="Weight loss by diet",
   xlab="Pounds lost",
   layout=c(1,4))

#### or a Q-Q plot for each condition
atkins <- diet %>%
  filter(Diet=="Atkins")
qqnorm(atkins$WeightLoss, ylab = "Atkins"); qqline(atkins$WeightLoss, ylab = "Atkins", lwd=3)

ornish <- diet %>%
  filter(Diet=="Ornish")
qqnorm(ornish$WeightLoss, ylab = "Ornish"); qqline(ornish$WeightLoss, ylab = "Ornish", lwd=3)

wtwtchrs <- diet %>%
  filter(Diet=="WtWtchrs")
qqnorm(wtwtchrs$WeightLoss, ylab = "WtWtchrs"); qqline(wtwtchrs$WeightLoss, ylab = "WtWtchrs", lwd=3)

zone <- diet %>%
  filter(Diet=="Zone")
qqnorm(zone$WeightLoss, ylab = "Zone"); qqline(zone$WeightLoss, ylab = "Zone", lwd=3)

## Normality test
shapiro.test(diet$WeightLoss)
## 
##  Shapiro-Wilk normality test
## 
## data:  diet$WeightLoss
## W = 0.9558, p-value = 0.003198
## Bartlett's test (test for equal variances)
bartlett.test(WeightLoss ~ Diet, data=diet)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  WeightLoss by Diet
## Bartlett's K-squared = 7.235, df = 3, p-value = 0.06477
## This command creates a series of plots...
plot(mod2)

## Post-hoc tests
## Tukey's HSD
TukeyHSD(mod2, conf.level=.95)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = WeightLoss ~ Diet, data = diet)
## 
## $Diet
##                       diff       lwr      upr     p adj
## Ornish-Atkins    2.6409524 -3.041011 8.322916 0.6178040
## WtWtchrs-Atkins  0.6732601 -4.662346 6.008866 0.9874777
## Zone-Atkins      0.9655678 -4.370039 6.301174 0.9646520
## WtWtchrs-Ornish -1.9676923 -7.376586 3.441201 0.7765563
## Zone-Ornish     -1.6753846 -7.084278 3.733509 0.8491203
## Zone-WtWtchrs    0.2923077 -4.751511 5.336127 0.9987459
## We can plot this confidence interval
plot(TukeyHSD(mod2))

## We could also use t-tests with the Bonferroni adjustment
## to compare pairs of means
with(diet, pairwise.t.test(WeightLoss, Diet, p.adjust.method="bonferroni"))
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  WeightLoss and Diet 
## 
##          Atkins Ornish WtWtchrs
## Ornish   1      -      -       
## WtWtchrs 1      1      -       
## Zone     1      1      1       
## 
## P value adjustment method: bonferroni