Load packages
library(mosaic)
library(dplyr)
library(ggplot2)
library(ggvis)
library(parallel)


ANOVA assumptions

First, let’s take a look at the “comprehending ambiguous prose” study:

#### Load the data
ambiguous <- read.csv("http://www.bradthiessen.com/html5/data/ambiguousprose.csv")

#### Run an ANOVA modeling comprehension as a function of condition (treatments)
mod1 <- aov(Comprehension ~ Condition, data=ambiguous)

#### Summarize the model in ANOVA format
anova(mod1)
## Analysis of Variance Table
## 
## Response: Comprehension
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## Condition  2 35.053 17.5263  10.012 0.0002002 ***
## Residuals 54 94.526  1.7505                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### Box plots
bwplot(~Comprehension|Condition, data=ambiguous, lwd=3,
    ylab="Condition", xlab="Comprehension", 
   main="Ambiguous Prose", 
   layout=(c(1,3)))

#### To check for normality, we could look at plots of the data
densityplot(~Comprehension|Condition, data=ambiguous, lwd=3,
    main="Comprehension by Condition",
   xlab="Comprehension score",
   layout=c(1,3))

#### or a Q-Q plot for each condition
after <- ambiguous %>%
  filter(Condition=="After")
qqnorm(after$Comprehension, ylab = "Comprehension"); qqline(ambiguous$Comprehension, ylab = "Comprehension", lwd=3)

before <- ambiguous %>%
  filter(Condition=="Before")
qqnorm(before$Comprehension, ylab = "Comprehension"); qqline(ambiguous$Comprehension, ylab = "Comprehension", lwd=3)

none <- ambiguous %>%
  filter(Condition=="None")
qqnorm(none$Comprehension, ylab = "Comprehension"); qqline(ambiguous$Comprehension, ylab = "Comprehension", lwd=3)

## Normality test
shapiro.test(ambiguous$Comprehension)
## 
##  Shapiro-Wilk normality test
## 
## data:  ambiguous$Comprehension
## W = 0.9433, p-value = 0.009909
## Bartlett's test (test for equal variances)
bartlett.test(Comprehension~Condition, data=ambiguous)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Comprehension by Condition
## Bartlett's K-squared = 0.2025, df = 2, p-value = 0.9037
## Post-hoc tests
## Tukey's HSD
TukeyHSD(mod1, conf.level=.95)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Comprehension ~ Condition, data = ambiguous)
## 
## $Condition
##                    diff        lwr        upr     p adj
## Before-After  1.7368421  0.7023389  2.7713453 0.0004836
## None-After    0.1578947 -0.8766084  1.1923979 0.9282350
## None-Before  -1.5789474 -2.6134505 -0.5444442 0.0015494
## We can plot this confidence interval
plot(TukeyHSD(mod1))

## We could also use t-tests with the Bonferroni adjustment
## to compare pairs of means
with(ambiguous, pairwise.t.test(Comprehension, Condition, p.adjust.method="bonferroni"))
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  Comprehension and Condition 
## 
##        After  Before
## Before 0.0005 -     
## None   1.0000 0.0016
## 
## P value adjustment method: bonferroni
#### Load the data
diet <- read.csv("http://www.bradthiessen.com/html5/data/diet.csv")

#### Run an ANOVA modeling comprehension as a function of condition (treatments)
mod2 <- aov(WeightLoss ~ Diet, data=diet)

#### Summarize the model in ANOVA format
anova(mod2)
## Analysis of Variance Table
## 
## Response: WeightLoss
##           Df Sum Sq Mean Sq F value Pr(>F)
## Diet       3   77.6  25.866  0.5361 0.6587
## Residuals 89 4293.7  48.244
### Box plots
bwplot(~WeightLoss|Diet, data=diet, lwd=3,
    ylab="Weight Loss", xlab="Diet", 
   main="Diet and Weightloss", 
   layout=(c(1,4)))