Load packages
library(mosaic)
library(dplyr)
library(ggplot2)
library(ggvis)
library(parallel)


Ambiguous prose

#### Load the data from the web
ambiguous <- read.csv("http://www.bradthiessen.com/html5/data/ambiguousprose.csv")
head(ambiguous)
##   Condition Comprehension
## 1     After             6
## 2     After             5
## 3     After             4
## 4     After             2
## 5     After             1
## 6     After             3
table(ambiguous$Condition, ambiguous$Comprehension)
##         
##          1 2 3 4 5 6 7
##   After  2 4 6 3 3 1 0
##   Before 0 1 2 3 5 7 1
##   None   1 4 5 6 2 1 0
ambiguous %>%
  group_by(Condition) %>%
  summarize(comprehension = mean(Comprehension), sd = sd(Comprehension), median=median(Comprehension), n = n())
## Source: local data frame [3 x 5]
## 
##   Condition comprehension       sd median  n
## 1     After      3.210526 1.397575      3 19
## 2    Before      4.947368 1.311220      5 19
## 3      None      3.368421 1.256562      3 19
ambiguous %>%
  summarize(comprehension = mean(Comprehension), sd = sd(Comprehension), median=median(Comprehension), n = n())
##   comprehension       sd median  n
## 1      3.842105 1.521154      4 57
dotPlot( ~ Comprehension | Condition, data = ambiguous, nint = 17,
          endpoints = c(-2, 10), layout = c(1,3), aspect = .35, cex=.7, 
          xlab = "Comprehension score (0-7)")

## Take some random shuffles and calculate means
mean(Comprehension ~ shuffle(Condition), data=ambiguous)
##    After   Before     None 
## 4.105263 3.421053 4.000000
##### MAD approach
xyplot(jitter(Comprehension) ~ Condition, data = ambiguous, alpha = 0.6, cex = 1.2, par.settings=simpleTheme(col="black"))

# Create our test statistic (mean absolute difference in means)
test.stat <- MAD(mean(Comprehension ~ Condition, data = ambiguous))
# Randomly shuffle the "weight" groups 100,000 times and calculate our test statistic
rtest.stats = do(100000) * MAD(mean(Comprehension ~ shuffle(Condition), data=ambiguous))

histogram(~result, data = rtest.stats, col="grey", xlim=c(-.2, 1.75), 
          xlab = "MAD values (assuming null hypothesis is true)", width=.07,
          main=paste("mean =", mean(rtest.stats$result, na.rm=T), "; std. dev =", sd(rtest.stats$result, na.rm=T)))

ladd(panel.abline(v=test.stat))

tally(~ (result >= test.stat), format="prop", data=rtest.stats)
## 
##    TRUE   FALSE 
## 0.00086 0.99914
#### Load the data from the web
recall <- read.csv("http://www.bradthiessen.com/html5/data/recall.csv")
head(recall)
##   Condition Recall
## 1     After      7
## 2     After      5
## 3     After      5
## 4     After      5
## 5     After      2
## 6     After      8
mean(recall$Recall ~ recall$Condition)
##    After   Before     None 
## 5.368421 8.263158 6.631579
sd(recall$Recall ~ recall$Condition)
##    After   Before     None 
## 1.460994 1.820931 2.005839
mean(recall$Recall)
## [1] 6.754386
sd(recall$Recall)
## [1] 2.115257
table(recall$Recall, recall$Condition)
##     
##      After Before None
##   2      1      0    0
##   3      1      0    2
##   4      0      1    0
##   5     11      1    4
##   6      2      1    3
##   7      2      2    3
##   8      2      4    3
##   9      0      5    3
##   10     0      4    1
##   11     0      1    0
dotPlot( ~ Recall | Condition, data = recall, nint = 1000, cex=.3, 
          endpoints = c(-1, 12), layout = c(1,3), aspect = .35, 
          xlab = "Comprehension score (0-11)")

#### Load the data
ambiguous <- read.csv("http://www.bradthiessen.com/html5/data/ambiguousprose.csv")

#### Run an ANOVA modeling comprehension as a function of condition (treatments)
mod1 <- aov(Comprehension ~ Condition, data=ambiguous)

#### Summarize the model in ANOVA format
anova(mod1)
## Analysis of Variance Table
## 
## Response: Comprehension
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## Condition  2 35.053 17.5263  10.012 0.0002002 ***
## Residuals 54 94.526  1.7505                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#### Load the diet data
diet <- read.csv("http://www.bradthiessen.com/html5/data/diet.csv")

diet %>%
  group_by(Diet) %>%
  summarize(n = n(), mean=mean(WeightLoss), sd=sd(WeightLoss))
## Source: local data frame [4 x 4]
## 
##       Diet  n     mean       sd
## 1   Atkins 21 3.919048 6.045215
## 2   Ornish 20 6.560000 9.291218
## 3 WtWtchrs 26 4.592308 5.392656
## 4     Zone 26 4.884615 6.915472
mean(diet$WeightLoss)
## [1] 4.945161
sd(diet$WeightLoss)
## [1] 6.893058
##### MAD approach
# Create our test statistic (mean absolute difference in means)
## (4/6) to make the average over the 6 comparisons; not the 4 groups
test.stat <- MAD(mean(WeightLoss ~ Diet, data = diet)) * (4/6)

# Randomly shuffle the "diet" groups 100,000 times and calculate our test statistic
rtest.stats = do(100000) * MAD(mean(WeightLoss ~ shuffle(Diet), data=diet)) * (4/6)

histogram(~result, data = rtest.stats, col="grey", xlim=c(-.2, 7), 
          xlab = "MAD values (assuming null hypothesis is true)", width=.07,
          main=paste("mean =", mean(rtest.stats$result, na.rm=T), "; std. dev =", sd(rtest.stats$result, na.rm=T)))

ladd(panel.abline(v=test.stat))

tally(~ (result >= test.stat), format="prop", data=rtest.stats)
## 
##    TRUE   FALSE 
## 0.61972 0.38028
#### Run an ANOVA 
mod1 <- aov(WeightLoss ~ Diet, data=diet)

#### Summarize the model in ANOVA format
anova(mod1)
## Analysis of Variance Table
## 
## Response: WeightLoss
##           Df Sum Sq Mean Sq F value Pr(>F)
## Diet       3   77.6  25.866  0.5361 0.6587
## Residuals 89 4293.7  48.244
# Permutation approach
mod2 <- lm(WeightLoss ~ Diet, data=diet)
anova(mod2)
## Analysis of Variance Table
## 
## Response: WeightLoss
##           Df Sum Sq Mean Sq F value Pr(>F)
## Diet       3   77.6  25.866  0.5361 0.6587
## Residuals 89 4293.7  48.244
teststat.obs = summary(mod2)$fstatistic[1]
rtest.stats = do(10000) * summary(
  lm(WeightLoss ~ shuffle(Diet), data=diet))$fstatistic[1]
histogram(~ value, xlim=c(-.2,7), groups=value >= test.stat, data=rtest.stats, width=.1)

ladd(panel.abline(v=test.stat))

tally(~ (value >= teststat.obs), format="prop", data=rtest.stats)
## 
##   TRUE  FALSE 
## 0.6563 0.3437