Load packages
library(mosaic)
library(dplyr)
library(ggplot2)
library(ggvis)
library(parallel)


AxS ANOVA: Optimism Example

## Load data
optimism <- read.csv("http://www.bradthiessen.com/html5/data/optimism.csv")

## Look at variable types
str(optimism)
## 'data.frame':    54 obs. of  3 variables:
##  $ subject : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ age     : int  16 16 16 16 16 16 16 16 16 16 ...
##  $ optimism: int  35 32 33 32 31 29 29 27 27 28 ...
## Change subject and age to factor variables
optimism$age <-as.factor(optimism$age)
optimism$subject <-as.factor(optimism$subject)

## Peek at data
head(optimism)
##   subject age optimism
## 1       1  16       35
## 2       2  16       32
## 3       3  16       33
## 4       4  16       32
## 5       5  16       31
## 6       6  16       29
## Summary of data
favstats(optimism ~ age, data=optimism)
##   .group min    Q1 median    Q3 max     mean       sd  n missing
## 1     16  13 24.00     27 30.50  35 25.88889 6.578833 18       0
## 2     18  13 25.25     29 31.75  39 26.94444 7.487026 18       0
## 3     20  12 19.00     24 27.75  32 23.38889 5.922429 18       0
## Some summary plots
densityplot(~optimism|age, data=optimism, lwd=2,
    main="Optimism by age",
   layout=c(1,3))

## Let's also run a test to check for homogeneity of variances
bartlett.test(optimism~age, data=optimism)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  optimism by age
## Bartlett's K-squared = 0.9167, df = 2, p-value = 0.6323
## Regular one-way ANOVA
aov.out <- lm(optimism ~ age, data=optimism)
anova(aov.out)
## Analysis of Variance Table
## 
## Response: optimism
##           Df  Sum Sq Mean Sq F value Pr(>F)
## age        2  120.04  60.019  1.3396  0.271
## Residuals 51 2285.00  44.804
## No significant differences among age means
## Check assumptions
plot(aov.out)

##############################################

# Subject means
mean(optimism ~ subject, data=optimism)
##        1        2        3        4        5        6        7        8 
## 35.33333 32.66667 31.00000 31.00000 30.00000 29.33333 29.00000 27.66667 
##        9       10       11       12       13       14       15       16 
## 27.33333 26.33333 25.66667 25.33333 24.00000 22.66667 17.00000 16.33333 
##       17       18 
## 13.66667 13.00000
sd(optimism$optimism)
## [1] 6.736324
## AxS ANOVA
# Take a look at profile plots (optimism by subject at each age)
interaction.plot(x.factor=optimism$age, trace.factor=optimism$subject,
                 response=optimism$optimism,
                 fun=mean, type="b", legend=F)

# Same plot using ggplot2
ggplot(data = optimism, aes(x = age, y = optimism, group = subject)) +
  ggtitle("Optimism by age") +
  ylab("optimism") + geom_line()

## Run the AxS ANOVA
## Specifying the error as subjects nested within age categories
axs <- aov(optimism ~ age + Error(subject/age), data=optimism)
summary(axs)
## 
## Error: subject
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 17   2182   128.3               
## 
## Error: subject:age
##           Df Sum Sq Mean Sq F value   Pr(>F)    
## age        2  120.0   60.02   19.75 2.03e-06 ***
## Residuals 34  103.3    3.04                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


AxS ANOVA: Grocery Example

## Load data
groceries <- read.csv("http://www.bradthiessen.com/html5/data/groceries.csv")

## Look at variable types
str(groceries)
## 'data.frame':    40 obs. of  3 variables:
##  $ price: num  1.17 1.77 1.49 0.65 1.58 3.13 2.09 0.62 5.89 4.46 ...
##  $ store: Factor w/ 4 levels "A","B","C","D": 1 1 1 1 1 1 1 1 1 1 ...
##  $ item : Factor w/ 10 levels "aspirin","bread",..: 7 9 8 4 2 3 5 10 6 1 ...
## We want store and item to be factor variables

## Peek at data
head(groceries)
##   price store     item
## 1  1.17     A  lettuce
## 2  1.77     A potatoes
## 3  1.49     A     milk
## 4  0.65     A     eggs
## 5  1.58     A    bread
## 6  3.13     A   cereal
## Summary of data
favstats(price ~ store, data=groceries)
##   .group  min     Q1 median     Q3  max  mean       sd  n missing
## 1      A 0.62 1.2500  1.675 2.8700 5.89 2.285 1.717933 10       0
## 2      B 0.65 1.6925  1.830 2.8575 5.99 2.465 1.707430 10       0
## 3      C 0.65 1.4150  1.940 2.7650 5.99 2.436 1.765296 10       0
## 4      D 0.69 1.3650  1.940 2.9400 6.99 2.626 1.987758 10       0
mean(price ~ item, data=groceries)
##           aspirin             bread            cereal              eggs 
##            4.8600            1.7650            3.0900            0.8550 
##       ground.beef laundry.detergent           lettuce              milk 
##            2.1375            6.2150            1.3825            1.6400 
##          potatoes       tomato.soup 
##            1.9325            0.6525
sd(price ~ item, data=groceries)
##           aspirin             bread            cereal              eggs 
##        0.29518356        0.15242484        0.07118052        0.21809784 
##       ground.beef laundry.detergent           lettuce              milk 
##        0.25500000        0.51881275        0.27097048        0.12909944 
##          potatoes       tomato.soup 
##        0.10843585        0.02872281
## Some summary plots
densityplot(~price|store, data=groceries, lwd=2,
    main="Price by store",
   layout=c(1,4))

## Let's also run a test to check for homogeneity of variances
bartlett.test(price~store, data=groceries)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  price by store
## Bartlett's K-squared = 0.2701, df = 3, p-value = 0.9655
## Regular one-way ANOVA
aov.out <- lm(price ~ store, data=groceries)
anova(aov.out)
## Analysis of Variance Table
## 
## Response: price
##           Df  Sum Sq Mean Sq F value Pr(>F)
## store      3   0.586  0.1953  0.0604 0.9803
## Residuals 36 116.407  3.2335
## No significant differences among age means
## Check assumptions
plot(aov.out)

##############################################

## AxS ANOVA
# Take a look at profile plots (optimism by subject at each age)
interaction.plot(x.factor=groceries$store, trace.factor=groceries$item,
                 response=groceries$price,
                 fun=mean, type="l", legend=F)

# Same plot using ggplot2
ggplot(data = groceries, aes(x = store, y = price, group = item)) +
  ggtitle("Price by store") +
  ylab("Price") + geom_line()

## Run the AxS ANOVA
## Specifying the error as subjects nested within age categories
axs <- aov(price ~ store + Error(item/store), data=groceries)
summary(axs)
## 
## Error: item
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals  9  115.2    12.8               
## 
## Error: item:store
##           Df Sum Sq Mean Sq F value Pr(>F)  
## store      3 0.5859 0.19529   4.344 0.0127 *
## Residuals 27 1.2137 0.04495                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1