# Your Turn #8
# Input data
crimes <- c(328, 334, 372, 327)
# Get expectation
sum(crimes)/4
expected <- c(rep(340.25, 4))
# Calculate chi-square
chi <- (crimes - expected)^2 / expected
chi2 <- sum(chi)
# Get p-value
help(pchisq)
pchisq(chi2, df=3, lower.tail=FALSE)
library(mosaic)
xpchisq(chi2, df=3)
## Run chi-square test
probs <- c(1/4, 1/4, 1/4, 1/4)
chisq.test(crimes, p = probs, rescale.p=TRUE)
# Randomization-based
chisq.test(crimes, p = probs, rescale.p=TRUE, simulate.p.value = TRUE, B=5000)
# Why significant?
chi
# more in 3rd season
probabilities <- crimes/sum(crimes)
probabilities
probabilities[3]/probabilities[1]
# 13% more likely in 3rd season
# What happens to the p-value with smaller sample size?
crimesmall <- crimes/10
chisq.test(crimesmall, p = probs, rescale.p=TRUE)
# What???
##################################################################
### Test for normality
diet <- read.csv("http://www.bradthiessen.com/html5/data/diet.csv")
weight <- diet$WeightLoss
histogram(~weight, col="lightblue", border="white", width=3)
densityplot(~weight, lw=5)
qqnorm(weight, col="darkorchid", cex=1.1, pch=16); qqline(weight, col="darkcyan", lw=3)
shapiro.test(weight)
# Cut data into 6 bins
cuts <- cut(weight, breaks = c(-Inf, -4, 0, 4, 8, 12, Inf), labels=c("gained 4+ lbs", "gained 0-4 lbs ", "lost 0-4 lbs", "lost 4-8 lbs", "lost 8-12 lbs", "lost 12+ lbs"))
tally(~cuts, margins=TRUE)
# Calculate normal probabilities
# Weights ~ normal(mean, sd)
p1 <- pnorm(-4, mean=mean(weight), sd=sd(weight), lower.tail = TRUE) #P(x < -4)
p2 <- pnorm(0, mean=mean(weight), sd=sd(weight))-pnorm(-4, mean=mean(weight), sd=sd(weight))
p3 <- pnorm(4, mean=mean(weight), sd=sd(weight))-pnorm(0, mean=mean(weight), sd=sd(weight))
p4 <- pnorm(8, mean=mean(weight), sd=sd(weight))-pnorm(4, mean=mean(weight), sd=sd(weight))
p5 <- pnorm(12, mean=mean(weight), sd=sd(weight))-pnorm(8, mean=mean(weight), sd=sd(weight))
p6 <- pnorm(12, mean=mean(weight), sd=sd(weight), lower.tail = FALSE) #P(X > 12)
p1+p2+p3+p4+p5+p6
# Store all the probabilities as "probs"
probs <- c(p1, p2, p3, p4, p5, p6)
# I'll store those frequencies in a vector called "weightbins"
# This code does it automatically
weightbins <- c(length(cuts[cuts=="gained 4+ lbs"]),
length(cuts[cuts=="gained 0-4 lbs"]),
length(cuts[cuts=="lost 0-4 lbs"]),
length(cuts[cuts=="lost 4-8 lbs"]),
length(cuts[cuts=="lost 8-12 lbs"]),
length(cuts[cuts=="lost 12+ lbs"]))
# Now I run the chi-squared test
chisq.test(weightbins, p = probs)
# Next with randomization-based methods
chisq.test(weightbins, p = probs, simulate.p.value = TRUE, B=5000)
##################################################################
### Independence test
pain <- c(rep("reduced", 184), rep("not", 203),
rep("reduced", 171), rep("not", 216),
rep("reduced", 106), rep("not", 282))
treatment <- c(rep("verum", 387),
rep("sham", 387),
rep("traditional", 388))
acupuncture <- data.frame(treatment, pain)
tally(pain ~ treatment, data=acupuncture, margins=TRUE)
chisq.test(acupuncture$treatment, acupuncture$pain, correct=FALSE)
preducedsham = 171/387
preducedtraditional = 282/388
preducedverum = 203/387
preducedtraditional/preducedsham
##################################################################
### Race
race <- c(rep("white", 151),
rep("black", 63))
death <- c(rep("yes", 19), rep("no", 132),
rep("yes", 11), rep("no", 52))
deathpenalty <- data.frame(race, death)
tally(death ~ race, data=deathpenalty, margins=TRUE)
# P(11 or more black death | race has no impact)
help(phyper)
1 - phyper(10, 63, 151, 30, lower.tail=TRUE)
fisher.test(deathpenalty$race, deathpenalty$death, alternative="less")
chisq.test(deathpenalty$race, deathpenalty$death, correct=FALSE)