# Your Turn #8 # Input data crimes <- c(328, 334, 372, 327) # Get expectation sum(crimes)/4 expected <- c(rep(340.25, 4)) # Calculate chi-square chi <- (crimes - expected)^2 / expected chi2 <- sum(chi) # Get p-value help(pchisq) pchisq(chi2, df=3, lower.tail=FALSE) library(mosaic) xpchisq(chi2, df=3) ## Run chi-square test probs <- c(1/4, 1/4, 1/4, 1/4) chisq.test(crimes, p = probs, rescale.p=TRUE) # Randomization-based chisq.test(crimes, p = probs, rescale.p=TRUE, simulate.p.value = TRUE, B=5000) # Why significant? chi # more in 3rd season probabilities <- crimes/sum(crimes) probabilities probabilities[3]/probabilities[1] # 13% more likely in 3rd season # What happens to the p-value with smaller sample size? crimesmall <- crimes/10 chisq.test(crimesmall, p = probs, rescale.p=TRUE) # What??? ################################################################## ### Test for normality diet <- read.csv("http://www.bradthiessen.com/html5/data/diet.csv") weight <- diet$WeightLoss histogram(~weight, col="lightblue", border="white", width=3) densityplot(~weight, lw=5) qqnorm(weight, col="darkorchid", cex=1.1, pch=16); qqline(weight, col="darkcyan", lw=3) shapiro.test(weight) # Cut data into 6 bins cuts <- cut(weight, breaks = c(-Inf, -4, 0, 4, 8, 12, Inf), labels=c("gained 4+ lbs", "gained 0-4 lbs ", "lost 0-4 lbs", "lost 4-8 lbs", "lost 8-12 lbs", "lost 12+ lbs")) tally(~cuts, margins=TRUE) # Calculate normal probabilities # Weights ~ normal(mean, sd) p1 <- pnorm(-4, mean=mean(weight), sd=sd(weight), lower.tail = TRUE) #P(x < -4) p2 <- pnorm(0, mean=mean(weight), sd=sd(weight))-pnorm(-4, mean=mean(weight), sd=sd(weight)) p3 <- pnorm(4, mean=mean(weight), sd=sd(weight))-pnorm(0, mean=mean(weight), sd=sd(weight)) p4 <- pnorm(8, mean=mean(weight), sd=sd(weight))-pnorm(4, mean=mean(weight), sd=sd(weight)) p5 <- pnorm(12, mean=mean(weight), sd=sd(weight))-pnorm(8, mean=mean(weight), sd=sd(weight)) p6 <- pnorm(12, mean=mean(weight), sd=sd(weight), lower.tail = FALSE) #P(X > 12) p1+p2+p3+p4+p5+p6 # Store all the probabilities as "probs" probs <- c(p1, p2, p3, p4, p5, p6) # I'll store those frequencies in a vector called "weightbins" # This code does it automatically weightbins <- c(length(cuts[cuts=="gained 4+ lbs"]), length(cuts[cuts=="gained 0-4 lbs"]), length(cuts[cuts=="lost 0-4 lbs"]), length(cuts[cuts=="lost 4-8 lbs"]), length(cuts[cuts=="lost 8-12 lbs"]), length(cuts[cuts=="lost 12+ lbs"])) # Now I run the chi-squared test chisq.test(weightbins, p = probs) # Next with randomization-based methods chisq.test(weightbins, p = probs, simulate.p.value = TRUE, B=5000) ################################################################## ### Independence test pain <- c(rep("reduced", 184), rep("not", 203), rep("reduced", 171), rep("not", 216), rep("reduced", 106), rep("not", 282)) treatment <- c(rep("verum", 387), rep("sham", 387), rep("traditional", 388)) acupuncture <- data.frame(treatment, pain) tally(pain ~ treatment, data=acupuncture, margins=TRUE) chisq.test(acupuncture$treatment, acupuncture$pain, correct=FALSE) preducedsham = 171/387 preducedtraditional = 282/388 preducedverum = 203/387 preducedtraditional/preducedsham ################################################################## ### Race race <- c(rep("white", 151), rep("black", 63)) death <- c(rep("yes", 19), rep("no", 132), rep("yes", 11), rep("no", 52)) deathpenalty <- data.frame(race, death) tally(death ~ race, data=deathpenalty, margins=TRUE) # P(11 or more black death | race has no impact) help(phyper) 1 - phyper(10, 63, 151, 30, lower.tail=TRUE) fisher.test(deathpenalty$race, deathpenalty$death, alternative="less") chisq.test(deathpenalty$race, deathpenalty$death, correct=FALSE)