--- title: "Your turn - Lesson 10" author: "Linear Regression" output: html_document: css: http://www.bradthiessen.com/batlab2.css highlight: pygments theme: spacelab fig_width: 5.6 fig_height: 4 --- ***** **Author(s):** [Enter names of people working on these solutions] ***** ```{r message=FALSE, echo=FALSE} # Above, type your name in the "Author(s)" section # Load packages library(mosaic) library(ggvis) library(broom) library(ggplot2) ```
# NBA dataset ```{r} # (a) # Load data nba <- read.csv("http://www.bradthiessen.com/html5/data/nba.csv") # Examine first several rows head(nba) # Scatterplot nba %>% ggvis(x=~PtsAgainst, y=~Wins, fill := "steelblue", stroke:="steelblue", fillOpacity:=.6, strokeOpacity:=1) %>% layer_points(size:=100) %>% layer_model_predictions(model = "lm", strokeWidth:=3, stroke:="red") %>% add_axis("x", title = "Points Against (average per game)", grid=F) %>% scale_numeric("x", domain=c(85,110), nice=FALSE, clamp=TRUE) %>% add_axis("y", title = "Wins", grid=F) %>% scale_numeric("y", domain=c(10, 70), nice=FALSE, clamp=TRUE) ``` ```{r} # (b) model <- lm(Wins ~ PtsAgainst, data=nba) model # (d) summary(model) # (e) anova(model) # (g) newdata = data.frame(PtsAgainst=100) predict(model, newdata, interval="confidence") # (h) predict(model, newdata, interval="predict") # This will plot the confidence and prediction intervals d <- data.frame(nba, predict(model, interval="prediction")) ggplot(d,aes(x=PtsAgainst,y=Wins)) + geom_ribbon(aes(ymin=lwr,ymax=upr,fill='prediction'),alpha=0.3) + geom_smooth(method="lm",aes(fill='confidence'),alpha=0.3) + geom_smooth(method="lm",se=FALSE,color='blue') + geom_point() + scale_fill_manual('Interval', values = c('steelblue', 'red')) + ylab('Wins') + xlab('Points Against') # (i) mplot(model) # (k) test.slope <- -1.808 rand.slopes <- do(10000) * lm(Wins ~ shuffle(PtsAgainst), data=nba) histogram(~PtsAgainst., data=rand.slopes, xlab="Possible slopes assuming null hypothesis is true", groups=PtsAgainst. >= test.slope) # Highlight values > test statistic ladd(panel.abline(v=test.slope)) # Add vertical line at test statistic p <- tally(~rand.slopes$PtsAgainst. <= test.slope, format="prop") p[1] # p-value # (L) bstrap <- do(10000) * lm(Wins ~ PtsAgainst, data=resample(nba)) densityplot(~PtsAgainst, data=bstrap, plot.points = FALSE, col="steelblue", lwd=4) cdata(0.95, PtsAgainst, data = bstrap) # (m) logLik(model) AIC(model) ```