Loading packages

Let’s load some packages we’ll use in these assignments:

require(ggplot2)
require(dplyr)
require(mosaic)
require(parallel)



Assignment #18: Confidence Intervals


Questions 1-2: Coal
## Create dataset
coal <- c(23.870, 23.730, 23.712, 23.760, 23.640,
          23.850, 23.840, 23.860, 23.940, 23.830,
          23.877, 23.700, 23.796, 23.727, 23.778, 
          23.740, 23.890, 23.780, 23.678, 23.771,
          23.860, 23.690, 23.800)
coal <- data.frame(values=coal)

## Verify the mean
mean(values, data=coal)
## [1] 23.79
## Get other statistics
favstats(values, data=coal)
##    min    Q1 median    Q3   max  mean      sd  n missing
##  23.64 23.73  23.78 23.86 23.94 23.79 0.07828 23       0
## 95% Confidence Interval - parametric approach
## We're given sigma = 0.1.  We'll use the same
## approach we used in the syntax for Activity #18...

# Enter values for the observed sample mean, population std. dev., sample size, and confidence level
observedMean = mean(values, data=coal)
sigma=0.1
n=nrow(coal)
confidence = 0.95
# This calculates the standard error
alpha = (1-confidence)/2
stdError <- sigma/sqrt(n)
# This calculates the confidence interval limits:
lower<- observedMean - qnorm(1-alpha)*(stdError)
upper<- observedMean + qnorm(1-alpha)*(stdError)
paste("CI: (", lower, " , ", upper, ")")
## [1] "CI: ( 23.7469145345147  ,  23.8286506828766 )"
## 99% Confidence Interval - parametric approach
# We simply need to change the confidence and 
# re-run the calculations
confidence = 0.99
alpha = (1-confidence)/2
stdError <- sigma/sqrt(n)
lower<- observedMean - qnorm(1-alpha)*(stdError)
upper<- observedMean + qnorm(1-alpha)*(stdError)
paste("CI: (", lower, " , ", upper, ")")
## [1] "CI: ( 23.7340728549016  ,  23.8414923624897 )"
## Bootstrap method (99% confidence interval)
meanCoalValues <- do(10000) * mean(~values, data = resample(coal))
densityplot(~result, data=meanCoalValues, plot.points = FALSE, col="darkblue", lwd=4)

plot of chunk unnamed-chunk-3

confint(meanCoalValues, level = 0.99, method = "quantile")
##     name lower upper level   method
## 1 result 23.75 23.83  0.99 quantile


Question 3: Unknown population standard deviation
## Create dataset
coal2 <- c(30.990, 31.030, 31.060, 30.921, 30.920, 30.990, 
          31.024, 30.929, 31.050, 30.991, 31.208, 31.330,
          30.830, 30.810, 31.060, 30.800, 31.091, 31.170,
          31.026, 31.020, 30.880, 31.125)
coal2 <- data.frame(values=coal2)

## Verify the mean and standard deviation
mean(values, data=coal2)
## [1] 31.01
sd(values, data=coal2)
## [1] 0.1294
## 95% Confidence Interval
confint(t.test(~coal2, conf.level=0.95), data=coal2)
## mean of x     lower     upper     level 
##     31.01     30.95     31.07      0.95



Questions 4-7: Bootstrap Method - Computer crashes

See solutions in activity #18 syntax: http://www.bradthiessen.com/html5/stats/m300/activity18.html


Question 8: Euro - Confidence interval for a proportion
## Create dataset (1 = heads, 0 = tails)
euro <- c(rep(0, 110), rep(1, 140))
euro <- data.frame(heads=euro)

## Get confidence interval
binom.test(euro$heads == 1)
## 
##  Exact binomial test
## 
## data:  x
## number of successes = 140, number of trials = 250, p-value =
## 0.06642
## alternative hypothesis: true probability of success is not equal to 0.5
## 95 percent confidence interval:
##  0.4961 0.6225
## sample estimates:
## probability of success 
##                   0.56