Loading packages

Let’s load some packages we’ll use in these assignments:

require(ggplot2)
require(dplyr)
require(mosaic)
require(parallel)



Assignment #8: Minority Coaches


Likelihood of 6 or fewer minority coaches at 3rd base.

Here’s our dataset:

Observed Caucasian Minority Total
1st Base 15 15 30
3rd Base 34 6 30
Total 49 21 60

Our sample of 60 coaches includes 30 3rd base coaches. Under our null hypothesis, these 30 people would have coached 3rd base regardless of their race.

We can now simulate this study.

## Create 60 coaches (30 who will coach 3rd base)
##    3rd Base = 1   and   1st Base = 0
Coaches <- c(rep(1, 30), rep(0, 30))

## Let's select 21 of these people at random to "be minorities"
## We'll simulate this process 50,000 times
ThirdBase <- do(50000) * sample(Coaches, 21, replace=FALSE)

## Now, let's count the number of 3rd base coaches who were selected in each replication
ThirdBase$sum <- rowSums(ThirdBase)

## Let's create a histogram showing the number of people who improved
histogram( ~sum, width=1, main="Minority 3rd Base Coaches", v=6, col="lightgrey", data=ThirdBase)

plot of chunk unnamed-chunk-3

## Find the p-value -- P(6 or fewer 3rd base coaches | race has no effect)
## The answer should be close to 1.5%.
prop( ~(sum<=6), data=ThirdBase)
##   TRUE 
## 0.0139




Assignment #8b: Practice Problems


Rather than simulating each scenario, let’s just use R’s built-in probability distributions.

## Question #1:  Geometric - P(1st win on 3rd trial)
# Remember the first input is the number of trials BEFORE the first win
dgeom(2, .1, log=FALSE)
## [1] 0.081
## Question #2:  Binomial - P(X >= 15 | p=.75)
# Same as 1 - P(X < 14)
1-pbinom(14, 16, 0.750, lower.tail=TRUE, log.p=FALSE)
## [1] 0.06348
## Question #3:  Geometric - P(1st win on 1st or 2nd trial)
# P(X = 1) + P(X = 2)
dgeom(0, .75, log=FALSE) + dgeom(1, .75, log=FALSE)
## [1] 0.9375
## Question #4:  Hypergeometric - P(0, 1, 2, 3, or 4)
# P(X = 0)
dhyper(0, 10, 40, 4, log = FALSE)
## [1] 0.3968
# P(X = 1)
dhyper(1, 10, 40, 4, log = FALSE)
## [1] 0.429
# P(X = 2)
dhyper(2, 10, 40, 4, log = FALSE)
## [1] 0.1524
# P(X = 3)
dhyper(3, 10, 40, 4, log = FALSE)
## [1] 0.02084
# P(X = 4)
dhyper(4, 10, 40, 4, log = FALSE)
## [1] 0.0009119
## Question #5:  NegBin - P(X = 3 in 4 or fewer trials)
pnbinom(1, 3, .6, lower.tail = TRUE, log.p = FALSE)
## [1] 0.4752
## Question #6:  Poisson - P(X = 0 | lambda = .25)
dpois(x=0, lambda=.25)
## [1] 0.7788
# Part b: P(X <= 1 | lambda = .25) 
ppois(q=1, lambda=.25, lower.tail=TRUE, log.p=FALSE)
## [1] 0.9735
## Question #7:  Hypergeometric - P(X=0)
dhyper(0, 6, 10, 5, log = FALSE)
## [1] 0.05769
## Question #8:  Poisson - P(X <= 3 | lambda = 47/7)
ppois(q=3, lambda=47/7, lower.tail=TRUE, log.p=FALSE)
## [1] 0.09793
## Question #10:  Exponential - P(X <= 7 | lambda = 1/20)
pexp(7, rate=1/20, lower.tail=TRUE, log.p=FALSE)
## [1] 0.2953




Assignment #9a: Industrial Robot


Questions 2-3: Find area under curve; sketch cdf.
## Find the antiderivative
cdf <- antiD(-(x^4) + (6/5) ~ x)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## Find definite integral from 0 to 1
cdf(x=1) - cdf(x=0)
## [1] 1
## Plot cdf
plotFun(cdf, x.lim=range(0,1))

plot of chunk unnamed-chunk-5


Questions 4-7: Find probabilities; median; mean
## 4. Find P(X<0.5)
cdf(x=0.5) - cdf(x=0)
## [1] 0.5938
## 5. Find P(X>0.75)
cdf(x=1) - cdf(x=0.75)
## [1] 0.1475
## 6. Find the median
# First, let's get a visual estimate
plotFun(cdf, x.lim=range(0,1))

plot of chunk unnamed-chunk-6

plotFun(0.5 ~ x, add=TRUE, col="black")

plot of chunk unnamed-chunk-6

# Now, let's find the solution (between 0.3 and 0.5)
# We set 0.5 = -(x^5 / 5) + (6x / 5)
# or 0 = -(x^5 / 5) + (6x / 5) - 0.5
# We tell the computer to look somewhere between 0.3 and 0.5
findZeros(-(x^5/5)+(6*x/5)-(0.5) ~ x, x.lim = range(0.3, 0.5))
##        x
## 1 0.4188
## 7. Find the expected value
# Find the definite integral from 0 to 1 of xf(x)
expectedIntegral <- antiD(-(x^5) + ((6 * x)/5) ~ x)
## Find definite integral from 0 to 1
expectedIntegral(x=1) - expectedIntegral(x=0)
## [1] 0.4333




Assignment #10: Fishing Problem


All questions appear in the comments.
## A:  Exponential - P(X > 60 | lambda = 1/30)
1-pexp(60, rate=1/30, lower.tail=TRUE, log.p=FALSE)
## [1] 0.1353
## B:  Exponential - P(X <10 | lambda = 1/30)
pexp(10, rate=1/30, lower.tail=TRUE, log.p=FALSE)
## [1] 0.2835
## C:  Exponential - P(20 < X < 40 | lambda = 1/30)
pexp(40, rate=1/30, lower.tail=TRUE, log.p=FALSE) -
  pexp(20, rate=1/30, lower.tail=TRUE, log.p=FALSE)
## [1] 0.2498
## D:  Normal - P(12 < X < 18 | lambda = 1/30)
xpnorm(18, mean = 15, sd = 3.5, plot=FALSE, verbose=FALSE) -
  xpnorm(12, mean = 15, sd = 3.5, plot=FALSE, verbose=FALSE)
## [1] 0.6086
## E:  Geometric - P(1st success on 1st, 2nd, or 3rd trial)
pgeom(2, .6086341, log=FALSE)
## [1] 0.9401
## F:  Binomial - P(4 successes in 4 trials)
dbinom(4, 4, 0.6086341)
## [1] 0.1372
## G:  Negative Binomial - P(4th win on 7th trial)
dnbinom(3, 4, .6086341)
## [1] 0.1645




Assignment #11: Normal Probability Practice


All questions appear in the comments.
## 1:  P(X < 10)
xpnorm(10, mean = 11.4, sd = 1.8)
## 
## If X ~ N(11.4,1.8), then 
## 
##  P(X <= 10) = P(Z <= -0.778) = 0.2184
##  P(X >  10) = P(Z >  -0.778) = 0.7816

plot of chunk unnamed-chunk-8

## [1] 0.2184
## 2:  P(X > 14)
xpnorm(14, mean = 11.4, sd = 1.8, lower.tail=FALSE)
## 
## If X ~ N(11.4,1.8), then 
## 
##  P(X <= 14) = P(Z <= 1.444) = 0.9257
##  P(X >  14) = P(Z >  1.444) = 0.0743

plot of chunk unnamed-chunk-8

## [1] 0.07431
## 3:  P(9 < X < 12)
xpnorm(12, mean = 11.4, sd = 1.8, verbose=FALSE, plot=FALSE) -
  xpnorm(9, mean = 11.4, sd = 1.8, verbose=FALSE, plot=FALSE)
## [1] 0.5393
# Plot curve for question #3
curve(dnorm(x,11.4,1.8), xlim=c(5, 18), col="steelblue", lwd=3)
cord.x <- c(9,seq(9,12,0.01),12) 
cord.y <- c(0,dnorm(seq(9, 12, 0.01), mean=11.4, sd=1.8),0) 
polygon(cord.x,cord.y,col='skyblue')

plot of chunk unnamed-chunk-9

## 4:  P(X>30)
xpnorm(30, mean = 11.4, sd = 1.8, lower.tail=FALSE, plot=FALSE, verbose=FALSE)
## [1] 2.49e-25
## 5:  P(11 < X < 12)
xpnorm(12, mean = 11.4, sd = 1.8, verbose=FALSE, plot=FALSE) -
  xpnorm(11, mean = 11.4, sd = 1.8, verbose=FALSE, plot=FALSE)
## [1] 0.2185
## 7:  95th percentile
xqnorm(.95, mean = 11.4, sd = 1.8)
##  P(X <= 14.3607365285126) = 0.95
##  P(X >  14.3607365285126) = 0.05

plot of chunk unnamed-chunk-10

## [1] 14.36
## 8:  5th percentile
xqnorm(.05, mean = 11.4, sd = 1.8)
##  P(X <= 8.43926347148735) = 0.05
##  P(X >  8.43926347148735) = 0.95

plot of chunk unnamed-chunk-10

## [1] 8.439
## 9:  97.5th percentile
xqnorm(.975, mean = 11.4, sd = 1.8)
##  P(X <= 14.9279351721721) = 0.975
##  P(X >  14.9279351721721) = 0.025

plot of chunk unnamed-chunk-10

## [1] 14.93