1) Create database with pitchRx data from a range of dates
## Create new database: db = MaxScherzer.sqlite3
db <- src_sqlite("MaxScherzer.sqlite3", create = TRUE)
## Scrape data from a range of dates
## Could also use Game ID with...
### GameData <- scrape(game.ids="gid_2011_05_07_detmlb_tormlb_1", suffix = "inning/inning_all.xml")
## This gets us 5 dataframes: atbat, action, pitch, po, runner
scrape(start = "2014-06-12", end = "2014-06-12", connect = db$con)
## http://gd2.mlb.com/components/game/mlb/year_2014/month_06/day_12/gid_2014_06_12_lanmlb_cinmlb_1/inning/inning_all.xml
## http://gd2.mlb.com/components/game/mlb/year_2014/month_06/day_12/gid_2014_06_12_sdnmlb_phimlb_1/inning/inning_all.xml
## http://gd2.mlb.com/components/game/mlb/year_2014/month_06/day_12/gid_2014_06_12_atlmlb_colmlb_1/inning/inning_all.xml
## http://gd2.mlb.com/components/game/mlb/year_2014/month_06/day_12/gid_2014_06_12_wasmlb_sfnmlb_1/inning/inning_all.xml
## http://gd2.mlb.com/components/game/mlb/year_2014/month_06/day_12/gid_2014_06_12_chnmlb_pitmlb_1/inning/inning_all.xml
## http://gd2.mlb.com/components/game/mlb/year_2014/month_06/day_12/gid_2014_06_12_tormlb_balmlb_1/inning/inning_all.xml
## http://gd2.mlb.com/components/game/mlb/year_2014/month_06/day_12/gid_2014_06_12_clemlb_bosmlb_1/inning/inning_all.xml
## http://gd2.mlb.com/components/game/mlb/year_2014/month_06/day_12/gid_2014_06_12_milmlb_nynmlb_1/inning/inning_all.xml
## http://gd2.mlb.com/components/game/mlb/year_2014/month_06/day_12/gid_2014_06_12_arimlb_houmlb_1/inning/inning_all.xml
## http://gd2.mlb.com/components/game/mlb/year_2014/month_06/day_12/gid_2014_06_12_detmlb_chamlb_1/inning/inning_all.xml
## http://gd2.mlb.com/components/game/mlb/year_2014/month_06/day_12/gid_2014_06_12_nyamlb_seamlb_1/inning/inning_all.xml
## NULL
2) Join atbat and pitch tables; select Max Scherzer; Query database
atbats <- tbl(db, 'atbat') %.%
filter(date >= '2014_06_12' & date <= '2014_06_12') %.%
filter(pitcher_name == 'Max Scherzer')
pitches <- tbl(db, 'pitch')
MaxScherzerJune12 <- collect(inner_join(pitches, atbats, by = c('num', 'gameday_link')))
MaxS <- collect(MaxScherzerJune12)
Slower way to do step 2…
## Select the atbat dataframe
# atbats <- tbl(db, 'atbat')
## Select the pitch table
# pitches <- tbl(db, 'pitch')
## Filter pitches thrown by Max Scherzer
# max <- filter(atbats, pitcher_name == 'Max Scherzer')
## Query the database to load this data
# dbSendQuery(db$con, 'CREATE INDEX pitcher_idx ON atbat(pitcher_name)')
## Join the atbat table with the pitches table
# MaxScherzerJune12 <- inner_join(pitches, max, by = c('num', 'gameday_link'))
## Query the database
# dbSendQuery(db$con, 'CREATE INDEX pitch_idx ON pitch(gameday_link, num)')
## Collect the joined tables into a dataframe
# MaxS <- collect(MaxScherzerJune12)
3) Visualize pitches
## Density - all pitches
strikeFX(MaxS, geom="tile", layer=facet_grid(.~stand))
## Warning: Removed 16 rows containing non-finite values (stat_density2d).
## Warning: Removed 12 rows containing non-finite values (stat_density2d).

## Just the called strikes
Maxstrikes <- subset(MaxS, des == "Called Strike")
strikeFX(Maxstrikes, geom="tile", layer=facet_grid(.~stand))

## Just the swinging strikes
Maxswingstrikes <- subset(MaxS, des == "Swinging Strike")
strikeFX(Maxswingstrikes, geom="tile", layer=facet_grid(.~stand))

## Just the balls
Maxballs <- subset(MaxS, des == "Ball")
strikeFX(Maxballs, geom="tile", layer=facet_grid(.~stand))
## Warning: Removed 4 rows containing non-finite values (stat_density2d).
## Warning: Removed 8 rows containing non-finite values (stat_density2d).

## The probability of a strike based on location
noswing <- subset(MaxS, des %in% c("Ball", "Called Strike"))
noswing$strike <- as.numeric(noswing$des %in% "Called Strike")
require(mgcv)
m1 <- bam(strike ~ s(px, pz, by=factor(stand)) +
factor(stand), data=noswing, family = binomial(link='logit'))
strikeFX(noswing, model=m1, layer=facet_grid(.~stand))

## Animated pitches (averaged by pitch type)
library(animation)
saveHTML(
animateFX(MaxS, avg.by = 'pitch_types', layer = list(theme_bw(), facet_grid(.~stand))),
img.name = "MaxS"
)