These are just some notes I’m bringing in to Professor Mario this afternoon!

Let’s start with the 2016-2017 regular season

Start Date: October 25, 2016

End Date: April 12, 2017

Last Game URL: http://stats.nba.com/players/advanced/#!?sort=GP&dir=-1&Season=2016-17&SeasonType=Regular%20Season&TeamID=1610612742&LastNGames=1

Factors to scrape:

DefRtgDefensive Rating

The formula is: Defensive Rating = (Opponent’s Points Allowed/ Opponent’s Possessions) x 100. The result is the expected amount of points that an individual player will allow on defense over 100 possessions. This stat can be significantly influenced by the defense of a player’s teammates.

https://en.wikipedia.org/wiki/Defensive_rating

DREB%Defensive Rebound Percentage

(-)CHARGES TAKEN

(+)DEFENSIVE FOULS

(-)BLOCKS

(-)STEALS

(-) SPEED

Issues:

-nba stat doesnt have all the stats I want

-filter doesnt work so loop the last game?

-how far back of a season do I go?

-faster way to look up and loop TEAM ID?

Lessons Learned:

Ctrl+Shift+C = comments out a block in R

IDEAL data set:

Capture

Scraper Code Progress:

library(rjson)

#JSON of all games into data frame.
url = “http://stats.nba.com/players/advanced/#!?sort=GP&dir=-1&Season=2016-17&SeasonType=Regular%20Season&TeamID=1610612742&LastNGames=1”
data = fromJSON(file = url)
#LOOP TEAM ID to get all 30 teams
#ATLANTA HAWKS (first) = 1610612737
#DALLAS MAVERICKS = 1610612742
#TORONTO RAPTORS = 1610612761
#UTAH JAZZ (second to last) = 1610612762
#WASHINGTON WIZARDS (last) = 1610612764
#LOOP LAST GAME N to get all 82 games
#LOOP to get last 5 seasons

#Create vector equal to the size of the JSON data for teamIDs and game names then populate
teamIDVector = vector(“list”, length(data$resultSets[[1]]$rowSet))
teamNamevector = vector(“list”, length(data$resultSets[[1]]$rowSet))
for(i in 1:length(data$resultSets[[1]]$rowSet))
{
teamIDVector[[i]] = data$resultSets[[1]]$rowSet[[i]][[30]]
teamNameVector[[i]] = data$resultSets[[1]]$rowSet[[i]][[30]]
}
teamNameID = data.frame(unlist(teamNameVector), unlist(teamIDVector))
colnames(teamNameID) = c(‘teamName’, ‘teamID’)
teamNameID <- teamNameID[!grepl(‘@’, teamNameID$teamName),]

# #Create vector equal to the size of the JSON data for gameIDs and game names then populate.
# gameIDVector = vector(“list”, length(data$resultSets[[1]]$rowSet))
# gameNameVector = vector(“list”, length(data$resultSets[[1]]$rowSet))
# for(i in 1:length(data$resultSets[[1]]$rowSet))
# {
# gameIDVector[[i]] = data$resultSets[[1]]$rowSet[[i]][[5]]
# gameNameVector[[i]] = data$resultSets[[1]]$rowSet[[i]][[7]]
# }
#
# gameNameID = data.frame(unlist(gameNameVector), unlist(gameIDVector))
# colnames(gameNameID) = c(‘gameName’, ‘gameID’)
# gameNameID <- gameNameID[!grepl(‘@’, gameNameID$gameName),]
# #Create pbp url’s
# gameNameID$gameURL = paste(“http://stats.nba.com/stats/playbyplayv2?EndPeriod=10&EndRange=55800&GameID=&#8221;, gameNameID$gameID, “&RangeType=2&Season=2014-15&SeasonType=Regular+Season&StartPeriod=1&StartRange=0”, sep = “”)
# gameNameID$gameName = gsub(” “, “”, gameNameID$gameName)
# #All games in HOMvsAWY format
# gameNameID$gameName = gsub(” “, “”, gameNameID$gameName)
# gameNameID$gameName = gsub(“vs.”, “vs”, gameNameID$gameName)
# gameNameID$gameName = paste(gameNameID$gameName, gameNameID$gameID, sep = ”)
#
# setwd(‘/Users/jessevo/Documents/NBA/net+/scrapes/pbps’)
# for (i in 1:length(gameNameID$gameName))
# #For each game played.
# {
# print(i)
# #Build filename from gameName and file extension
# fileName = paste(gameNameID$gameName[[i]], ‘.JSON’, sep =”)
# url = gameNameID$gameURL[[i]]
# gID = gameNameID$gameID[[i]]
# json_data = fromJSON(file=url)
# json_data = toJSON(json_data)
# write(json_data, file = fileName)
# }
# Sys.time()
#
# “””
# Scrape for Box Score – Traditional
# “””
# gameNameID$gameURL = paste(“http://stats.nba.com/stats/boxscoretraditionalv2?EndPeriod=10&EndRange=28800&GameID=&#8221;, gameNameID$gameID, “&RangeType=0&Season=2014-15&SeasonType=Regular+Season&StartPeriod=1&StartRange=0”, sep = “”)
# setwd(‘/Users/jessevo/Documents/NBA/net+/scrapes/boxscores/traditional’)
# for (i in 1:length(gameNameID$gameName))
# #For each game played.
# {
# print(i)
# #Build filename from gameName and file extension
# fileName = paste(gameNameID$gameName[[i]], ‘.JSON’, sep =”)
# url = gameNameID$gameURL[[i]]
# gID = gameNameID$gameID[[i]]
# json_data = fromJSON(file=url)
# json_data = toJSON(json_data)
# write(json_data, file = fileName)
# }
# Sys.time()
#
# “””
# Scrape for Box Score – Advanced
# “””
# gameNameID$gameURL = paste(“http://stats.nba.com/stats/boxscoreadvancedv2?EndPeriod=10&EndRange=34800&GameID=&#8221;, gameNameID$gameID, “&RangeType=0&Season=2014-15&SeasonType=Regular+Season&StartPeriod=1&StartRange=0”, sep = “”)
# setwd(‘/Users/jessevo/Documents/NBA/net+/scrapes/boxscores/advanced’)
# for (i in 1:length(gameNameID$gameName))
# #For each game played.
# {
# print(i)
# #Build filename from gameName and file extension
# fileName = paste(gameNameID$gameName[[i]], ‘.JSON’, sep =”)
# url = gameNameID$gameURL[[i]]
# gID = gameNameID$gameID[[i]]
# json_data = fromJSON(file=url)
# json_data = toJSON(json_data)
# write(json_data, file = fileName)
# }
# Sys.time()
#
# “””
# Scrape for Box Score – Misc
# “””
# gameNameID$gameURL = paste(“http://stats.nba.com/stats/boxscoremiscv2?EndPeriod=10&EndRange=40800&GameID=&#8221;, gameNameID$gameID, “&RangeType=0&Season=2015-16&SeasonType=Regular+Season&StartPeriod=1&StartRange=0”, sep = “”)
# setwd(‘/Users/jessevo/Documents/NBA/net+/scrapes/boxscores/misc’)
# for (i in 1:length(gameNameID$gameName))
# #For each game played.
# {
# print(i)
# #Build filename from gameName and file extension
# fileName = paste(gameNameID$gameName[[i]], ‘.JSON’, sep =”)
# url = gameNameID$gameURL[[i]]
# gID = gameNameID$gameID[[i]]
# json_data = fromJSON(file=url)
# json_data = toJSON(json_data)
# write(json_data, file = fileName)
# }
# Sys.time()
#
# “””
# Scrape for Box Score – Scoring
# “””
# gameNameID$gameURL = paste(“http://stats.nba.com/stats/boxscorescoringv2?EndPeriod=10&EndRange=40800&GameID=&#8221;, gameNameID$gameID, “&RangeType=0&Season=2015-16&SeasonType=Regular+Season&StartPeriod=1&StartRange=0”, sep = “”)
# setwd(‘/Users/jessevo/Documents/NBA/net+/scrapes/boxscores/scoring’)
# for (i in 1:length(gameNameID$gameName))
# #For each game played.
# {
# print(i)
# #Build filename from gameName and file extension
# fileName = paste(gameNameID$gameName[[i]], ‘.JSON’, sep =”)
# url = gameNameID$gameURL[[i]]
# gID = gameNameID$gameID[[i]]
# json_data = fromJSON(file=url)
# json_data = toJSON(json_data)
# write(json_data, file = fileName)
# }
# Sys.time()
#
# “””
# Scrape for Box Score – Usage
# “””
# gameNameID$gameURL = paste(“http://stats.nba.com/stats/boxscoreusagev2?EndPeriod=10&EndRange=40800&GameID=&#8221;, gameNameID$gameID, “&RangeType=0&Season=2015-16&SeasonType=Regular+Season&StartPeriod=1&StartRange=0”, sep = “”)
# setwd(‘/Users/jessevo/Documents/NBA/net+/scrapes/boxscores/usage’)
# for (i in 1:length(gameNameID$gameName))
# #For each game played.
# {
# print(i)
# #Build filename from gameName and file extension
# fileName = paste(gameNameID$gameName[[i]], ‘.JSON’, sep =”)
# url = gameNameID$gameURL[[i]]
# gID = gameNameID$gameID[[i]]
# json_data = fromJSON(file=url)
# json_data = toJSON(json_data)
# write(json_data, file = fileName)
# }
# Sys.time()
#
# “””
# Scrape for Box Score – Four Factors
# “””
# gameNameID$gameURL = paste(“http://stats.nba.com/stats/boxscorefourfactorsv2?EndPeriod=10&EndRange=40800&GameID=&#8221;, gameNameID$gameID, “&RangeType=0&Season=2015-16&SeasonType=Regular+Season&StartPeriod=1&StartRange=0”, sep = “”)
# setwd(‘/Users/jessevo/Documents/NBA/net+/scrapes/boxscores/fourfactors’)
# for (i in 1:length(gameNameID$gameName))
# #For each game played.
# {
# print(i)
# #Build filename from gameName and file extension
# fileName = paste(gameNameID$gameName[[i]], ‘.JSON’, sep =”)
# url = gameNameID$gameURL[[i]]
# gID = gameNameID$gameID[[i]]
# json_data = fromJSON(file=url)
# json_data = toJSON(json_data)
# write(json_data, file = fileName)
# }
# Sys.time()
#
# “””
# Scrape for Box Score – Player Tracking
# “””
# gameNameID$gameURL = paste(“http://stats.nba.com/stats/boxscoreplayertrackv2?EndPeriod=10&EndRange=55800&GameID=&#8221;, gameNameID$gameID, “&RangeType=2&Season=2015-16&SeasonType=Regular+Season&StartPeriod=1&StartRange=0”, sep = “”)
# setwd(‘/Users/jessevo/Documents/NBA/net+/scrapes/boxscores/playertrack’)
# for (i in 1:length(gameNameID$gameName))
# #For each game played.
# {
# print(i)
# #Build filename from gameName and file extension
# fileName = paste(gameNameID$gameName[[i]], ‘.JSON’, sep =”)
# url = gameNameID$gameURL[[i]]
# gID = gameNameID$gameID[[i]]
# json_data = fromJSON(file=url)
# json_data = toJSON(json_data)
# write(json_data, file = fileName)
# }
# Sys.time()

 

 

 

 

 

Advertisements