Use R on HDFS With High Availability
-
Install the following package:
install.packages("httr") library(httr)
-
Run the following lines of code:
# To list all the URLs you want to try. uris <- c('http://nn1.p01.client.saagie.io:50070/webhdfs/v1/', 'http://nn1.p02.client.saagie.io:50070/webhdfs/v1/', 'http://nn2.p01.client.saagie.io:50070/webhdfs/v1/', 'http://nn2.p02.client.saagie.io:50070/webhdfs/v1/') # This function tries all URLs and filters out those that return a timeout or a bad status. getNN <- function(uris) { op <- '?op=LISTSTATUS' # Operation to try. In this case list, the contents of a folder. res <- lapply(uris, function(uri) { # If timeout, it returns the status code 400, else it returns the status code of the request. status <- tryCatch( status_code(httr::GET(paste0(uri, op), timeout(1))), error=function(x) 400 ) if(status != 200) return(NULL) # If the status code is invalid, returns NULL. uri # If the status is valid, returns the working URLs. }) unlist(uris[lapply(res, length) > 0]) # To return all working URLs and filter the rest. } first_valid <- getNN(uris)[1] # Select the first working URL.