49 lines
2.1 KiB
R
49 lines
2.1 KiB
R
load_data <- function(translationFileDirName, callstrTranslation) {
|
|
# Load data from the CrossRef database that have been marked as either having
|
|
# or being a translation. Not to querry the database every run, this data is
|
|
# stored in the file translationFileDirName. Delete this file for a fresh copy.
|
|
#
|
|
# CrossRef gives out larger datasets in multiple pages. The variable nextCursor
|
|
# is used to track of which page you are on. It starts as "*" for the first page.
|
|
# Details on the API can be found here:
|
|
# https://github.com/CrossRef/rest-api-doc/blob/master/api_format.md
|
|
|
|
if (file.exists(translationFileDirName) == FALSE) {
|
|
# If the cached data file does not exist, load a full new JSON dataset from Crossref.
|
|
translationDataPages <- list()
|
|
noValues = +Inf
|
|
nextCursor = "*"
|
|
i = 1 # Counts the number of pages the database gives out
|
|
iVal = 0 # Counts the number of records
|
|
while(iVal < NoValues) {
|
|
APIstr = URLencode(paste0(callstrTranslation, "&cursor=", nextCursor))
|
|
translationData <- fromJSON(APIstr, flatten=TRUE)
|
|
|
|
if ( translationData[[1]] != "ok" ) {
|
|
stop("Data transfer failed.")
|
|
}
|
|
if ( translationData[[2]] != "work-list" ) {
|
|
stop("Wrong kind of data.")
|
|
}
|
|
if ( translationData[[3]] != "1.0.0" ) {
|
|
stop("Data format version unknown")
|
|
}
|
|
# If the data is right, put the actual data in the 4th element into the variable to simplify data addressing.
|
|
translationData = translationData[[4]]
|
|
nextCursor = translationData$'next-cursor'
|
|
noValues = translationData$'total-results'
|
|
noValPerPage = translationData$'items-per-page'
|
|
# The actual data is in the third element items
|
|
translationDataPages[[i]] = translationData$items
|
|
iVal = iVal + noValPerPage
|
|
print(iVal)
|
|
i = i + 1
|
|
}
|
|
translationData <- rbind_pages(translationDataPages) # Combining data from all pages into one list.
|
|
save(translationData, file=translationFileDirName)
|
|
} else {
|
|
load(translationFileDirName) # load TranslationData
|
|
}
|
|
return(translationData)
|
|
}
|