The code used to analyze the information in the CrossRef Database on works that are or have translations.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Analyze_Translations_CrossR.../load_update_data_API.R

179 lines
8.9 KiB

load_update_data_API <- function(APItranslationDirFileName, isTranslationData, hasTranslationData) {
# This function retrieves metadata for all DOIs mentioned in the metadata of
# isTranslationData or hasTranslations data. It is quite common for a record
# to mention is is a translation of x, but that record x is not marked as having a translation.
# So here we simply download all metadata via the "works" API interface.
# Initialize
# To only use the API when a record does not exist, we save all data at the end of the function
# and only call the API if we do not have the information yet.
if (file.exists(APItranslationDirFileName) == TRUE) {
load(APItranslationDirFileName)
publisher = APIcache$publisher
shortcontainertitle = APIcache$'short-container-title'
containertitle = APIcache$'container-title'
DOI = tolower(APIcache$DOI)
title = APIcache$title
language = APIcache$language
source = APIcache$source
iDOI = nrow(APIcache)
} else {
noValuesIsTranslation = nrow(isTranslationData)
noValuesHasTranslation = nrow(hasTranslationData)
noValues = noValuesIsTranslation + noValuesHasTranslation
publisher = vector(mode="character", noValues*2)
shortcontainertitle = vector(mode="character", noValues*2)
containertitle = vector(mode="character", noValues*2)
DOI = vector(mode="character", noValues*2)
title = vector(mode="character", noValues*2)
language = vector(mode="character", noValues*2)
source = vector(mode="character", noValues*2)
iDOI = 0
}
# Load (remaining) data
# noValuesIsTranslation = 2
# noValuesHasTranslation = 3
noValuesIsTranslation = nrow(isTranslationData)
noValuesHasTranslation = nrow(hasTranslationData)
for ( iDataset in 1:2 ) { # Loop over the two datasets (isTranslation and hasTranslation)
for ( iRelation in 1:2 ) { # Loop over the two relation items of the record (also isTranslation and hasTranslation)
if ( iDataset == 1 ) {
translationData = isTranslationData
noValues = noValuesIsTranslation
} else {
translationData = hasTranslationData
noValues = noValuesHasTranslation
}
for (i in 1:noValues) {
if ( iRelation == 1 ) {
translationDataRelation = translationData$'relation.is-translation-of'[[i]] # This should be single brackets, we want element i of the vector, not the the ith item of this dataframe, but somehow this needs to be a double [[]] for the next if to work.
} else {
translationDataRelation = translationData$'relation.has-translation'[[i]]
}
if ( !is.null(translationDataRelation$'id-type') ) {
noDOIs = sum(translationDataRelation$'id-type' == "doi")
noIDs = nrow(translationDataRelation)
} else {
noDOIs = 0
}
if (noDOIs > 0) {
for ( iID in 1:noIDs ) {
if ( translationDataRelation$'id-type'[[iID]] == "doi" ) {
querryDOI = tolower(translationDataRelation$id[[iID]])
index = which(DOI==querryDOI) # Only upload new records if the DOI is unknown
# index = which(DOI==querryDOI & source!="API - Data transfer failed.") # Upload new records if the DOI is unknown or the data transfer failed (this is sometimes because the record does not exist, but sometimes also for other reasons and it may make sense to make additional attempts.)
if ( length(index) == 0 ) {
iDOI = iDOI + 1
callStrTranslation = paste0("http://api.crossref.org/works/", querryDOI) # , "&mailto=Translate.Science@grassroots.is") # &mailto=GroovyBib@example.org
# callstrIsTranslation = "http:C//api.crossref.org/works?filter=relation.type:is-translation-of"
APIstr = URLencode(callStrTranslation)
# APIstr = URLencode(paste0(CallstrTranslation, "&cursor=", NextCursor))
print(paste(i, APIstr))
APItranslationData <- try(fromJSON(APIstr, flatten=TRUE), TRUE)
# print(translationData)
if ( APItranslationData[[1]] != "ok" ) {
warning("Data transfer failed.")
publisher[iDOI] = ""
shortcontainertitle[iDOI] = ""
containertitle[iDOI] = ""
DOI[iDOI] = querryDOI
title[iDOI] = ""
language[iDOI] = ""
source[iDOI] = "API - Data transfer failed."
} else {
if ( APItranslationData[[2]] != "work" ) {
stop("Wrong kind of data.")
}
if ( APItranslationData[[3]] != "1.0.0" ) {
stop("Data format version unknown")
}
# If the data is right, put the actual data in the 4th element into the variable to simplify data addressing.
APItranslationData = APItranslationData[[4]]
# The function put_data() is at the end of this file.
publisher[iDOI] = put_data(APItranslationData$publisher)
shortcontainertitle[iDOI] = put_data(APItranslationData$'short-container-title')
containertitle[iDOI] = put_data(APItranslationData$'container-title')
title[iDOI] = put_data(APItranslationData$title)
language[iDOI] = put_data(APItranslationData$language)
if ( is.null(APItranslationData$DOI) == FALSE ) {
if (length(APItranslationData$DOI) == 0 ) {
print(APItranslationData)
print("This record unexpectedly has no DOI.")
warning("This record unexpectedly has no DOI.")
} else {
DOI[iDOI] = APItranslationData$DOI
}
if (length(APItranslationData$DOI) > 1 ) {
print(APItranslationData)
print("This record unexpectedly has multiple DOIs.")
warning("This record unexpectedly has multiple DOIs.")
}
}
source[iDOI] = "API"
a=0
} # If received data is okay
} # if there is no info for the DOI
} # End if this ID is a DOI
} # End loop over all DOIs in a record
} # End if there are DOIs in this record
} # End loop over all records of a dataset, i
} # End loop over the two relations, iRelation
} # End loop over the two datasets, iDataset
publisher = publisher[1:iDOI]
shortcontainertitle = shortcontainertitle[1:iDOI]
containertitle = containertitle[1:iDOI]
DOI = DOI[1:iDOI]
title = title[1:iDOI]
language = language[1:iDOI]
source = source[1:iDOI]
APIcache = data.frame(publisher, shortcontainertitle, containertitle, DOI, title, language, source, stringsAsFactors=FALSE)
names(APIcache)[2] <- "short-container-title"
names(APIcache)[3] <- "container-title"
save(APIcache, file=APItranslationDirFileName)
return(APIcache)
} # End of function
# Comments:
## Gives an error "Resource not found.", but doi.org does forward to the right article:
# http://api.crossref.org/works/10.4467/2084395xwi.16.010.5900
# http://api.crossref.org/works/10.4467/20843844te.12.013.0476
# http://api.crossref.org/works/10.4467/20843844te.14.006.3083
# From the same journal, this DOI also goes wrong on doi.org.
# 10.4467/20843844te.14.006.3083
# It does not forward to the journal, but to:
# https://dx.doi.org/10.4467/www.ejournals.eu/Terminus/2014/Terminus-2014-2/art/4353/
# This page https://www.ejournals.eu/Terminus/2014/Terminus-2014-2/art/4353/ does exist (although maybe for this homepage system any page exists), but is empty.
# All from https://www.ejournals.eu/ Maybe the translations use another DOI registry??
# There is a similar case though from another homepage: http://api.crossref.org/works/10.32113/ijmdat_202010_270
# http://api.crossref.org/works/10.20948/prepr-2019-105 works, but somehow no info in the DOIcache.
# Next day, the system somehow downloaded these three:
# [1] "106 http://api.crossref.org/works/10.17537/2017.12.t1"
# [1] "913 http://api.crossref.org/works/10.17537/2017.12.t23"
# [1] "1475 http://api.crossref.org/works/10.17537/2017.12.t12"
# Broken everywhere now
# 10.4467/2084395xwi.14.023.2824 http://api.crossref.org/works/10.33048/smzh.2022.63.115
put_data <- function(x) {
if ( is.null(x) == FALSE ) {
len = length(x)
if ( len == 0) {
y = ""
}
if ( len == 1) {
y = x
}
if ( len > 1 ) {
y = paste(x, collapse=" ")
}
} else {
y = ""
}
return(y)
}