The code used to analyze the information in the CrossRef Database on works that are or have translations.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Analyze_Translations_CrossR.../initialize_DOI_cache.R

55 lines
2.7 KiB

initialize_DOI_cache <- function(hasTranslationData, isTranslationData, APItranslationData) {
# This function combines all datasets we have downloaded on translations.
# The records of hastranslationData, isTranslationData and all DOIs mentioned in the
# metadata of hasTranslation or isTranslation (APItranslationData).
# This data is used later to generated tables combining originals with their translations.
#
# Originally we wanted to save band with by only downloading APItranslationData for records
# we were missing in the others, but so much was missing that in the end we downloaded all known DOIs.
# Maybe that makes this combination no longer needed and we could have used APItranslationData only.
publisher = c(hasTranslationData$publisher,
isTranslationData$publisher,
APItranslationData$publisher)
shortcontainertitle = c(hasTranslationData$`short-container-title`,
isTranslationData$`short-container-title`,
APItranslationData$`short-container-title`)
containertitle = c(hasTranslationData$`container-title`,
isTranslationData$`container-title`,
APItranslationData$`container-title`)
DOI = c(hasTranslationData$DOI,
isTranslationData$DOI,
APItranslationData$DOI)
title = c(hasTranslationData$title,
isTranslationData$title,
APItranslationData$title)
language = c(hasTranslationData$language,
isTranslationData$language,
APItranslationData$language)
source = c(rep("has", length(hasTranslationData$DOI)),
rep("is", length(isTranslationData$DOI)),
APItranslationData$source)
doiCache = data.frame(publisher, shortcontainertitle, containertitle, DOI, title, language, source, stringsAsFactors=FALSE)
names(doiCache)[2] <- "short-container-title"
names(doiCache)[3] <- "container-title"
# Remove double/multiple entries
uniqueDOIs = unique(doiCache$DOI)
noUnique = length(uniqueDOIs)
noVal = nrow(doiCache)
bool = vector(mode="logical", length=noVal)
if ( noUnique < noVal ) {
for ( iDOI in 1:noUnique ) {
index = which(doiCache$DOI == uniqueDOIs[iDOI])
if ( length(index) > 1 ) {
# print(paste0(length(index), ": ", uniqueDOIs[iDOI]))
bool[index[2:length(index)]] = TRUE
}
}
}
doiCache = subset(doiCache, bool==FALSE)
return(doiCache)
}