The code used to analyze the information in the CrossRef Database on works that are or have translations.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Analyze_Translations_CrossR.../write_table_translations.R

157 lines
7.7 KiB

write_table_translations <- function(translationData, translationDataRelation, doiCache, translationTableCon, typeStr) {
# This function writes a big table with all originals and translations known by DOI.
# It has information on the original and its translation in one row.
noValues = nrow(translationData)
noValues2 = noValues * 5 # create large vectors and cut them to length at the end.
translationID = vector(mode="character", length=noValues2)
publisherOne = vector(mode="character", length=noValues2)
publisherTwo = vector(mode="character", length=noValues2)
journalOne = vector(mode="character", length=noValues2)
journalTwo = vector(mode="character", length=noValues2)
DOIone = vector(mode="character", length=noValues2)
DOItwo = vector(mode="character", length=noValues2)
titleOne = vector(mode="character", length=noValues2)
titleTwo = vector(mode="character", length=noValues2)
languageOne = vector(mode="character", length=noValues2)
languageTwo = vector(mode="character", length=noValues2)
# Main loop.
# As one record can refer to several (or no) translations, we need an additional counter
# for the translations (iRow)
iRow = 0
for (i in 1:noValues) {
if ( !is.null(translationDataRelation[[i]]) ) {
if ( !is.null(translationDataRelation[[i]]$'id-type') ) {
noDOIs = sum(translationDataRelation[[i]]$'id-type' == "doi")
noURIs = sum(translationDataRelation[[i]]$'id-type' == "uri")
noIDs = noDOIs + noURIs
} else {
noDOIs = 0
}
if (noDOIs > 0) { # If there is information on the translation as it has a DOI.
# if ( nrow(works) > 1 ) {
# print(works)
# }
for ( iWork in 1:noIDs ) {
if ( translationDataRelation[[i]]$'id-type'[iWork] == "doi" ) {
work = get_information_doi(translationDataRelation[[i]]$id[iWork], doiCache)
if ( nrow(work) > 0 ) {
iRow = iRow + 1
# Write Translation ID number
writeLines(paste0("\"", format(i), "\""), con=translationTableCon, sep="\t")
translationID[iRow] = format(i)
# Write publishers
if ( nrow(work) > 0 ) {
writeLines(paste0("\"", work$publisher, "\""), con=translationTableCon, sep="\t")
publisherOne[iRow] = work$publisher
} else {
writeLines("", con = translationTableCon, sep="\t")
publisherOne[iRow] = ""
}
if ( !is.null(translationData$publisher[[i]]) ) {
writeLines(paste0("\"", translationData$publisher[[i]], "\""), con=translationTableCon, sep="\t")
publisherTwo[iRow] = translationData$publisher[[i]]
} else {
writeLines("", con = translationTableCon, sep="\t")
publisherTwo[iRow] = ""
}
# Write journal names, which CrossRef calls: ((short-)container-title)
if ( nrow(work) > 0 ) {
tmpStr = trimws(paste0(work$'short-container-title', " ", work$'container-title'))
tmpStr2 = paste0("\"", tmpStr, "\"")
writeLines(tmpStr2, con=translationTableCon, sep="\t")
journalOne[iRow] = tmpStr
} else {
writeLines("", con = translationTableCon, sep="\t")
journalOne[iRow] = ""
}
if ( !is.null(translationData$'short-container-title'[[i]]) ) {
tmpStr = paste0("\"", translationData$'short-container-title'[[i]], "\"")
writeLines(tmpStr, con=translationTableCon, sep="\t")
journalTwo[iRow] = translationData$'short-container-title'[[i]]
} else {
if ( !is.null(translationData$'container-title'[[i]]) ) {
tmpStr = paste0("\"", translationData$'container-title'[[i]], "\"")
writeLines(tmpStr, con=translationTableCon, sep="\t")
journalTwo[iRow] = translationData$'container-title'[[i]]
} else {
writeLines("", con=translationTableCon, sep="\t")
journalTwo[iRow] = ""
}
}
# Write DOIs
if ( nrow(work) > 0 ) {
writeLines(paste0("\"", work$DOI, "\""), con=translationTableCon, sep="\t")
DOIone[iRow] = work$DOI
} else {
writeLines("", con = translationTableCon, sep="\t")
DOIone[iRow] = ""
}
writeLines(paste0("\"", translationData$DOI[[i]], "\""), con=translationTableCon, sep="\t")
DOItwo[iRow] = translationData$DOI[[i]]
# Write article titles
if ( nrow(work) > 0 ) {
writeLines(paste0("\"", work$title, "\""), con=translationTableCon, sep="\t")
titleOne[iRow] = work$title
} else {
writeLines("", con = translationTableCon, sep="\t")
titleOne[iRow] = ""
}
writeLines(paste0("\"", translationData$title[[i]], "\""), con=translationTableCon, sep="\t")
titleTwo[iRow] = translationData$title[[i]]
# Write languages
if ( nrow(work) > 0 ) {
writeLines(paste0("\"", work$language, "\""), con=translationTableCon, sep="\t")
languageOne[iRow] = work$language
} else {
writeLines("", con = translationTableCon, sep="\t")
languageOne[iRow] = ""
}
if ( !is.null(translationData$language[[i]]) ) {
writeLines(paste0("\"", translationData$language[[i]], "\""), con=translationTableCon, sep="\t")
languageTwo[iRow] = translationData$language[[i]]
} else {
writeLines("", con = translationTableCon, sep="\t")
languageTwo[iRow] = ""
}
writeLines("", con=translationTableCon, sep="\n")
} # If info found for the DOI
} # If ID is a DOI
} # for loop over all DOIs of a record
} # if the record contains DOIs
}
}
# Cut vectors to length.
# In the output table the original is first, the translation second. So for the data
# of hasTranslation the order needs to be reversed.
if ( typeStr == "is" ) {
translationTable = data.frame(
translationID = translationID[1:iRow],
publisherOriginal = publisherOne[1:iRow],
publisherTranslation = publisherTwo[1:iRow],
journalOriginal = journalOne[1:iRow],
journalTranslation = journalTwo[1:iRow],
DOIoriginal = DOIone[1:iRow],
DOItranslation = DOItwo[1:iRow],
titleOriginal = titleOne[1:iRow],
titleTranslation = titleTwo[1:iRow],
languageOriginal = languageOne[1:iRow],
languageTranslation = languageTwo[1:iRow])
}
if ( typeStr == "has" ) {
translationTable = data.frame(translationID = translationID[1:iRow],
publisherOriginal = publisherTwo[1:iRow],
publisherTranslation = publisherOne[1:iRow],
journalOriginal = journalTwo[1:iRow],
journalTranslation = journalOne[1:iRow],
DOIoriginal = DOItwo[1:iRow],
DOItranslation = DOIone[1:iRow],
titleOriginal = titleTwo[1:iRow],
titleTranslation = titleOne[1:iRow],
languageOriginal = languageTwo[1:iRow],
languageTranslation = languageOne[1:iRow])
}
return(translationTable)
} # End of function