You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
88 lines
3.9 KiB
88 lines
3.9 KiB
analyse_single_source <- function(crossRefData, dataTypeStr) { |
|
# This function analyses the data retrieved because they either had metadata indication |
|
# they are a translation or because it indicated they have translation. |
|
# Analysis: year of these articles, the most frequent publisher and journal for various periods, |
|
# and the number of unique journals and publishers. |
|
# |
|
# To get more output, set the verbose flag below to TRUE. |
|
# Note CrossRef calls the journal name the "container-title". |
|
|
|
# Initialize |
|
verbose = FALSE |
|
isTranslationOf = crossRefData$`relation.is-translation-of` |
|
noElement = ncol(crossRefData) # Number of elements in one CrossRef record (is different for original and translation) |
|
noValues = nrow(crossRefData) # Number of works |
|
# a=0 |
|
|
|
publicationYearTmp = crossRefData$'published.date-parts' # date-parts is an incomplete date list, which can contain either just a year, a year and a month, or a year, month and day of month. |
|
publicationYears = vector(mode = "integer", length=noValues) |
|
|
|
publicationYears = vector(mode = "integer", length=noValues) |
|
for (i in 1:noValues) { |
|
publicationYears[i] = publicationYearTmp[[i]][1] |
|
} |
|
if ( verbose ) { |
|
hist(publicationYears, main="", ylab = "Count", xlab = paste("Publication year - ", dataTypeStr)) |
|
} |
|
print(sort(table(publicationYears), decreasing = TRUE)) |
|
|
|
publisherTmp = crossRefData$publisher |
|
noUniquePublishers = length(unique(publisherTmp)) |
|
articlesPerPublisher = noValues/noUniquePublishers |
|
print(paste("Articles per publisher:", format(articlesPerPublisher, digits=3))) |
|
|
|
|
|
indexLate = publicationYears > 2020 |
|
indexMiddle = (publicationYears < 2021) & (publicationYears > 2008) |
|
index2008 = publicationYears == 2008 |
|
if ( verbose == TRUE ) { |
|
print(paste("Most common publisher names - all - ", dataTypeStr)) |
|
print(sort(table(publisherTmp), decreasing = TRUE)[1:10]) |
|
print(paste("Most common publisher names - 2021, 2022 - ", dataTypeStr)) |
|
print(sort(table(publisherTmp[indexLate]), decreasing = TRUE)[1:10]) |
|
print(paste("Most common publisher names - 2009-2020 - ", dataTypeStr)) |
|
print(sort(table(publisherTmp[indexMiddle]), decreasing = TRUE)[1:10]) |
|
print(paste("Most common publisher names - 2008 - ", dataTypeStr)) |
|
print(sort(table(publisherTmp[index2008]), decreasing = TRUE)[1:10]) |
|
} |
|
|
|
journalTmp = crossRefData$'container-title' |
|
noUniqueJournals = length(unique(journalTmp)) |
|
articlesPerJournal = noValues/noUniqueJournals |
|
print(paste("Articles per journal:", format(articlesPerJournal, digits=3))) |
|
|
|
# index = PublicationYears >= 2021 |
|
# index2 = publicationYears < 2021 |
|
# noUniqueJournals = length(unique(journalTmp[index])) |
|
noUniqueJournals = length(unique(journalTmp)) |
|
articlesPerJournal = noValues/noUniqueJournals |
|
print(paste("Articles per journal after 2020:", format(articlesPerJournal, digits=3))) |
|
|
|
# Sometimes journals have multiple names. To simplify the analysis we only look at the first. |
|
firstJournalName = vector(mode = "character", length=noValues) |
|
for (i in 1:noValues) { |
|
tmp = journalTmp[[i]][1] |
|
if ( is.null(tmp) ) { |
|
firstJournalName[i] = "" |
|
} else { |
|
firstJournalName[i] = tmp |
|
} |
|
} |
|
if ( verbose == TRUE ) { |
|
print(paste("Most common journal names - all - ", dataTypeStr)) |
|
print(sort(table(firstJournalName), decreasing = TRUE)[1:10]) |
|
print(paste("Most common journal names - 2021, 2022 - ", dataTypeStr)) |
|
print(sort(table(firstJournalName[indexLate]), decreasing = TRUE)[1:10]) |
|
print(paste("Most common journal names - 2009-2020 - ", dataTypeStr)) |
|
print(sort(table(firstJournalName[indexMiddle]), decreasing = TRUE)[1:10]) |
|
print(paste("Most common journal names - 2008 - ", dataTypeStr)) |
|
print(sort(table(firstJournalName[index2008]), decreasing = TRUE)[1:10]) |
|
} |
|
|
|
languageTmp = crossRefData$language |
|
noLanguages = length(unique(languageTmp)) |
|
print(noLanguages) |
|
print(sort(table(languageTmp), decreasing = TRUE)[1:6]) |
|
|
|
a=0 |
|
} |