Data from the DOAJ database was downloaded on June 30, 2025. Data were filtered to only include those entries with APCs and entries that were updated between 2020-2025.

dat<-read.csv("journalcsv__doaj_20250630_1429_utf8.csv")

apcs <- dat%>%filter(APC=="Yes")

apcs_20to25 <- apcs %>% filter(str_detect(Last.updated.Date,"^2020") | str_detect(Last.updated.Date,"^2021") | str_detect(Last.updated.Date,"^2022") | str_detect(Last.updated.Date,"^2023") | str_detect(Last.updated.Date,"^2024") | str_detect(Last.updated.Date,"^2025"))

APCs in USD currencies were extracted first.

apcs_20to25$numCurrencies <- str_count(apcs_20to25$APC.amount, ";")+1
max(apcs_20to25$numCurrencies)

apcs_sep <- separate_wider_delim(apcs_20to25,
                             APC.amount, ";", 
                             names=c("cost1", "cost2", "cost3", "cost4", "cost5"),
                             too_few = "align_start", 
                             too_many = "drop")

apcs_sep$USDvalue <- ifelse(str_detect(apcs_sep$cost1, "USD"), apcs_sep$cost1, 
                ifelse(str_detect(apcs_sep$cost2, "USD"), apcs_sep$cost2, 
                ifelse(str_detect(apcs_sep$cost3, "USD"), apcs_sep$cost3, 
                ifelse(str_detect(apcs_sep$cost4, "USD"), apcs_sep$cost4, 
                ifelse(str_detect(apcs_sep$cost5, "USD"), apcs_sep$cost5, NA)))))
apcs_sep$APC_Dollar <- as.numeric(gsub(" USD", "", apcs_sep$USDvalue))
sum(is.na(apcs_sep$USDvalue))
apcs_sep$ids <- row.names(apcs_sep)

Currency conversion rates were pulled from https://fiscaldata.treasury.gov/datasets/treasury-reporting-rates-exchange/treasury-reporting-rates-of-exchange using the effective dates of 3/31/2025. Nested if/else statements were used to convert currencies and ensure no errors were made in the conversion process.

#unique currencies (the first one listed that wasn't USD was used)
apcs_sep$typesCurrencies <- gsub("[[:digit:]]+", "", apcs_sep$cost1)
table(apcs_sep$typesCurrencies)
nrow(table(apcs_sep$typesCurrencies))


#conversion here
apcs_sep$USDvalue <-ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "ARS"), as.numeric(gsub(" ARS", "", apcs_sep$cost1))*1/1093, #Argentine Peso
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "AUD"), as.numeric(gsub(" AUD", "", apcs_sep$cost1))*1/1.6, #Australian Dollar
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "BDT"), as.numeric(gsub(" BDT", "", apcs_sep$cost1))*1/121, #Bangladeshi Taka
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "BRL"), as.numeric(gsub(" BRL", "", apcs_sep$cost1))*1/5.758,#Brazilian Real
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "CAD"), as.numeric(gsub(" CAD", "", apcs_sep$cost1))*1.435,#Canadian Dollar
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "CHF"), as.numeric(gsub(" CHF", "", apcs_sep$cost1))*1/0.881, #SwissFranc
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "CNY"), as.numeric(gsub(" CNY", "", apcs_sep$cost1))*1/7.253, #Chinese Yuan
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "EGP"), as.numeric(gsub(" EGP", "", apcs_sep$cost1))*1/50.52, #Egyptian Pound
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "EUR"), as.numeric(gsub(" EUR", "", apcs_sep$cost1))*1/0.924, #Euro
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "GBP"), as.numeric(gsub(" GBP", "", apcs_sep$cost1))*1/0.772, #British Pound
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "GHS"), as.numeric(gsub(" GHS", "", apcs_sep$cost1))*1/15.45, #Ghanaian Cedi
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "IDR"), as.numeric(gsub(" IDR", "", apcs_sep$cost1))*1/16525.94, #Indonesian Rupiah
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "INR"), as.numeric(gsub(" INR", "", apcs_sep$cost1))*1/85.417, #Indian Rupee
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "IQD"), as.numeric(gsub(" IQD", "", apcs_sep$cost1))*1/1309, #Iraqi Dinar
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "IRR"), as.numeric(gsub(" IRR", "", apcs_sep$cost1))*1/42000, #Iranian Rial
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "JPY"), as.numeric(gsub(" JPY", "", apcs_sep$cost1))*1/149.36, #Japanese Yen
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "KRW"), as.numeric(gsub(" KRW", "", apcs_sep$cost1))*1/1473.69, #Korean Won
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "KZT"), as.numeric(gsub(" KZT", "", apcs_sep$cost1))*1/503.25, #Kazakhstani Tenge
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "MAD"), as.numeric(gsub(" MAD", "", apcs_sep$cost1))*1/9.631, #Moroccan Dinar
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "MXN"), as.numeric(gsub(" MXN", "", apcs_sep$cost1))*1/20.386, #Mexican Peso
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "MYR"), as.numeric(gsub(" MYR", "", apcs_sep$cost1))*1/4.435, #Malayasian Ringgit
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "NGN"), as.numeric(gsub(" NGN", "", apcs_sep$cost1))*1/1530, #Nigerian Naira
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "NOK"), as.numeric(gsub(" NOK", "", apcs_sep$cost1))*1/10.538, #Norwegian Krone
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "NPR"), as.numeric(gsub(" NPR", "", apcs_sep$cost1))*1/136.75, #Nepalese Rupee
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "OMR"), as.numeric(gsub(" OMR", "", apcs_sep$cost1))*1/0.385, #Omani Rial
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "PHP"), as.numeric(gsub(" PHP", "", apcs_sep$cost1))*1/57.239, #Philippine Peso
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "PKR"), as.numeric(gsub(" PKR", "", apcs_sep$cost1))*1/280, #Pakistani Rupee
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "PLN"), as.numeric(gsub(" PLN", "", apcs_sep$cost1))*1/3.867, #Polish Zloty
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "RON"), as.numeric(gsub(" RON", "", apcs_sep$cost1))*1/4.597, #Romanian Leu
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "RSD"), as.numeric(gsub(" RSD", "", apcs_sep$cost1))*1/108.11, #Serbian Dinar
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "RUB"), as.numeric(gsub(" RUB", "", apcs_sep$cost1))*1/84.85, #Rusian Ruble
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "SEK"), as.numeric(gsub(" SEK", "", apcs_sep$cost1))*1/10.02, #Swedish Krona
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "SGD"), as.numeric(gsub(" SGD", "", apcs_sep$cost1))*1/1.342, #Singapore Dollar
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "SYP"), as.numeric(gsub(" SYP", "", apcs_sep$cost1))*1/12000, #Syrian Pound
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "THB"), as.numeric(gsub(" THB", "", apcs_sep$cost1))*1/33.91, #Thai Baht
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "TRY"), as.numeric(gsub(" TRY", "", apcs_sep$cost1))*1/37.934, #Turkish Lira
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "UAH"), as.numeric(gsub(" UAH", "", apcs_sep$cost1))*1/41.356, #Ukrainian Hryvnia 
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "UGX"), as.numeric(gsub(" UGX", "", apcs_sep$cost1))*1/3655, #Ugandan Shilling
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "VND"), as.numeric(gsub(" VND", "", apcs_sep$cost1))*1/25565, #Vietnamese Dong
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "XAF"), as.numeric(gsub(" XAF", "", apcs_sep$cost1))*1/605.6, #Central African Franc
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "XOF"), as.numeric(gsub(" XOF", "", apcs_sep$cost1))*1/604.5, #West African Franc (used Cote D'Ivoire rates)
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "YER"), as.numeric(gsub(" YER", "", apcs_sep$cost1))*1/528, #Yemeni Rial
                    ifelse(is.na(apcs_sep$USDvalue) & str_detect(apcs_sep$cost1, "ZAR"), as.numeric(gsub(" ZAR", "", apcs_sep$cost1))*1/18.366, #South African Rand
                    ifelse(!is.na(apcs_sep$USDvalue), apcs_sep$USDvalue, NA))))))))))))))))))))))))))))))))))))))))))))

fin <- apcs_sep 
fin$APC_Dollar <- as.numeric(gsub(" USD", "", fin$USDvalue))

The final dataset was examined for outliers and missingness. Five entries were missing currency conversion rates and were removed. One APC was unusually high. In the original data, this data point may have been incorrectly marked with the currency or the APC may be reported incorrectly. Because of this, this value was removed from the analysis.

hist(fin$APC_Dollar, xlab="Cost (USD, 2025 Dollars)", main = "Converted Reported APC")

fin <- fin%>% filter(APC_Dollar<10000)
fin <- fin %>%filter(!is.na(APC_Dollar))

Summary statistics were calculated using this processed dataset.