simple_concordance_example.Rmd
This vignette demonstrates a concordance analysis looking at the concordance of just presence of a carbapenamase with susceptiblity or non-susceptibility as determined by phenotypic testing.
First get the relevant data sets
library(ghruR)
library(kableExtra)
ast_data <- ghruR::get_data_for_country(
country_value = "India",
type_value = "Antimicrobial Susceptibility Testing",
user_email = "anthony.underwood@cgps.group")
## [1] "anthony.underwood@cgps.group"
## [1] "anthony.underwood@cgps.group"
## [1] "anthony.underwood@cgps.group"
acquired_amr_data <- ghruR::get_data_for_country(
country_value = "India",
type_value = "AMR Klebsiella pneumoniae",
AMR_type = "acquired",
user_email = "anthony.underwood@cgps.group")
## [1] "anthony.underwood@cgps.group"
Convert the acquired AMR data to long format, and annotate it and for each sample with at least one carbapenamase record NS
(non-susceptible) else S
# convert to long format
annotated_amr_data <- ghruR::annotate_amr_data(acquired_amr_data)
# filter data for those that have acquired the gene and have good pct_id match and ctg_cov
acquired_genes_present <- ghruR::filter_long_data(annotated_amr_data)
# filter by subclass
carbapenamases <- acquired_genes_present %>%
dplyr::filter(subclass == "CARBAPENEM") %>%
dplyr::select(`Sample id`, gene) %>%
dplyr::rename(determinant = gene)
# collapse the data so there is one row per sample using the summarise feature
carbapenamases %<>% dplyr::group_by(`Sample id`) %>%
dplyr::summarise(determinant = paste0(determinant, collapse = ";"))
# Join with original data so we have a row for each sample
samples_with_carbapenamases <- acquired_amr_data %>% select(`Sample id`) %>%
dplyr::left_join(carbapenamases, by = 'Sample id')
# Convert to predictions
samples_with_predicted_carabapenem_resistance <- samples_with_carbapenamases %>%
dplyr::mutate(predicted_resistance = dplyr::case_when(
is.na(determinant) ~ "S",
TRUE ~ "NS"
))
genomic_and_ast_data <- ast_data %>%
select(`Sample id`, IPM, MEM) %>%
dplyr::mutate(IPM_phenotypic_resistance = dplyr::case_when(
stringr::str_starts(IPM, "S") ~ "S",
stringr::str_starts(IPM, "I") ~ "NS",
stringr::str_starts(IPM, "R") ~ "NS",
TRUE ~ ""
)) %>%
dplyr::mutate(MEM_phenotypic_resistance = dplyr::case_when(
stringr::str_starts(MEM, "S") ~ "S",
stringr::str_starts(MEM, "I") ~ "NS",
stringr::str_starts(MEM, "R") ~ "NS",
TRUE ~ ""
)) %>%
dplyr::inner_join(samples_with_predicted_carabapenem_resistance)
Calculate sensitivity specificty
library(epiR)
drugs <- c()
num_tests <- c()
concordances <- c()
TPs <- c()
FPs <- c()
TNs <- c()
FNs <- c()
specificities <- c()
specificity_lower_limits <- c()
specificity_upper_limits <- c()
sensitivities <- c()
sensitivity_lower_limits <- c()
sensitivity_upper_limits <- c()
for (drug in c("IPM", "MEM")){
drug_num_tests <- sum(!is.na(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]]) & !is.na(genomic_and_ast_data[['predicted_resistance']]), na.rm = TRUE)
concordance <- sum(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]] == genomic_and_ast_data[['predicted_resistance']], na.rm = TRUE) / drug_num_tests
# add to lists
num_tests <- c(num_tests, drug_num_tests)
concordances <- c(concordances, concordance)
# calculate TP, FP, FN, TN
TP <- sum(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]] == "NS" & genomic_and_ast_data[['predicted_resistance']] == "NS", na.rm = TRUE)
FP <- sum(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]] == "S" & genomic_and_ast_data[['predicted_resistance']] == "NS", na.rm = TRUE)
FN <- sum(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]] == "NS" & genomic_and_ast_data[['predicted_resistance']] == "S", na.rm = TRUE)
TN <- sum(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]] == "S" & genomic_and_ast_data[['predicted_resistance']] == "S", na.rm = TRUE)
# add to lists
TPs <- c(TPs, TP)
FPs <- c(FPs, FP)
TNs <- c(TNs, TN)
FNs <- c(FNs, FN)
# Make 2x2 matrix
contigency_tab <- as.table(matrix(c(TP,FP, FN, TN), nrow = 2, byrow = TRUE))
# Use epi.stats function from epiR package to calculate specificty and sensitivity and confidence bounds
specificity <- epi.tests(contigency_tab, conf.level = 0.95)$elements$specificity
spec_result <- specificity$est
spec_lower <- specificity$lower
spec_upper <- specificity$upper
sensitivity <- epi.tests(contigency_tab, conf.level = 0.95)$elements$sensitivity
sens_result <- sensitivity$est
sens_lower <- sensitivity$lower
sens_upper <- sensitivity$upper
# add to lists
drugs <- c(drugs, drug)
specificities <- c(specificities, spec_result)
specificity_lower_limits <- c(specificity_lower_limits, spec_lower)
specificity_upper_limits <- c(specificity_upper_limits, spec_upper)
sensitivities = c(sensitivities, sens_result)
sensitivity_lower_limits <- c(sensitivity_lower_limits, sens_lower)
sensitivity_upper_limits <- c(sensitivity_upper_limits, sens_upper)
}
# make table from results
results <- tibble(
"drug" = drugs,
"num_tests" = num_tests,
"concordance" = concordances,
"TP" = TPs,
"FP" = FPs,
"TN" = FNs,
"FN" = FNs,
"specificity" = specificities,
"specificity_lower_limits" = specificity_lower_limits,
"specificity_upper_limits" = specificity_upper_limits,
"sensitivity" = sensitivities,
"sensitivity_lower_limits" = sensitivity_lower_limits,
"sensitivity_upper_limits" = sensitivity_upper_limits
)
results %>% kable() %>% kable_styling() %>% scroll_box(width = "100%")
drug | num_tests | concordance | TP | FP | TN | FN |
---|---|---|---|---|---|---|
IPM | 308 | 0.8311688 | 237 | 12 | 40 | 40 |
MEM | 308 | 0.8636364 | 239 | 10 | 32 | 32 |