Introduction


This vignette demonstrates a concordance analysis looking at the concordance of just presence of a carbapenamase with susceptiblity or non-susceptibility as determined by phenotypic testing.

Getting and formatting the data


First get the relevant data sets

library(ghruR)
library(kableExtra)
ast_data <- ghruR::get_data_for_country(
  country_value = "India",
  type_value = "Antimicrobial Susceptibility Testing",
  user_email = "anthony.underwood@cgps.group")
## [1] "anthony.underwood@cgps.group"
## [1] "anthony.underwood@cgps.group"
## [1] "anthony.underwood@cgps.group"
acquired_amr_data <- ghruR::get_data_for_country(
  country_value = "India",
  type_value = "AMR Klebsiella pneumoniae",
  AMR_type = "acquired",
  user_email = "anthony.underwood@cgps.group")
## [1] "anthony.underwood@cgps.group"

Samples with carbapenamases


Convert the acquired AMR data to long format, and annotate it and for each sample with at least one carbapenamase record NS (non-susceptible) else S

# convert to long format
annotated_amr_data <- ghruR::annotate_amr_data(acquired_amr_data)
# filter data for those that have acquired the gene and have good pct_id match and ctg_cov
acquired_genes_present <- ghruR::filter_long_data(annotated_amr_data)
# filter by subclass
carbapenamases <- acquired_genes_present %>%
  dplyr::filter(subclass == "CARBAPENEM") %>%
  dplyr::select(`Sample id`, gene) %>%
  dplyr::rename(determinant = gene)
# collapse the data so there is one row per sample using the summarise feature
carbapenamases %<>% dplyr::group_by(`Sample id`) %>% 
  dplyr::summarise(determinant = paste0(determinant, collapse = ";"))
# Join with original data so we have a row for each sample
samples_with_carbapenamases <- acquired_amr_data %>% select(`Sample id`) %>% 
  dplyr::left_join(carbapenamases, by = 'Sample id')
# Convert to predictions
samples_with_predicted_carabapenem_resistance <- samples_with_carbapenamases %>% 
  dplyr::mutate(predicted_resistance = dplyr::case_when(
    is.na(determinant) ~ "S",
    TRUE ~ "NS"
  ))

Linking to AST data


genomic_and_ast_data <- ast_data %>%
  select(`Sample id`, IPM, MEM) %>% 
  dplyr::mutate(IPM_phenotypic_resistance = dplyr::case_when(
    stringr::str_starts(IPM, "S") ~ "S",
    stringr::str_starts(IPM, "I") ~ "NS",
    stringr::str_starts(IPM, "R") ~ "NS",
    TRUE ~ ""
  )) %>% 
  dplyr::mutate(MEM_phenotypic_resistance = dplyr::case_when(
    stringr::str_starts(MEM, "S") ~ "S",
    stringr::str_starts(MEM, "I") ~ "NS",
    stringr::str_starts(MEM, "R") ~ "NS",
    TRUE ~ ""
  )) %>% 
  dplyr::inner_join(samples_with_predicted_carabapenem_resistance)

Calculate sensitivity specificty

library(epiR)
drugs <- c()
num_tests <- c()
concordances <-  c()
TPs <- c()
FPs <- c()
TNs <- c()
FNs <- c()
specificities <-  c()
specificity_lower_limits <- c()
specificity_upper_limits <- c()
sensitivities <-  c()
sensitivity_lower_limits <- c()
sensitivity_upper_limits <- c()
for (drug in c("IPM", "MEM")){
  drug_num_tests <- sum(!is.na(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]]) & !is.na(genomic_and_ast_data[['predicted_resistance']]), na.rm = TRUE)
  concordance <- sum(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]] == genomic_and_ast_data[['predicted_resistance']], na.rm = TRUE) / drug_num_tests
  # add to lists
  num_tests <- c(num_tests, drug_num_tests)
  concordances <- c(concordances, concordance)
  # calculate TP, FP, FN, TN
  TP <- sum(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]] == "NS" & genomic_and_ast_data[['predicted_resistance']] == "NS", na.rm = TRUE)
  FP <- sum(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]] == "S" & genomic_and_ast_data[['predicted_resistance']] == "NS", na.rm = TRUE)
  FN <- sum(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]] == "NS" & genomic_and_ast_data[['predicted_resistance']] == "S", na.rm = TRUE)
  TN <- sum(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]] == "S" & genomic_and_ast_data[['predicted_resistance']] == "S", na.rm = TRUE)
  
  # add to lists
  TPs <- c(TPs, TP)
  FPs <- c(FPs, FP)
  TNs <- c(TNs, TN)
  FNs <- c(FNs, FN)
  
  # Make 2x2 matrix
  contigency_tab <- as.table(matrix(c(TP,FP, FN, TN), nrow = 2, byrow = TRUE))
  
  # Use epi.stats function from epiR package to calculate specificty and sensitivity and confidence bounds
  specificity <- epi.tests(contigency_tab, conf.level = 0.95)$elements$specificity
  spec_result <- specificity$est
  spec_lower <- specificity$lower
  spec_upper <- specificity$upper
  
  sensitivity <- epi.tests(contigency_tab, conf.level = 0.95)$elements$sensitivity
  sens_result <- sensitivity$est
  sens_lower <- sensitivity$lower
  sens_upper <- sensitivity$upper
  
  # add to lists
  drugs <- c(drugs, drug)
  specificities <- c(specificities, spec_result)
  specificity_lower_limits <- c(specificity_lower_limits, spec_lower)
  specificity_upper_limits <- c(specificity_upper_limits, spec_upper)
  sensitivities = c(sensitivities, sens_result)
  sensitivity_lower_limits <- c(sensitivity_lower_limits, sens_lower)
  sensitivity_upper_limits <- c(sensitivity_upper_limits, sens_upper)
}
# make table from results
results <- tibble(
                  "drug" = drugs,
                  "num_tests" = num_tests,
                  "concordance" = concordances,
                  "TP" = TPs,
                  "FP" = FPs,
                  "TN" = FNs,
                  "FN" = FNs,
                  "specificity" = specificities,
                  "specificity_lower_limits" = specificity_lower_limits,
                  "specificity_upper_limits" = specificity_upper_limits,
                  "sensitivity" = sensitivities,
                  "sensitivity_lower_limits" = sensitivity_lower_limits,
                  "sensitivity_upper_limits" = sensitivity_upper_limits
                )
results %>% kable() %>% kable_styling() %>% scroll_box(width = "100%")
drug num_tests concordance TP FP TN FN
IPM 308 0.8311688 237 12 40 40
MEM 308 0.8636364 239 10 32 32