Example of a simple concordance analysis

Introduction

This vignette demonstrates a concordance analysis looking at the concordance of just presence of a carbapenamase with susceptiblity or non-susceptibility as determined by phenotypic testing.

Getting and formatting the data

First get the relevant data sets

library(ghruR)
library(kableExtra)
ast_data <- ghruR::get_data_for_country(
  country_value = "India",
  type_value = "Antimicrobial Susceptibility Testing",
  user_email = "anthony.underwood@cgps.group")

## [1] "anthony.underwood@cgps.group"
## [1] "anthony.underwood@cgps.group"
## [1] "anthony.underwood@cgps.group"

acquired_amr_data <- ghruR::get_data_for_country(
  country_value = "India",
  type_value = "AMR Klebsiella pneumoniae",
  AMR_type = "acquired",
  user_email = "anthony.underwood@cgps.group")

## [1] "anthony.underwood@cgps.group"

Samples with carbapenamases

Convert the acquired AMR data to long format, and annotate it and for each sample with at least one carbapenamase record NS (non-susceptible) else S

# convert to long format
annotated_amr_data <- ghruR::annotate_amr_data(acquired_amr_data)
# filter data for those that have acquired the gene and have good pct_id match and ctg_cov
acquired_genes_present <- ghruR::filter_long_data(annotated_amr_data)
# filter by subclass
carbapenamases <- acquired_genes_present %>%
  dplyr::filter(subclass == "CARBAPENEM") %>%
  dplyr::select(`Sample id`, gene) %>%
  dplyr::rename(determinant = gene)
# collapse the data so there is one row per sample using the summarise feature
carbapenamases %<>% dplyr::group_by(`Sample id`) %>% 
  dplyr::summarise(determinant = paste0(determinant, collapse = ";"))
# Join with original data so we have a row for each sample
samples_with_carbapenamases <- acquired_amr_data %>% select(`Sample id`) %>% 
  dplyr::left_join(carbapenamases, by = 'Sample id')
# Convert to predictions
samples_with_predicted_carabapenem_resistance <- samples_with_carbapenamases %>% 
  dplyr::mutate(predicted_resistance = dplyr::case_when(
    is.na(determinant) ~ "S",
    TRUE ~ "NS"
  ))

Linking to AST data

genomic_and_ast_data <- ast_data %>%
  select(`Sample id`, IPM, MEM) %>% 
  dplyr::mutate(IPM_phenotypic_resistance = dplyr::case_when(
    stringr::str_starts(IPM, "S") ~ "S",
    stringr::str_starts(IPM, "I") ~ "NS",
    stringr::str_starts(IPM, "R") ~ "NS",
    TRUE ~ ""
  )) %>% 
  dplyr::mutate(MEM_phenotypic_resistance = dplyr::case_when(
    stringr::str_starts(MEM, "S") ~ "S",
    stringr::str_starts(MEM, "I") ~ "NS",
    stringr::str_starts(MEM, "R") ~ "NS",
    TRUE ~ ""
  )) %>% 
  dplyr::inner_join(samples_with_predicted_carabapenem_resistance)

Calculate sensitivity specificty

library(epiR)
drugs <- c()
num_tests <- c()
concordances <-  c()
TPs <- c()
FPs <- c()
TNs <- c()
FNs <- c()
specificities <-  c()
specificity_lower_limits <- c()
specificity_upper_limits <- c()
sensitivities <-  c()
sensitivity_lower_limits <- c()
sensitivity_upper_limits <- c()
for (drug in c("IPM", "MEM")){
  drug_num_tests <- sum(!is.na(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]]) & !is.na(genomic_and_ast_data[['predicted_resistance']]), na.rm = TRUE)
  concordance <- sum(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]] == genomic_and_ast_data[['predicted_resistance']], na.rm = TRUE) / drug_num_tests
  # add to lists
  num_tests <- c(num_tests, drug_num_tests)
  concordances <- c(concordances, concordance)
  # calculate TP, FP, FN, TN
  TP <- sum(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]] == "NS" & genomic_and_ast_data[['predicted_resistance']] == "NS", na.rm = TRUE)
  FP <- sum(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]] == "S" & genomic_and_ast_data[['predicted_resistance']] == "NS", na.rm = TRUE)
  FN <- sum(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]] == "NS" & genomic_and_ast_data[['predicted_resistance']] == "S", na.rm = TRUE)
  TN <- sum(genomic_and_ast_data[[paste0(drug, '_phenotypic_resistance')]] == "S" & genomic_and_ast_data[['predicted_resistance']] == "S", na.rm = TRUE)
  
  # add to lists
  TPs <- c(TPs, TP)
  FPs <- c(FPs, FP)
  TNs <- c(TNs, TN)
  FNs <- c(FNs, FN)
  
  # Make 2x2 matrix
  contigency_tab <- as.table(matrix(c(TP,FP, FN, TN), nrow = 2, byrow = TRUE))
  
  # Use epi.stats function from epiR package to calculate specificty and sensitivity and confidence bounds
  specificity <- epi.tests(contigency_tab, conf.level = 0.95)$elements$specificity
  spec_result <- specificity$est
  spec_lower <- specificity$lower
  spec_upper <- specificity$upper
  
  sensitivity <- epi.tests(contigency_tab, conf.level = 0.95)$elements$sensitivity
  sens_result <- sensitivity$est
  sens_lower <- sensitivity$lower
  sens_upper <- sensitivity$upper
  
  # add to lists
  drugs <- c(drugs, drug)
  specificities <- c(specificities, spec_result)
  specificity_lower_limits <- c(specificity_lower_limits, spec_lower)
  specificity_upper_limits <- c(specificity_upper_limits, spec_upper)
  sensitivities = c(sensitivities, sens_result)
  sensitivity_lower_limits <- c(sensitivity_lower_limits, sens_lower)
  sensitivity_upper_limits <- c(sensitivity_upper_limits, sens_upper)
}
# make table from results
results <- tibble(
                  "drug" = drugs,
                  "num_tests" = num_tests,
                  "concordance" = concordances,
                  "TP" = TPs,
                  "FP" = FPs,
                  "TN" = FNs,
                  "FN" = FNs,
                  "specificity" = specificities,
                  "specificity_lower_limits" = specificity_lower_limits,
                  "specificity_upper_limits" = specificity_upper_limits,
                  "sensitivity" = sensitivities,
                  "sensitivity_lower_limits" = sensitivity_lower_limits,
                  "sensitivity_upper_limits" = sensitivity_upper_limits
                )
results %>% kable() %>% kable_styling() %>% scroll_box(width = "100%")

drug	num_tests	concordance	TP	FP	TN	FN
IPM	308	0.8311688	237	12	40	40
MEM	308	0.8636364	239	10	32	32

Anthony Underwood

Introduction

Getting and formatting the data

Samples with carbapenamases

Linking to AST data