ploting_GHRU_data.RmdThis vignette will demonstrate how to use some of the inbuilt GHRU R functions to plot data
For these plots we will need epi data, MLST data and AMR data.
library(ghruR)
library(kableExtra)
epi_data <- ghruR::get_data_for_country(
  country_value = "India",
  type_value = "Epidemiological Metadata",
  user_email = "anthony.underwood@cgps.group")## [1] "anthony.underwood@cgps.group"
## [1] "anthony.underwood@cgps.group"
## [1] "anthony.underwood@cgps.group"
epi_data <- ghruR::clean_data(epi_data)
mlst_data <- ghruR::get_data_for_country(
  country_value = "India",
  type_value = "MLST",
  user_email = "anthony.underwood@cgps.group")## [1] "anthony.underwood@cgps.group"
amr_data <- ghruR::get_data_for_country(
  country_value = "India",
  type_value = "AMR Klebsiella pneumoniae",
  AMR_type = "acquired",
  user_email = "anthony.underwood@cgps.group")## [1] "anthony.underwood@cgps.group"
# Select just the data for which we have AMR data
kpn_ids <- amr_data %>% pull(`Sample id`)
combined_data <- epi_data %>% 
  dplyr::filter(`Sample id` %in% kpn_ids) %>% 
  left_join(mlst_data, by = 'Sample id')
# Show some basic stats
samples_per_sentinel_site <- count_samples_by_sentinel_site(combined_data)
samples_per_sentinel_site %>% kable() %>% kable_styling() %>% scroll_box(width = "100%")| Sentinel Site Code | Sample Count | 
|---|---|
| AIIMSJ | 19 | 
| APH | 5 | 
| BCH | 258 | 
| BCR | 9 | 
| CMC | 8 | 
| IGIMS | 21 | 
| IPH | 1 | 
| JAY | 70 | 
| JIP | 55 | 
| KGMU | 9 | 
| KMC | 28 | 
| KMN | 1 | 
| LPL | 3 | 
| MEDQ | 2 | 
| MIMS | 5 | 
| NIM | 5 | 
| PRIM | 7 | 
| RBH | 7 | 
| RRM | 13 | 
| SDU | 5 | 
| SMF | 5 | 
| SMS | 7 | 
| TSRM | 6 | 
| UTK | 29 | 
| VPC | 64 | 
st_counts <- ghruR::count_sts(combined_data)
st_plot <- plot_sts(st_counts, order_by_count = TRUE)
print(st_plot)
most_frequent_sts <- count_most_frequent_sts(st_counts)
st_plot <- plot_sts(most_frequent_sts, order_by_count = TRUE)
print(st_plot)
st_counts_by_sentinel_site <- count_sts_by_sentinel_site(combined_data)
most_frequent_st_counts_by_sentinel_site <- count_most_frequent_sts_per_sentinel_site(st_counts_by_sentinel_site, per_sentinel_site = 2)
plot_most_frequent_sentinel_site_sts(most_frequent_st_counts_by_sentinel_site)
First converting the amr data to long format and annotating with NCBI metadata and then plotting selected drug classes
# convert to long format
annotated_amr_data <- ghruR::annotate_amr_data(amr_data)
# filter data
annotated_amr_data <- ghruR::filter_long_data(annotated_amr_data)
# add Sentinel Site Code
annotated_amr_data  %<>% left_join(
  epi_data %>% select(`Sample id`, `Sentinel Site Code`),
  by = 'Sample id'
)
#  select drug classes
selected_drug_subclasses <- c("BETA-LACTAM", "CEPHALOSPORIN", "CARBAPENEM")
subclass_counts_by_sentinel_site <- ghruR::count_AMR_subclasses_by_sentinel_site(
  annotated_amr_data,
  samples_per_sentinel_site,
  selected_drug_subclasses
)
amr_subclasses_plot <- ghruR::plot_AMR_subclasses_by_sentinel_site(subclass_counts_by_sentinel_site)Make a plot looking at the distribution of gene families. Combine them together
gene_family_counts_by_sentinel_site <- ghruR::count_gene_families_by_sentinel_site(
  annotated_amr_data,
  subclass_counts_by_sentinel_site,
  selected_drug_subclasses)
gene_family_dot_plot <- ghruR::dot_plot_gene_family_counts_by_sentinel_site(gene_family_counts_by_sentinel_site)
cowplot::plot_grid(amr_subclasses_plot, gene_family_dot_plot,ncol =1, align="v", rel_heights = c(1, 3))
 It is important to look at the gene alleles responsible for resistance. Here looking at just cephalosporin and carbapenem
drug_subclasses <- c('CEPHALOSPORIN', 'CARBAPENEM')
allele_counts_by_sentinel_site <- ghruR::count_alleles_by_sentinel_site(
  annotated_amr_data,
  gene_family_counts_by_sentinel_site,
  drug_subclasses
)
allele_counts_plots <- ghruR::plot_allele_counts_by_sentinel_site(
  allele_counts_by_sentinel_site,
  drug_subclasses
)
print(allele_counts_plots[[1]])
print(allele_counts_plots[[2]])