ploting_GHRU_data.Rmd
This vignette will demonstrate how to use some of the inbuilt GHRU R functions to plot data
For these plots we will need epi data, MLST data and AMR data.
library(ghruR)
library(kableExtra)
epi_data <- ghruR::get_data_for_country(
country_value = "India",
type_value = "Epidemiological Metadata",
user_email = "anthony.underwood@cgps.group")
## [1] "anthony.underwood@cgps.group"
## [1] "anthony.underwood@cgps.group"
## [1] "anthony.underwood@cgps.group"
epi_data <- ghruR::clean_data(epi_data)
mlst_data <- ghruR::get_data_for_country(
country_value = "India",
type_value = "MLST",
user_email = "anthony.underwood@cgps.group")
## [1] "anthony.underwood@cgps.group"
amr_data <- ghruR::get_data_for_country(
country_value = "India",
type_value = "AMR Klebsiella pneumoniae",
AMR_type = "acquired",
user_email = "anthony.underwood@cgps.group")
## [1] "anthony.underwood@cgps.group"
# Select just the data for which we have AMR data
kpn_ids <- amr_data %>% pull(`Sample id`)
combined_data <- epi_data %>%
dplyr::filter(`Sample id` %in% kpn_ids) %>%
left_join(mlst_data, by = 'Sample id')
# Show some basic stats
samples_per_sentinel_site <- count_samples_by_sentinel_site(combined_data)
samples_per_sentinel_site %>% kable() %>% kable_styling() %>% scroll_box(width = "100%")
Sentinel Site Code | Sample Count |
---|---|
AIIMSJ | 19 |
APH | 5 |
BCH | 258 |
BCR | 9 |
CMC | 8 |
IGIMS | 21 |
IPH | 1 |
JAY | 70 |
JIP | 55 |
KGMU | 9 |
KMC | 28 |
KMN | 1 |
LPL | 3 |
MEDQ | 2 |
MIMS | 5 |
NIM | 5 |
PRIM | 7 |
RBH | 7 |
RRM | 13 |
SDU | 5 |
SMF | 5 |
SMS | 7 |
TSRM | 6 |
UTK | 29 |
VPC | 64 |
st_counts <- ghruR::count_sts(combined_data)
st_plot <- plot_sts(st_counts, order_by_count = TRUE)
print(st_plot)
most_frequent_sts <- count_most_frequent_sts(st_counts)
st_plot <- plot_sts(most_frequent_sts, order_by_count = TRUE)
print(st_plot)
st_counts_by_sentinel_site <- count_sts_by_sentinel_site(combined_data)
most_frequent_st_counts_by_sentinel_site <- count_most_frequent_sts_per_sentinel_site(st_counts_by_sentinel_site, per_sentinel_site = 2)
plot_most_frequent_sentinel_site_sts(most_frequent_st_counts_by_sentinel_site)
First converting the amr data to long format and annotating with NCBI metadata and then plotting selected drug classes
# convert to long format
annotated_amr_data <- ghruR::annotate_amr_data(amr_data)
# filter data
annotated_amr_data <- ghruR::filter_long_data(annotated_amr_data)
# add Sentinel Site Code
annotated_amr_data %<>% left_join(
epi_data %>% select(`Sample id`, `Sentinel Site Code`),
by = 'Sample id'
)
# select drug classes
selected_drug_subclasses <- c("BETA-LACTAM", "CEPHALOSPORIN", "CARBAPENEM")
subclass_counts_by_sentinel_site <- ghruR::count_AMR_subclasses_by_sentinel_site(
annotated_amr_data,
samples_per_sentinel_site,
selected_drug_subclasses
)
amr_subclasses_plot <- ghruR::plot_AMR_subclasses_by_sentinel_site(subclass_counts_by_sentinel_site)
Make a plot looking at the distribution of gene families. Combine them together
gene_family_counts_by_sentinel_site <- ghruR::count_gene_families_by_sentinel_site(
annotated_amr_data,
subclass_counts_by_sentinel_site,
selected_drug_subclasses)
gene_family_dot_plot <- ghruR::dot_plot_gene_family_counts_by_sentinel_site(gene_family_counts_by_sentinel_site)
cowplot::plot_grid(amr_subclasses_plot, gene_family_dot_plot,ncol =1, align="v", rel_heights = c(1, 3))
It is important to look at the gene alleles responsible for resistance. Here looking at just cephalosporin and carbapenem
drug_subclasses <- c('CEPHALOSPORIN', 'CARBAPENEM')
allele_counts_by_sentinel_site <- ghruR::count_alleles_by_sentinel_site(
annotated_amr_data,
gene_family_counts_by_sentinel_site,
drug_subclasses
)
allele_counts_plots <- ghruR::plot_allele_counts_by_sentinel_site(
allele_counts_by_sentinel_site,
drug_subclasses
)
print(allele_counts_plots[[1]])
print(allele_counts_plots[[2]])