From 62f20958c4157945e8762c333faa46d91452acdd Mon Sep 17 00:00:00 2001 From: Zoe Anne Dyson Date: Wed, 19 Feb 2025 13:51:05 +0000 Subject: [PATCH] Update hAMRonization_function_v3.R Updated to parse data in AMRgen genotype data frame format. Notes: - may need to handle contig information if included in raw data (currently appended to sample ID creating a many to one relationship for some test data) but this could be something for the user to sort out before using the package. - drug columns needs updating with as.ab() function when available. --- R/hAMRonization_function_v3.R | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/R/hAMRonization_function_v3.R b/R/hAMRonization_function_v3.R index 7e20766e..23e23844 100644 --- a/R/hAMRonization_function_v3.R +++ b/R/hAMRonization_function_v3.R @@ -76,6 +76,20 @@ harmonize_data <- function(user_software_name, # convert output to data frame py_run_string("hamronized_output_df = pandas.DataFrame(hamronized_output)") + # convert output to AMRgen genotype data frame format + hamronized_data <- py$hamronized_output_df %>% + mutate(Sample_ID = input_sequence_id) %>% + mutate(marker = as.gene(gene_symbol)) %>% + mutate(drug_agent = antimicrobial_agent) %>% + select(Sample_ID, marker, drug_class, drug_agent) + + # Separate drug classes for rgi data + if (user_software_name=="rgi"){ + hamronized_data <- hamronized_data %>% + separate_longer_delim(., drug_class, delim=";") %>% + mutate(drug_class = str_trim(drug_class, side = "both")) + } + # return harmonized data frame - return(py$hamronized_output_df) + return(hamronized_data) }