Skip to content

Commit a90ca5b

Browse files
authored
fix: handle empty extracted VCF fields (#43)
1 parent b9fee30 commit a90ca5b

1 file changed

Lines changed: 43 additions & 34 deletions

File tree

workflow/scripts/format_vcf_fields_longer.R

Lines changed: 43 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ filter.exclude <- lapply(snakemake@params$filter_exclude, empty.to.na)
2727

2828
# Process input table
2929
log_info("Applying filters and writing results")
30-
read_tsv(
30+
df <- read_tsv(
3131
snakemake@input$tsv,
3232
col_types = cols(
3333
POS = col_integer(),
@@ -43,42 +43,51 @@ read_tsv(
4343
) %>%
4444

4545
# Rename "...[*]..." columns using the provided lookup via Snakemake config
46-
rename(all_of(unlist(snakemake@params$colnames_mapping))) %>%
46+
rename(all_of(unlist(snakemake@params$colnames_mapping)))
4747

48-
# Ensure missing values are properly encoded
49-
mutate(across(where(is.character), ~ na_if(.x, "NA"))) %>%
48+
if (nrow(df) == 0) {
49+
log_info("Writing empty file")
50+
write_tsv(df, snakemake@output$tsv)
51+
} else {
52+
log_info("Processing variants")
53+
df %>%
54+
# Ensure missing values are properly encoded
55+
mutate(across(where(is.character), ~ na_if(.x, "NA"))) %>%
5056

51-
# Separate &-delimited error column (more than one error/warning/info message per row is possible)
52-
mutate(split_errors = strsplit(ERRORS, "&")) %>%
53-
# Keep rows with none of the excluded ERRORS terms, if any
54-
filter(map_lgl(split_errors, ~ !any(. %in% filter.exclude[["ERRORS"]]))) %>%
55-
select(-split_errors) %>%
57+
# Separate &-delimited error column (more than one error/warning/info message per row is possible)
58+
mutate(split_errors = strsplit(ERRORS, "&")) %>%
59+
# Keep rows with none of the excluded ERRORS terms, if any
60+
filter(map_lgl(split_errors, ~ !any(. %in% filter.exclude[["ERRORS"]]))) %>%
61+
select(-split_errors) %>%
5662

57-
# Apply filters
58-
filter(
59-
# Keep variants that include required values in each field
60-
!!!map2(
61-
names(filter.include),
62-
filter.include,
63-
~ expr(.data[[!!.x]] %in% !!.y)
64-
),
65-
# Keep variants that exclude required values in each field
66-
!!!map2(
67-
names(filter.exclude),
68-
filter.exclude,
69-
~ expr(!(.data[[!!.x]] %in% !!.y))
70-
)
71-
) %>%
63+
# Apply filters
64+
filter(
65+
# Keep variants that include required values in each field
66+
!!!map2(
67+
names(filter.include),
68+
filter.include,
69+
~ expr(.data[[!!.x]] %in% !!.y)
70+
),
71+
# Keep variants that exclude required values in each field
72+
!!!map2(
73+
names(filter.exclude),
74+
filter.exclude,
75+
~ expr(!(.data[[!!.x]] %in% !!.y))
76+
)
77+
) %>%
7278

73-
# Keep unique rows
74-
distinct() %>%
79+
# Keep unique rows
80+
distinct() %>%
7581

76-
mutate(
77-
# Assign variant name using the pattern defined via Snakemake config
78-
VARIANT_NAME = str_glue(snakemake@params$variant_name_pattern),
79-
# Assign sample name
80-
SAMPLE = snakemake@params$sample
81-
) %>%
82+
mutate(
83+
# Assign variant name using the pattern defined via Snakemake config
84+
VARIANT_NAME = str_glue(snakemake@params$variant_name_pattern),
85+
# Assign sample name
86+
SAMPLE = snakemake@params$sample
87+
) %>%
8288

83-
# Write output file
84-
write_tsv(snakemake@output$tsv)
89+
# Write output file
90+
write_tsv(snakemake@output$tsv)
91+
92+
log_info("Done")
93+
}

0 commit comments

Comments
 (0)