-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathvalidation_error.rs
More file actions
118 lines (111 loc) · 3.8 KB
/
validation_error.rs
File metadata and controls
118 lines (111 loc) · 3.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
use itertools::Itertools;
use num_format::{Locale, ToFormattedString};
use polars::prelude::*;
use pyo3::{create_exception, exceptions::PyException, prelude::*};
use std::collections::HashMap;
use super::RuleFailure;
create_exception!(exc, PyRuleValidationError, PyException);
pub struct RuleValidationError<'a> {
num_rule_failures: usize,
schema_errors: Vec<RuleFailure<'a>>,
column_errors: Vec<(&'a str, Vec<RuleFailure<'a>>)>,
}
impl<'a> RuleValidationError<'a> {
pub fn new(failure_counts: Vec<RuleFailure<'a>>) -> Self {
let num_rule_failures = failure_counts.len();
let (flat_column_errors, schema_errors): (Vec<_>, Vec<_>) = failure_counts
.into_iter()
.partition(|item| item.rule.contains("|"));
let column_errors = flat_column_errors
.into_iter()
.chunk_by(|item| item.rule.split_once("|").unwrap().0)
.into_iter()
.map(|(key, chunk)| {
(
key,
chunk
.map(|failure| failure.split_off_column_name())
.collect::<Vec<_>>(),
)
})
.collect::<Vec<_>>();
Self {
num_rule_failures,
schema_errors: schema_errors,
column_errors,
}
}
pub fn to_string(
&self,
schema: Option<&str>,
examples: Option<&HashMap<String, Vec<String>>>,
) -> String {
let mut result = if let Some(schema) = schema {
format!(
"{} rules failed validation for schema '{schema}':",
self.num_rule_failures
)
} else {
format!("{} rules failed validation:", self.num_rule_failures)
};
self.schema_errors.iter().for_each(|failure| {
let examples_str = format_examples(failure.rule, examples);
result += format!(
"\n - '{}' failed for {} rows{}",
failure.rule,
failure.count.to_formatted_string(&Locale::en),
examples_str,
)
.as_str();
});
self.column_errors.iter().for_each(|(column, errors)| {
result += format!(
"\n * Column '{column}' failed validation for {} rules:",
errors.len()
)
.as_str();
errors.iter().for_each(|failure| {
let full_rule = format!("{}|{}", column, failure.rule);
let examples_str = format_examples(&full_rule, examples);
result += format!(
"\n - '{}' failed for {} rows{}",
failure.rule,
failure.count.to_formatted_string(&Locale::en),
examples_str,
)
.as_str();
});
});
result
}
}
fn format_examples(rule: &str, examples: Option<&HashMap<String, Vec<String>>>) -> String {
match examples.and_then(|ex| ex.get(rule)) {
Some(ex) if !ex.is_empty() => {
let suffix = if ex.len() == 1 {
"example".to_string()
} else {
"examples".to_string()
};
format!(" with {} distinct {}: [{}]", ex.len(), suffix, ex.join(", "))
}
_ => String::new(),
}
}
#[pyfunction]
#[pyo3(signature = (failures, examples=None))]
pub fn format_rule_failures(
failures: Vec<(String, IdxSize)>,
examples: Option<HashMap<String, Vec<String>>>,
) -> String {
let validation_error = RuleValidationError::new(
failures
.iter()
.map(|(rule, count)| RuleFailure {
rule: rule,
count: *count,
})
.collect(),
);
return validation_error.to_string(None, examples.as_ref());
}