From ff934d5960bfbb185d8547e915ba79afe948b565 Mon Sep 17 00:00:00 2001 From: matistjati Date: Sun, 17 Aug 2025 19:27:29 +0200 Subject: [PATCH 1/5] Slow fuzzing of output vals --- problemtools/verifyproblem.py | 36 ++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py index a15d913b..447f5330 100644 --- a/problemtools/verifyproblem.py +++ b/problemtools/verifyproblem.py @@ -953,6 +953,7 @@ def __str__(self) -> str: return 'attachments' +# Junk data. The validator should reject these cases _JUNK_CASES = [ ('an empty file', b''), ('a binary file with random bytes', bytearray(random.Random(0).randbytes(1024))), @@ -963,6 +964,15 @@ def __str__(self) -> str: ), ] +# These cases might be valid and should not always be rejected. +# However, they might crash an output validator, thus causing a judge error. +_JUNK_CASES_CRASH = [ + ('a file with the number 1', b'1'), + ('a file with the contents "1\na"', b'1\na'), + ('a file with the number -1', b'-1'), + ('a file with the number 2147483648', b'2147483648'), + ('a file with the number 9223372036854775808', b'9223372036854775808'), +] def _build_junk_modifier( desc: str, pattern: str, repl: str | Callable[[Match[str]], str] @@ -1270,20 +1280,32 @@ def check(self, context: Context) -> bool: if self._check_res: flags = self.problem.metadata.legacy_validator_flags - fd, file_name = tempfile.mkstemp() - os.close(fd) - for desc, case in _JUNK_CASES: - f = open(file_name, 'wb') - f.write(case) + # Sanity check cases that should be rejected by the output validator + def run_junk_case(junk_file, junk_content): + f = open(junk_file, 'wb') + f.write(junk_content) f.close() rejected = False + results = [] for testcase in self.problem.testdata.get_all_testcases(): result = self.validate(testcase, file_name) - if result.verdict != 'AC': - rejected = True + results.append(result) if result.verdict == 'JE': self.error(f'{desc} as output, and output validator flags "{" ".join(flags)}" gave {result}') break + return results + + fd, file_name = tempfile.mkstemp() + os.close(fd) + for desc, case in _JUNK_CASES: + results = run_junk_case(file_name, case) + rejected = any(result.verdict != 'AC' for result in results) + if not rejected: + self.warning(f'{desc} gets AC') + for desc, case in _JUNK_CASES_CRASH: + continue + run_junk_case(file_name, case) + rejected = any(result.verdict != 'AC' for result in results) if not rejected: self.warning(f'{desc} gets AC') os.unlink(file_name) From bc4a1bf1af5618049da740af50f8bab7942e9607 Mon Sep 17 00:00:00 2001 From: Matistjati Date: Tue, 19 Aug 2025 20:41:34 +0200 Subject: [PATCH 2/5] Only fuzz on samples --- problemtools/verifyproblem.py | 60 ++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py index 447f5330..0d9fdf19 100644 --- a/problemtools/verifyproblem.py +++ b/problemtools/verifyproblem.py @@ -964,16 +964,27 @@ def __str__(self) -> str: ), ] -# These cases might be valid and should not always be rejected. -# However, they might crash an output validator, thus causing a judge error. +# Try to crash the output validator, causing a judge error _JUNK_CASES_CRASH = [ - ('a file with the number 1', b'1'), - ('a file with the contents "1\na"', b'1\na'), ('a file with the number -1', b'-1'), + ('a file with the number 2147483647', b'2147483647'), ('a file with the number 2147483648', b'2147483648'), ('a file with the number 9223372036854775808', b'9223372036854775808'), + ('a file with the number 0', b'0'), + ('a file with the number 1', b'1'), + ('a file with the number 1.0', b'1.0'), + ('a file with the string "a"', b'a'), + ('a file with the contents "2\\n-1 1"', b'2\n-1 1'), + ('a file with the contents "2\\n1"', b'2\n1'), + ('a file with the contents "1\\n-1 1"', b'1\n-1 1'), + ('a file with the contents "1\\na"', b'1\na'), + ('a file with the contents "(()"', b'(()'), + ('a file with the contents "1-"', b'1-'), + ('a file with the contents "1/0"', b'1/0'), + ('a file with the contents "2\\n<"', b'2\n<'), ] + def _build_junk_modifier( desc: str, pattern: str, repl: str | Callable[[Match[str]], str] ) -> tuple[str, Callable, Callable[[str], str]]: @@ -1281,34 +1292,31 @@ def check(self, context: Context) -> bool: flags = self.problem.metadata.legacy_validator_flags # Sanity check cases that should be rejected by the output validator - def run_junk_case(junk_file, junk_content): - f = open(junk_file, 'wb') - f.write(junk_content) - f.close() - rejected = False + def run_junk_case(case_desc: str, junk_content: str, testcases: list[TestCase]) -> list[SubmissionResult]: results = [] - for testcase in self.problem.testdata.get_all_testcases(): - result = self.validate(testcase, file_name) - results.append(result) - if result.verdict == 'JE': - self.error(f'{desc} as output, and output validator flags "{" ".join(flags)}" gave {result}') - break + with tempfile.NamedTemporaryFile(mode='wb') as f: + f.write(junk_content) + f.flush() + for testcase in testcases: + result = self.validate(testcase, f.name) + results.append(result) + if result.verdict == 'JE': + self.error(f'{case_desc} as output, and output validator flags "{" ".join(flags)}" gave {result}') + break return results - fd, file_name = tempfile.mkstemp() - os.close(fd) - for desc, case in _JUNK_CASES: - results = run_junk_case(file_name, case) - rejected = any(result.verdict != 'AC' for result in results) - if not rejected: - self.warning(f'{desc} gets AC') - for desc, case in _JUNK_CASES_CRASH: - continue - run_junk_case(file_name, case) + # Junk cases that the output validator should reject + for desc, junk_case_content in _JUNK_CASES: + results = run_junk_case(desc, junk_case_content, self.problem.testdata.get_all_testcases()) rejected = any(result.verdict != 'AC' for result in results) if not rejected: self.warning(f'{desc} gets AC') - os.unlink(file_name) + + # Malformed cases that a poorly-written output validator might crash on + # Note that these might be valid output, so we only check if it crashes + sample_cases = [tc for tc in self.problem.testdata.get_all_testcases() if tc.is_in_sample_group()] + for desc, junk_case_content in _JUNK_CASES_CRASH: + run_junk_case(desc, junk_case_content, sample_cases) return self._check_res From f951d0789f948f6cb589c7a20ac931e4a488b584 Mon Sep 17 00:00:00 2001 From: Matistjati Date: Tue, 19 Aug 2025 21:02:30 +0200 Subject: [PATCH 3/5] Fix type hint --- problemtools/verifyproblem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py index 0d9fdf19..8b7b72df 100644 --- a/problemtools/verifyproblem.py +++ b/problemtools/verifyproblem.py @@ -1292,7 +1292,7 @@ def check(self, context: Context) -> bool: flags = self.problem.metadata.legacy_validator_flags # Sanity check cases that should be rejected by the output validator - def run_junk_case(case_desc: str, junk_content: str, testcases: list[TestCase]) -> list[SubmissionResult]: + def run_junk_case(case_desc: str, junk_content: bytes, testcases: list[TestCase]) -> list[SubmissionResult]: results = [] with tempfile.NamedTemporaryFile(mode='wb') as f: f.write(junk_content) From a48a387c277d16bf2a31d7c22788a1ff4fd4486d Mon Sep 17 00:00:00 2001 From: Matistjati Date: Wed, 20 Aug 2025 13:35:52 +0200 Subject: [PATCH 4/5] More crash cases and limit if using python --- problemtools/verifyproblem.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py index 8b7b72df..91334eb9 100644 --- a/problemtools/verifyproblem.py +++ b/problemtools/verifyproblem.py @@ -982,6 +982,10 @@ def __str__(self) -> str: ('a file with the contents "1-"', b'1-'), ('a file with the contents "1/0"', b'1/0'), ('a file with the contents "2\\n<"', b'2\n<'), + ('a file with the contents "NaN"', b'NaN'), + ('a file with the contents "inf"', b'inf'), + ('a file with the contents "\\x00"', b'\x00'), + ('a file with the contents "\\x80"', b'\x80'), ] @@ -1312,11 +1316,14 @@ def run_junk_case(case_desc: str, junk_content: bytes, testcases: list[TestCase] if not rejected: self.warning(f'{desc} gets AC') + # For performance reasons, strongly limit the amount of testcases we run on + fast_languages = {'c', 'cpp'} + num_testcases = 3 if all(v.language.lang_id in fast_languages for v in self._validators) else 1 + test_cases = self.problem.testdata.get_all_testcases()[:num_testcases] # Malformed cases that a poorly-written output validator might crash on # Note that these might be valid output, so we only check if it crashes - sample_cases = [tc for tc in self.problem.testdata.get_all_testcases() if tc.is_in_sample_group()] for desc, junk_case_content in _JUNK_CASES_CRASH: - run_junk_case(desc, junk_case_content, sample_cases) + run_junk_case(desc, junk_case_content, test_cases) return self._check_res From 3f2c5f221e6ee417e43d87fb8c917745278f4254 Mon Sep 17 00:00:00 2001 From: Matistjati Date: Wed, 20 Aug 2025 13:43:53 +0200 Subject: [PATCH 5/5] Fix type error --- problemtools/verifyproblem.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/problemtools/verifyproblem.py b/problemtools/verifyproblem.py index 91334eb9..9fe4b65e 100644 --- a/problemtools/verifyproblem.py +++ b/problemtools/verifyproblem.py @@ -1318,7 +1318,11 @@ def run_junk_case(case_desc: str, junk_content: bytes, testcases: list[TestCase] # For performance reasons, strongly limit the amount of testcases we run on fast_languages = {'c', 'cpp'} - num_testcases = 3 if all(v.language.lang_id in fast_languages for v in self._validators) else 1 + all_validators_are_fast = True + for val in self._validators: + if isinstance(val, run.SourceCode): + all_validators_are_fast &= val.language.lang_id in fast_languages + num_testcases = 3 if all_validators_are_fast else 1 test_cases = self.problem.testdata.get_all_testcases()[:num_testcases] # Malformed cases that a poorly-written output validator might crash on # Note that these might be valid output, so we only check if it crashes