From 1e2fe827573294b3e7d010d8e1c2ba0af15d8a50 Mon Sep 17 00:00:00 2001 From: Shichao Song <60967965+Ki-Seki@users.noreply.github.com> Date: Mon, 13 Apr 2026 22:14:03 +0800 Subject: [PATCH 1/2] Refactor humaneval_infilling.py to load multiple subsets from the dataset and remove TODO comment --- src/gimbench/code/humaneval_infilling.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/gimbench/code/humaneval_infilling.py b/src/gimbench/code/humaneval_infilling.py index 46d5993..3ea8f9d 100644 --- a/src/gimbench/code/humaneval_infilling.py +++ b/src/gimbench/code/humaneval_infilling.py @@ -1,7 +1,6 @@ # https://huggingface.co/datasets/Sculpt-AI/humaneval_infilling -# TODO: the HF dataset repo needs repairs. -from datasets import load_dataset +from datasets import load_dataset, concatenate_datasets from gimbench.arguments import get_args from gimbench.code.evaluators import conduct_eval @@ -15,9 +14,18 @@ args = get_args() args.dataset = { "path": "Sculpt-AI/humaneval_infilling", + "subsets": [ + "MultiLine", + "RandomSpan", + "RandomSpanLight", + "SingleLine" + ] } - ds = load_dataset(args.dataset["path"], split="train") + ds = concatenate_datasets([ + load_dataset(args.dataset["path"], split="test", name=subset) + for subset in args.dataset["subsets"] + ]).shuffle(seed=args.seed) logger.info(f"Loaded {len(ds)} samples from dataset {args.dataset}") logger.info(f"Columns: {ds.column_names}") logger.info(f"First sample: {ds[0]}") From 8b3dd46cd82fa34f8e130dc7e9ee9648bfff7b47 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:14:26 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/gimbench/code/humaneval_infilling.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/src/gimbench/code/humaneval_infilling.py b/src/gimbench/code/humaneval_infilling.py index 3ea8f9d..8ff3013 100644 --- a/src/gimbench/code/humaneval_infilling.py +++ b/src/gimbench/code/humaneval_infilling.py @@ -1,6 +1,6 @@ # https://huggingface.co/datasets/Sculpt-AI/humaneval_infilling -from datasets import load_dataset, concatenate_datasets +from datasets import concatenate_datasets, load_dataset from gimbench.arguments import get_args from gimbench.code.evaluators import conduct_eval @@ -14,18 +14,12 @@ args = get_args() args.dataset = { "path": "Sculpt-AI/humaneval_infilling", - "subsets": [ - "MultiLine", - "RandomSpan", - "RandomSpanLight", - "SingleLine" - ] + "subsets": ["MultiLine", "RandomSpan", "RandomSpanLight", "SingleLine"], } - ds = concatenate_datasets([ - load_dataset(args.dataset["path"], split="test", name=subset) - for subset in args.dataset["subsets"] - ]).shuffle(seed=args.seed) + ds = concatenate_datasets( + [load_dataset(args.dataset["path"], split="test", name=subset) for subset in args.dataset["subsets"]] + ).shuffle(seed=args.seed) logger.info(f"Loaded {len(ds)} samples from dataset {args.dataset}") logger.info(f"Columns: {ds.column_names}") logger.info(f"First sample: {ds[0]}")