Skip to content

Commit 5a39ccd

Browse files
JesperDramschclaudepre-commit-ci[bot]
authored
fix: merging in CI (#209)
* fix(ci): handle non-interactive mode for missing field prompts - Add auto-detection of 'sub' field based on conference name keywords - Define default 'sub' value (PY) for conferences without keyword match - Skip interactive prompts in CI environments (when stdin is not a tty) - Fixes EOF error when CI tries to prompt for missing conference fields The fill_missing_required function now: 1. Tries to auto-detect 'sub' using existing subs.yml keywords 2. Falls back to DEFAULT_SUB (PY) in non-interactive mode 3. Skips prompts for other required fields in CI (leaves as NaN for validation) * fix(subs): use module-relative path for subs.yml loading - Change load_subs() to use Path(__file__).parent for robustness - Add fallback in _load_subs_keywords() if loading fails - Prevents failures when script is run from non-root directories * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix(docs): fix docstring format for docsig compliance * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix(docs): use numpy-style docstrings with type on separate line --------- Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent f7443b1 commit 5a39ccd

3 files changed

Lines changed: 80 additions & 3 deletions

File tree

utils/tidy_conf/deduplicate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
def merge_near_duplicates(group):
55
# Fill missing values with the next value then take the first row
6-
with pd.option_context('future.no_silent_downcasting', True):
6+
with pd.option_context("future.no_silent_downcasting", True):
77
group = group.bfill().ffill().infer_objects(copy=False)
88
return group.iloc[0]
99

utils/tidy_conf/subs.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ def auto_add_sub(data):
2020

2121

2222
def load_subs():
23-
with Path("utils", "tidy_conf", "data", "subs.yml").open(encoding="utf-8") as file:
23+
"""Load sub keywords from subs.yml.
24+
25+
Uses module-relative path for robustness regardless of working directory.
26+
"""
27+
subs_path = Path(__file__).parent / "data" / "subs.yml"
28+
with subs_path.open(encoding="utf-8") as file:
2429
data = yaml.safe_load(file)
2530
return data

utils/tidy_conf/utils.py

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,60 @@ def query_yes_no(question, default="no"):
9898
sys.stdout.write("Please respond with 'yes' or 'no' (or 'y' or 'n').\n")
9999

100100

101-
def fill_missing_required(df):
101+
def _load_subs_keywords():
102+
"""Load sub keywords from subs.yml for auto-detection.
103+
104+
Returns empty dict if loading fails, allowing fallback to DEFAULT_SUB.
105+
"""
106+
try:
107+
from .subs import load_subs
108+
109+
return load_subs()
110+
except (FileNotFoundError, ImportError):
111+
return {}
112+
113+
114+
def _auto_detect_sub(conference_name: str) -> str | None:
115+
"""Auto-detect sub category based on conference name.
116+
117+
Parameters
118+
----------
119+
conference_name : str
120+
Name of the conference
121+
122+
Returns
123+
-------
124+
str | None
125+
Sub category string if matched, None otherwise.
126+
"""
127+
keywords = _load_subs_keywords()
128+
name_lower = conference_name.lower()
129+
for sub_key, sub_keywords in keywords.items():
130+
if any(word in name_lower for word in sub_keywords):
131+
return sub_key
132+
return None
133+
134+
135+
# Default sub value for conferences that don't match any keyword
136+
DEFAULT_SUB = "PY"
137+
138+
139+
def fill_missing_required(df: pd.DataFrame) -> pd.DataFrame:
140+
"""Fill missing required fields in the DataFrame.
141+
142+
In non-interactive environments (CI), uses auto-detection and defaults
143+
instead of prompting for user input.
144+
145+
Parameters
146+
----------
147+
df : pd.DataFrame
148+
DataFrame with conference data
149+
150+
Returns
151+
-------
152+
pd.DataFrame
153+
DataFrame with missing required fields filled.
154+
"""
102155
required = [
103156
"conference",
104157
"year",
@@ -110,9 +163,28 @@ def fill_missing_required(df):
110163
"sub",
111164
]
112165

166+
is_interactive = sys.stdin.isatty()
167+
113168
for i, row in df.copy().iterrows():
114169
for keyword in required:
115170
if pd.isna(row[keyword]):
171+
# Handle sub field specially - try auto-detection first
172+
if keyword == "sub":
173+
detected_sub = _auto_detect_sub(row["conference"])
174+
if detected_sub:
175+
df.loc[i, keyword] = detected_sub
176+
continue
177+
# Use default if no match and non-interactive
178+
if not is_interactive:
179+
df.loc[i, keyword] = DEFAULT_SUB
180+
continue
181+
182+
# In non-interactive mode, skip prompting for other fields
183+
if not is_interactive:
184+
# Leave as NaN - will be caught by validation later
185+
continue
186+
187+
# Interactive mode - prompt user
116188
user_input = input(
117189
f"What's the value of '{keyword}' for conference '{row['conference']}' check {row['link']} ?: ",
118190
)

0 commit comments

Comments
 (0)