JesperDramsch · JesperDramsch · Jan 19, 2026 · Jan 19, 2026 · Jan 19, 2026 · Jan 19, 2026
diff --git a/utils/tidy_conf/deduplicate.py b/utils/tidy_conf/deduplicate.py
@@ -3,7 +3,7 @@
 
 def merge_near_duplicates(group):
     # Fill missing values with the next value then take the first row
-    with pd.option_context('future.no_silent_downcasting', True):
+    with pd.option_context("future.no_silent_downcasting", True):
         group = group.bfill().ffill().infer_objects(copy=False)
     return group.iloc[0]
 

diff --git a/utils/tidy_conf/subs.py b/utils/tidy_conf/subs.py
@@ -20,6 +20,11 @@ def auto_add_sub(data):
 
 
 def load_subs():
-    with Path("utils", "tidy_conf", "data", "subs.yml").open(encoding="utf-8") as file:
+    """Load sub keywords from subs.yml.
+
+    Uses module-relative path for robustness regardless of working directory.
+    """
+    subs_path = Path(__file__).parent / "data" / "subs.yml"
+    with subs_path.open(encoding="utf-8") as file:
         data = yaml.safe_load(file)
     return data
diff --git a/utils/tidy_conf/utils.py b/utils/tidy_conf/utils.py
@@ -98,7 +98,60 @@ def query_yes_no(question, default="no"):
         sys.stdout.write("Please respond with 'yes' or 'no' (or 'y' or 'n').\n")
 
 
-def fill_missing_required(df):
+def _load_subs_keywords():
+    """Load sub keywords from subs.yml for auto-detection.
+
+    Returns empty dict if loading fails, allowing fallback to DEFAULT_SUB.
+    """
+    try:
+        from .subs import load_subs
+
+        return load_subs()
+    except (FileNotFoundError, ImportError):
+        return {}
+
+
+def _auto_detect_sub(conference_name: str) -> str | None:
+    """Auto-detect sub category based on conference name.
+
+    Parameters
+    ----------
+    conference_name : str
+        Name of the conference
+
+    Returns
+    -------
+    str | None
+        Sub category string if matched, None otherwise.
+    """
+    keywords = _load_subs_keywords()
+    name_lower = conference_name.lower()
+    for sub_key, sub_keywords in keywords.items():
+        if any(word in name_lower for word in sub_keywords):
+            return sub_key
+    return None
+
+
+# Default sub value for conferences that don't match any keyword
+DEFAULT_SUB = "PY"
+
+
+def fill_missing_required(df: pd.DataFrame) -> pd.DataFrame:
+    """Fill missing required fields in the DataFrame.
+
+    In non-interactive environments (CI), uses auto-detection and defaults
+    instead of prompting for user input.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        DataFrame with conference data
+
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with missing required fields filled.
+    """
     required = [
         "conference",
         "year",
@@ -110,9 +163,28 @@ def fill_missing_required(df):
         "sub",
     ]
 
+    is_interactive = sys.stdin.isatty()
+
     for i, row in df.copy().iterrows():
         for keyword in required:
             if pd.isna(row[keyword]):
+                # Handle sub field specially - try auto-detection first
+                if keyword == "sub":
+                    detected_sub = _auto_detect_sub(row["conference"])
+                    if detected_sub:
+                        df.loc[i, keyword] = detected_sub
+                        continue
+                    # Use default if no match and non-interactive
+                    if not is_interactive:
+                        df.loc[i, keyword] = DEFAULT_SUB
+                        continue
+
+                # In non-interactive mode, skip prompting for other fields
+                if not is_interactive:
+                    # Leave as NaN - will be caught by validation later
+                    continue
+
+                # Interactive mode - prompt user
                 user_input = input(
                     f"What's the value of '{keyword}' for conference '{row['conference']}' check {row['link']} ?: ",
                 )