Skip to content

Commit 78eaa41

Browse files
JesperDramschclaudepre-commit-ci[bot]
authored
fix: unbound variable due to link missing (#210)
* fix: handle validation errors gracefully and filter empty links - Fix UnboundLocalError in sort_yaml.py by validating conferences individually instead of via list comprehension - Skip invalid conferences instead of failing the entire process - Filter out ICS calendar entries without valid links at import time * feat: fill missing links from historical conference data When an ICS calendar entry has no link, look up the conference name in existing YAML data and use the historical link with the year replaced. This allows conferences without descriptions to still have valid links if we've seen them before. * style: fix ruff SIM102 and PERF203 warnings - Combine nested if statements in fill_links_from_history (SIM102) - Extract validate_conference helper to avoid try-except in loop (PERF203) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 5a39ccd commit 78eaa41

2 files changed

Lines changed: 87 additions & 8 deletions

File tree

utils/import_python_official.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,66 @@
3737
logger = get_tqdm_logger(__name__)
3838

3939

40+
def fill_links_from_history(df_ics: pd.DataFrame, df_yml: pd.DataFrame) -> pd.DataFrame:
41+
"""Fill missing links in ICS data from historical conference data.
42+
43+
For conferences without links, look up the conference name in historical data
44+
and use that link, replacing any year references with the current year.
45+
46+
Parameters
47+
----------
48+
df_ics : pd.DataFrame
49+
DataFrame with ICS conference data (may have empty links)
50+
df_yml : pd.DataFrame
51+
DataFrame with existing conference data from YAML files
52+
53+
Returns
54+
-------
55+
pd.DataFrame
56+
DataFrame with missing links filled where historical data exists
57+
"""
58+
if df_yml.empty:
59+
return df_ics
60+
61+
# Create a lookup of conference names to their most recent links
62+
# Group by normalized conference name and get the most recent entry
63+
historical_links = {}
64+
for _, row in df_yml.iterrows():
65+
conf_name = row.get("conference", "")
66+
link = row.get("link", "")
67+
year = row.get("year", 0)
68+
69+
# Keep the most recent link for each conference
70+
if conf_name and link and (conf_name not in historical_links or year > historical_links[conf_name][1]):
71+
historical_links[conf_name] = (link, year)
72+
73+
filled_count = 0
74+
for idx, row in df_ics.iterrows():
75+
link = row.get("link", "")
76+
if not link or len(str(link).strip()) == 0:
77+
conf_name = row.get("conference", "")
78+
target_year = row.get("year", datetime.now(tz=timezone.utc).year)
79+
80+
if conf_name in historical_links:
81+
hist_link, hist_year = historical_links[conf_name]
82+
# Replace the historical year with the target year in the link
83+
new_link = re.sub(
84+
rf"\b{hist_year}\b",
85+
str(target_year),
86+
str(hist_link),
87+
)
88+
df_ics.at[idx, "link"] = new_link
89+
filled_count += 1
90+
logger.debug(
91+
f"Filled link for '{conf_name}' from historical data: {new_link}",
92+
)
93+
94+
if filled_count > 0:
95+
logger.info(f"Filled {filled_count} missing links from historical conference data")
96+
97+
return df_ics
98+
99+
40100
def ics_to_dataframe() -> pd.DataFrame:
41101
"""Parse an .ics file and return a DataFrame with the event data.
42102
@@ -241,6 +301,21 @@ def main(year=None, base="") -> bool:
241301
logger.warning("No conference data retrieved from calendar")
242302
return False
243303

304+
# Try to fill missing links from historical conference data
305+
logger.info("Filling missing links from historical data")
306+
df_ics = fill_links_from_history(df_ics, df_yml)
307+
308+
# Filter out entries with empty or missing links
309+
initial_count = len(df_ics)
310+
df_ics = df_ics[df_ics["link"].str.len() > 0]
311+
filtered_count = initial_count - len(df_ics)
312+
if filtered_count > 0:
313+
logger.info(f"Filtered out {filtered_count} entries without valid links")
314+
315+
if df_ics.empty:
316+
logger.warning("No conferences with valid links after filtering")
317+
return False
318+
244319
except Exception as e:
245320
logger.error(f"Failed to initialize import process: {e}")
246321
return False

utils/sort_yaml.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -301,15 +301,19 @@ def sort_data(base="", prefix="", skip_links=False):
301301
for i, q in enumerate(data.copy()):
302302
data[i] = order_keywords(q)
303303

304+
def validate_conference(q: dict) -> Conference | None:
305+
"""Validate a single conference entry, returning None if invalid."""
306+
try:
307+
return Conference(**q)
308+
except pydantic.ValidationError as e:
309+
logger.error(f"❌ Validation error in conference: {e}")
310+
logger.debug(f"Invalid data: \n{yaml.dump(q, default_flow_style=False)}")
311+
return None
312+
304313
logger.info("✅ Validating conference data with Pydantic schema")
305-
validation_errors = 0
306-
307-
try:
308-
new_data = [Conference(**q) for q in data]
309-
except pydantic.ValidationError as e:
310-
validation_errors += 1
311-
logger.error(f"❌ Validation error in conference: {e}")
312-
logger.debug(f"Invalid data: \n{yaml.dump(q, default_flow_style=False)}")
314+
validated = [validate_conference(q) for q in data]
315+
new_data = [c for c in validated if c is not None]
316+
validation_errors = len(validated) - len(new_data)
313317

314318
if validation_errors > 0:
315319
logger.warning(f"⚠️ {validation_errors} conferences failed validation and were skipped")

0 commit comments

Comments
 (0)