From 42793b5ae5beb2bafbb10b0414c503615c780a5e Mon Sep 17 00:00:00 2001 From: Jon Froehlich Date: Fri, 26 Jun 2026 14:10:53 -0700 Subject: [PATCH] Flag publications missing an expected companion artifact (#1405) Add two Data Health checks (Artifacts group) for publications that should have a related artifact but don't: - Conference papers without a talk: Conference venue type, not an extended abstract, talk FK empty. Most full conference papers are presented with a talk, so these are usually data-entry gaps. - Poster papers without a linked poster: Poster venue type, poster FK empty. Both share CompanionArtifactCheck, which excludes pre-Makeability-Lab work (DATE_MAKEABILITYLAB_FORMED) and not-yet-presented (future-dated) papers to avoid permanent false positives, and links each row to the publication's edit page where the FK is set. Kept as two separate checks (distinct dashboard counts) because the corrective action differs. Co-Authored-By: Claude Opus 4.8 (1M context) --- website/admin/data_health/checks/__init__.py | 2 + .../data_health/checks/_companion_base.py | 79 +++++++++++++++++++ .../checks/conference_papers_without_talk.py | 28 +++++++ .../checks/poster_papers_without_poster.py | 27 +++++++ website/tests/test_data_health.py | 70 +++++++++++++++- 5 files changed, 205 insertions(+), 1 deletion(-) create mode 100644 website/admin/data_health/checks/_companion_base.py create mode 100644 website/admin/data_health/checks/conference_papers_without_talk.py create mode 100644 website/admin/data_health/checks/poster_papers_without_poster.py diff --git a/website/admin/data_health/checks/__init__.py b/website/admin/data_health/checks/__init__.py index a248e74d..c8a09eee 100644 --- a/website/admin/data_health/checks/__init__.py +++ b/website/admin/data_health/checks/__init__.py @@ -9,6 +9,8 @@ media_integrity, publication_quality, unlinked_artifacts, + conference_papers_without_talk, + poster_papers_without_poster, project_health, project_leadership, position_integrity, diff --git a/website/admin/data_health/checks/_companion_base.py b/website/admin/data_health/checks/_companion_base.py new file mode 100644 index 00000000..bd45472a --- /dev/null +++ b/website/admin/data_health/checks/_companion_base.py @@ -0,0 +1,79 @@ +""" +Shared base for "publication is missing an expected companion artifact" checks. + +Some publication types almost always ship with a companion artifact: + +- a full **conference paper** is presented with a **talk**, and +- a **poster** publication points at its **poster** artifact. + +Each gap is surfaced as its own dashboard check (the corrective action differs) +but the scoping and row shape are identical, so they share this base. A +subclass only sets the venue type to match, the companion FK that should be +populated, and (optionally) whether to restrict to full papers. + +Scoping mirrors the unlinked-artifacts check to avoid permanent false +positives: + +- **Pre-Makeability-Lab work is excluded** (date before + ``settings.DATE_MAKEABILITYLAB_FORMED``) — grad-school-era papers predate the + lab's talk/poster records. +- **Not-yet-presented papers are excluded** (``to_appear()`` — a future date); + the companion usually isn't recorded until the work is presented. + +Read-only: never calls ``.save()`` or mutates the DB. +""" + +from django.conf import settings + +from website.admin.data_health.registry import HealthCheck +from website.models import Publication + + +class CompanionArtifactCheck(HealthCheck): + """Base check: publications of one venue type missing a companion FK. + + Subclasses set :attr:`venue_type`, :attr:`companion_field`, and optionally + :attr:`require_full_paper`; everything else (columns, scoping, the link to + the publication's edit page) is shared. + """ + + group = 'Artifacts' + link_model = 'publication' # each row's fix happens on the publication form + columns = ['id', 'title', 'date', 'forum_name', 'first_author'] + + #: ``PubType`` value this check applies to (e.g. ``PubType.CONFERENCE``). + venue_type = None + #: Publication FK that should be populated (e.g. ``'talk'`` / ``'poster'``). + companion_field = None + #: When True, skip extended abstracts (short-form papers rarely have one). + require_full_paper = False + + def get_rows(self): + qs = (Publication.objects + .filter(pub_venue_type=self.venue_type) + .prefetch_related('authors')) + + rows = [] + for pub in qs: + if getattr(pub, f'{self.companion_field}_id'): + continue # companion already linked — healthy + if self.require_full_paper and pub.is_extended_abstract(): + continue # short-form paper; a talk isn't expected + if pub.to_appear(): + continue # not presented yet — companion expected later + if pub.date and pub.date < settings.DATE_MAKEABILITYLAB_FORMED: + continue # pre-Makeability-Lab; not expected to have one + + person = pub.get_person() + rows.append({ + 'id': pub.pk, + 'title': pub.title or '', + 'date': pub.date.isoformat() if pub.date else '', + 'forum_name': pub.forum_name or '', + 'first_author': person.get_full_name() if person else '', + }) + + # Newest first (stable two-pass sort: by title, then by date desc). + rows.sort(key=lambda r: r['title']) + rows.sort(key=lambda r: r['date'], reverse=True) + return rows diff --git a/website/admin/data_health/checks/conference_papers_without_talk.py b/website/admin/data_health/checks/conference_papers_without_talk.py new file mode 100644 index 00000000..8ed0a94b --- /dev/null +++ b/website/admin/data_health/checks/conference_papers_without_talk.py @@ -0,0 +1,28 @@ +""" +Data-health check: full conference papers with no linked talk. + +A Conference-type publication is normally presented with a talk, so a full +conference paper whose ``talk`` FK is empty is usually a data-entry gap. The fix +is to add (or link) the talk from the publication's edit page. Extended +abstracts are excluded — short-form conference items often have no talk. Shared +scoping and row shape live in :mod:`._companion_base`. Read-only. +""" + +from website.admin.data_health.checks._companion_base import CompanionArtifactCheck +from website.admin.data_health.registry import register_check +from website.models.publication import PubType + + +@register_check +class ConferencePapersWithoutTalkCheck(CompanionArtifactCheck): + slug = 'conference-papers-without-talk' + title = 'Conference papers without a talk' + description = ( + 'Full conference papers (Conference venue type, not an extended ' + 'abstract) with no linked talk. Most should have one — add or link the ' + "talk from the publication's edit page. Pre-Makeability-Lab and " + 'not-yet-presented (future-dated) papers are excluded.' + ) + venue_type = PubType.CONFERENCE + companion_field = 'talk' + require_full_paper = True diff --git a/website/admin/data_health/checks/poster_papers_without_poster.py b/website/admin/data_health/checks/poster_papers_without_poster.py new file mode 100644 index 00000000..76587382 --- /dev/null +++ b/website/admin/data_health/checks/poster_papers_without_poster.py @@ -0,0 +1,27 @@ +""" +Data-health check: poster publications with no linked poster artifact. + +A Poster-type publication should point at its ``Poster`` (the actual poster +PDF/image); without that link the poster isn't shown on the site. The fix is to +add (or link) the poster from the publication's edit page. Shared scoping and +row shape live in :mod:`._companion_base`. Read-only. +""" + +from website.admin.data_health.checks._companion_base import CompanionArtifactCheck +from website.admin.data_health.registry import register_check +from website.models.publication import PubType + + +@register_check +class PosterPapersWithoutPosterCheck(CompanionArtifactCheck): + slug = 'poster-papers-without-poster' + title = 'Poster papers without a linked poster' + description = ( + 'Publications of type Poster with no linked Poster artifact — the ' + "poster won't appear on the site. Add or link it from the " + "publication's edit page. Pre-Makeability-Lab and not-yet-presented " + '(future-dated) papers are excluded.' + ) + venue_type = PubType.POSTER + companion_field = 'poster' + require_full_paper = False diff --git a/website/tests/test_data_health.py b/website/tests/test_data_health.py index 052c8c80..fedf1c31 100644 --- a/website/tests/test_data_health.py +++ b/website/tests/test_data_health.py @@ -349,6 +349,73 @@ def test_every_check_provides_an_action_link(self): ) +class CompanionArtifactCheckTests(DatabaseTestCase): + """Conference-paper-needs-talk and poster-needs-poster checks (issue #1405).""" + + def test_conference_paper_without_talk_is_flagged_and_linked(self): + # make_publication defaults to a post-lab Conference paper with no talk. + pub = self.make_publication(title="Talkless Conference Paper") + check = get_check("conference-papers-without-talk") + rows = {r["id"]: r for r in check.get_rows()} + self.assertIn(pub.pk, rows) + label, url = check.row_link(rows[pub.pk]) + self.assertEqual(label, "Open →") + self.assertEqual( + url, reverse("admin:website_publication_change", args=[pub.pk]) + ) + + def test_conference_paper_with_talk_not_flagged(self): + talk = self.make_talk(title="The Talk") + pub = self.make_publication(title="Conference Paper With Talk", talk=talk) + ids = [r["id"] for r in get_check("conference-papers-without-talk").get_rows()] + self.assertNotIn(pub.pk, ids) + + def test_extended_abstract_conference_paper_not_flagged(self): + pub = self.make_publication( + title="Short-form Conference Paper", extended_abstract=True + ) + ids = [r["id"] for r in get_check("conference-papers-without-talk").get_rows()] + self.assertNotIn(pub.pk, ids) + + def test_to_appear_conference_paper_not_flagged(self): + from datetime import date, timedelta + + future = date.today() + timedelta(days=365) + pub = self.make_publication(title="Not Yet Presented", date=future) + ids = [r["id"] for r in get_check("conference-papers-without-talk").get_rows()] + self.assertNotIn(pub.pk, ids) + + def test_prelab_conference_paper_not_flagged(self): + pub = self.make_publication(title="Grad School Paper", year=2010) + ids = [r["id"] for r in get_check("conference-papers-without-talk").get_rows()] + self.assertNotIn(pub.pk, ids) + + def test_poster_publication_without_poster_is_flagged(self): + from website.models.publication import PubType + + pub = self.make_publication( + title="Poster Pub, No Poster", pub_venue_type=PubType.POSTER + ) + rows = {r["id"]: r for r in get_check("poster-papers-without-poster").get_rows()} + self.assertIn(pub.pk, rows) + # A poster-type pub must NOT be flagged by the talk check, and vice versa. + talk_ids = [r["id"] for r in get_check("conference-papers-without-talk").get_rows()] + self.assertNotIn(pub.pk, talk_ids) + + def test_poster_publication_with_poster_not_flagged(self): + from website.models.publication import PubType + from website.tests.factories import PosterFactory + + poster = PosterFactory(title="The Poster") + pub = self.make_publication( + title="Poster Pub With Poster", + pub_venue_type=PubType.POSTER, + poster=poster, + ) + ids = [r["id"] for r in get_check("poster-papers-without-poster").get_rows()] + self.assertNotIn(pub.pk, ids) + + class DataHealthReadOnlyTests(DatabaseTestCase): def test_get_rows_does_not_mutate_db(self): from website.models import Person, Publication @@ -358,7 +425,8 @@ def test_get_rows_does_not_mutate_db(self): before = (Person.objects.count(), Publication.objects.count()) for slug in ( "duplicate-people", "url-name-collisions", "position-integrity", - "project-leadership", + "project-leadership", "conference-papers-without-talk", + "poster-papers-without-poster", ): get_check(slug).get_rows() after = (Person.objects.count(), Publication.objects.count())