diff --git a/website/admin/data_health/checks/__init__.py b/website/admin/data_health/checks/__init__.py index a248e74d..c8a09eee 100644 --- a/website/admin/data_health/checks/__init__.py +++ b/website/admin/data_health/checks/__init__.py @@ -9,6 +9,8 @@ media_integrity, publication_quality, unlinked_artifacts, + conference_papers_without_talk, + poster_papers_without_poster, project_health, project_leadership, position_integrity, diff --git a/website/admin/data_health/checks/_companion_base.py b/website/admin/data_health/checks/_companion_base.py new file mode 100644 index 00000000..bd45472a --- /dev/null +++ b/website/admin/data_health/checks/_companion_base.py @@ -0,0 +1,79 @@ +""" +Shared base for "publication is missing an expected companion artifact" checks. + +Some publication types almost always ship with a companion artifact: + +- a full **conference paper** is presented with a **talk**, and +- a **poster** publication points at its **poster** artifact. + +Each gap is surfaced as its own dashboard check (the corrective action differs) +but the scoping and row shape are identical, so they share this base. A +subclass only sets the venue type to match, the companion FK that should be +populated, and (optionally) whether to restrict to full papers. + +Scoping mirrors the unlinked-artifacts check to avoid permanent false +positives: + +- **Pre-Makeability-Lab work is excluded** (date before + ``settings.DATE_MAKEABILITYLAB_FORMED``) — grad-school-era papers predate the + lab's talk/poster records. +- **Not-yet-presented papers are excluded** (``to_appear()`` — a future date); + the companion usually isn't recorded until the work is presented. + +Read-only: never calls ``.save()`` or mutates the DB. +""" + +from django.conf import settings + +from website.admin.data_health.registry import HealthCheck +from website.models import Publication + + +class CompanionArtifactCheck(HealthCheck): + """Base check: publications of one venue type missing a companion FK. + + Subclasses set :attr:`venue_type`, :attr:`companion_field`, and optionally + :attr:`require_full_paper`; everything else (columns, scoping, the link to + the publication's edit page) is shared. + """ + + group = 'Artifacts' + link_model = 'publication' # each row's fix happens on the publication form + columns = ['id', 'title', 'date', 'forum_name', 'first_author'] + + #: ``PubType`` value this check applies to (e.g. ``PubType.CONFERENCE``). + venue_type = None + #: Publication FK that should be populated (e.g. ``'talk'`` / ``'poster'``). + companion_field = None + #: When True, skip extended abstracts (short-form papers rarely have one). + require_full_paper = False + + def get_rows(self): + qs = (Publication.objects + .filter(pub_venue_type=self.venue_type) + .prefetch_related('authors')) + + rows = [] + for pub in qs: + if getattr(pub, f'{self.companion_field}_id'): + continue # companion already linked — healthy + if self.require_full_paper and pub.is_extended_abstract(): + continue # short-form paper; a talk isn't expected + if pub.to_appear(): + continue # not presented yet — companion expected later + if pub.date and pub.date < settings.DATE_MAKEABILITYLAB_FORMED: + continue # pre-Makeability-Lab; not expected to have one + + person = pub.get_person() + rows.append({ + 'id': pub.pk, + 'title': pub.title or '', + 'date': pub.date.isoformat() if pub.date else '', + 'forum_name': pub.forum_name or '', + 'first_author': person.get_full_name() if person else '', + }) + + # Newest first (stable two-pass sort: by title, then by date desc). + rows.sort(key=lambda r: r['title']) + rows.sort(key=lambda r: r['date'], reverse=True) + return rows diff --git a/website/admin/data_health/checks/conference_papers_without_talk.py b/website/admin/data_health/checks/conference_papers_without_talk.py new file mode 100644 index 00000000..8ed0a94b --- /dev/null +++ b/website/admin/data_health/checks/conference_papers_without_talk.py @@ -0,0 +1,28 @@ +""" +Data-health check: full conference papers with no linked talk. + +A Conference-type publication is normally presented with a talk, so a full +conference paper whose ``talk`` FK is empty is usually a data-entry gap. The fix +is to add (or link) the talk from the publication's edit page. Extended +abstracts are excluded — short-form conference items often have no talk. Shared +scoping and row shape live in :mod:`._companion_base`. Read-only. +""" + +from website.admin.data_health.checks._companion_base import CompanionArtifactCheck +from website.admin.data_health.registry import register_check +from website.models.publication import PubType + + +@register_check +class ConferencePapersWithoutTalkCheck(CompanionArtifactCheck): + slug = 'conference-papers-without-talk' + title = 'Conference papers without a talk' + description = ( + 'Full conference papers (Conference venue type, not an extended ' + 'abstract) with no linked talk. Most should have one — add or link the ' + "talk from the publication's edit page. Pre-Makeability-Lab and " + 'not-yet-presented (future-dated) papers are excluded.' + ) + venue_type = PubType.CONFERENCE + companion_field = 'talk' + require_full_paper = True diff --git a/website/admin/data_health/checks/poster_papers_without_poster.py b/website/admin/data_health/checks/poster_papers_without_poster.py new file mode 100644 index 00000000..76587382 --- /dev/null +++ b/website/admin/data_health/checks/poster_papers_without_poster.py @@ -0,0 +1,27 @@ +""" +Data-health check: poster publications with no linked poster artifact. + +A Poster-type publication should point at its ``Poster`` (the actual poster +PDF/image); without that link the poster isn't shown on the site. The fix is to +add (or link) the poster from the publication's edit page. Shared scoping and +row shape live in :mod:`._companion_base`. Read-only. +""" + +from website.admin.data_health.checks._companion_base import CompanionArtifactCheck +from website.admin.data_health.registry import register_check +from website.models.publication import PubType + + +@register_check +class PosterPapersWithoutPosterCheck(CompanionArtifactCheck): + slug = 'poster-papers-without-poster' + title = 'Poster papers without a linked poster' + description = ( + 'Publications of type Poster with no linked Poster artifact — the ' + "poster won't appear on the site. Add or link it from the " + "publication's edit page. Pre-Makeability-Lab and not-yet-presented " + '(future-dated) papers are excluded.' + ) + venue_type = PubType.POSTER + companion_field = 'poster' + require_full_paper = False diff --git a/website/tests/test_data_health.py b/website/tests/test_data_health.py index 052c8c80..fedf1c31 100644 --- a/website/tests/test_data_health.py +++ b/website/tests/test_data_health.py @@ -349,6 +349,73 @@ def test_every_check_provides_an_action_link(self): ) +class CompanionArtifactCheckTests(DatabaseTestCase): + """Conference-paper-needs-talk and poster-needs-poster checks (issue #1405).""" + + def test_conference_paper_without_talk_is_flagged_and_linked(self): + # make_publication defaults to a post-lab Conference paper with no talk. + pub = self.make_publication(title="Talkless Conference Paper") + check = get_check("conference-papers-without-talk") + rows = {r["id"]: r for r in check.get_rows()} + self.assertIn(pub.pk, rows) + label, url = check.row_link(rows[pub.pk]) + self.assertEqual(label, "Open →") + self.assertEqual( + url, reverse("admin:website_publication_change", args=[pub.pk]) + ) + + def test_conference_paper_with_talk_not_flagged(self): + talk = self.make_talk(title="The Talk") + pub = self.make_publication(title="Conference Paper With Talk", talk=talk) + ids = [r["id"] for r in get_check("conference-papers-without-talk").get_rows()] + self.assertNotIn(pub.pk, ids) + + def test_extended_abstract_conference_paper_not_flagged(self): + pub = self.make_publication( + title="Short-form Conference Paper", extended_abstract=True + ) + ids = [r["id"] for r in get_check("conference-papers-without-talk").get_rows()] + self.assertNotIn(pub.pk, ids) + + def test_to_appear_conference_paper_not_flagged(self): + from datetime import date, timedelta + + future = date.today() + timedelta(days=365) + pub = self.make_publication(title="Not Yet Presented", date=future) + ids = [r["id"] for r in get_check("conference-papers-without-talk").get_rows()] + self.assertNotIn(pub.pk, ids) + + def test_prelab_conference_paper_not_flagged(self): + pub = self.make_publication(title="Grad School Paper", year=2010) + ids = [r["id"] for r in get_check("conference-papers-without-talk").get_rows()] + self.assertNotIn(pub.pk, ids) + + def test_poster_publication_without_poster_is_flagged(self): + from website.models.publication import PubType + + pub = self.make_publication( + title="Poster Pub, No Poster", pub_venue_type=PubType.POSTER + ) + rows = {r["id"]: r for r in get_check("poster-papers-without-poster").get_rows()} + self.assertIn(pub.pk, rows) + # A poster-type pub must NOT be flagged by the talk check, and vice versa. + talk_ids = [r["id"] for r in get_check("conference-papers-without-talk").get_rows()] + self.assertNotIn(pub.pk, talk_ids) + + def test_poster_publication_with_poster_not_flagged(self): + from website.models.publication import PubType + from website.tests.factories import PosterFactory + + poster = PosterFactory(title="The Poster") + pub = self.make_publication( + title="Poster Pub With Poster", + pub_venue_type=PubType.POSTER, + poster=poster, + ) + ids = [r["id"] for r in get_check("poster-papers-without-poster").get_rows()] + self.assertNotIn(pub.pk, ids) + + class DataHealthReadOnlyTests(DatabaseTestCase): def test_get_rows_does_not_mutate_db(self): from website.models import Person, Publication @@ -358,7 +425,8 @@ def test_get_rows_does_not_mutate_db(self): before = (Person.objects.count(), Publication.objects.count()) for slug in ( "duplicate-people", "url-name-collisions", "position-integrity", - "project-leadership", + "project-leadership", "conference-papers-without-talk", + "poster-papers-without-poster", ): get_check(slug).get_rows() after = (Person.objects.count(), Publication.objects.count())