diff --git a/.config/hooks/authors.py b/.config/hooks/authors.py index caea62d..ac96b15 100644 --- a/.config/hooks/authors.py +++ b/.config/hooks/authors.py @@ -12,6 +12,7 @@ from __future__ import annotations +import html import re from pathlib import Path @@ -26,6 +27,60 @@ ) BULLET_LINE_RE = re.compile(r"^\s*[-*]\s+\S") HEADING_RE = re.compile(r"^#+\s", re.MULTILINE) +FEED_SUMMARY_CHARS = 240 + +# Material for MkDocs hardcodes the autodiscovery tag +# to this filename. See .config/mkdocs.yml for context. +FEED_URL = "feed_rss_created.xml" +FEED_FILES = ("feed_rss_created.xml", "feed_rss_updated.xml") + +# Validation regexes for authors.yml. Names and affiliations are interpolated +# into Markdown that allows inline HTML through, so we forbid characters that +# could open HTML tags. ORCIDs are interpolated into URLs and link bodies. +SLUG_RE = re.compile(r"^[a-z0-9-]+$") +ORCID_RE = re.compile(r"^\d{4}-\d{4}-\d{4}-\d{3}[\dX]$") +FORBIDDEN_IN_TEXT_RE = re.compile(r"[<>{}\x00-\x1f\x7f]") + + +def _validate_authors(data: dict) -> None: + """Reject malformed or unsafe entries in authors.yml at build time. + + Names and affiliations later end up inside Markdown that allows raw HTML, + and ORCIDs go into href attributes — so we strictly validate here rather + than try to escape every downstream interpolation site. + """ + for slug, record in data.items(): + loc = f"authors.yml: {slug!r}" + if not isinstance(slug, str) or not SLUG_RE.match(slug): + raise PluginError( + f"{loc}: slug must match {SLUG_RE.pattern} " + f"(lowercase letters, digits, hyphens)." + ) + if not isinstance(record, dict): + raise PluginError(f"{loc}: entry must be a mapping, got {type(record).__name__}.") + name = record.get("name") + if not isinstance(name, str) or not name.strip(): + raise PluginError(f"{loc}: 'name' is required and must be a non-empty string.") + if FORBIDDEN_IN_TEXT_RE.search(name) or len(name) > 200: + raise PluginError( + f"{loc}: 'name' contains a forbidden character (<, >, {{, }} " + f"or control char) or exceeds 200 chars." + ) + aff = record.get("affiliation") + if aff is not None: + if not isinstance(aff, str): + raise PluginError(f"{loc}: 'affiliation' must be a string.") + if FORBIDDEN_IN_TEXT_RE.search(aff) or len(aff) > 300: + raise PluginError( + f"{loc}: 'affiliation' contains a forbidden character or exceeds 300 chars." + ) + orcid = record.get("orcid") + if orcid is not None: + if not isinstance(orcid, str) or not ORCID_RE.match(orcid): + raise PluginError( + f"{loc}: 'orcid' must match {ORCID_RE.pattern} " + f"(e.g. 0000-0001-2345-6789). Got {orcid!r}." + ) def _load_authors(docs_dir: str) -> dict: @@ -37,6 +92,7 @@ def _load_authors(docs_dir: str) -> dict: raise PluginError( f"authors hook: {AUTHORS_FILE} must be a YAML mapping keyed by slug" ) + _validate_authors(data) return data @@ -94,6 +150,10 @@ def _render_authors_index( "Everyone who has contributed to a CURIOSS pattern. " "Click a name to jump to their entry below.", "", + f'

New patterns are announced in the ' + f'RSS feed — each item credits the authors ' + f'with a link to their ORCID profile.

', + "", "| Author | Affiliation | ORCID |", "| --- | --- | --- |", ] @@ -124,6 +184,72 @@ def _render_authors_index( return "\n".join(lines) +def _first_paragraph(markdown: str) -> str: + """Return the first prose paragraph after the H1, stripped of markdown.""" + _, body = _parse_frontmatter(markdown) + paragraphs: list[str] = [] + buf: list[str] = [] + for line in body.splitlines(): + stripped = line.strip() + if not stripped: + if buf: + paragraphs.append(" ".join(buf)) + buf = [] + continue + if stripped.startswith("#") or BULLET_LINE_RE.match(line): + if buf: + paragraphs.append(" ".join(buf)) + buf = [] + continue + buf.append(stripped) + if buf: + paragraphs.append(" ".join(buf)) + for p in paragraphs: + plain = re.sub(r"[*_`]", "", p) + plain = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", plain) + if plain: + return plain + return "" + + +def _author_html(slug: str, record: dict) -> str: + name = record.get("name", slug) + orcid = record.get("orcid") + name_safe = html.escape(name, quote=True) + if orcid: + # ORCID is validated against ORCID_RE at load time, so no characters + # here need URL-encoding, but we still escape for attribute context. + return ( + f'{name_safe}' + ) + return name_safe + + +def _feed_description(slugs: list[str], authors_map: dict, markdown: str) -> str: + """Build the HTML body for a feed item's . + + Emitted as raw HTML (anchor tags, escaped text). The on_post_build hook + later wraps each item-level in CDATA so RSS readers render + these as live links rather than as escaped text. + """ + parts = [_author_html(slug, authors_map.get(slug, {"name": slug})) for slug in slugs] + if not parts: + attribution = "" + elif len(parts) == 1: + attribution = f"By {parts[0]}." + else: + attribution = "By " + ", ".join(parts[:-1]) + f", and {parts[-1]}." + + summary = _first_paragraph(markdown) + if summary and len(summary) > FEED_SUMMARY_CHARS: + summary = summary[: FEED_SUMMARY_CHARS - 1].rstrip() + "…" + summary_html = html.escape(summary, quote=False) if summary else "" + + if attribution and summary_html: + return f"{attribution} {summary_html}" + return attribution or summary_html + + def _render_pattern_bullets(slugs: list[str], authors_map: dict) -> str: bullets = [] for slug in slugs: @@ -188,6 +314,16 @@ def on_page_markdown(markdown, page, config, files): if not isinstance(fm_authors, list) or not fm_authors: return markdown + resolved_names = [ + authors_map.get(slug, {}).get("name", slug) for slug in fm_authors + ] + page.meta["authors"] = resolved_names + rss_meta = page.meta.setdefault("rss", {}) + if not rss_meta.get("feed_description"): + rss_meta["feed_description"] = _feed_description( + fm_authors, authors_map, markdown + ) + rendered = _render_pattern_bullets(fm_authors, authors_map) m = CONTRIB_HEADING_RE.search(markdown) @@ -227,3 +363,57 @@ def on_page_markdown(markdown, page, config, files): rebuilt += trailing rebuilt += markdown[section_end:] return rebuilt + + +# Pre-compiled patterns for the feed post-processor. +_ITEM_RE = re.compile(r".*?", re.DOTALL) +_ITEM_DESC_RE = re.compile(r"(.*?)", re.DOTALL) +_AUTHOR_RE = re.compile(r"(.*?)", re.DOTALL) +_SOURCE_RE = re.compile(r"[ \t]*]*>.*?\s*\n?", re.DOTALL) +_CATEGORY_RE = re.compile(r"(.*?)", re.DOTALL) + + +def _rewrite_item(item_xml: str) -> str: + """Apply per-item transforms inside a single ... block.""" + + def _desc_to_cdata(m: re.Match[str]) -> str: + # The plugin HTML-escapes our description body; reverse that and + # CDATA-wrap so feed readers render Name as a real + # link instead of as literal angle-bracket text. + raw = html.unescape(m.group(1)) + # Defuse any literal "]]>" sequence that would close CDATA early. + raw = raw.replace("]]>", "]]]]>") + return f"" + + item_xml = _ITEM_DESC_RE.sub(_desc_to_cdata, item_xml) + # RSS 2.0 must be an email address; use Dublin Core + # for plain names (the dc namespace is already declared by the plugin). + item_xml = _AUTHOR_RE.sub(r"\1", item_xml) + # The plugin emits a self-referencing on every item. The RSS 2.0 + # spec reserves for items republished from another feed, so we + # strip it to keep the feed standards-compliant. + item_xml = _SOURCE_RE.sub("", item_xml) + # The plugin doesn't XML-escape category text, so tag names like + # "Education & Skills" produce invalid XML. Re-escape defensively. + item_xml = _CATEGORY_RE.sub( + lambda m: f"{html.escape(html.unescape(m.group(1)), quote=False)}", + item_xml, + ) + return item_xml + + +def on_post_build(config): + """Rewrite the generated feeds for spec compliance and link rendering. + + The mkdocs-rss-plugin has no template-override mechanism, so we patch + its output after build: switch to , CDATA-wrap + item descriptions so HTML renders, and drop self-referencing . + """ + site_dir = Path(config["site_dir"]) + for name in FEED_FILES: + path = site_dir / name + if not path.exists(): + continue + text = path.read_text(encoding="utf-8") + text = _ITEM_RE.sub(lambda m: _rewrite_item(m.group(0)), text) + path.write_text(text, encoding="utf-8") diff --git a/.config/mkdocs.yml b/.config/mkdocs.yml index e87e7a7..cf2e026 100644 --- a/.config/mkdocs.yml +++ b/.config/mkdocs.yml @@ -76,6 +76,34 @@ markdown_extensions: plugins: - search - tags + - rss: + enabled: true + # match_path picks which pages become feed items. We include every + # top-level .md EXCEPT the four meta/generated paths below: + # - authors/ (generated author pages, not patterns) + # - README.md (the home page itself) + # - PATTERN-TEMPLATE.md / CONTRIBUTING.md (docs about patterns, not patterns) + # If you add another top-level non-pattern document later + # (e.g. CODE-OF-CONDUCT.md), append it here or it will show up + # in the feed as if it were a new pattern. + match_path: '(?!authors/|README\.md$|PATTERN-TEMPLATE\.md$|CONTRIBUTING\.md$).*\.md' + length: 50 + abstract_chars_count: 280 + date_from_meta: + as_creation: git + as_update: git + categories: + - tags + # Filenames are LEFT AT THE PLUGIN DEFAULTS because Material for + # MkDocs hardcodes the autodiscovery in its + # base.html to feed_rss_created.xml / feed_rss_updated.xml. Renaming + # the files here would silently break browser/reader autodiscovery. + pretty_print: true + json_feed_enabled: false + feed_title: "CURIOSS Patterns — new patterns" + feed_description: "New patterns published in the CURIOSS Patterns catalogue." + image: "https://curioss.org/patterns/assets/curioss_logo_white.svg" + use_git: true hooks: - hooks/authors.py extra: diff --git a/.config/requirements.txt b/.config/requirements.txt index eccaa4b..d785f76 100644 --- a/.config/requirements.txt +++ b/.config/requirements.txt @@ -1,2 +1,3 @@ mkdocs>=1.6,<2 mkdocs-material>=9.7,<10 +mkdocs-rss-plugin>=1.19,<2 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ca56c27..67393b8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -38,6 +38,29 @@ You do **not** need to edit the "Contributors & Acknowledgement" section at the bottom of the pattern — the site generates that automatically from the frontmatter. +## The RSS feed + +The site publishes an RSS feed of new patterns at +[/feed_rss_created.xml](https://curioss.org/patterns/feed_rss_created.xml) so +readers can subscribe to updates. A couple of things to know as a contributor: + +- **Your ORCID, if you provide one, appears publicly in each feed item** for + every pattern you've co-authored — as a clickable link to your ORCID + profile. This is the same ORCID that already shows up on the pattern page + itself; the feed just makes it travel further. +- **The first paragraph of your pattern becomes the feed blurb.** Try to + make that opening sentence stand on its own — it's what readers will see + in their RSS reader before they click through. +- **My pattern isn't in the feed — why?** Three usual culprits: + 1. The file is not at the repo root (everything in `authors/` and the + four meta files — `README.md`, `CONTRIBUTING.md`, `PATTERN-TEMPLATE.md`, + and any new top-level non-pattern doc listed in the `match_path` of + `.config/mkdocs.yml` — is excluded by design). + 2. The pattern has no git history yet (it must have been committed at + least once; the feed uses the first commit as the publication date). + 3. The build is older than your latest commit — the feed is regenerated + on every deploy. + ## Local Development If you want to test the MkDocs site locally: diff --git a/README.md b/README.md index b3f18f0..e0932a5 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,10 @@ For more information about patterns in general, scroll down to the [About Patter +## Stay in the loop + +

Want to know when a new pattern is published? Subscribe to our RSS feed. Each item credits the pattern's authors with a link to their ORCID profile, so you can follow the people behind the work as well as the patterns themselves.

+ ## About Patterns ### What Are Patterns?