diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b7bb33..f99f0ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,14 @@ # Changelog +## [0.9.2] - 2026-05-10 + +### Fixed +- `Vcon.build_new()` now emits `"vcon": "0.4.0"` per `draft-ietf-vcon-vcon-core-02` §4.1.1. The field is deprecated in the draft but retained for parser compatibility; emitting it by default avoids surprises with strict parsers. +- `Vcon.build_new()` no longer initializes empty `"group": []` and `"redacted": {}`. The speckit reserves `group`, and downstream consumers no longer need to strip these defaults before serializing. Both fields are still populated lazily by their setters. + +### Added +- `add_wtf_transcription_analysis()` — sibling helper to `add_wtf_transcription_attachment` that places the WTF transcription into `analysis[]` as a spec-shaped analysis entry (`type: "transcription"`, `vendor`, `product`, `schema`, `encoding: "json"`, JSON-stringified body). Use this when your pipeline treats transcripts as derived analysis output; use the existing `_attachment` helper for the canonical attachments[] placement shown in the speckit example. + ## [0.9.0] - 2025-01-26 ### 🎉 Major Release: Extension Framework and Privacy Compliance diff --git a/pyproject.toml b/pyproject.toml index abf57c1..594d47b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "vcon" -version = "0.9.1" +version = "0.9.2" description = "The vCon library - Complete vCon 0.4.0 specification implementation" authors = ["Thomas McCarthy-Howe "] license = "MIT" diff --git a/setup.py b/setup.py index b3dc46c..8cc22e0 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="vcon", - version="0.9.0", + version="0.9.2", author="Thomas McCarthy-Howe", author_email="ghostofbasho@gmail.com", description="A package for working with vCon containers", diff --git a/src/vcon/vcon.py b/src/vcon/vcon.py index 89c47a4..5211298 100644 --- a/src/vcon/vcon.py +++ b/src/vcon/vcon.py @@ -453,9 +453,8 @@ def build_new( logger.debug(f"Generated UUID8: {uuid}") vcon_dict = { + "vcon": "0.4.0", "uuid": uuid, - "redacted": {}, - "group": [], "parties": [], "dialog": [], "attachments": [], @@ -774,7 +773,72 @@ def add_wtf_transcription_attachment( self.add_extension("wtf_transcription") logger.info("Added WTF transcription attachment") - + + except ImportError: + raise RuntimeError("WTF extension not available") + + WTF_SCHEMA_URL = "https://datatracker.ietf.org/doc/html/draft-howe-vcon-wtf-extension" + + def add_wtf_transcription_analysis( + self, + transcript: Dict[str, Any], + segments: List[Dict[str, Any]], + metadata: Dict[str, Any], + dialog_index: Optional[int] = None, + **kwargs + ) -> None: + """ + Add a WTF transcription as an analysis entry (vs. an attachment). + + Use ``add_wtf_transcription_attachment`` for canonical placement + (matches the speckit example, attachments[]). Use this method if your + pipeline treats transcripts as derived analysis output and prefers + analysis[] placement. + + Emits a spec-shaped analysis entry: ``type="transcription"``, + ``vendor`` and ``product`` taken from ``metadata["provider"]`` / + ``metadata["model"]``, ``schema`` set to the WTF draft URL, + ``encoding="json"``, and ``body`` as a JSON-serialized WTF document. + + Args: + transcript: Transcript information dictionary + segments: List of segment dictionaries + metadata: Metadata dictionary; ``provider`` and ``model`` keys + map to ``vendor`` and ``product`` on the analysis entry + dialog_index: Index of the dialog this transcription applies to + **kwargs: Additional WTF parameters (words, speakers, quality, etc.) + """ + if not EXTENSIONS_AVAILABLE: + raise RuntimeError("Extensions not available") + + try: + from .extensions.wtf import WTFExtension + extension = WTFExtension() + wtf_attachment = extension.create_wtf_attachment( + transcript=transcript, + segments=segments, + metadata=metadata, + **kwargs + ) + + wtf_body = wtf_attachment["body"] + dialog_ref = [dialog_index] if dialog_index is not None else [] + + self.add_analysis( + type="transcription", + dialog=dialog_ref, + vendor=metadata.get("provider", "unknown"), + product=metadata.get("model"), + body=json.dumps(wtf_body), + encoding="json", + schema=self.WTF_SCHEMA_URL, + ) + + if "wtf_transcription" not in self.get_extensions(): + self.add_extension("wtf_transcription") + + logger.info("Added WTF transcription analysis entry") + except ImportError: raise RuntimeError("WTF extension not available") diff --git a/tests/test_vcon.py b/tests/test_vcon.py index fa9b1aa..61ff6e7 100644 --- a/tests/test_vcon.py +++ b/tests/test_vcon.py @@ -158,8 +158,10 @@ def test_build_from_json() -> None: def test_build_new() -> None: vcon = Vcon.build_new() assert vcon.uuid is not None - assert vcon.vcon is None # vcon field is now optional and not set by default + assert vcon.vcon == "0.4.0" assert vcon.created_at is not None + assert "group" not in vcon.vcon_dict + assert "redacted" not in vcon.vcon_dict def test_tags() -> None: @@ -1368,11 +1370,14 @@ def test_version_field_optional_load_from_file(tmp_path) -> None: assert "vcon" not in vcon.vcon_dict -def test_build_new_no_version_field() -> None: - """Test that build_new creates vCons without version field.""" +def test_build_new_sets_version_field() -> None: + """build_new() sets vcon: '0.4.0' per draft-ietf-vcon-vcon-core-02 §4.1.1. + + The field is deprecated in the draft but retained for parser compat; + the library emits it by default to avoid surprises with strict parsers. + """ vcon = Vcon.build_new() - # Version field should not be automatically added - assert "vcon" not in vcon.vcon_dict + assert vcon.vcon_dict["vcon"] == "0.4.0" def test_no_version_field_remains_absent() -> None: @@ -1508,3 +1513,35 @@ def test_extensions_property_handling(): assert vcon.get_extensions() == ["video"] assert vcon.get_critical() == ["encryption"] assert vcon.vcon_dict.get("meta", {}).get("custom_field") == "value" + + +def test_build_new_emits_vcon_syntax_param() -> None: + """build_new() should emit vcon: '0.4.0' and round-trip through JSON.""" + vcon = Vcon.build_new() + serialized = vcon.to_json() + reloaded = Vcon.build_from_json(serialized) + assert reloaded.vcon_dict["vcon"] == "0.4.0" + + +def test_build_new_omits_empty_group_and_redacted() -> None: + """build_new() should not seed empty group/redacted defaults.""" + vcon = Vcon.build_new() + assert "group" not in vcon.vcon_dict + assert "redacted" not in vcon.vcon_dict + + +def test_is_valid_with_new_defaults() -> None: + """A minimally-populated build_new() vCon should pass is_valid().""" + from vcon.party import Party + from vcon.dialog import Dialog + + vcon = Vcon.build_new() + vcon.add_party(Party(name="Alice")) + vcon.add_dialog(Dialog( + type="recording", + start="2026-05-10T12:00:00Z", + parties=[0], + mediatype="audio/wav", + )) + valid, errors = vcon.is_valid() + assert valid, f"Expected valid, got errors: {errors}" diff --git a/tests/test_wtf_extension.py b/tests/test_wtf_extension.py index 6d6e09a..592d06e 100644 --- a/tests/test_wtf_extension.py +++ b/tests/test_wtf_extension.py @@ -587,7 +587,71 @@ def test_export_transcription(self): srt_content = extension.export_transcription(attachment, "srt") assert "1" in srt_content assert "Hello world" in srt_content - + vtt_content = extension.export_transcription(attachment, "vtt") assert "WEBVTT" in vtt_content assert "Hello world" in vtt_content + + +class TestWTFTranscriptionAnalysis: + """Tests for the add_wtf_transcription_analysis() Vcon helper.""" + + def _payload(self): + return { + "transcript": { + "text": "Hello world", + "language": "en", + "duration": 2.0, + "confidence": 0.95, + }, + "segments": [{ + "id": 0, + "start": 0.0, + "end": 2.0, + "text": "Hello world", + "confidence": 0.95, + }], + "metadata": { + "created_at": "2026-05-10T00:00:00Z", + "processed_at": "2026-05-10T00:00:01Z", + "provider": "whisper", + "model": "whisper-1", + }, + } + + def test_add_wtf_transcription_analysis_basic(self): + from vcon import Vcon + + vcon = Vcon.build_new() + vcon.add_wtf_transcription_analysis(**self._payload(), dialog_index=0) + + assert len(vcon.vcon_dict["analysis"]) == 1 + assert vcon.vcon_dict["attachments"] == [] + + entry = vcon.vcon_dict["analysis"][0] + assert entry["type"] == "transcription" + assert entry["vendor"] == "whisper" + assert entry["product"] == "whisper-1" + assert entry["encoding"] == "json" + assert entry["schema"] == Vcon.WTF_SCHEMA_URL + assert entry["dialog"] == [0] + + body = json.loads(entry["body"]) + assert body["transcript"]["text"] == "Hello world" + + def test_add_wtf_transcription_analysis_registers_extension(self): + from vcon import Vcon + + vcon = Vcon.build_new() + vcon.add_wtf_transcription_analysis(**self._payload()) + assert "wtf_transcription" in vcon.get_extensions() + + def test_attachment_helper_unchanged_regression(self): + """Sanity: the existing _attachment helper still writes to attachments[].""" + from vcon import Vcon + + vcon = Vcon.build_new() + vcon.add_wtf_transcription_attachment(**self._payload(), dialog_index=0) + assert len(vcon.vcon_dict["attachments"]) == 1 + assert vcon.vcon_dict["analysis"] == [] + assert vcon.vcon_dict["attachments"][0]["purpose"] == "wtf_transcription"