diff --git a/SETUP.md b/SETUP.md index ee2bdd4..2fc7ad7 100644 --- a/SETUP.md +++ b/SETUP.md @@ -125,7 +125,7 @@ Edit `config/config.yaml` and replace the placeholder channel IDs: ```yaml youtube: - channel_id: "UC7c3Kb6jYCRj4JOHHZTxKsA" # GitHub's YouTube channel – change if needed + channel_id: "UC7c3Kb6jYCRj4JOHHZTxKsQ" # GitHub's YouTube channel – change if needed discord_channel_id: 123456789012345678 # ← your real channel ID here digest_day: "thursday" # day of week to post the digest keywords: # topics to match (OR logic) @@ -151,7 +151,7 @@ blog: search_pool: 20 ``` -> **Note:** YouTube channel IDs look like `UC7c3Kb6jYCRj4JOHHZTxKsA`. +> **Note:** YouTube channel IDs look like `UC7c3Kb6jYCRj4JOHHZTxKsQ`. > You can find a channel's ID at `https://www.youtube.com/@/about` (click the share icon → Copy channel ID), or via the [YouTube channel-ID finder](https://commentpicker.com/youtube-channel-id.php). --- diff --git a/config/config.yaml b/config/config.yaml index 55ed8a8..aacc2dd 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -10,12 +10,12 @@ # must be on: Settings > Advanced > Developer Mode) and choose # "Copy Channel ID". # 2. youtube.channel_id – The ID of the YouTube channel to watch. -# GitHub's channel ID is: UC7c3Kb6jYCRj4JOHHZTxKsA +# GitHub's channel ID is: UC7c3Kb6jYCRj4JOHHZTxKsQ # ───────────────────────────────────────────────────────────────────────────── youtube: # YouTube channel to watch for new uploads - channel_id: "UC7c3Kb6jYCRj4JOHHZTxKsA" # GitHub's YouTube channel + channel_id: "UC7c3Kb6jYCRj4JOHHZTxKsQ" # GitHub's YouTube channel # Discord channel where the weekly digest is posted # Replace with your actual channel ID (integer) diff --git a/tests/test_cog_flows.py b/tests/test_cog_flows.py index 825b98c..0f7c30a 100644 --- a/tests/test_cog_flows.py +++ b/tests/test_cog_flows.py @@ -72,7 +72,7 @@ async def test_youtube_weekly_digest_fetches_channel_and_posts(self) -> None: bot = _FakeBot( config={ "youtube": { - "channel_id": "UC7c3Kb6jYCRj4JOHHZTxKsA", + "channel_id": "UC7c3Kb6jYCRj4JOHHZTxKsQ", "discord_channel_id": 123456789012345678, "digest_count": 1, "search_pool": 10, diff --git a/tests/test_youtube_api.py b/tests/test_youtube_api.py index 99c0c0a..1076047 100644 --- a/tests/test_youtube_api.py +++ b/tests/test_youtube_api.py @@ -1,7 +1,6 @@ import unittest from datetime import datetime, timezone -from unittest.mock import patch -from unittest.mock import MagicMock +from unittest.mock import patch, MagicMock from googleapiclient.errors import HttpError @@ -16,7 +15,17 @@ def execute(self): return self._payload -class _FakeSearchResource: +class _FakeChannelsResource: + def __init__(self, payload, capture): + self._payload = payload + self._capture = capture + + def list(self, **kwargs): + self._capture.update(kwargs) + return _FakeRequest(self._payload) + + +class _FakePlaylistItemsResource: def __init__(self, payload, capture): self._payload = payload self._capture = capture @@ -37,19 +46,42 @@ def list(self, **kwargs): class _FakeService: - def __init__(self, search_payload=None, videos_payload=None): - self.search_capture = {} + def __init__(self, playlist_payload=None, videos_payload=None, uploads_playlist_id="UUfakeplaylist"): + self.playlist_capture = {} self.videos_capture = {} - self._search_payload = search_payload or {"items": []} + self.channels_capture = {} + self._playlist_payload = playlist_payload or {"items": []} self._videos_payload = videos_payload or {"items": []} + self._channels_payload = { + "items": [{"contentDetails": {"relatedPlaylists": {"uploads": uploads_playlist_id}}}] + } - def search(self): - return _FakeSearchResource(self._search_payload, self.search_capture) + def channels(self): + return _FakeChannelsResource(self._channels_payload, self.channels_capture) + + def playlistItems(self): + return _FakePlaylistItemsResource(self._playlist_payload, self.playlist_capture) def videos(self): return _FakeVideosResource(self._videos_payload, self.videos_capture) +def _playlist_item(video_id, title, description, published_at, thumbnail_url=""): + """Build a fake playlistItems.list response item.""" + return { + "snippet": { + "title": title, + "description": description, + "publishedAt": published_at, + "thumbnails": {"high": {"url": thumbnail_url}} if thumbnail_url else {}, + "resourceId": {"videoId": video_id}, + }, + "contentDetails": { + "videoPublishedAt": published_at, + }, + } + + class YouTubeApiTests(unittest.TestCase): def test_search_recent_propagates_http_error(self) -> None: """HttpError from the API must propagate so callers can distinguish @@ -59,63 +91,108 @@ def test_search_recent_propagates_http_error(self) -> None: fake_resp.reason = "quotaExceeded" error = HttpError(resp=fake_resp, content=b'{"error":{"message":"quotaExceeded"}}') - class _ErrorSearchResource: + class _ErrorPlaylistResource: def list(self, **kwargs): raise error class _ErrorService: - def search(self): - return _ErrorSearchResource() + def channels(self): + return _FakeChannelsResource( + {"items": [{"contentDetails": {"relatedPlaylists": {"uploads": "UUtest"}}}]}, + {}, + ) + + def playlistItems(self): + return _ErrorPlaylistResource() client = YouTubeClient.__new__(YouTubeClient) client._service = _ErrorService() + client._uploads_playlist_cache = {} with self.assertRaises(HttpError): client.search_recent( - channel_id="channel-id", + channel_id="UCxxxxxxxxxxxxxxxxxxxxxxxx", published_after=datetime(2026, 5, 1, tzinfo=timezone.utc), ) def test_search_recent_requires_timezone_aware_datetime(self) -> None: client = YouTubeClient.__new__(YouTubeClient) client._service = _FakeService() + client._uploads_playlist_cache = {} with self.assertRaises(ValueError): client.search_recent( - channel_id="channel-id", + channel_id="UCxxxxxxxxxxxxxxxxxxxxxxxx", published_after=datetime.now(), ) - def test_search_recent_maps_response_without_query_param(self) -> None: + def test_search_recent_uses_api_to_resolve_uploads_playlist(self) -> None: + """search_recent must fetch the uploads playlist ID via channels.list, + not by string-manipulating the channel ID.""" service = _FakeService( - search_payload={ + uploads_playlist_id="PLxxxxxxRealPlaylist", + playlist_payload={ "items": [ - { - "id": {"videoId": "abc123"}, - "snippet": { - "title": "Copilot update", - "description": "Great release notes", - "publishedAt": "2026-05-01T00:00:00Z", - "thumbnails": {"high": {"url": "https://img.example/1.jpg"}}, - }, - } + _playlist_item("abc123", "Copilot update", "Great release notes", + "2026-05-10T00:00:00Z", "https://img.example/1.jpg") ] - } + }, ) client = YouTubeClient.__new__(YouTubeClient) client._service = service + client._uploads_playlist_cache = {} videos = client.search_recent( - channel_id="channel-id", + channel_id="UC7c3Kb6jYCRj4JOHHZTxKsQ", published_after=datetime(2026, 5, 1, tzinfo=timezone.utc), max_results=15, ) - self.assertNotIn("q", service.search_capture) - self.assertEqual(service.search_capture["channelId"], "channel-id") - self.assertEqual(service.search_capture["maxResults"], 15) + # Must use the playlist ID returned by the API, not a derived one. + self.assertEqual(service.playlist_capture["playlistId"], "PLxxxxxxRealPlaylist") + self.assertEqual(service.playlist_capture["maxResults"], 15) + self.assertNotIn("q", service.playlist_capture) + self.assertEqual(len(videos), 1) self.assertEqual(videos[0]["id"], "abc123") self.assertEqual(videos[0]["url"], "https://www.youtube.com/watch?v=abc123") self.assertEqual(videos[0]["view_count"], 0) + self.assertEqual(videos[0]["thumbnail"], "https://img.example/1.jpg") + + def test_search_recent_raises_when_channel_not_found(self) -> None: + service = _FakeService() + service._channels_payload = {"items": []} + client = YouTubeClient.__new__(YouTubeClient) + client._service = service + client._uploads_playlist_cache = {} + + with self.assertRaises(ValueError): + client.search_recent( + channel_id="UCxxxxxxxxxxxxxxxxxxxxxxxx", + published_after=datetime(2026, 5, 1, tzinfo=timezone.utc), + ) + + def test_search_recent_excludes_videos_at_or_before_window(self) -> None: + """Videos published at or before published_after must be excluded.""" + service = _FakeService( + uploads_playlist_id="PLtest", + playlist_payload={ + "items": [ + _playlist_item("new", "New", "", "2026-05-10T00:00:00Z"), + # Exactly at the boundary — should be excluded. + _playlist_item("boundary", "Boundary", "", "2026-05-07T00:00:00Z"), + _playlist_item("old", "Old", "", "2026-05-01T00:00:00Z"), + ] + } + ) + client = YouTubeClient.__new__(YouTubeClient) + client._service = service + client._uploads_playlist_cache = {} + + videos = client.search_recent( + channel_id="UCxxxxxxxxxxxxxxxxxxxxxxxx", + published_after=datetime(2026, 5, 7, tzinfo=timezone.utc), + ) + + self.assertEqual([v["id"] for v in videos], ["new"]) def test_get_video_statistics_parses_view_counts(self) -> None: service = _FakeService( @@ -136,6 +213,7 @@ def test_get_video_statistics_parses_view_counts(self) -> None: def test_get_top_recent_videos_sorts_descending_by_view_count(self) -> None: client = YouTubeClient.__new__(YouTubeClient) + client._uploads_playlist_cache = {} with patch.object( client, "search_recent", @@ -148,7 +226,7 @@ def test_get_top_recent_videos_sorts_descending_by_view_count(self) -> None: client, "get_video_statistics", return_value={"a": 10, "b": 300, "c": 50} ): top = client.get_top_recent_videos( - channel_id="channel-id", + channel_id="UCxxxxxxxxxxxxxxxxxxxxxxxx", published_after=datetime(2026, 5, 1, tzinfo=timezone.utc), top_n=2, search_pool=20, diff --git a/utils/youtube_api.py b/utils/youtube_api.py index 363e466..18c8782 100644 --- a/utils/youtube_api.py +++ b/utils/youtube_api.py @@ -21,7 +21,7 @@ # Weekly digest: top 3 videos from the past 7 days (no keyword filter) since = datetime.now(tz=timezone.utc) - timedelta(days=7) videos = client.get_top_recent_videos( - channel_id="UC7c3Kb6jYCRj4JOHHZTxKsA", + channel_id="UC7c3Kb6jYCRj4JOHHZTxKsQ", published_after=since, top_n=3, ) @@ -55,6 +55,32 @@ def __init__(self, api_key: str) -> None: developerKey=api_key, cache_discovery=False, ) + # Cache uploads playlist IDs to avoid a redundant channels.list call + # on every digest run. + self._uploads_playlist_cache: dict = {} + + def _get_uploads_playlist_id(self, channel_id: str) -> str: + """Return the uploads playlist ID for *channel_id*. + + Fetches ``contentDetails.relatedPlaylists.uploads`` from the YouTube + Data API on first call and caches the result. Raises + :class:`ValueError` if the channel is not found. + """ + if channel_id not in self._uploads_playlist_cache: + response = ( + self._service.channels() + .list(part="contentDetails", id=channel_id) + .execute() + ) + items = response.get("items", []) + if not items: + raise ValueError( + f"YouTube channel {channel_id!r} not found. " + "Check the channel_id in config.yaml." + ) + playlist_id = items[0]["contentDetails"]["relatedPlaylists"]["uploads"] + self._uploads_playlist_cache[channel_id] = playlist_id + return self._uploads_playlist_cache[channel_id] # ------------------------------------------------------------------ # Public methods @@ -66,7 +92,18 @@ def search_recent( published_after: datetime, max_results: int = 20, ) -> List[dict]: - """Search *channel_id* for all videos published after *published_after*. + """Return videos uploaded to *channel_id* after *published_after*. + + Uses the channel's **uploads playlist** (``playlistItems.list``) rather + than ``search.list``. This is more reliable because: + + - ``search.list`` has unpredictable indexing delays and can silently + omit recently-uploaded videos and YouTube Shorts. + - ``playlistItems.list`` reflects the actual upload history immediately. + - ``playlistItems.list`` costs **1 quota unit** vs 100 for ``search.list``. + + The uploads playlist ID is retrieved via ``channels.list`` and cached + for the lifetime of the client instance. Each returned dict contains: @@ -80,21 +117,20 @@ def search_recent( Raises :class:`googleapiclient.errors.HttpError` if the API call fails so callers can distinguish a genuine empty result from an API error. + Raises :class:`ValueError` if the channel cannot be found. """ if published_after.tzinfo is None: raise ValueError( "published_after must be a timezone-aware datetime (e.g. use timezone.utc)" ) - published_after_str = published_after.strftime("%Y-%m-%dT%H:%M:%SZ") + uploads_playlist_id = self._get_uploads_playlist_id(channel_id) + response = ( - self._service.search() + self._service.playlistItems() .list( - part="snippet", - channelId=channel_id, - order="date", - type="video", - publishedAfter=published_after_str, + part="snippet,contentDetails", + playlistId=uploads_playlist_id, maxResults=max_results, ) .execute() @@ -103,14 +139,37 @@ def search_recent( videos: List[dict] = [] for item in response.get("items", []): snippet = item["snippet"] - video_id = item["id"]["videoId"] + content_details = item.get("contentDetails", {}) + + video_id = snippet.get("resourceId", {}).get("videoId", "") + if not video_id: + continue + + # contentDetails.videoPublishedAt is the authoritative publish date. + # snippet.publishedAt is when the item was added to the playlist + # (usually identical for the uploads playlist, but videoPublishedAt + # is preferred). + published_at_str = content_details.get( + "videoPublishedAt", snippet.get("publishedAt", "") + ) + + if published_at_str: + published_at = datetime.fromisoformat( + published_at_str.replace("Z", "+00:00") + ) + # The uploads playlist is ordered newest-first. Once we reach + # a video at or before the window boundary, all remaining items + # will also be out of the window. + if published_at <= published_after: + break + videos.append( { "id": video_id, "title": snippet.get("title", "Untitled"), "description": snippet.get("description", "")[:500], "url": f"https://www.youtube.com/watch?v={video_id}", - "published": snippet.get("publishedAt", ""), + "published": published_at_str, "thumbnail": ( snippet.get("thumbnails", {}) .get("high", {})