Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions SETUP.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ Edit `config/config.yaml` and replace the placeholder channel IDs:

```yaml
youtube:
channel_id: "UC7c3Kb6jYCRj4JOHHZTxKsA" # GitHub's YouTube channel – change if needed
channel_id: "UC7c3Kb6jYCRj4JOHHZTxKsQ" # GitHub's YouTube channel – change if needed
Comment thread
MetzinAround marked this conversation as resolved.
discord_channel_id: 123456789012345678 # ← your real channel ID here
digest_day: "thursday" # day of week to post the digest
keywords: # topics to match (OR logic)
Expand All @@ -151,7 +151,7 @@ blog:
search_pool: 20
```

> **Note:** YouTube channel IDs look like `UC7c3Kb6jYCRj4JOHHZTxKsA`.
> **Note:** YouTube channel IDs look like `UC7c3Kb6jYCRj4JOHHZTxKsQ`.
> You can find a channel's ID at `https://www.youtube.com/@<handle>/about` (click the share icon → Copy channel ID), or via the [YouTube channel-ID finder](https://commentpicker.com/youtube-channel-id.php).

---
Expand Down
4 changes: 2 additions & 2 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
# must be on: Settings > Advanced > Developer Mode) and choose
# "Copy Channel ID".
# 2. youtube.channel_id – The ID of the YouTube channel to watch.
# GitHub's channel ID is: UC7c3Kb6jYCRj4JOHHZTxKsA
# GitHub's channel ID is: UC7c3Kb6jYCRj4JOHHZTxKsQ
# ─────────────────────────────────────────────────────────────────────────────

youtube:
# YouTube channel to watch for new uploads
channel_id: "UC7c3Kb6jYCRj4JOHHZTxKsA" # GitHub's YouTube channel
channel_id: "UC7c3Kb6jYCRj4JOHHZTxKsQ" # GitHub's YouTube channel
Comment thread
MetzinAround marked this conversation as resolved.

# Discord channel where the weekly digest is posted
# Replace with your actual channel ID (integer)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_cog_flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ async def test_youtube_weekly_digest_fetches_channel_and_posts(self) -> None:
bot = _FakeBot(
config={
"youtube": {
"channel_id": "UC7c3Kb6jYCRj4JOHHZTxKsA",
"channel_id": "UC7c3Kb6jYCRj4JOHHZTxKsQ",
"discord_channel_id": 123456789012345678,
"digest_count": 1,
"search_pool": 10,
Expand Down
138 changes: 108 additions & 30 deletions tests/test_youtube_api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import unittest
from datetime import datetime, timezone
from unittest.mock import patch
from unittest.mock import MagicMock
from unittest.mock import patch, MagicMock

from googleapiclient.errors import HttpError

Expand All @@ -16,7 +15,17 @@ def execute(self):
return self._payload


class _FakeSearchResource:
class _FakeChannelsResource:
def __init__(self, payload, capture):
self._payload = payload
self._capture = capture

def list(self, **kwargs):
self._capture.update(kwargs)
return _FakeRequest(self._payload)


class _FakePlaylistItemsResource:
def __init__(self, payload, capture):
self._payload = payload
self._capture = capture
Expand All @@ -37,19 +46,42 @@ def list(self, **kwargs):


class _FakeService:
def __init__(self, search_payload=None, videos_payload=None):
self.search_capture = {}
def __init__(self, playlist_payload=None, videos_payload=None, uploads_playlist_id="UUfakeplaylist"):
self.playlist_capture = {}
self.videos_capture = {}
self._search_payload = search_payload or {"items": []}
self.channels_capture = {}
self._playlist_payload = playlist_payload or {"items": []}
self._videos_payload = videos_payload or {"items": []}
self._channels_payload = {
"items": [{"contentDetails": {"relatedPlaylists": {"uploads": uploads_playlist_id}}}]
}

def search(self):
return _FakeSearchResource(self._search_payload, self.search_capture)
def channels(self):
return _FakeChannelsResource(self._channels_payload, self.channels_capture)

def playlistItems(self):
return _FakePlaylistItemsResource(self._playlist_payload, self.playlist_capture)

def videos(self):
return _FakeVideosResource(self._videos_payload, self.videos_capture)


def _playlist_item(video_id, title, description, published_at, thumbnail_url=""):
"""Build a fake playlistItems.list response item."""
return {
"snippet": {
"title": title,
"description": description,
"publishedAt": published_at,
"thumbnails": {"high": {"url": thumbnail_url}} if thumbnail_url else {},
"resourceId": {"videoId": video_id},
},
"contentDetails": {
"videoPublishedAt": published_at,
},
}


class YouTubeApiTests(unittest.TestCase):
def test_search_recent_propagates_http_error(self) -> None:
"""HttpError from the API must propagate so callers can distinguish
Expand All @@ -59,63 +91,108 @@ def test_search_recent_propagates_http_error(self) -> None:
fake_resp.reason = "quotaExceeded"
error = HttpError(resp=fake_resp, content=b'{"error":{"message":"quotaExceeded"}}')

class _ErrorSearchResource:
class _ErrorPlaylistResource:
def list(self, **kwargs):
raise error

class _ErrorService:
def search(self):
return _ErrorSearchResource()
def channels(self):
return _FakeChannelsResource(
{"items": [{"contentDetails": {"relatedPlaylists": {"uploads": "UUtest"}}}]},
{},
)

def playlistItems(self):
return _ErrorPlaylistResource()

client = YouTubeClient.__new__(YouTubeClient)
client._service = _ErrorService()
client._uploads_playlist_cache = {}

with self.assertRaises(HttpError):
client.search_recent(
channel_id="channel-id",
channel_id="UCxxxxxxxxxxxxxxxxxxxxxxxx",
published_after=datetime(2026, 5, 1, tzinfo=timezone.utc),
)

def test_search_recent_requires_timezone_aware_datetime(self) -> None:
client = YouTubeClient.__new__(YouTubeClient)
client._service = _FakeService()
client._uploads_playlist_cache = {}
with self.assertRaises(ValueError):
client.search_recent(
channel_id="channel-id",
channel_id="UCxxxxxxxxxxxxxxxxxxxxxxxx",
published_after=datetime.now(),
)

def test_search_recent_maps_response_without_query_param(self) -> None:
def test_search_recent_uses_api_to_resolve_uploads_playlist(self) -> None:
"""search_recent must fetch the uploads playlist ID via channels.list,
not by string-manipulating the channel ID."""
service = _FakeService(
search_payload={
uploads_playlist_id="PLxxxxxxRealPlaylist",
playlist_payload={
"items": [
{
"id": {"videoId": "abc123"},
"snippet": {
"title": "Copilot update",
"description": "Great release notes",
"publishedAt": "2026-05-01T00:00:00Z",
"thumbnails": {"high": {"url": "https://img.example/1.jpg"}},
},
}
_playlist_item("abc123", "Copilot update", "Great release notes",
"2026-05-10T00:00:00Z", "https://img.example/1.jpg")
]
}
},
)
client = YouTubeClient.__new__(YouTubeClient)
client._service = service
client._uploads_playlist_cache = {}

videos = client.search_recent(
channel_id="channel-id",
channel_id="UC7c3Kb6jYCRj4JOHHZTxKsQ",
published_after=datetime(2026, 5, 1, tzinfo=timezone.utc),
max_results=15,
)

self.assertNotIn("q", service.search_capture)
self.assertEqual(service.search_capture["channelId"], "channel-id")
self.assertEqual(service.search_capture["maxResults"], 15)
# Must use the playlist ID returned by the API, not a derived one.
self.assertEqual(service.playlist_capture["playlistId"], "PLxxxxxxRealPlaylist")
self.assertEqual(service.playlist_capture["maxResults"], 15)
self.assertNotIn("q", service.playlist_capture)
self.assertEqual(len(videos), 1)
self.assertEqual(videos[0]["id"], "abc123")
self.assertEqual(videos[0]["url"], "https://www.youtube.com/watch?v=abc123")
self.assertEqual(videos[0]["view_count"], 0)
self.assertEqual(videos[0]["thumbnail"], "https://img.example/1.jpg")

def test_search_recent_raises_when_channel_not_found(self) -> None:
service = _FakeService()
service._channels_payload = {"items": []}
client = YouTubeClient.__new__(YouTubeClient)
client._service = service
client._uploads_playlist_cache = {}

with self.assertRaises(ValueError):
client.search_recent(
channel_id="UCxxxxxxxxxxxxxxxxxxxxxxxx",
published_after=datetime(2026, 5, 1, tzinfo=timezone.utc),
)

def test_search_recent_excludes_videos_at_or_before_window(self) -> None:
"""Videos published at or before published_after must be excluded."""
service = _FakeService(
uploads_playlist_id="PLtest",
playlist_payload={
"items": [
_playlist_item("new", "New", "", "2026-05-10T00:00:00Z"),
# Exactly at the boundary — should be excluded.
_playlist_item("boundary", "Boundary", "", "2026-05-07T00:00:00Z"),
_playlist_item("old", "Old", "", "2026-05-01T00:00:00Z"),
]
}
)
client = YouTubeClient.__new__(YouTubeClient)
client._service = service
client._uploads_playlist_cache = {}

videos = client.search_recent(
channel_id="UCxxxxxxxxxxxxxxxxxxxxxxxx",
published_after=datetime(2026, 5, 7, tzinfo=timezone.utc),
)

self.assertEqual([v["id"] for v in videos], ["new"])

def test_get_video_statistics_parses_view_counts(self) -> None:
service = _FakeService(
Expand All @@ -136,6 +213,7 @@ def test_get_video_statistics_parses_view_counts(self) -> None:

def test_get_top_recent_videos_sorts_descending_by_view_count(self) -> None:
client = YouTubeClient.__new__(YouTubeClient)
client._uploads_playlist_cache = {}
with patch.object(
client,
"search_recent",
Expand All @@ -148,7 +226,7 @@ def test_get_top_recent_videos_sorts_descending_by_view_count(self) -> None:
client, "get_video_statistics", return_value={"a": 10, "b": 300, "c": 50}
):
top = client.get_top_recent_videos(
channel_id="channel-id",
channel_id="UCxxxxxxxxxxxxxxxxxxxxxxxx",
published_after=datetime(2026, 5, 1, tzinfo=timezone.utc),
top_n=2,
search_pool=20,
Expand Down
81 changes: 70 additions & 11 deletions utils/youtube_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# Weekly digest: top 3 videos from the past 7 days (no keyword filter)
since = datetime.now(tz=timezone.utc) - timedelta(days=7)
videos = client.get_top_recent_videos(
channel_id="UC7c3Kb6jYCRj4JOHHZTxKsA",
channel_id="UC7c3Kb6jYCRj4JOHHZTxKsQ",
Comment thread
MetzinAround marked this conversation as resolved.
published_after=since,
top_n=3,
)
Expand Down Expand Up @@ -55,6 +55,32 @@ def __init__(self, api_key: str) -> None:
developerKey=api_key,
cache_discovery=False,
)
# Cache uploads playlist IDs to avoid a redundant channels.list call
# on every digest run.
self._uploads_playlist_cache: dict = {}

def _get_uploads_playlist_id(self, channel_id: str) -> str:
"""Return the uploads playlist ID for *channel_id*.

Fetches ``contentDetails.relatedPlaylists.uploads`` from the YouTube
Data API on first call and caches the result. Raises
:class:`ValueError` if the channel is not found.
"""
if channel_id not in self._uploads_playlist_cache:
response = (
self._service.channels()
.list(part="contentDetails", id=channel_id)
.execute()
)
items = response.get("items", [])
if not items:
raise ValueError(
f"YouTube channel {channel_id!r} not found. "
"Check the channel_id in config.yaml."
)
Comment thread
MetzinAround marked this conversation as resolved.
playlist_id = items[0]["contentDetails"]["relatedPlaylists"]["uploads"]
self._uploads_playlist_cache[channel_id] = playlist_id
return self._uploads_playlist_cache[channel_id]

# ------------------------------------------------------------------
# Public methods
Expand All @@ -66,7 +92,18 @@ def search_recent(
published_after: datetime,
max_results: int = 20,
) -> List[dict]:
"""Search *channel_id* for all videos published after *published_after*.
"""Return videos uploaded to *channel_id* after *published_after*.

Uses the channel's **uploads playlist** (``playlistItems.list``) rather
than ``search.list``. This is more reliable because:

- ``search.list`` has unpredictable indexing delays and can silently
omit recently-uploaded videos and YouTube Shorts.
- ``playlistItems.list`` reflects the actual upload history immediately.
- ``playlistItems.list`` costs **1 quota unit** vs 100 for ``search.list``.

The uploads playlist ID is retrieved via ``channels.list`` and cached
for the lifetime of the client instance.

Each returned dict contains:

Expand All @@ -80,21 +117,20 @@ def search_recent(

Raises :class:`googleapiclient.errors.HttpError` if the API call fails
so callers can distinguish a genuine empty result from an API error.
Raises :class:`ValueError` if the channel cannot be found.
"""
if published_after.tzinfo is None:
raise ValueError(
"published_after must be a timezone-aware datetime (e.g. use timezone.utc)"
)

published_after_str = published_after.strftime("%Y-%m-%dT%H:%M:%SZ")
uploads_playlist_id = self._get_uploads_playlist_id(channel_id)

response = (
self._service.search()
self._service.playlistItems()
.list(
part="snippet",
channelId=channel_id,
order="date",
type="video",
publishedAfter=published_after_str,
part="snippet,contentDetails",
playlistId=uploads_playlist_id,
maxResults=max_results,
)
.execute()
Expand All @@ -103,14 +139,37 @@ def search_recent(
videos: List[dict] = []
for item in response.get("items", []):
snippet = item["snippet"]
video_id = item["id"]["videoId"]
content_details = item.get("contentDetails", {})

video_id = snippet.get("resourceId", {}).get("videoId", "")
if not video_id:
continue

# contentDetails.videoPublishedAt is the authoritative publish date.
# snippet.publishedAt is when the item was added to the playlist
# (usually identical for the uploads playlist, but videoPublishedAt
# is preferred).
published_at_str = content_details.get(
"videoPublishedAt", snippet.get("publishedAt", "")
)

if published_at_str:
published_at = datetime.fromisoformat(
published_at_str.replace("Z", "+00:00")
)
# The uploads playlist is ordered newest-first. Once we reach
# a video at or before the window boundary, all remaining items
# will also be out of the window.
if published_at <= published_after:
break

videos.append(
{
"id": video_id,
"title": snippet.get("title", "Untitled"),
"description": snippet.get("description", "")[:500],
"url": f"https://www.youtube.com/watch?v={video_id}",
"published": snippet.get("publishedAt", ""),
"published": published_at_str,
"thumbnail": (
snippet.get("thumbnails", {})
.get("high", {})
Expand Down
Loading