Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 10 additions & 28 deletions bot/cogs/youtube_watcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
2. On each run the cog checks whether today (UTC) is the configured
``digest_day`` (default: Thursday).
3. If it is Thursday and a digest hasn't already been sent today, the cog:
a. Searches the GitHub YouTube channel for videos matching any of the
configured ``keywords`` published in the past 7 days
(up to ``search_pool`` candidates).
a. Fetches all videos from the GitHub YouTube channel published in the
past 7 days (up to ``search_pool`` candidates).
b. Fetches view counts for all candidates in a single API call.
c. Sorts by view count and picks the top ``digest_count`` videos.
d. Posts a single rich embed digest to the configured Discord channel.
Expand All @@ -25,7 +24,6 @@
channel_id – YouTube channel ID to watch
discord_channel_id – Discord channel ID to post the digest in
digest_day – Day of week for the digest (default: "thursday")
keywords – List of topic keywords to filter videos (OR logic)
digest_count – Number of videos in the digest (default: 3)
search_pool – Candidate pool size before view-count ranking (default: 20)
"""
Expand All @@ -34,7 +32,6 @@
import logging
import os
from datetime import datetime, timedelta, timezone
from typing import List

import discord
from discord import app_commands
Expand All @@ -51,14 +48,6 @@
"friday": 4, "saturday": 5, "sunday": 6,
}

_DEFAULT_KEYWORDS = [
"GitHub Copilot",
"GitHub Copilot CLI",
"Security",
"Developer Skills",
"Company News",
]


class YouTubeWatcher(commands.Cog):
"""Background task that posts a weekly YouTube digest on Thursdays."""
Expand All @@ -69,7 +58,6 @@ def __init__(self, bot: commands.Bot) -> None:

self.yt_channel_id: str = cfg["channel_id"]
self.discord_channel_id: int = int(cfg["discord_channel_id"])
self.keywords: List[str] = cfg.get("keywords", _DEFAULT_KEYWORDS)
self.digest_count: int = int(cfg.get("digest_count", 3))
self.search_pool: int = int(cfg.get("search_pool", 20))
digest_day_str: str = str(cfg.get("digest_day", "thursday")).strip().lower()
Expand Down Expand Up @@ -111,17 +99,15 @@ async def weekly_digest(self) -> None:
return

logger.info(
"Running weekly YouTube digest (keywords=%s, date=%s)",
self.keywords,
"Running weekly YouTube digest (date=%s)",
today_str,
)

today_midnight = now.replace(hour=0, minute=0, second=0, microsecond=0)
since = today_midnight - timedelta(days=7)
videos = await asyncio.to_thread(
self.yt_client.get_top_videos_by_keywords,
self.yt_client.get_top_recent_videos,
channel_id=self.yt_channel_id,
keywords=self.keywords,
published_after=since,
top_n=self.digest_count,
search_pool=self.search_pool,
Expand Down Expand Up @@ -151,15 +137,14 @@ async def weekly_digest(self) -> None:

if not videos:
logger.warning(
"YouTube digest query returned no videos for keywords %s. "
"YouTube digest returned no videos for the past 7 days. "
"Because an empty result may also indicate a YouTube API error, "
"skipping the 'no videos' post and not marking the digest as sent "
"so it can be retried later.",
self.keywords,
)
return

embed = _build_digest_embed(videos, self.keywords, since, now)
embed = _build_digest_embed(videos, since, now)
await channel.send(embed=embed)
logger.info(
"Posted YouTube weekly digest: %d video(s).", len(videos)
Expand Down Expand Up @@ -194,9 +179,8 @@ async def youtubedigest(self, interaction: discord.Interaction) -> None:
since = today_midnight - timedelta(days=7)

videos = await asyncio.to_thread(
self.yt_client.get_top_videos_by_keywords,
self.yt_client.get_top_recent_videos,
channel_id=self.yt_channel_id,
keywords=self.keywords,
published_after=since,
top_n=self.digest_count,
search_pool=self.search_pool,
Comment thread
MetzinAround marked this conversation as resolved.
Expand Down Expand Up @@ -225,12 +209,12 @@ async def youtubedigest(self, interaction: discord.Interaction) -> None:

if not videos:
await interaction.followup.send(
"⚠️ No matching YouTube videos found for the past 7 days.",
"⚠️ No recent YouTube videos found for the past 7 days.",
ephemeral=True,
)
return

embed = _build_digest_embed(videos, self.keywords, since, now)
embed = _build_digest_embed(videos, since, now)
await channel.send(embed=embed)
logger.info("Manual YouTube digest posted by %s: %d video(s).", interaction.user, len(videos))
await interaction.followup.send(
Expand Down Expand Up @@ -272,18 +256,16 @@ def _truncate(text: str, max_chars: int) -> str:

def _build_digest_embed(
videos: list,
keywords: List[str],
since: datetime,
now: datetime,
Comment thread
MetzinAround marked this conversation as resolved.
) -> discord.Embed:
"""Construct a Discord :class:`discord.Embed` for the weekly video digest."""
date_range = f"{since.strftime('%b %d')} – {now.strftime('%b %d, %Y')}"
topics_str = ", ".join(keywords)
embed = discord.Embed(
title="📺 GitHub — Weekly Video Digest",
description=(
f"Top GitHub YouTube videos from the past week ({date_range}), "
f"ranked by views.\n**Topics:** {topics_str}"
f"ranked by views."
),
color=discord.Color.red(),
)
Expand Down
11 changes: 1 addition & 10 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,10 @@ youtube:
# Day of the week to post the digest (case-insensitive)
digest_day: "thursday"

# Topics to filter videos by (OR logic – a video matches if any keyword appears
# in its title or description)
keywords:
- "GitHub Copilot"
- "GitHub Copilot CLI"
- "Security"
- "Developer Skills"
- "Company News"

# Number of top videos to include in the digest (ranked by view count)
digest_count: 3

# Candidate pool size: how many recent keyword-matching videos to fetch before
# Candidate pool size: how many recent videos to fetch before
# ranking by view count and picking the top digest_count
search_pool: 20

Expand Down
3 changes: 1 addition & 2 deletions tests/test_cog_flows.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class _FakeYouTubeClient:
def __init__(self, api_key: str):
self.api_key = api_key

def get_top_videos_by_keywords(self, **kwargs):
def get_top_recent_videos(self, **kwargs):
return [
{
"id": "vid-1",
Expand Down Expand Up @@ -74,7 +74,6 @@ async def test_youtube_weekly_digest_fetches_channel_and_posts(self) -> None:
"youtube": {
"channel_id": "UC7c3Kb6jYCRj4JOHHZTxKsA",
"discord_channel_id": 123456789012345678,
"keywords": ["GitHub Copilot"],
"digest_count": 1,
"search_pool": 10,
"digest_day": _today_name_utc(),
Expand Down
1 change: 0 additions & 1 deletion tests/test_digest_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def test_build_youtube_embed_contains_expected_fields(self) -> None:
"view_count": 1200,
}
],
keywords=["GitHub Copilot", "Security"],
since=now,
now=now,
)
Expand Down
19 changes: 8 additions & 11 deletions tests/test_youtube_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,16 @@ def videos(self):


class YouTubeApiTests(unittest.TestCase):
def test_search_by_keywords_requires_timezone_aware_datetime(self) -> None:
def test_search_recent_requires_timezone_aware_datetime(self) -> None:
client = YouTubeClient.__new__(YouTubeClient)
client._service = _FakeService()
with self.assertRaises(ValueError):
client.search_by_keywords(
client.search_recent(
channel_id="channel-id",
keywords=["Copilot"],
published_after=datetime.now(),
)

def test_search_by_keywords_joins_list_keywords_and_maps_response(self) -> None:
def test_search_recent_maps_response_without_query_param(self) -> None:
service = _FakeService(
search_payload={
"items": [
Expand All @@ -77,14 +76,13 @@ def test_search_by_keywords_joins_list_keywords_and_maps_response(self) -> None:
client = YouTubeClient.__new__(YouTubeClient)
client._service = service

videos = client.search_by_keywords(
videos = client.search_recent(
channel_id="channel-id",
keywords=["Copilot", "Security"],
published_after=datetime(2026, 5, 1, tzinfo=timezone.utc),
max_results=15,
)

self.assertEqual(service.search_capture["q"], "Copilot|Security")
self.assertNotIn("q", service.search_capture)
self.assertEqual(service.search_capture["channelId"], "channel-id")
self.assertEqual(service.search_capture["maxResults"], 15)
self.assertEqual(videos[0]["id"], "abc123")
Expand All @@ -108,11 +106,11 @@ def test_get_video_statistics_parses_view_counts(self) -> None:
self.assertEqual(stats, {"vid1": 12, "vid2": 3000})
self.assertEqual(service.videos_capture["id"], "vid1,vid2")

def test_get_top_videos_by_keywords_sorts_descending_by_view_count(self) -> None:
def test_get_top_recent_videos_sorts_descending_by_view_count(self) -> None:
client = YouTubeClient.__new__(YouTubeClient)
with patch.object(
client,
"search_by_keywords",
"search_recent",
return_value=[
{"id": "a", "title": "A", "view_count": 0},
{"id": "b", "title": "B", "view_count": 0},
Expand All @@ -121,9 +119,8 @@ def test_get_top_videos_by_keywords_sorts_descending_by_view_count(self) -> None
), patch.object(
client, "get_video_statistics", return_value={"a": 10, "b": 300, "c": 50}
):
top = client.get_top_videos_by_keywords(
top = client.get_top_recent_videos(
channel_id="channel-id",
keywords=["Copilot"],
published_after=datetime(2026, 5, 1, tzinfo=timezone.utc),
top_n=2,
search_pool=20,
Expand Down
46 changes: 14 additions & 32 deletions utils/youtube_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,10 @@

client = YouTubeClient(api_key="YOUR_KEY")

# Weekly digest: top 3 videos from the past 7 days matching any keyword
since = datetime.now(tz=timezone.utc) - timedelta(days=7)
keywords = ["GitHub Copilot", "GitHub Copilot CLI", "Security",
"Developer Skills", "Company News"]
videos = client.get_top_videos_by_keywords(
# Weekly digest: top 3 videos from the past 7 days (no keyword filter)
since = datetime.now(tz=timezone.utc) - timedelta(days=7)
videos = client.get_top_recent_videos(
channel_id="UC7c3Kb6jYCRj4JOHHZTxKsA",
keywords=keywords,
published_after=since,
top_n=3,
)
Expand All @@ -34,7 +31,7 @@

import logging
from datetime import datetime
from typing import Dict, List, Union
from typing import Dict, List

from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
Expand Down Expand Up @@ -63,18 +60,13 @@ def __init__(self, api_key: str) -> None:
# Public methods
# ------------------------------------------------------------------

def search_by_keywords(
def search_recent(
self,
channel_id: str,
keywords: Union[str, List[str]],
published_after: datetime,
max_results: int = 20,
) -> List[dict]:
"""Search *channel_id* for videos matching any of *keywords* published after *published_after*.

*keywords* may be a single string or a list of strings. When a list is
supplied the YouTube ``q`` parameter is constructed as
``"term1|term2|term3"`` so the API returns results matching **any** term.
"""Search *channel_id* for all videos published after *published_after*.

Each returned dict contains:

Expand All @@ -93,21 +85,13 @@ def search_by_keywords(
"published_after must be a timezone-aware datetime (e.g. use timezone.utc)"
)

# Build the query string. The YouTube Data API supports OR via "|".
if isinstance(keywords, list):
query = "|".join(keywords)
else:
query = keywords

# RFC 3339 format required by the YouTube Data API.
published_after_str = published_after.strftime("%Y-%m-%dT%H:%M:%SZ")
try:
response = (
self._service.search()
.list(
part="snippet",
channelId=channel_id,
q=query,
order="date",
type="video",
publishedAfter=published_after_str,
Expand All @@ -117,9 +101,8 @@ def search_by_keywords(
)
except HttpError as exc:
logger.error(
"YouTube API search error (channel=%s, keywords=%s): %s",
"YouTube API search error (channel=%s): %s",
channel_id,
keywords,
exc,
)
return []
Expand Down Expand Up @@ -175,26 +158,25 @@ def get_video_statistics(self, video_ids: List[str]) -> Dict[str, int]:
stats[vid_id] = int(raw)
return stats

def get_top_videos_by_keywords(
def get_top_recent_videos(
self,
channel_id: str,
keywords: Union[str, List[str]],
published_after: datetime,
top_n: int = 3,
search_pool: int = 20,
) -> List[dict]:
"""Return the top *top_n* videos matching *keywords* ranked by view count.
"""Return the top *top_n* recent videos from *channel_id* ranked by view count.

1. Searches *channel_id* for videos matching any term in *keywords*
published in the past week (up to *search_pool* candidates).
1. Fetches up to *search_pool* videos from *channel_id* published after
*published_after* (no keyword filter).
2. Fetches view counts for all candidates in a single batch call.
3. Sorts by view count descending and returns the top *top_n*.

Each returned dict contains the same fields as :meth:`search_by_keywords`
Each returned dict contains the same fields as :meth:`search_recent`
plus a populated ``view_count`` integer.
"""
videos = self.search_by_keywords(
channel_id, keywords, published_after, max_results=search_pool
videos = self.search_recent(
channel_id, published_after, max_results=search_pool
)
if not videos:
return []
Expand Down
Loading