PythonID-bot/src/bot/main.py at 047e05b2d6aef62fe87d7a43e73e342852ae0994 · rezhajulio/PythonID-bot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
"""
Main entry point for the PythonID bot.

This module initializes the bot application, registers all message handlers,
and starts the polling loop. Handler registration order matters:
1. Topic guard (group -1): Runs first to delete unauthorized messages
2. DM handler: Processes private messages for unrestriction flow
3. Message handler: Monitors group messages for profile compliance
"""

import logging

import logfire
from telegram.error import NetworkError, TimedOut
from telegram.ext import Application, CallbackQueryHandler, CommandHandler, ContextTypes, MessageHandler, filters

from bot.config import get_settings
from bot.database.service import init_database
from bot.group_config import get_group_registry, init_group_registry
from bot.handlers import captcha
from bot.handlers.anti_spam import handle_inline_keyboard_spam, handle_new_user_spam
from bot.handlers.duplicate_spam import handle_duplicate_spam
from bot.handlers.dm import handle_dm
from bot.handlers.message import handle_message
from bot.handlers.topic_guard import guard_warning_topic
from bot.handlers.verify import (
    handle_unverify_callback,
    handle_unverify_command,
    handle_verify_callback,
    handle_verify_command,
)
from bot.handlers.check import (
    handle_check_command,
    handle_check_forwarded_message,
    handle_warn_callback,
)
from bot.services.scheduler import auto_restrict_expired_warnings
from bot.services.telegram_utils import fetch_group_admin_ids


def configure_logging() -> None:
    """
    Configure logging with Logfire integration.

    Uses minimal instrumentation to conserve Logfire quota:
    - Configurable log level via LOG_LEVEL environment variable
    - Disables database query tracing
    - Disables auto-instrumentation for less critical operations
    - Suppresses verbose HTTP request logs from httpx/httpcore libraries
    - In local dev: console output only (send_to_logfire=False)
    - In production: sends to Logfire only if LOGFIRE_TOKEN is set
    """
    # Configure basic logging FIRST to capture Settings initialization logs
    logging.basicConfig(
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
        level=logging.INFO,
        force=True,  # Override any existing config
    )

    # Now load settings (this will trigger model_post_init logging)
    settings = get_settings()

    # Get log level from settings and convert to logging constant
    log_level_str = settings.log_level.upper()
    log_level = getattr(logging, log_level_str, logging.INFO)

    # Determine if we should send to Logfire
    # Only send if enabled AND token is provided
    send_to_logfire = settings.logfire_enabled and settings.logfire_token is not None

    # Map log level to Logfire console min_log_level
    logfire_min_level = log_level_str.lower()

    # Configure Logfire with minimal instrumentation
    logfire.configure(
        token=settings.logfire_token,
        service_name=settings.logfire_service_name,
        environment=settings.logfire_environment,
        send_to_logfire=send_to_logfire,
        console=logfire.ConsoleOptions(
            colors="auto",
            include_timestamps=True,
            min_log_level=logfire_min_level,
        ),
        # Disable auto-instrumentation to save quota
        inspect_arguments=False,
    )

    # Reconfigure logging with Logfire handler and configured level
    logging.basicConfig(
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
        level=log_level,
        handlers=[logfire.LogfireLoggingHandler()],
        force=True,  # Override previous config
    )

    # Suppress verbose HTTP logs from httpx/httpcore used by python-telegram-bot
    # These libraries log every HTTP request at INFO level, flooding logs with Telegram API polling requests
    logging.getLogger("httpx").setLevel(logging.WARNING)
    logging.getLogger("httpcore").setLevel(logging.WARNING)

    logger = logging.getLogger(__name__)
    logger.info(f"Logging level set to {log_level_str}")
    if send_to_logfire:
        logger.info(f"Logfire enabled - sending logs to {settings.logfire_environment}")
    else:
        logger.info("Logfire disabled - console output only")


logger = logging.getLogger(__name__)


async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> None:
    """
    Handle errors in the bot.

    Logs the error and continues operation. Network timeouts are logged
    at warning level since they're transient issues.
    """
    error = context.error

    if isinstance(error, TimedOut):
        logger.warning(f"Request timed out: {error}")
        return

    if isinstance(error, NetworkError):
        logger.warning(f"Network error: {error}")
        return

    logger.error("Unhandled exception:", exc_info=context.error)


async def post_init(application: Application) -> None:  # type: ignore[type-arg]
    """
    Post-initialization callback to fetch and cache group admin IDs.

    This runs once after the bot starts and before polling begins.
    Fetches admin list from all monitored groups and stores per-group
    and union admin IDs in bot_data. Also recovers pending captchas.

    Args:
        application: The Application instance.
    """
    logger.info("Starting post_init: fetching admin IDs and recovering captcha state")
    registry = get_group_registry()

    # Fetch admin IDs for all monitored groups
    group_admin_ids: dict[int, list[int]] = {}
    all_admin_ids: set[int] = set()

    for gc in registry.all_groups():
        logger.info(f"Fetching admin IDs for group {gc.group_id}")
        try:
            ids = await fetch_group_admin_ids(application.bot, gc.group_id)  # type: ignore[arg-type]
            group_admin_ids[gc.group_id] = ids
            all_admin_ids.update(ids)
            logger.info(f"Fetched {len(ids)} admin(s) from group {gc.group_id}")
        except Exception as e:
            logger.error(f"Failed to fetch admin IDs for group {gc.group_id}: {e}")
            group_admin_ids[gc.group_id] = []

    application.bot_data["group_admin_ids"] = group_admin_ids  # type: ignore[index]
    application.bot_data["admin_ids"] = list(all_admin_ids)  # type: ignore[index]
    logger.info(f"Total unique admins across all groups: {len(all_admin_ids)}")

    # Recover pending captcha verifications for groups with captcha enabled
    has_captcha = any(gc.captcha_enabled for gc in registry.all_groups())
    if has_captcha:
        logger.info("Recovering pending captcha verifications from database")
        from bot.services.captcha_recovery import recover_pending_captchas
        await recover_pending_captchas(application)


def main() -> None:
    """
    Initialize and run the bot.

    This function:
    1. Configures logging with Logfire integration
    2. Loads configuration from environment
    3. Initializes the group registry (from groups.json or .env fallback)
    4. Initializes the SQLite database
    5. Registers message handlers in priority order
    6. Starts JobQueue for periodic tasks
    7. Starts the bot polling loop
    """
    # Configure logging first
    configure_logging()

    settings = get_settings()

    # Initialize group registry
    registry = init_group_registry(settings)
    group_count = len(registry.all_groups())
    logger.info(f"Starting PythonID bot (environment: {settings.logfire_environment}, groups: {group_count})")
    for gc in registry.all_groups():
        logger.info(
            f"  Group {gc.group_id}: warning_topic={gc.warning_topic_id}, "
            f"restrict={gc.restrict_failed_users}, captcha={gc.captcha_enabled}"
        )

    # Initialize database (creates tables if they don't exist)
    init_database(settings.database_path)
    logger.info(f"Database initialized at {settings.database_path}")

    # Build the bot application with the token and post_init callback
    application = Application.builder().token(settings.telegram_bot_token).post_init(post_init).build()
    application.add_error_handler(error_handler)
    logger.info("Application built successfully")

    # Handler 1: Topic guard - runs first (group -1) to delete unauthorized
    # messages in the warning topic before other handlers process them
    application.add_handler(
        MessageHandler(
            filters.ALL,
            guard_warning_topic,
        ),
        group=-1,
    )
    logger.info("Registered handler: topic_guard (group=-1)")

    # Handler 2: /verify command - allows admins to whitelist users in DM
    application.add_handler(
        CommandHandler("verify", handle_verify_command)
    )
    logger.info("Registered handler: verify_command (group=0)")

    # Handler 3: /unverify command - allows admins to remove users from whitelist in DM
    application.add_handler(
        CommandHandler("unverify", handle_unverify_command)
    )
    logger.info("Registered handler: unverify_command (group=0)")

    # Handler: /check command - allows admins to check user profiles in DM
    application.add_handler(
        CommandHandler("check", handle_check_command)
    )
    logger.info("Registered handler: check_command (group=0)")

    # Handler: Forwarded message handler - allows admins to check profiles via forward
    application.add_handler(
        MessageHandler(
            filters.FORWARDED & filters.ChatType.PRIVATE,
            handle_check_forwarded_message
        )
    )
    logger.info("Registered handler: check_forwarded_message (group=0)")

    # Handler 5: Callback handlers for verify/unverify buttons
    application.add_handler(
        CallbackQueryHandler(handle_verify_callback, pattern=r"^verify:\d+$")
    )
    logger.info("Registered handler: verify_callback (group=0)")
    application.add_handler(
        CallbackQueryHandler(handle_unverify_callback, pattern=r"^unverify:\d+$")
    )
    logger.info("Registered handler: unverify_callback (group=0)")
    application.add_handler(
        CallbackQueryHandler(handle_warn_callback, pattern=r"^warn:\d+:")
    )
    logger.info("Registered handler: warn_callback (group=0)")

    # Handler 6: Captcha handlers - new member verification
    for handler in captcha.get_handlers():
        application.add_handler(handler)
    logger.info("Registered handler: captcha_handlers (group=0)")

    # Handler 7: DM handler - processes private messages (including /start)
    # for the unrestriction flow. Must be registered before group handler
    # to prevent group handler from catching private messages first.
    application.add_handler(
        MessageHandler(
            filters.ChatType.PRIVATE & filters.TEXT,
            handle_dm,
        )
    )
    logger.info("Registered handler: dm_handler (group=0)")

    # Handler 8: Inline keyboard spam handler - catches messages with
    # non-whitelisted URL buttons in inline keyboards (spam from bots/forwards).
    # Each spam handler runs in its own group so they all independently process
    # every group message. They raise ApplicationHandlerStop to prevent later
    # groups from running when spam IS detected.
    application.add_handler(
        MessageHandler(
            filters.ChatType.GROUPS,
            handle_inline_keyboard_spam,
        ),
        group=1,
    )
    logger.info("Registered handler: inline_keyboard_spam_handler (group=1)")

    # Handler 9: New-user anti-spam handler - checks for forwards/links from users on probation
    application.add_handler(
        MessageHandler(
            filters.ChatType.GROUPS,
            handle_new_user_spam,
        ),
        group=2,
    )
    logger.info("Registered handler: anti_spam_handler (group=2)")

    # Handler 10: Duplicate message spam handler - detects repeated identical messages
    application.add_handler(
        MessageHandler(
            filters.ChatType.GROUPS & ~filters.COMMAND,
            handle_duplicate_spam,
        ),
        group=3,
    )
    logger.info("Registered handler: duplicate_spam_handler (group=3)")

    # Handler 11: Group message handler - monitors messages in monitored
    # groups and warns/restricts users with incomplete profiles
    application.add_handler(
        MessageHandler(
            filters.ChatType.GROUPS & ~filters.COMMAND,
            handle_message,
        ),
        group=4,
    )
    logger.info("Registered handler: message_handler (group=4)")

    # Register auto-restriction job to run every 5 minutes
    if application.job_queue:
        application.job_queue.run_repeating(
            auto_restrict_expired_warnings,
            interval=300,
            first=300,
            name="auto_restrict_job"
        )
        logger.info("JobQueue registered: auto_restrict_job (every 5 minutes, first run in 5 minutes)")

    logger.info(f"Starting bot polling for {group_count} group(s)")
    logger.info("All handlers registered successfully")

    application.run_polling(allowed_updates=["message", "callback_query", "chat_member"])


if __name__ == "__main__":
    main()