From 0172d1e17f8ce285910f0056881a24b68cd94e48 Mon Sep 17 00:00:00 2001 From: chenyifan <3167712292@qq.com> Date: Sun, 22 Mar 2026 19:52:28 +0800 Subject: [PATCH] feat: add video call plugin with face recognition and vision context - Add VideoCallPlugin with InsightFace face recognition (512-dim embeddings) - Add VisionContextProvider for real-time frame caching and merging - Inject vision context automatically into coordinator transient_system_messages - Register face tools (bind/list/forget) via non-invasive context_builder wrapping - Add decision engine rules for face binding intent routing to agent path - Add proactive unknown face detection prompting in _build_vision_message - Persist face features to .echobot/face_features.json via FaceFeatureInterceptor - Fix ReMeLight backward compatibility for tool_result_threshold and retention_days - Add VIDEO_CALL_README.md with architecture and usage documentation --- .env | 61 ++ .gitignore | 11 +- VIDEO_CALL_README.md | 153 +++++ echobot/app/create_app.py | 71 ++- echobot/app/schemas.py | 1 + echobot/app/services/chat.py | 4 + echobot/app/web/app.js | 7 + echobot/app/web/camera.html | 201 +++++++ echobot/app/web/index.html | 214 +++++++ echobot/app/web/modules/chat.js | 29 +- .../app/web/modules/video-call-integration.js | 300 ++++++++++ echobot/memory/support.py | 52 +- echobot/orchestration/coordinator.py | 71 ++- echobot/orchestration/decision.py | 4 + echobot/orchestration/roleplay.py | 8 +- echobot/plugins/__init__.py | 6 + echobot/plugins/base.py | 46 ++ echobot/plugins/registry.py | 41 ++ echobot/plugins/video_call/__init__.py | 109 ++++ echobot/plugins/video_call/config.py | 31 ++ .../video_call/interceptors/__init__.py | 105 ++++ echobot/plugins/video_call/models.py | 43 ++ .../plugins/video_call/routers/__init__.py | 172 ++++++ .../plugins/video_call/services/__init__.py | 131 +++++ .../video_call/services/face_recognition.py | 173 ++++++ .../video_call/services/image_description.py | 120 ++++ echobot/plugins/video_call/tools/__init__.py | 45 ++ .../plugins/video_call/tools/face_tools.py | 135 +++++ echobot/plugins/video_call/tools/handlers.py | 104 ++++ echobot/plugins/video_call/vision_provider.py | 86 +++ echobot/plugins/video_call/web/video-call.css | 101 ++++ echobot/plugins/video_call/web/video-call.js | 240 ++++++++ echobot_client/.gitignore | 45 ++ echobot_client/.metadata | 30 + echobot_client/README.md | 17 + echobot_client/analysis_options.yaml | 28 + echobot_client/android/.gitignore | 101 ++++ echobot_client/android/app/.gitignore | 2 + echobot_client/android/app/build.gradle | 54 ++ .../android/app/capacitor.build.gradle | 19 + echobot_client/android/app/proguard-rules.pro | 21 + .../myapp/ExampleInstrumentedTest.java | 26 + .../android/app/src/main/AndroidManifest.xml | 48 ++ .../java/com/echobot/app/MainActivity.java | 98 ++++ .../main/res/drawable-land-hdpi/splash.png | Bin 0 -> 7705 bytes .../main/res/drawable-land-mdpi/splash.png | Bin 0 -> 4040 bytes .../main/res/drawable-land-xhdpi/splash.png | Bin 0 -> 9251 bytes .../main/res/drawable-land-xxhdpi/splash.png | Bin 0 -> 13984 bytes .../main/res/drawable-land-xxxhdpi/splash.png | Bin 0 -> 17683 bytes .../main/res/drawable-port-hdpi/splash.png | Bin 0 -> 7934 bytes .../main/res/drawable-port-mdpi/splash.png | Bin 0 -> 4096 bytes .../main/res/drawable-port-xhdpi/splash.png | Bin 0 -> 9875 bytes .../main/res/drawable-port-xxhdpi/splash.png | Bin 0 -> 13346 bytes .../main/res/drawable-port-xxxhdpi/splash.png | Bin 0 -> 17489 bytes .../drawable-v24/ic_launcher_foreground.xml | 34 ++ .../res/drawable/ic_launcher_background.xml | 170 ++++++ .../app/src/main/res/drawable/splash.png | Bin 0 -> 4040 bytes .../app/src/main/res/layout/activity_main.xml | 12 + .../res/mipmap-anydpi-v26/ic_launcher.xml | 5 + .../mipmap-anydpi-v26/ic_launcher_round.xml | 5 + .../src/main/res/mipmap-hdpi/ic_launcher.png | Bin 0 -> 2786 bytes .../mipmap-hdpi/ic_launcher_foreground.png | Bin 0 -> 3450 bytes .../res/mipmap-hdpi/ic_launcher_round.png | Bin 0 -> 4341 bytes .../src/main/res/mipmap-mdpi/ic_launcher.png | Bin 0 -> 1869 bytes .../mipmap-mdpi/ic_launcher_foreground.png | Bin 0 -> 2110 bytes .../res/mipmap-mdpi/ic_launcher_round.png | Bin 0 -> 2725 bytes .../src/main/res/mipmap-xhdpi/ic_launcher.png | Bin 0 -> 3981 bytes .../mipmap-xhdpi/ic_launcher_foreground.png | Bin 0 -> 5036 bytes .../res/mipmap-xhdpi/ic_launcher_round.png | Bin 0 -> 6593 bytes .../main/res/mipmap-xxhdpi/ic_launcher.png | Bin 0 -> 6644 bytes .../mipmap-xxhdpi/ic_launcher_foreground.png | Bin 0 -> 9793 bytes .../res/mipmap-xxhdpi/ic_launcher_round.png | Bin 0 -> 10455 bytes .../main/res/mipmap-xxxhdpi/ic_launcher.png | Bin 0 -> 9441 bytes .../mipmap-xxxhdpi/ic_launcher_foreground.png | Bin 0 -> 15529 bytes .../res/mipmap-xxxhdpi/ic_launcher_round.png | Bin 0 -> 15916 bytes .../res/values/ic_launcher_background.xml | 4 + .../app/src/main/res/values/strings.xml | 7 + .../app/src/main/res/values/styles.xml | 22 + .../app/src/main/res/xml/file_paths.xml | 5 + .../getcapacitor/myapp/ExampleUnitTest.java | 18 + echobot_client/android/build.gradle | 29 + .../android/capacitor.settings.gradle | 3 + echobot_client/android/gradle.properties | 22 + .../android/gradle/wrapper/gradle-wrapper.jar | Bin 0 -> 43764 bytes .../gradle/wrapper/gradle-wrapper.properties | 7 + echobot_client/android/gradlew | 251 +++++++++ echobot_client/android/gradlew.bat | 94 ++++ echobot_client/android/settings.gradle | 5 + echobot_client/android/variables.gradle | 16 + echobot_client/capacitor.config.json | 13 + echobot_client/lib/main.dart | 122 ++++ echobot_client/package.json | 22 + echobot_client/pubspec.lock | 522 ++++++++++++++++++ echobot_client/pubspec.yaml | 37 ++ echobot_client/test/widget_test.dart | 30 + 95 files changed, 4783 insertions(+), 36 deletions(-) create mode 100644 .env create mode 100644 VIDEO_CALL_README.md create mode 100644 echobot/app/web/camera.html create mode 100644 echobot/app/web/modules/video-call-integration.js create mode 100644 echobot/plugins/__init__.py create mode 100644 echobot/plugins/base.py create mode 100644 echobot/plugins/registry.py create mode 100644 echobot/plugins/video_call/__init__.py create mode 100644 echobot/plugins/video_call/config.py create mode 100644 echobot/plugins/video_call/interceptors/__init__.py create mode 100644 echobot/plugins/video_call/models.py create mode 100644 echobot/plugins/video_call/routers/__init__.py create mode 100644 echobot/plugins/video_call/services/__init__.py create mode 100644 echobot/plugins/video_call/services/face_recognition.py create mode 100644 echobot/plugins/video_call/services/image_description.py create mode 100644 echobot/plugins/video_call/tools/__init__.py create mode 100644 echobot/plugins/video_call/tools/face_tools.py create mode 100644 echobot/plugins/video_call/tools/handlers.py create mode 100644 echobot/plugins/video_call/vision_provider.py create mode 100644 echobot/plugins/video_call/web/video-call.css create mode 100644 echobot/plugins/video_call/web/video-call.js create mode 100644 echobot_client/.gitignore create mode 100644 echobot_client/.metadata create mode 100644 echobot_client/README.md create mode 100644 echobot_client/analysis_options.yaml create mode 100644 echobot_client/android/.gitignore create mode 100644 echobot_client/android/app/.gitignore create mode 100644 echobot_client/android/app/build.gradle create mode 100644 echobot_client/android/app/capacitor.build.gradle create mode 100644 echobot_client/android/app/proguard-rules.pro create mode 100644 echobot_client/android/app/src/androidTest/java/com/getcapacitor/myapp/ExampleInstrumentedTest.java create mode 100644 echobot_client/android/app/src/main/AndroidManifest.xml create mode 100644 echobot_client/android/app/src/main/java/com/echobot/app/MainActivity.java create mode 100644 echobot_client/android/app/src/main/res/drawable-land-hdpi/splash.png create mode 100644 echobot_client/android/app/src/main/res/drawable-land-mdpi/splash.png create mode 100644 echobot_client/android/app/src/main/res/drawable-land-xhdpi/splash.png create mode 100644 echobot_client/android/app/src/main/res/drawable-land-xxhdpi/splash.png create mode 100644 echobot_client/android/app/src/main/res/drawable-land-xxxhdpi/splash.png create mode 100644 echobot_client/android/app/src/main/res/drawable-port-hdpi/splash.png create mode 100644 echobot_client/android/app/src/main/res/drawable-port-mdpi/splash.png create mode 100644 echobot_client/android/app/src/main/res/drawable-port-xhdpi/splash.png create mode 100644 echobot_client/android/app/src/main/res/drawable-port-xxhdpi/splash.png create mode 100644 echobot_client/android/app/src/main/res/drawable-port-xxxhdpi/splash.png create mode 100644 echobot_client/android/app/src/main/res/drawable-v24/ic_launcher_foreground.xml create mode 100644 echobot_client/android/app/src/main/res/drawable/ic_launcher_background.xml create mode 100644 echobot_client/android/app/src/main/res/drawable/splash.png create mode 100644 echobot_client/android/app/src/main/res/layout/activity_main.xml create mode 100644 echobot_client/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml create mode 100644 echobot_client/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml create mode 100644 echobot_client/android/app/src/main/res/mipmap-hdpi/ic_launcher.png create mode 100644 echobot_client/android/app/src/main/res/mipmap-hdpi/ic_launcher_foreground.png create mode 100644 echobot_client/android/app/src/main/res/mipmap-hdpi/ic_launcher_round.png create mode 100644 echobot_client/android/app/src/main/res/mipmap-mdpi/ic_launcher.png create mode 100644 echobot_client/android/app/src/main/res/mipmap-mdpi/ic_launcher_foreground.png create mode 100644 echobot_client/android/app/src/main/res/mipmap-mdpi/ic_launcher_round.png create mode 100644 echobot_client/android/app/src/main/res/mipmap-xhdpi/ic_launcher.png create mode 100644 echobot_client/android/app/src/main/res/mipmap-xhdpi/ic_launcher_foreground.png create mode 100644 echobot_client/android/app/src/main/res/mipmap-xhdpi/ic_launcher_round.png create mode 100644 echobot_client/android/app/src/main/res/mipmap-xxhdpi/ic_launcher.png create mode 100644 echobot_client/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_foreground.png create mode 100644 echobot_client/android/app/src/main/res/mipmap-xxhdpi/ic_launcher_round.png create mode 100644 echobot_client/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher.png create mode 100644 echobot_client/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_foreground.png create mode 100644 echobot_client/android/app/src/main/res/mipmap-xxxhdpi/ic_launcher_round.png create mode 100644 echobot_client/android/app/src/main/res/values/ic_launcher_background.xml create mode 100644 echobot_client/android/app/src/main/res/values/strings.xml create mode 100644 echobot_client/android/app/src/main/res/values/styles.xml create mode 100644 echobot_client/android/app/src/main/res/xml/file_paths.xml create mode 100644 echobot_client/android/app/src/test/java/com/getcapacitor/myapp/ExampleUnitTest.java create mode 100644 echobot_client/android/build.gradle create mode 100644 echobot_client/android/capacitor.settings.gradle create mode 100644 echobot_client/android/gradle.properties create mode 100644 echobot_client/android/gradle/wrapper/gradle-wrapper.jar create mode 100644 echobot_client/android/gradle/wrapper/gradle-wrapper.properties create mode 100755 echobot_client/android/gradlew create mode 100644 echobot_client/android/gradlew.bat create mode 100644 echobot_client/android/settings.gradle create mode 100644 echobot_client/android/variables.gradle create mode 100644 echobot_client/capacitor.config.json create mode 100644 echobot_client/lib/main.dart create mode 100644 echobot_client/package.json create mode 100644 echobot_client/pubspec.lock create mode 100644 echobot_client/pubspec.yaml create mode 100644 echobot_client/test/widget_test.dart diff --git a/.env b/.env new file mode 100644 index 0000000..1378873 --- /dev/null +++ b/.env @@ -0,0 +1,61 @@ +# OpenAI-compatible LLM settings (文本对话 - DeepSeek) +LLM_API_KEY=your_llm_api_key_here +LLM_MODEL=deepseek-chat +LLM_BASE_URL=https://api.deepseek.com +LLM_TIMEOUT=60 +# Optional: extra fields merged into every request body (JSON object) +# Example: LLM_EXTRA_BODY={"chat_template_kwargs": {"enable_thinking": false}} +LLM_EXTRA_BODY= + +# MiniMax Vision - 图像描述专用 +VISION_API_KEY=your_vision_api_key_here +VISION_MODEL=qwen-vl-plus +VISION_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1 + +# Lightweight orchestration max_tokens budget +# Used by the decision layer and short roleplay replies +ECHOBOT_LIGHTWEIGHT_MAX_TOKENS=4096 + +# Single-turn agent tool/skill loop step limit +ECHOBOT_AGENT_MAX_STEPS=50 + +# Whether to send a short "I'm checking" acknowledgement before delegating +# to the full background agent +ECHOBOT_DELEGATED_ACK_ENABLED=true + +# Runtime logging +REME_LOG_LEVEL=WARNING +AGENTSCOPE_LOG_LEVEL=WARNING + +# Web Live2D +ECHOBOT_WEB_LIVE2D_MODEL= + +# Web TTS +ECHOBOT_TTS_KOKORO_AUTO_DOWNLOAD=true +ECHOBOT_TTS_KOKORO_MODEL_DIR= +ECHOBOT_TTS_KOKORO_PROVIDER=cpu +ECHOBOT_TTS_KOKORO_NUM_THREADS=2 +ECHOBOT_TTS_KOKORO_DEFAULT_VOICE=zf_001 +ECHOBOT_TTS_KOKORO_DOWNLOAD_TIMEOUT_SECONDS=600 +ECHOBOT_TTS_KOKORO_URL= +ECHOBOT_TTS_KOKORO_LENGTH_SCALE=1.0 +ECHOBOT_TTS_KOKORO_LANG= + +# Web ASR / VAD +ECHOBOT_ASR_AUTO_DOWNLOAD=true +ECHOBOT_ASR_MODEL_DIR= +ECHOBOT_ASR_PROVIDER=cpu +ECHOBOT_ASR_NUM_THREADS=2 +ECHOBOT_ASR_LANGUAGE=auto +ECHOBOT_ASR_USE_ITN=false +ECHOBOT_ASR_SAMPLE_RATE=16000 +ECHOBOT_ASR_DOWNLOAD_TIMEOUT_SECONDS=600 +ECHOBOT_ASR_SENSEVOICE_URL= +ECHOBOT_ASR_VAD_URL= + +# Video Call Plugin +ECHOBOT_ENABLE_VIDEO_CALL=true +ECHOBOT_VIDEO_FRAME_RATE=0.25 +ECHOBOT_VIDEO_MAX_FRAME_SIZE=1280x720 +ECHOBOT_VIDEO_FACE_CONFIDENCE_THRESHOLD=0.8 +ECHOBOT_VIDEO_FEATURE_MATCH_THRESHOLD=0.6 diff --git a/.gitignore b/.gitignore index aef659d..b42af4d 100644 --- a/.gitignore +++ b/.gitignore @@ -26,7 +26,7 @@ htmlcov/ .hypothesis/ # Local environment files -.env +# .env # Committed with placeholder values - copy and fill in your own keys .env.* !.env.example @@ -43,3 +43,12 @@ example_projects/ .echobot logs/ + +# Node +node_modules/ +package-lock.json + +# Generated docs +VIDEO_CALL_FINAL_GUIDE.md +VIDEO_CALL_INTEGRATION_GUIDE.md +VIDEO_CALL_PLUGIN_SUMMARY.md diff --git a/VIDEO_CALL_README.md b/VIDEO_CALL_README.md new file mode 100644 index 0000000..76f39f8 --- /dev/null +++ b/VIDEO_CALL_README.md @@ -0,0 +1,153 @@ +# EchoBot 视频通话插件 - 架构与使用说明 + +## 功能概述 + +视频通话插件(`video_call`)为 EchoBot 提供实时视觉感知能力: + +- **实时摄像头画面处理**:通过 WebSocket 接收前端推送的视频帧 +- **图像语义描述**:调用视觉模型对每帧画面生成自然语言描述 +- **人脸识别与绑定**:用 InsightFace 提取 512 维特征向量,与已知人脸库做余弦相似度匹配 +- **视觉上下文自动注入**:每次对话时,coordinator 自动将当前视觉信息追加为临时系统消息,模型无需工具调用即可感知画面 +- **主动记忆陌生人**:检测到未识别人脸时,模型会主动询问姓名并通过工具绑定,重启后仍可识别 + +--- + +## 架构概览 + +``` +前端摄像头 + │ WebSocket /api/web/video/stream + ▼ +VisionProcessingService + ├── ImageDescriptionService # 视觉模型:图像 → 自然语言描述 + └── FaceRecognitionService # InsightFace:检测 + 512 维特征提取 + 余弦匹配 + │ + ▼ +VisionContextProvider(内存缓存,最近 80 帧) + │ + │ on_startup 注入 + ▼ +ConversationCoordinator._vision_context_provider + │ 每次对话自动调用 _build_vision_message() + ▼ +transient_system_messages → RoleplayEngine / AgentRunner + │ + ▼ +大模型(感知视觉 + 主动询问陌生人) + │ 用户回答名字 → decision engine 路由到 agent + ▼ +BindFaceToNameTool(BaseTool) + ├── FaceRecognitionService.add_known_face_from_frame() # 内存 + └── FaceFeatureInterceptor.add_or_update_feature() # 持久化到 .echobot/face_features.json +``` + +### 工具注入方式(非侵入) + +插件工具通过 `create_app.py` 里包装 `tool_registry_factory` 注入,不修改框架代码: + +```python +# create_app.py +def _plugin_context_builder(opts): + ctx = build_runtime_context(opts, load_session_state=False) + original_factory = ctx.tool_registry_factory + + def wrapped_factory(session_name, scheduled_context): + registry = original_factory(session_name, scheduled_context) + for tool in video_plugin.get_tool_instances(): + registry.register(tool) + return registry + + ctx.tool_registry_factory = wrapped_factory + return ctx +``` + +### 视觉上下文注入方式 + +插件在 `on_startup` 时调用: + +```python +coordinator.set_vision_context_provider(self.vision_provider) +``` + +Coordinator 在每次 `handle_user_turn_stream` 时自动调用 `_build_vision_message()`,将视觉信息作为 `transient_system_messages` 注入,**不写入会话历史**。 + +--- + +## 目录结构 + +``` +echobot/plugins/video_call/ +├── __init__.py # VideoCallPlugin:插件入口,on_startup/on_shutdown +├── models.py # 数据模型:Face, VisionContext +├── vision_provider.py # VisionContextProvider:帧缓存与合并 +├── interceptors/ +│ └── __init__.py # FaceFeatureInterceptor:特征持久化(JSON) +├── routers/ +│ └── __init__.py # API 路由:WebSocket 视频流、人脸绑定接口 +├── services/ +│ ├── __init__.py # VisionProcessingService:双链路并行处理 +│ ├── face_recognition.py # FaceRecognitionService:InsightFace 封装 +│ └── image_description.py # ImageDescriptionService:视觉描述 +└── tools/ + ├── __init__.py # VISION_TOOLS:旧格式工具定义(兼容用) + ├── face_tools.py # BaseTool 实现:bind_face_to_name / list_known_faces / forget_face + └── handlers.py # VisionToolHandler:工具调用处理器 +``` + +--- + +## 可用工具(Agent 路径) + +| 工具名 | 触发场景 | 功能 | +|--------|----------|------| +| `bind_face_to_name` | "我叫XXX" / "记住我" / "这是XXX" | 从当前摄像头帧提取人脸特征并绑定姓名 | +| `list_known_faces` | "你认识哪些人" | 列出所有已绑定的人脸姓名 | +| `forget_face` | "忘掉XXX" | 删除某人的人脸绑定记录 | + +--- + +## API 接口 + +| 方法 | 路径 | 说明 | +|------|------|------| +| WS | `/api/web/video/stream` | 视频帧推送(JPEG bytes) | +| GET | `/api/web/video/context` | 获取当前视觉上下文列表 | +| GET | `/api/web/video/snapshot` | 获取最新一帧快照 | +| POST | `/api/web/video/face-bind` | 通过特征向量绑定人脸 | +| POST | `/api/web/video/face-bind-frame` | 通过图像帧绑定人脸 | +| GET | `/api/web/video/face-list` | 查询已绑定人脸列表 | + +--- + +## 启用方式 + +在 `.env` 中设置: + +```env +ECHOBOT_ENABLE_VIDEO_CALL=true +``` + +前端摄像头页面:`http://localhost:8000/web/camera` + +--- + +## 人脸数据持久化 + +绑定的人脸特征向量(512 维,InsightFace buffalo_sc)保存在: + +``` +.echobot/face_features.json +``` + +重启后自动加载,无需重新绑定。 + +--- + +## 依赖 + +``` +insightface +onnxruntime +Pillow +numpy +``` diff --git a/echobot/app/create_app.py b/echobot/app/create_app.py index 86bea57..ea7be30 100644 --- a/echobot/app/create_app.py +++ b/echobot/app/create_app.py @@ -1,12 +1,23 @@ from __future__ import annotations +import os from contextlib import asynccontextmanager from pathlib import Path from fastapi import FastAPI from fastapi.responses import FileResponse from fastapi.staticfiles import StaticFiles +from loguru import logger +# 加载 .env 文件 +try: + from dotenv import load_dotenv + load_dotenv() +except ImportError: + pass + +from ..plugins import PluginRegistry +from ..plugins.video_call import VideoCallPlugin from ..runtime.bootstrap import RuntimeOptions from .routers import chat, channels, cron, health, heartbeat, roles, sessions, web from .runtime import ASRServiceBuilder, AppRuntime, RuntimeContextBuilder, TTSServiceBuilder @@ -24,10 +35,47 @@ def create_app( asr_service_builder: ASRServiceBuilder | None = None, ) -> FastAPI: options = runtime_options or RuntimeOptions() + + # 初始化插件系统(先于 runtime 构建,以便注入工具) + plugin_registry = PluginRegistry() + video_plugin = VideoCallPlugin() + plugin_registry.register(video_plugin) + video_enabled = os.getenv("ECHOBOT_ENABLE_VIDEO_CALL", "false").lower() == "true" + if video_enabled: + plugin_registry.enable("video_call") + logger.info("VideoCall plugin enabled") + + # 构建包装过的 context_builder,注入插件工具 + def _plugin_context_builder(opts: RuntimeOptions): + from ..runtime.bootstrap import build_runtime_context + # 调用原始 builder(默认或用户传入) + if context_builder is not None: + ctx = context_builder(opts) + else: + ctx = build_runtime_context(opts, load_session_state=False) + + if video_enabled: + # 包装 tool_registry_factory,追加插件工具 + original_factory = ctx.tool_registry_factory + + def wrapped_factory(session_name: str, scheduled_context: bool): + registry = original_factory(session_name, scheduled_context) + if registry is None: + return registry + for tool in video_plugin.get_tool_instances(): + try: + registry.register(tool) + except ValueError: + pass # 已注册则跳过 + return registry + + ctx.tool_registry_factory = wrapped_factory + return ctx + runtime = AppRuntime( runtime_options=options, channel_config_path=channel_config_path, - context_builder=context_builder, + context_builder=_plugin_context_builder, tts_service_builder=tts_service_builder, asr_service_builder=asr_service_builder, ) @@ -36,9 +84,19 @@ def create_app( async def lifespan(app: FastAPI): await runtime.start() app.state.runtime = runtime + app.state.plugin_registry = plugin_registry + + # 启动所有启用的插件 + for plugin in plugin_registry.get_enabled_plugins(): + await plugin.on_startup(app, runtime) + try: yield finally: + # 关闭所有插件 + for plugin in plugin_registry.get_enabled_plugins(): + await plugin.on_shutdown() + await runtime.stop() app = FastAPI( @@ -58,6 +116,10 @@ async def root() -> dict[str, str]: async def web_console() -> FileResponse: return FileResponse(WEB_ASSETS_DIR / "index.html") + @app.get("/web/camera", include_in_schema=False) + async def web_camera() -> FileResponse: + return FileResponse(WEB_ASSETS_DIR / "camera.html") + @app.get("/favicon.ico", include_in_schema=False) async def favicon() -> FileResponse: return FileResponse( @@ -79,4 +141,11 @@ async def favicon() -> FileResponse: app.include_router(roles.router, prefix="/api") app.include_router(channels.router, prefix="/api") app.include_router(web.router, prefix="/api") + + # 加载启用的插件路由 + for plugin in plugin_registry.get_enabled_plugins(): + for router in plugin.get_routers(): + app.include_router(router, prefix="/api") + logger.info(f"Loaded router from plugin: {plugin.name}") + return app diff --git a/echobot/app/schemas.py b/echobot/app/schemas.py index 2142d6f..1579f43 100644 --- a/echobot/app/schemas.py +++ b/echobot/app/schemas.py @@ -71,6 +71,7 @@ class ChatRequest(BaseModel): temperature: float | None = None max_tokens: int | None = None images: list["ChatImageInput"] = Field(default_factory=list) + vision_context: list[dict] = Field(default_factory=list) class ChatImageInput(BaseModel): diff --git a/echobot/app/services/chat.py b/echobot/app/services/chat.py index bc7bee4..f7e0706 100644 --- a/echobot/app/services/chat.py +++ b/echobot/app/services/chat.py @@ -36,6 +36,7 @@ async def run_prompt( image_urls: list[str] | None = None, role_name: str | None = None, route_mode: RouteMode | None = None, + transient_system_messages: list[str] | None = None, ) -> OrchestratedTurnResult: result = await self._coordinator.handle_user_turn( session_name, @@ -43,6 +44,7 @@ async def run_prompt( image_urls=image_urls, role_name=role_name, route_mode=route_mode, + transient_system_messages=transient_system_messages, ) await self._session_service.set_current_session(result.session.name) return result @@ -56,6 +58,7 @@ async def run_prompt_stream( role_name: str | None = None, route_mode: RouteMode | None = None, on_chunk: StreamCallback | None = None, + transient_system_messages: list[str] | None = None, ) -> OrchestratedTurnResult: result = await self._coordinator.handle_user_turn_stream( session_name, @@ -64,6 +67,7 @@ async def run_prompt_stream( role_name=role_name, route_mode=route_mode, on_chunk=on_chunk, + transient_system_messages=transient_system_messages, ) await self._session_service.set_current_session(result.session.name) return result diff --git a/echobot/app/web/app.js b/echobot/app/web/app.js index 5367c77..c341e68 100644 --- a/echobot/app/web/app.js +++ b/echobot/app/web/app.js @@ -32,6 +32,7 @@ import { roundTo, smoothValue, } from "./modules/utils.js"; +import { createVideoCallIntegration } from "./modules/video-call-integration.js"; const layout = createLayoutModule({ addMessage: addMessage, @@ -129,6 +130,11 @@ const chat = createChatModule({ updateMessage: updateMessage, }); +const videoCall = createVideoCallIntegration({ + addSystemMessage: addSystemMessage, + requestJson: requestJson, +}); + document.addEventListener("DOMContentLoaded", initializePage); async function initializePage() { @@ -160,6 +166,7 @@ async function initializePage() { asr.applyAsrStatus(config.asr); asr.startAsrStatusPolling(); traces.resetTracePanel(); + videoCall.initialize(); setConnectionState("ready", "已连接"); setRunStatus("准备就绪"); diff --git a/echobot/app/web/camera.html b/echobot/app/web/camera.html new file mode 100644 index 0000000..935b0a7 --- /dev/null +++ b/echobot/app/web/camera.html @@ -0,0 +1,201 @@ + + +
+ + +chrome://flags/#unsafely-treat-insecure-origin-as-secure
+ Edge:在地址栏输入edge://flags/#unsafely-treat-insecure-origin-as-secure
+ 在输入框填入:http://192.168.110.143:8000
+ 点击 Enabled → Relaunch / Restart 重启浏览器