diff --git a/api/app/lib/model_catalog.py b/api/app/lib/model_catalog.py index 10ae67f42..7e105c293 100644 --- a/api/app/lib/model_catalog.py +++ b/api/app/lib/model_catalog.py @@ -145,26 +145,26 @@ def set_model_default(conn, catalog_id: int) -> bool: """ Set a model as the default for its provider+category. - Clears existing default for that provider+category first. + Clears existing default for that provider+category first. Uses a + subquery rather than fetchone() + tuple-unpack so the function works + regardless of the connection's cursor_factory — RealDictCursor returns + dict-like rows that silently yield column *names* on tuple unpacking. """ with conn.cursor() as cur: - # Get the provider and category for this model - cur.execute( - "SELECT provider, category FROM kg_api.provider_model_catalog WHERE id = %s", - (catalog_id,), - ) - row = cur.fetchone() - if not row: - return False - - provider, category = row - - # Clear existing default + # Clear any existing default that shares the new model's + # (provider, category), excluding the new model itself in case it + # is already the default (so this call is idempotent). cur.execute( """UPDATE kg_api.provider_model_catalog SET is_default = FALSE, updated_at = NOW() - WHERE provider = %s AND category = %s AND is_default = TRUE""", - (provider, category), + WHERE is_default = TRUE + AND id <> %s + AND (provider, category) = ( + SELECT provider, category + FROM kg_api.provider_model_catalog + WHERE id = %s + )""", + (catalog_id, catalog_id), ) # Set new default (also ensures enabled) @@ -174,8 +174,9 @@ def set_model_default(conn, catalog_id: int) -> bool: WHERE id = %s""", (catalog_id,), ) + updated = cur.rowcount > 0 conn.commit() - return True + return updated def update_model_pricing( diff --git a/operator/configure.py b/operator/configure.py index 318618bfd..7dbbf440c 100755 --- a/operator/configure.py +++ b/operator/configure.py @@ -187,6 +187,7 @@ def cmd_embedding(self, args): """Configure embedding provider by activating a pre-configured profile""" profile_id = getattr(args, 'profile_id', None) provider_name = getattr(args, 'provider', None) + device = getattr(args, 'device', None) # If no profile_id or provider specified, list available profiles if profile_id is None and provider_name is None: @@ -229,9 +230,21 @@ def cmd_embedding(self, args): # Activate selected profile (use profile['id'] from query, not profile_id arg) cur.execute("UPDATE kg_api.embedding_profile SET active = true WHERE id = %s", (profile['id'],)) + # Optionally update the compute device on the activated profile. + # The wizard maps its GPU_MODE choice (mac/nvidia/amd/cpu) to a + # PyTorch device string here so the API container loads the + # model on the right accelerator at startup. + effective_device = profile['device'] + if device: + cur.execute( + "UPDATE kg_api.embedding_profile SET device = %s WHERE id = %s", + (device, profile['id']), + ) + effective_device = device + conn.commit() - device_info = f" ({profile['device']})" if profile['device'] else "" + device_info = f" ({effective_device})" if effective_device else "" print(f"✅ Activated: [{profile['id']}] {profile['provider']} / {profile['model_name']} ({profile['embedding_dimensions']} dims, {profile['precision']}){device_info}") return True @@ -323,6 +336,71 @@ def _validate_provider_key(self, provider, key): return False return None + def _fetch_catalog_via_sdk(self, provider): + """Fetch a provider's model catalog without instantiating the full + AIProvider class. + + AnthropicProvider and OllamaProvider eagerly construct an OpenAI + embedding provider in __init__ when none is supplied — but the + operator container has no loaded EmbeddingModelManager (only the API + container initializes one at startup), so get_embedding_provider() + returns None and the eager fallback runs and fails. fetch_model_catalog + itself only needs self.client (or self.api_key for OpenRouter), so we + construct the SDK client directly and bypass __init__ via __new__, + reusing the existing fetch_model_catalog implementation rather than + duplicating per-provider pricing/feature dicts. + """ + from api.app.lib.ai_providers import ( + _load_api_key, + OpenAIProvider, + AnthropicProvider, + OpenRouterProvider, + ) + + if provider == "openai": + from openai import OpenAI + key = _load_api_key("openai", None, "OPENAI_API_KEY") + if not key: + raise RuntimeError( + "OpenAI API key not configured. Store it first via " + "`configure.py api-key openai`." + ) + prov = OpenAIProvider.__new__(OpenAIProvider) + prov.client = OpenAI(api_key=key) + return prov.fetch_model_catalog() + + if provider == "anthropic": + from anthropic import Anthropic + key = _load_api_key("anthropic", None, "ANTHROPIC_API_KEY") + if not key: + raise RuntimeError( + "Anthropic API key not configured. Store it first via " + "`configure.py api-key anthropic`." + ) + prov = AnthropicProvider.__new__(AnthropicProvider) + prov.client = Anthropic(api_key=key) + return prov.fetch_model_catalog() + + if provider == "openrouter": + from openai import OpenAI + key = _load_api_key("openrouter", None, "OPENROUTER_API_KEY") + if not key: + raise RuntimeError( + "OpenRouter API key not configured. Store it first via " + "`configure.py api-key openrouter`." + ) + prov = OpenRouterProvider.__new__(OpenRouterProvider) + # OpenRouter's fetch_model_catalog uses self.api_key for the + # Authorization header and OPENROUTER_BASE_URL from the class. + prov.api_key = key + return prov.fetch_model_catalog() + + # Other providers (ollama, llamacpp) — fall back to the original + # construction. They don't currently appear in the guided wizard, and + # their catalog refresh has different requirements (base_url, etc.). + from api.app.lib.ai_providers import get_provider + return get_provider(provider).fetch_model_catalog() + def cmd_api_key(self, args): """Store encrypted API key""" provider = args.provider @@ -469,11 +547,9 @@ def cmd_models(self, args): print(f"🔄 Fetching model catalog from {provider}...") try: - from api.app.lib.ai_providers import get_provider from api.app.lib.model_catalog import upsert_catalog_entries - prov = get_provider(provider) - entries = prov.fetch_model_catalog() + entries = self._fetch_catalog_via_sdk(provider.lower()) if not entries: print(f"⚠️ No models returned from {provider}") @@ -695,6 +771,7 @@ def main(): embed_parser = subparsers.add_parser('embedding', help='List or activate embedding profile') embed_parser.add_argument('profile_id', nargs='?', type=int, help='Profile ID to activate (omit to list profiles)') embed_parser.add_argument('--provider', help='Select profile by provider name (local, openai)') + embed_parser.add_argument('--device', help='Set compute device on the activated profile (cpu, cuda, mps)') # api-key key_parser = subparsers.add_parser('api-key', help='Store encrypted API key') diff --git a/operator/lib/guided-init.sh b/operator/lib/guided-init.sh index 52aa57863..74435f73e 100755 --- a/operator/lib/guided-init.sh +++ b/operator/lib/guided-init.sh @@ -343,9 +343,30 @@ fi docker exec kg-operator python /workspace/operator/configure.py admin --password "$ADMIN_PASSWORD" echo "" -# Step 4: Configure AI provider (interactive selection) +# Step 4: Configure local embedding profile (GPU_MODE-aware) +# This runs BEFORE AI provider selection because the embedding model is +# system-level infrastructure that the API container loads at startup. It +# is also activated against the device chosen at the very start of the +# wizard, so the user's GPU/CPU intent is honored end-to-end. echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" -echo -e "${BOLD}Step 4/9: Choosing AI extraction provider${NC}" +echo -e "${BOLD}Step 4/9: Configuring local embedding profile${NC}" +echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" +echo "" + +case "$GPU_MODE" in + mac) EMBEDDING_DEVICE="mps" ;; + nvidia) EMBEDDING_DEVICE="cuda" ;; + amd|amd-host) EMBEDDING_DEVICE="cuda" ;; # PyTorch ROCm presents as cuda + cpu|*) EMBEDDING_DEVICE="cpu" ;; +esac + +echo "Activating local embeddings (nomic-ai/nomic-embed-text-v1.5) on device: ${EMBEDDING_DEVICE}" +docker exec kg-operator python /workspace/operator/configure.py embedding --provider local --device "$EMBEDDING_DEVICE" +echo "" + +# Step 5: Configure AI provider (interactive selection) +echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" +echo -e "${BOLD}Step 5/9: Choosing AI extraction provider${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo "" echo "Choose your AI extraction provider:" @@ -392,9 +413,9 @@ case "$REPLY" in esac echo "" -# Step 5: Store API key (skip for Ollama) +# Step 6: Store API key (skip for Ollama) echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" -echo -e "${BOLD}Step 5/9: Validating API key${NC}" +echo -e "${BOLD}Step 6/9: Validating API key${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo "" @@ -427,9 +448,9 @@ while [ "$API_KEY_STORED" = false ]; do fi done -# Step 6: Refresh model catalog and select model +# Step 7: Refresh model catalog and select model echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" -echo -e "${BOLD}Step 6/9: Selecting extraction model${NC}" +echo -e "${BOLD}Step 7/9: Selecting extraction model${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo "" @@ -625,15 +646,6 @@ else fi echo "" -# Step 7: Configure embeddings -echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" -echo -e "${BOLD}Step 7/9: Configuring embedding provider${NC}" -echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" -echo "" -echo "Activating local embeddings (nomic-ai/nomic-embed-text-v1.5)..." -docker exec kg-operator python /workspace/operator/configure.py embedding --provider local -echo "" - # Step 8: Configure Garage credentials echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BOLD}Step 8/9: Configuring Garage object storage${NC}"