From a5a5efbc40c3a4221ecfb10a16bd32a2669e936b Mon Sep 17 00:00:00 2001 From: Aaron Bockelie Date: Thu, 21 May 2026 06:52:49 -0700 Subject: [PATCH 1/2] fix(operator): configure embeddings first; refresh catalog via SDK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two coupled fixes for Step 6 init failure ("Anthropic requires an embedding provider") on first-run setup. 1. Reorder guided-init: configure the local embedding profile right after admin user creation (new Step 4), before AI provider selection. The wizard already collects the GPU/CPU choice at the top; map that to a PyTorch device string (mac→mps, nvidia/amd/amd-host→cuda, cpu→cpu) and pass it to `configure.py embedding --device`. The API container picks up the active profile + device at startup. Renumber subsequent steps (AI provider → 5, validate key → 6, model select → 7); Garage and start-app stay at 8 and 9. 2. SDK-direct catalog refresh. `models refresh` previously went through get_provider(), which instantiates AnthropicProvider — whose __init__ eagerly constructs an OpenAIProvider as the embedding delegate. The operator container has no loaded EmbeddingModelManager (only the API container initializes one at startup), so get_embedding_provider() returns None and the eager fallback fails for lack of an OpenAI key. New _fetch_catalog_via_sdk() bypasses __init__ via __new__, sets only the SDK client (or api_key for OpenRouter), and reuses the existing fetch_model_catalog method. Mirrors the SDK-direct pattern already used by _validate_provider_key. Adds a --device flag to `configure.py embedding` so the wizard can write the chosen device onto the activated profile in one call. --- operator/configure.py | 85 +++++++++++++++++++++++++++++++++++-- operator/lib/guided-init.sh | 42 +++++++++++------- 2 files changed, 108 insertions(+), 19 deletions(-) diff --git a/operator/configure.py b/operator/configure.py index 318618bfd..7dbbf440c 100755 --- a/operator/configure.py +++ b/operator/configure.py @@ -187,6 +187,7 @@ def cmd_embedding(self, args): """Configure embedding provider by activating a pre-configured profile""" profile_id = getattr(args, 'profile_id', None) provider_name = getattr(args, 'provider', None) + device = getattr(args, 'device', None) # If no profile_id or provider specified, list available profiles if profile_id is None and provider_name is None: @@ -229,9 +230,21 @@ def cmd_embedding(self, args): # Activate selected profile (use profile['id'] from query, not profile_id arg) cur.execute("UPDATE kg_api.embedding_profile SET active = true WHERE id = %s", (profile['id'],)) + # Optionally update the compute device on the activated profile. + # The wizard maps its GPU_MODE choice (mac/nvidia/amd/cpu) to a + # PyTorch device string here so the API container loads the + # model on the right accelerator at startup. + effective_device = profile['device'] + if device: + cur.execute( + "UPDATE kg_api.embedding_profile SET device = %s WHERE id = %s", + (device, profile['id']), + ) + effective_device = device + conn.commit() - device_info = f" ({profile['device']})" if profile['device'] else "" + device_info = f" ({effective_device})" if effective_device else "" print(f"✅ Activated: [{profile['id']}] {profile['provider']} / {profile['model_name']} ({profile['embedding_dimensions']} dims, {profile['precision']}){device_info}") return True @@ -323,6 +336,71 @@ def _validate_provider_key(self, provider, key): return False return None + def _fetch_catalog_via_sdk(self, provider): + """Fetch a provider's model catalog without instantiating the full + AIProvider class. + + AnthropicProvider and OllamaProvider eagerly construct an OpenAI + embedding provider in __init__ when none is supplied — but the + operator container has no loaded EmbeddingModelManager (only the API + container initializes one at startup), so get_embedding_provider() + returns None and the eager fallback runs and fails. fetch_model_catalog + itself only needs self.client (or self.api_key for OpenRouter), so we + construct the SDK client directly and bypass __init__ via __new__, + reusing the existing fetch_model_catalog implementation rather than + duplicating per-provider pricing/feature dicts. + """ + from api.app.lib.ai_providers import ( + _load_api_key, + OpenAIProvider, + AnthropicProvider, + OpenRouterProvider, + ) + + if provider == "openai": + from openai import OpenAI + key = _load_api_key("openai", None, "OPENAI_API_KEY") + if not key: + raise RuntimeError( + "OpenAI API key not configured. Store it first via " + "`configure.py api-key openai`." + ) + prov = OpenAIProvider.__new__(OpenAIProvider) + prov.client = OpenAI(api_key=key) + return prov.fetch_model_catalog() + + if provider == "anthropic": + from anthropic import Anthropic + key = _load_api_key("anthropic", None, "ANTHROPIC_API_KEY") + if not key: + raise RuntimeError( + "Anthropic API key not configured. Store it first via " + "`configure.py api-key anthropic`." + ) + prov = AnthropicProvider.__new__(AnthropicProvider) + prov.client = Anthropic(api_key=key) + return prov.fetch_model_catalog() + + if provider == "openrouter": + from openai import OpenAI + key = _load_api_key("openrouter", None, "OPENROUTER_API_KEY") + if not key: + raise RuntimeError( + "OpenRouter API key not configured. Store it first via " + "`configure.py api-key openrouter`." + ) + prov = OpenRouterProvider.__new__(OpenRouterProvider) + # OpenRouter's fetch_model_catalog uses self.api_key for the + # Authorization header and OPENROUTER_BASE_URL from the class. + prov.api_key = key + return prov.fetch_model_catalog() + + # Other providers (ollama, llamacpp) — fall back to the original + # construction. They don't currently appear in the guided wizard, and + # their catalog refresh has different requirements (base_url, etc.). + from api.app.lib.ai_providers import get_provider + return get_provider(provider).fetch_model_catalog() + def cmd_api_key(self, args): """Store encrypted API key""" provider = args.provider @@ -469,11 +547,9 @@ def cmd_models(self, args): print(f"🔄 Fetching model catalog from {provider}...") try: - from api.app.lib.ai_providers import get_provider from api.app.lib.model_catalog import upsert_catalog_entries - prov = get_provider(provider) - entries = prov.fetch_model_catalog() + entries = self._fetch_catalog_via_sdk(provider.lower()) if not entries: print(f"⚠️ No models returned from {provider}") @@ -695,6 +771,7 @@ def main(): embed_parser = subparsers.add_parser('embedding', help='List or activate embedding profile') embed_parser.add_argument('profile_id', nargs='?', type=int, help='Profile ID to activate (omit to list profiles)') embed_parser.add_argument('--provider', help='Select profile by provider name (local, openai)') + embed_parser.add_argument('--device', help='Set compute device on the activated profile (cpu, cuda, mps)') # api-key key_parser = subparsers.add_parser('api-key', help='Store encrypted API key') diff --git a/operator/lib/guided-init.sh b/operator/lib/guided-init.sh index 52aa57863..74435f73e 100755 --- a/operator/lib/guided-init.sh +++ b/operator/lib/guided-init.sh @@ -343,9 +343,30 @@ fi docker exec kg-operator python /workspace/operator/configure.py admin --password "$ADMIN_PASSWORD" echo "" -# Step 4: Configure AI provider (interactive selection) +# Step 4: Configure local embedding profile (GPU_MODE-aware) +# This runs BEFORE AI provider selection because the embedding model is +# system-level infrastructure that the API container loads at startup. It +# is also activated against the device chosen at the very start of the +# wizard, so the user's GPU/CPU intent is honored end-to-end. echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" -echo -e "${BOLD}Step 4/9: Choosing AI extraction provider${NC}" +echo -e "${BOLD}Step 4/9: Configuring local embedding profile${NC}" +echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" +echo "" + +case "$GPU_MODE" in + mac) EMBEDDING_DEVICE="mps" ;; + nvidia) EMBEDDING_DEVICE="cuda" ;; + amd|amd-host) EMBEDDING_DEVICE="cuda" ;; # PyTorch ROCm presents as cuda + cpu|*) EMBEDDING_DEVICE="cpu" ;; +esac + +echo "Activating local embeddings (nomic-ai/nomic-embed-text-v1.5) on device: ${EMBEDDING_DEVICE}" +docker exec kg-operator python /workspace/operator/configure.py embedding --provider local --device "$EMBEDDING_DEVICE" +echo "" + +# Step 5: Configure AI provider (interactive selection) +echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" +echo -e "${BOLD}Step 5/9: Choosing AI extraction provider${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo "" echo "Choose your AI extraction provider:" @@ -392,9 +413,9 @@ case "$REPLY" in esac echo "" -# Step 5: Store API key (skip for Ollama) +# Step 6: Store API key (skip for Ollama) echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" -echo -e "${BOLD}Step 5/9: Validating API key${NC}" +echo -e "${BOLD}Step 6/9: Validating API key${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo "" @@ -427,9 +448,9 @@ while [ "$API_KEY_STORED" = false ]; do fi done -# Step 6: Refresh model catalog and select model +# Step 7: Refresh model catalog and select model echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" -echo -e "${BOLD}Step 6/9: Selecting extraction model${NC}" +echo -e "${BOLD}Step 7/9: Selecting extraction model${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo "" @@ -625,15 +646,6 @@ else fi echo "" -# Step 7: Configure embeddings -echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" -echo -e "${BOLD}Step 7/9: Configuring embedding provider${NC}" -echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" -echo "" -echo "Activating local embeddings (nomic-ai/nomic-embed-text-v1.5)..." -docker exec kg-operator python /workspace/operator/configure.py embedding --provider local -echo "" - # Step 8: Configure Garage credentials echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BOLD}Step 8/9: Configuring Garage object storage${NC}" From 97c0872f430a50c9390be42fbb23018501f0c1ad Mon Sep 17 00:00:00 2001 From: Aaron Bockelie Date: Thu, 21 May 2026 07:47:38 -0700 Subject: [PATCH 2/2] fix(operator): set_model_default works with RealDictCursor connections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit set_model_default fetched the provider/category for the target row and then unpacked the result with `provider, category = row`. When the caller's connection is configured with RealDictCursor (as the operator container's configure.py is — see operator/configure.py line 39), the row is a dict subclass and tuple unpacking silently yields the column *names* — "provider" and "category" — rather than the values. The clear-existing-default UPDATE then matched zero rows, and the set-new-default UPDATE collided with the still-set old default, violating idx_catalog_default on (provider, category). The API container's path didn't hit this because AGEClient.pool doesn't set a cursor_factory; only this operator-driven path tripped on it. Replace the SELECT + tuple-unpack with a subquery so the function is cursor-factory-agnostic. As a bonus the path is now idempotent: setting a model that's already the default no longer races with itself. Manifests as Step 7 of guided init: "Models command failed: duplicate key value violates unique constraint 'idx_catalog_default'". --- api/app/lib/model_catalog.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/api/app/lib/model_catalog.py b/api/app/lib/model_catalog.py index 10ae67f42..7e105c293 100644 --- a/api/app/lib/model_catalog.py +++ b/api/app/lib/model_catalog.py @@ -145,26 +145,26 @@ def set_model_default(conn, catalog_id: int) -> bool: """ Set a model as the default for its provider+category. - Clears existing default for that provider+category first. + Clears existing default for that provider+category first. Uses a + subquery rather than fetchone() + tuple-unpack so the function works + regardless of the connection's cursor_factory — RealDictCursor returns + dict-like rows that silently yield column *names* on tuple unpacking. """ with conn.cursor() as cur: - # Get the provider and category for this model - cur.execute( - "SELECT provider, category FROM kg_api.provider_model_catalog WHERE id = %s", - (catalog_id,), - ) - row = cur.fetchone() - if not row: - return False - - provider, category = row - - # Clear existing default + # Clear any existing default that shares the new model's + # (provider, category), excluding the new model itself in case it + # is already the default (so this call is idempotent). cur.execute( """UPDATE kg_api.provider_model_catalog SET is_default = FALSE, updated_at = NOW() - WHERE provider = %s AND category = %s AND is_default = TRUE""", - (provider, category), + WHERE is_default = TRUE + AND id <> %s + AND (provider, category) = ( + SELECT provider, category + FROM kg_api.provider_model_catalog + WHERE id = %s + )""", + (catalog_id, catalog_id), ) # Set new default (also ensures enabled) @@ -174,8 +174,9 @@ def set_model_default(conn, catalog_id: int) -> bool: WHERE id = %s""", (catalog_id,), ) + updated = cur.rowcount > 0 conn.commit() - return True + return updated def update_model_pricing(