aaronsb · May 23, 2026 · May 21, 2026 · May 21, 2026
diff --git a/api/app/lib/model_catalog.py b/api/app/lib/model_catalog.py
@@ -145,26 +145,26 @@ def set_model_default(conn, catalog_id: int) -> bool:
     """
     Set a model as the default for its provider+category.
 
-    Clears existing default for that provider+category first.
+    Clears existing default for that provider+category first. Uses a
+    subquery rather than fetchone() + tuple-unpack so the function works
+    regardless of the connection's cursor_factory — RealDictCursor returns
+    dict-like rows that silently yield column *names* on tuple unpacking.
     """
     with conn.cursor() as cur:
-        # Get the provider and category for this model
-        cur.execute(
-            "SELECT provider, category FROM kg_api.provider_model_catalog WHERE id = %s",
-            (catalog_id,),
-        )
-        row = cur.fetchone()
-        if not row:
-            return False
-
-        provider, category = row
-
-        # Clear existing default
+        # Clear any existing default that shares the new model's
+        # (provider, category), excluding the new model itself in case it
+        # is already the default (so this call is idempotent).
         cur.execute(
             """UPDATE kg_api.provider_model_catalog
                SET is_default = FALSE, updated_at = NOW()
-               WHERE provider = %s AND category = %s AND is_default = TRUE""",
-            (provider, category),
+               WHERE is_default = TRUE
+                 AND id <> %s
+                 AND (provider, category) = (
+                     SELECT provider, category
+                     FROM kg_api.provider_model_catalog
+                     WHERE id = %s
+                 )""",
+            (catalog_id, catalog_id),
         )
 
         # Set new default (also ensures enabled)
@@ -174,8 +174,9 @@ def set_model_default(conn, catalog_id: int) -> bool:
                WHERE id = %s""",
             (catalog_id,),
         )
+        updated = cur.rowcount > 0
         conn.commit()
-        return True
+        return updated
 
 
 def update_model_pricing(

diff --git a/operator/configure.py b/operator/configure.py
@@ -187,6 +187,7 @@ def cmd_embedding(self, args):
         """Configure embedding provider by activating a pre-configured profile"""
         profile_id = getattr(args, 'profile_id', None)
         provider_name = getattr(args, 'provider', None)
+        device = getattr(args, 'device', None)
 
         # If no profile_id or provider specified, list available profiles
         if profile_id is None and provider_name is None:
@@ -229,9 +230,21 @@ def cmd_embedding(self, args):
                 # Activate selected profile (use profile['id'] from query, not profile_id arg)
                 cur.execute("UPDATE kg_api.embedding_profile SET active = true WHERE id = %s", (profile['id'],))
 
+                # Optionally update the compute device on the activated profile.
+                # The wizard maps its GPU_MODE choice (mac/nvidia/amd/cpu) to a
+                # PyTorch device string here so the API container loads the
+                # model on the right accelerator at startup.
+                effective_device = profile['device']
+                if device:
+                    cur.execute(
+                        "UPDATE kg_api.embedding_profile SET device = %s WHERE id = %s",
+                        (device, profile['id']),
+                    )
+                    effective_device = device
+
                 conn.commit()
 
-                device_info = f" ({profile['device']})" if profile['device'] else ""
+                device_info = f" ({effective_device})" if effective_device else ""
                 print(f"✅ Activated: [{profile['id']}] {profile['provider']} / {profile['model_name']} ({profile['embedding_dimensions']} dims, {profile['precision']}){device_info}")
                 return True
 
@@ -323,6 +336,71 @@ def _validate_provider_key(self, provider, key):
                 return False
         return None
 
+    def _fetch_catalog_via_sdk(self, provider):
+        """Fetch a provider's model catalog without instantiating the full
+        AIProvider class.
+
+        AnthropicProvider and OllamaProvider eagerly construct an OpenAI
+        embedding provider in __init__ when none is supplied — but the
+        operator container has no loaded EmbeddingModelManager (only the API
+        container initializes one at startup), so get_embedding_provider()
+        returns None and the eager fallback runs and fails. fetch_model_catalog
+        itself only needs self.client (or self.api_key for OpenRouter), so we
+        construct the SDK client directly and bypass __init__ via __new__,
+        reusing the existing fetch_model_catalog implementation rather than
+        duplicating per-provider pricing/feature dicts.
+        """
+        from api.app.lib.ai_providers import (
+            _load_api_key,
+            OpenAIProvider,
+            AnthropicProvider,
+            OpenRouterProvider,
+        )
+
+        if provider == "openai":
+            from openai import OpenAI
+            key = _load_api_key("openai", None, "OPENAI_API_KEY")
+            if not key:
+                raise RuntimeError(
+                    "OpenAI API key not configured. Store it first via "
+                    "`configure.py api-key openai`."
+                )
+            prov = OpenAIProvider.__new__(OpenAIProvider)
+            prov.client = OpenAI(api_key=key)
+            return prov.fetch_model_catalog()
+
+        if provider == "anthropic":
+            from anthropic import Anthropic
+            key = _load_api_key("anthropic", None, "ANTHROPIC_API_KEY")
+            if not key:
+                raise RuntimeError(
+                    "Anthropic API key not configured. Store it first via "
+                    "`configure.py api-key anthropic`."
+                )
+            prov = AnthropicProvider.__new__(AnthropicProvider)
+            prov.client = Anthropic(api_key=key)
+            return prov.fetch_model_catalog()
+
+        if provider == "openrouter":
+            from openai import OpenAI
+            key = _load_api_key("openrouter", None, "OPENROUTER_API_KEY")
+            if not key:
+                raise RuntimeError(
+                    "OpenRouter API key not configured. Store it first via "
+                    "`configure.py api-key openrouter`."
+                )
+            prov = OpenRouterProvider.__new__(OpenRouterProvider)
+            # OpenRouter's fetch_model_catalog uses self.api_key for the
+            # Authorization header and OPENROUTER_BASE_URL from the class.
+            prov.api_key = key
+            return prov.fetch_model_catalog()
+
+        # Other providers (ollama, llamacpp) — fall back to the original
+        # construction. They don't currently appear in the guided wizard, and
+        # their catalog refresh has different requirements (base_url, etc.).
+        from api.app.lib.ai_providers import get_provider
+        return get_provider(provider).fetch_model_catalog()
+
     def cmd_api_key(self, args):
         """Store encrypted API key"""
         provider = args.provider
@@ -469,11 +547,9 @@ def cmd_models(self, args):
 
                 print(f"🔄 Fetching model catalog from {provider}...")
                 try:
-                    from api.app.lib.ai_providers import get_provider
                     from api.app.lib.model_catalog import upsert_catalog_entries
 
-                    prov = get_provider(provider)
-                    entries = prov.fetch_model_catalog()
+                    entries = self._fetch_catalog_via_sdk(provider.lower())
 
                     if not entries:
                         print(f"⚠️  No models returned from {provider}")
@@ -695,6 +771,7 @@ def main():
     embed_parser = subparsers.add_parser('embedding', help='List or activate embedding profile')
     embed_parser.add_argument('profile_id', nargs='?', type=int, help='Profile ID to activate (omit to list profiles)')
     embed_parser.add_argument('--provider', help='Select profile by provider name (local, openai)')
+    embed_parser.add_argument('--device', help='Set compute device on the activated profile (cpu, cuda, mps)')
 
     # api-key
     key_parser = subparsers.add_parser('api-key', help='Store encrypted API key')

diff --git a/operator/lib/guided-init.sh b/operator/lib/guided-init.sh
@@ -343,9 +343,30 @@ fi
 docker exec kg-operator python /workspace/operator/configure.py admin --password "$ADMIN_PASSWORD"
 echo ""
 
-# Step 4: Configure AI provider (interactive selection)
+# Step 4: Configure local embedding profile (GPU_MODE-aware)
+# This runs BEFORE AI provider selection because the embedding model is
+# system-level infrastructure that the API container loads at startup. It
+# is also activated against the device chosen at the very start of the
+# wizard, so the user's GPU/CPU intent is honored end-to-end.
 echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
-echo -e "${BOLD}Step 4/9: Choosing AI extraction provider${NC}"
+echo -e "${BOLD}Step 4/9: Configuring local embedding profile${NC}"
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo ""
+
+case "$GPU_MODE" in
+    mac)                 EMBEDDING_DEVICE="mps" ;;
+    nvidia)              EMBEDDING_DEVICE="cuda" ;;
+    amd|amd-host)        EMBEDDING_DEVICE="cuda" ;;  # PyTorch ROCm presents as cuda
+    cpu|*)               EMBEDDING_DEVICE="cpu" ;;
+esac
+
+echo "Activating local embeddings (nomic-ai/nomic-embed-text-v1.5) on device: ${EMBEDDING_DEVICE}"
+docker exec kg-operator python /workspace/operator/configure.py embedding --provider local --device "$EMBEDDING_DEVICE"
+echo ""
+
+# Step 5: Configure AI provider (interactive selection)
+echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
+echo -e "${BOLD}Step 5/9: Choosing AI extraction provider${NC}"
 echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
 echo ""
 echo "Choose your AI extraction provider:"
@@ -392,9 +413,9 @@ case "$REPLY" in
 esac
 echo ""
 
-# Step 5: Store API key (skip for Ollama)
+# Step 6: Store API key (skip for Ollama)
 echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
-echo -e "${BOLD}Step 5/9: Validating API key${NC}"
+echo -e "${BOLD}Step 6/9: Validating API key${NC}"
 echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
 echo ""
 
@@ -427,9 +448,9 @@ while [ "$API_KEY_STORED" = false ]; do
     fi
 done
 
-# Step 6: Refresh model catalog and select model
+# Step 7: Refresh model catalog and select model
 echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
-echo -e "${BOLD}Step 6/9: Selecting extraction model${NC}"
+echo -e "${BOLD}Step 7/9: Selecting extraction model${NC}"
 echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
 echo ""
 
@@ -625,15 +646,6 @@ else
 fi
 echo ""
 
-# Step 7: Configure embeddings
-echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
-echo -e "${BOLD}Step 7/9: Configuring embedding provider${NC}"
-echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
-echo ""
-echo "Activating local embeddings (nomic-ai/nomic-embed-text-v1.5)..."
-docker exec kg-operator python /workspace/operator/configure.py embedding --provider local
-echo ""
-
 # Step 8: Configure Garage credentials
 echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
 echo -e "${BOLD}Step 8/9: Configuring Garage object storage${NC}"