Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 17 additions & 16 deletions api/app/lib/model_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,26 +145,26 @@ def set_model_default(conn, catalog_id: int) -> bool:
"""
Set a model as the default for its provider+category.

Clears existing default for that provider+category first.
Clears existing default for that provider+category first. Uses a
subquery rather than fetchone() + tuple-unpack so the function works
regardless of the connection's cursor_factory — RealDictCursor returns
dict-like rows that silently yield column *names* on tuple unpacking.
"""
with conn.cursor() as cur:
# Get the provider and category for this model
cur.execute(
"SELECT provider, category FROM kg_api.provider_model_catalog WHERE id = %s",
(catalog_id,),
)
row = cur.fetchone()
if not row:
return False

provider, category = row

# Clear existing default
# Clear any existing default that shares the new model's
# (provider, category), excluding the new model itself in case it
# is already the default (so this call is idempotent).
cur.execute(
"""UPDATE kg_api.provider_model_catalog
SET is_default = FALSE, updated_at = NOW()
WHERE provider = %s AND category = %s AND is_default = TRUE""",
(provider, category),
WHERE is_default = TRUE
AND id <> %s
AND (provider, category) = (
SELECT provider, category
FROM kg_api.provider_model_catalog
WHERE id = %s
)""",
(catalog_id, catalog_id),
)

# Set new default (also ensures enabled)
Expand All @@ -174,8 +174,9 @@ def set_model_default(conn, catalog_id: int) -> bool:
WHERE id = %s""",
(catalog_id,),
)
updated = cur.rowcount > 0
conn.commit()
return True
return updated


def update_model_pricing(
Expand Down
85 changes: 81 additions & 4 deletions operator/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@ def cmd_embedding(self, args):
"""Configure embedding provider by activating a pre-configured profile"""
profile_id = getattr(args, 'profile_id', None)
provider_name = getattr(args, 'provider', None)
device = getattr(args, 'device', None)

# If no profile_id or provider specified, list available profiles
if profile_id is None and provider_name is None:
Expand Down Expand Up @@ -229,9 +230,21 @@ def cmd_embedding(self, args):
# Activate selected profile (use profile['id'] from query, not profile_id arg)
cur.execute("UPDATE kg_api.embedding_profile SET active = true WHERE id = %s", (profile['id'],))

# Optionally update the compute device on the activated profile.
# The wizard maps its GPU_MODE choice (mac/nvidia/amd/cpu) to a
# PyTorch device string here so the API container loads the
# model on the right accelerator at startup.
effective_device = profile['device']
if device:
cur.execute(
"UPDATE kg_api.embedding_profile SET device = %s WHERE id = %s",
(device, profile['id']),
)
effective_device = device

conn.commit()

device_info = f" ({profile['device']})" if profile['device'] else ""
device_info = f" ({effective_device})" if effective_device else ""
print(f"✅ Activated: [{profile['id']}] {profile['provider']} / {profile['model_name']} ({profile['embedding_dimensions']} dims, {profile['precision']}){device_info}")
return True

Expand Down Expand Up @@ -323,6 +336,71 @@ def _validate_provider_key(self, provider, key):
return False
return None

def _fetch_catalog_via_sdk(self, provider):
"""Fetch a provider's model catalog without instantiating the full
AIProvider class.

AnthropicProvider and OllamaProvider eagerly construct an OpenAI
embedding provider in __init__ when none is supplied — but the
operator container has no loaded EmbeddingModelManager (only the API
container initializes one at startup), so get_embedding_provider()
returns None and the eager fallback runs and fails. fetch_model_catalog
itself only needs self.client (or self.api_key for OpenRouter), so we
construct the SDK client directly and bypass __init__ via __new__,
reusing the existing fetch_model_catalog implementation rather than
duplicating per-provider pricing/feature dicts.
"""
from api.app.lib.ai_providers import (
_load_api_key,
OpenAIProvider,
AnthropicProvider,
OpenRouterProvider,
)

if provider == "openai":
from openai import OpenAI
key = _load_api_key("openai", None, "OPENAI_API_KEY")
if not key:
raise RuntimeError(
"OpenAI API key not configured. Store it first via "
"`configure.py api-key openai`."
)
prov = OpenAIProvider.__new__(OpenAIProvider)
prov.client = OpenAI(api_key=key)
return prov.fetch_model_catalog()

if provider == "anthropic":
from anthropic import Anthropic
key = _load_api_key("anthropic", None, "ANTHROPIC_API_KEY")
if not key:
raise RuntimeError(
"Anthropic API key not configured. Store it first via "
"`configure.py api-key anthropic`."
)
prov = AnthropicProvider.__new__(AnthropicProvider)
prov.client = Anthropic(api_key=key)
return prov.fetch_model_catalog()

if provider == "openrouter":
from openai import OpenAI
key = _load_api_key("openrouter", None, "OPENROUTER_API_KEY")
if not key:
raise RuntimeError(
"OpenRouter API key not configured. Store it first via "
"`configure.py api-key openrouter`."
)
prov = OpenRouterProvider.__new__(OpenRouterProvider)
# OpenRouter's fetch_model_catalog uses self.api_key for the
# Authorization header and OPENROUTER_BASE_URL from the class.
prov.api_key = key
return prov.fetch_model_catalog()

# Other providers (ollama, llamacpp) — fall back to the original
# construction. They don't currently appear in the guided wizard, and
# their catalog refresh has different requirements (base_url, etc.).
from api.app.lib.ai_providers import get_provider
return get_provider(provider).fetch_model_catalog()

def cmd_api_key(self, args):
"""Store encrypted API key"""
provider = args.provider
Expand Down Expand Up @@ -469,11 +547,9 @@ def cmd_models(self, args):

print(f"🔄 Fetching model catalog from {provider}...")
try:
from api.app.lib.ai_providers import get_provider
from api.app.lib.model_catalog import upsert_catalog_entries

prov = get_provider(provider)
entries = prov.fetch_model_catalog()
entries = self._fetch_catalog_via_sdk(provider.lower())

if not entries:
print(f"⚠️ No models returned from {provider}")
Expand Down Expand Up @@ -695,6 +771,7 @@ def main():
embed_parser = subparsers.add_parser('embedding', help='List or activate embedding profile')
embed_parser.add_argument('profile_id', nargs='?', type=int, help='Profile ID to activate (omit to list profiles)')
embed_parser.add_argument('--provider', help='Select profile by provider name (local, openai)')
embed_parser.add_argument('--device', help='Set compute device on the activated profile (cpu, cuda, mps)')

# api-key
key_parser = subparsers.add_parser('api-key', help='Store encrypted API key')
Expand Down
42 changes: 27 additions & 15 deletions operator/lib/guided-init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -343,9 +343,30 @@ fi
docker exec kg-operator python /workspace/operator/configure.py admin --password "$ADMIN_PASSWORD"
echo ""

# Step 4: Configure AI provider (interactive selection)
# Step 4: Configure local embedding profile (GPU_MODE-aware)
# This runs BEFORE AI provider selection because the embedding model is
# system-level infrastructure that the API container loads at startup. It
# is also activated against the device chosen at the very start of the
# wizard, so the user's GPU/CPU intent is honored end-to-end.
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BOLD}Step 4/9: Choosing AI extraction provider${NC}"
echo -e "${BOLD}Step 4/9: Configuring local embedding profile${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""

case "$GPU_MODE" in
mac) EMBEDDING_DEVICE="mps" ;;
nvidia) EMBEDDING_DEVICE="cuda" ;;
amd|amd-host) EMBEDDING_DEVICE="cuda" ;; # PyTorch ROCm presents as cuda
cpu|*) EMBEDDING_DEVICE="cpu" ;;
esac

echo "Activating local embeddings (nomic-ai/nomic-embed-text-v1.5) on device: ${EMBEDDING_DEVICE}"
docker exec kg-operator python /workspace/operator/configure.py embedding --provider local --device "$EMBEDDING_DEVICE"
echo ""

# Step 5: Configure AI provider (interactive selection)
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BOLD}Step 5/9: Choosing AI extraction provider${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""
echo "Choose your AI extraction provider:"
Expand Down Expand Up @@ -392,9 +413,9 @@ case "$REPLY" in
esac
echo ""

# Step 5: Store API key (skip for Ollama)
# Step 6: Store API key (skip for Ollama)
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BOLD}Step 5/9: Validating API key${NC}"
echo -e "${BOLD}Step 6/9: Validating API key${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""

Expand Down Expand Up @@ -427,9 +448,9 @@ while [ "$API_KEY_STORED" = false ]; do
fi
done

# Step 6: Refresh model catalog and select model
# Step 7: Refresh model catalog and select model
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BOLD}Step 6/9: Selecting extraction model${NC}"
echo -e "${BOLD}Step 7/9: Selecting extraction model${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""

Expand Down Expand Up @@ -625,15 +646,6 @@ else
fi
echo ""

# Step 7: Configure embeddings
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BOLD}Step 7/9: Configuring embedding provider${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""
echo "Activating local embeddings (nomic-ai/nomic-embed-text-v1.5)..."
docker exec kg-operator python /workspace/operator/configure.py embedding --provider local
echo ""

# Step 8: Configure Garage credentials
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BOLD}Step 8/9: Configuring Garage object storage${NC}"
Expand Down
Loading