diff --git a/apps/docs/app/[lang]/layout.tsx b/apps/docs/app/[lang]/layout.tsx index 5c9b2011917..869919d2a41 100644 --- a/apps/docs/app/[lang]/layout.tsx +++ b/apps/docs/app/[lang]/layout.tsx @@ -3,6 +3,7 @@ import { defineI18nUI } from 'fumadocs-ui/i18n' import { DocsLayout } from 'fumadocs-ui/layouts/docs' import { RootProvider } from 'fumadocs-ui/provider/next' import { Geist_Mono, Inter } from 'next/font/google' +import { AskAI } from '@/components/ai/ask-ai' import { SidebarFolder, SidebarItem, @@ -120,6 +121,7 @@ export default async function Layout({ children, params }: LayoutProps) { > {children} + diff --git a/apps/docs/app/api/chat/route.ts b/apps/docs/app/api/chat/route.ts new file mode 100644 index 00000000000..c59a4596b5f --- /dev/null +++ b/apps/docs/app/api/chat/route.ts @@ -0,0 +1,302 @@ +import { openai } from '@ai-sdk/openai' +import { convertToModelMessages, stepCountIs, streamText, tool, type UIMessage } from 'ai' +import { sql } from 'drizzle-orm' +import { z } from 'zod' +import { db, docsEmbeddings } from '@/lib/db' +import { generateSearchEmbedding } from '@/lib/embeddings' + +export const runtime = 'nodejs' +export const maxDuration = 30 + +/** Model used for the Ask AI chat. Override with OPENAI_CHAT_MODEL in the environment. */ +const CHAT_MODEL = process.env.OPENAI_CHAT_MODEL || 'gpt-5.4-mini' + +/** Max documentation chunks returned per search to ground an answer. */ +const SEARCH_LIMIT = 6 + +/** Candidates pulled before locale filtering, so a locale still yields SEARCH_LIMIT results. */ +const SEARCH_CANDIDATES = SEARCH_LIMIT * 4 + +/** Minimum cosine similarity for an English vector match (mirrors the site search route). */ +const SIMILARITY_THRESHOLD = 0.6 + +/** Locales the docs are published in (mirrors the site search route). */ +const KNOWN_LOCALES = ['en', 'es', 'fr', 'de', 'ja', 'zh'] +const DEFAULT_LOCALE = 'en' + +/** Postgres full-text config per locale (mirrors the site search route). */ +const TS_CONFIG: Record = { + en: 'english', + es: 'spanish', + fr: 'french', + de: 'german', + ja: 'simple', + zh: 'simple', +} + +/** + * Abuse guards. This endpoint proxies a paid LLM, so an unauthenticated public + * route is a target for scripted "free inference". These bounds cap the cost of + * any single request; an in-memory per-IP rate limit (below) caps volume on the + * hot path. A shared-store rate limit, a provider spend cap, and edge bot + * protection remain the durable controls (see the PR checklist). + * + * The size cap counts only user-authored text — NOT the conversation history, + * assistant turns, or retrieved doc chunks we add via the searchDocs tool, which + * legitimately grow large over a multi-turn chat. + */ +const MAX_MESSAGES = 200 +const MAX_USER_INPUT_CHARS = 400_000 +const MAX_OUTPUT_TOKENS = 4000 +const MAX_STEPS = 6 +/** Backstop on the whole serialized payload — blocks stuffing assistant/tool parts past the user-text cap. */ +const MAX_TOTAL_CHARS = 1_000_000 + +/** + * Per-IP rate limit. Fixed window, in-memory: this bounds volume from a single + * source on a warm instance without external infra. It is best-effort on + * serverless (state is per-instance, not shared across regions/cold starts); + * a shared store (e.g. Vercel KV) and an edge WAF remain the durable controls, + * but this closes the "no volume limit at all" gap on the hot path. + */ +const RATE_LIMIT_MAX = 20 +const RATE_LIMIT_WINDOW_MS = 60_000 +const rateLimitHits = new Map() + +/** Resolve the client IP from forwarding headers, falling back to a shared bucket. */ +function getClientIp(req: Request): string { + const forwarded = req.headers.get('x-forwarded-for') + if (forwarded) return forwarded.split(',')[0].trim() + return req.headers.get('x-real-ip') ?? 'unknown' +} + +/** Fixed-window check. Returns retry-after seconds when the caller is over the limit, else null. */ +function rateLimit(ip: string, now: number): number | null { + const entry = rateLimitHits.get(ip) + if (!entry || now >= entry.resetAt) { + rateLimitHits.set(ip, { count: 1, resetAt: now + RATE_LIMIT_WINDOW_MS }) + return null + } + if (entry.count >= RATE_LIMIT_MAX) { + return Math.ceil((entry.resetAt - now) / 1000) + } + entry.count += 1 + return null +} + +/** Drop expired buckets so the Map doesn't grow unbounded on a long-lived instance. */ +function sweepRateLimit(now: number): void { + if (rateLimitHits.size < 10_000) return + for (const [ip, entry] of rateLimitHits) { + if (now >= entry.resetAt) rateLimitHits.delete(ip) + } +} + +/** A structurally valid UI message: has a role and a parts array. */ +function isValidMessage(message: unknown): message is UIMessage { + return ( + typeof message === 'object' && + message !== null && + typeof (message as { role?: unknown }).role === 'string' && + Array.isArray((message as { parts?: unknown }).parts) + ) +} + +/** Total length of user-authored text across the conversation. */ +function userInputChars(messages: UIMessage[]): number { + let total = 0 + for (const message of messages) { + if (message.role !== 'user') continue + for (const part of message.parts) { + if (part.type === 'text' && typeof part.text === 'string') total += part.text.length + } + } + return total +} + +/** + * Strip everything the model shouldn't trust from client-supplied history: + * drop `system` messages (client-injected instructions) and every non-text part + * (e.g. crafted tool results faking searchDocs output). Only user/assistant text + * survives, so grounding comes from the server-run searchDocs tool — not the + * client's payload. + */ +function sanitizeMessages(messages: UIMessage[]): UIMessage[] { + return messages + .filter((message) => message.role === 'user' || message.role === 'assistant') + .map((message) => ({ + ...message, + parts: message.parts.filter((part) => part.type === 'text' && typeof part.text === 'string'), + })) + .filter((message) => message.parts.length > 0) +} + +/** + * Reject obvious cross-origin calls. Same-origin browser requests send an + * `Origin` header matching the host; we allow those, plus any host in + * DOCS_ALLOWED_ORIGINS (comma-separated). Requests with no Origin (e.g. curl) + * are allowed through to the cost caps rather than blocked, since Origin is + * trivially spoofable and is a filter, not a security boundary. + */ +function isAllowedOrigin(req: Request): boolean { + const origin = req.headers.get('origin') + if (!origin) return true + + let originHost: string + try { + originHost = new URL(origin).host.toLowerCase() + } catch { + return false + } + + const forwardedHost = req.headers.get('x-forwarded-host') ?? req.headers.get('host') + const requestHost = forwardedHost?.split(',')[0].trim().toLowerCase() + if (requestHost && originHost === requestHost) return true + + const allowlist = (process.env.DOCS_ALLOWED_ORIGINS ?? '') + .split(',') + .map((value) => value.trim().toLowerCase()) + .filter(Boolean) + return allowlist.includes(originHost) +} + +const SYSTEM_PROMPT = `You are the documentation assistant for Sim — the open-source AI workspace where teams build, deploy, and manage AI agents. + +Answer questions about Sim using the documentation. Always call the searchDocs tool before answering anything specific about Sim's features, configuration, or usage — do not answer from memory. Base your answer only on the returned documentation; if the docs do not cover the question, say so plainly rather than guessing. + +Guidelines: +- Be direct and concrete. Lead with the answer, then the detail. +- Reference the relevant pages by their titles so the user knows where to read more. +- When you show configuration or code, keep it minimal and correct. +- The agent is called "Sim" and the chat surface is "Chat" — never say "Mothership" or "copilot". +- If a question is unrelated to Sim, briefly say it's outside the docs' scope.` + +const SEARCH_COLUMNS = { + title: docsEmbeddings.headerText, + url: docsEmbeddings.sourceLink, + content: docsEmbeddings.chunkText, + sourceDocument: docsEmbeddings.sourceDocument, +} + +/** + * Retrieve candidate chunks for grounding. English docs are embedded, so they + * use vector similarity; other locales rely on Postgres full-text keyword search + * (vector search over English-trained embeddings does not serve them) — the same + * split the site search route makes. + */ +/** + * SQL predicate selecting only the locale's documents, so the row limit applies + * to matching rows (mirrors `matchesLocale`): non-English docs are prefixed with + * their locale segment; English is everything not prefixed with another locale. + */ +function localeFilter(locale: string) { + const firstSegment = sql`split_part(${docsEmbeddings.sourceDocument}, '/', 1)` + if (locale === DEFAULT_LOCALE) { + const others = KNOWN_LOCALES.filter((l) => l !== DEFAULT_LOCALE) + return sql`${firstSegment} not in (${sql.join( + others.map((l) => sql`${l}`), + sql`, ` + )})` + } + return sql`${firstSegment} = ${locale}` +} + +async function searchDocs(query: string, locale: string) { + let rows: Array<{ title: string; url: string; content: string; sourceDocument: string }> + + if (locale === DEFAULT_LOCALE) { + const embedding = await generateSearchEmbedding(query) + const vectorLiteral = JSON.stringify(embedding) + rows = await db + .select(SEARCH_COLUMNS) + .from(docsEmbeddings) + .where( + sql`1 - (${docsEmbeddings.embedding} <=> ${vectorLiteral}::vector) >= ${SIMILARITY_THRESHOLD} and ${localeFilter(locale)}` + ) + .orderBy(sql`${docsEmbeddings.embedding} <=> ${vectorLiteral}::vector`) + .limit(SEARCH_CANDIDATES) + } else { + const tsConfig = TS_CONFIG[locale] ?? 'simple' + rows = await db + .select(SEARCH_COLUMNS) + .from(docsEmbeddings) + .where( + sql`${docsEmbeddings.chunkTextTsv} @@ plainto_tsquery(${tsConfig}, ${query}) and ${localeFilter(locale)}` + ) + .orderBy( + sql`ts_rank(${docsEmbeddings.chunkTextTsv}, plainto_tsquery(${tsConfig}, ${query})) DESC` + ) + .limit(SEARCH_CANDIDATES) + } + + return rows.slice(0, SEARCH_LIMIT).map((row) => ({ + title: row.title, + url: row.url, + content: row.content, + })) +} + +export async function POST(req: Request) { + if (!isAllowedOrigin(req)) { + return new Response('Forbidden', { status: 403 }) + } + + const now = Date.now() + sweepRateLimit(now) + const retryAfter = rateLimit(getClientIp(req), now) + if (retryAfter !== null) { + return new Response('Too many requests', { + status: 429, + headers: { 'Retry-After': String(retryAfter) }, + }) + } + + let body: { messages: UIMessage[]; locale?: string } + try { + body = await req.json() + } catch { + return new Response('Invalid JSON', { status: 400 }) + } + const { messages } = body + const locale = KNOWN_LOCALES.includes(body.locale ?? '') + ? (body.locale as string) + : DEFAULT_LOCALE + + if (!Array.isArray(messages) || messages.length === 0 || messages.length > MAX_MESSAGES) { + return new Response('Invalid request', { status: 400 }) + } + if (!messages.every(isValidMessage)) { + return new Response('Invalid request', { status: 400 }) + } + if (userInputChars(messages) > MAX_USER_INPUT_CHARS) { + return new Response('Request too large', { status: 413 }) + } + if (JSON.stringify(messages).length > MAX_TOTAL_CHARS) { + return new Response('Request too large', { status: 413 }) + } + + const modelMessages = sanitizeMessages(messages) + if (modelMessages.length === 0) { + return new Response('Invalid request', { status: 400 }) + } + + const result = streamText({ + model: openai(CHAT_MODEL), + system: SYSTEM_PROMPT, + messages: convertToModelMessages(modelMessages), + stopWhen: stepCountIs(MAX_STEPS), + maxOutputTokens: MAX_OUTPUT_TOKENS, + tools: { + searchDocs: tool({ + description: + 'Search the Sim documentation for relevant content. Use this before answering any question about Sim.', + inputSchema: z.object({ + query: z.string().describe('A focused natural-language search query.'), + }), + execute: async ({ query }) => searchDocs(query, locale), + }), + }, + }) + + return result.toUIMessageStreamResponse() +} diff --git a/apps/docs/components/ai/ask-ai.tsx b/apps/docs/components/ai/ask-ai.tsx new file mode 100644 index 00000000000..5dc5a9b4785 --- /dev/null +++ b/apps/docs/components/ai/ask-ai.tsx @@ -0,0 +1,214 @@ +'use client' + +import { type FormEvent, useEffect, useMemo, useRef, useState } from 'react' +import { useChat } from '@ai-sdk/react' +import { DefaultChatTransport } from 'ai' +import { ArrowUp, MessageCircle, Square, X } from 'lucide-react' +import { Streamdown } from 'streamdown' +import { cn } from '@/lib/utils' +import 'streamdown/styles.css' + +interface DocSource { + title: string + url: string +} + +/** Pull the deduped doc sources surfaced by the searchDocs tool out of a message's parts. */ +function getSources(parts: ReadonlyArray<{ type: string; [key: string]: unknown }>): DocSource[] { + const seen = new Set() + const sources: DocSource[] = [] + + for (const part of parts) { + if (part.type !== 'tool-searchDocs') continue + const output = (part as { output?: unknown }).output + if (!Array.isArray(output)) continue + for (const item of output as DocSource[]) { + if (!item?.url || seen.has(item.url)) continue + seen.add(item.url) + sources.push({ title: item.title, url: item.url }) + } + } + + return sources +} + +/** Concatenate the streamed text parts of a message. */ +function getText(parts: ReadonlyArray<{ type: string; [key: string]: unknown }>): string { + return parts + .filter((part) => part.type === 'text') + .map((part) => (part as unknown as { text: string }).text) + .join('') +} + +interface AskAIProps { + /** Active docs locale, forwarded so retrieval is scoped to the reader's language. */ + locale: string +} + +export function AskAI({ locale }: AskAIProps) { + const [open, setOpen] = useState(false) + const [input, setInput] = useState('') + const scrollRef = useRef(null) + + // Stable transport; the locale is sent per-message (below) so it stays current + // after a language switch instead of being frozen into the transport. + const transport = useMemo(() => new DefaultChatTransport({ api: '/api/chat' }), []) + + const { messages, sendMessage, status, stop, error } = useChat({ transport }) + + const isBusy = status === 'submitted' || status === 'streaming' + + // Jump to the bottom instantly when the panel opens (a mount transition). + useEffect(() => { + if (!open) return + scrollRef.current?.scrollTo({ top: scrollRef.current.scrollHeight }) + }, [open]) + + // Smooth-scroll as new messages stream in (an explicit re-orientation cue). + useEffect(() => { + scrollRef.current?.scrollTo({ top: scrollRef.current.scrollHeight, behavior: 'smooth' }) + }, [messages]) + + const handleSubmit = (event: FormEvent) => { + event.preventDefault() + const text = input.trim() + if (!text || isBusy) return + sendMessage({ text }, { body: { locale } }) + setInput('') + } + + return ( + <> + {!open && ( + + )} + + {open && ( +
+
+ + + Ask AI + + +
+ +
+ {messages.length === 0 && ( +

+ Ask anything about building, deploying, and managing AI agents in Sim. +

+ )} + + {messages.map((message) => { + const text = getText(message.parts) + const sources = message.role === 'assistant' ? getSources(message.parts) : [] + return ( +
+ {message.role === 'user' ? ( +
+ {text} +
+ ) : ( +
+ {text ? ( + + {text} + + ) : isBusy ? ( + '…' + ) : null} +
+ )} + {sources.length > 0 && ( +
+ {sources.map((source) => ( + + {source.title || source.url} + + ))} +
+ )} +
+ ) + })} + + {error && ( +

+ Something went wrong. Please try again. +

+ )} +
+ +
+