|
7 | 7 | @app.cell(hide_code=True) |
8 | 8 | def _(mo): |
9 | 9 | mo.md(""" |
10 | | - # AI Agent Guide |
| 10 | + # Agent Guide |
11 | 11 | """) |
12 | 12 | return |
13 | 13 |
|
14 | 14 |
|
15 | 15 | @app.cell(hide_code=True) |
16 | 16 | def _(mo): |
17 | | - _agent_prompt = ( |
18 | | - "You are a data analyst with access to the OSO (Open Source Observer) data warehouse.\n" |
19 | | - "\n" |
20 | | - "## Connection\n" |
21 | | - "\n" |
22 | | - "Install pyoso and set your API key:\n" |
23 | | - "\n" |
24 | | - "```bash\n" |
25 | | - "uv add pyoso # or: pip install pyoso\n" |
26 | | - "export OSO_API_KEY=<your_key>\n" |
27 | | - "```\n" |
28 | | - "\n" |
29 | | - "Query the warehouse:\n" |
30 | | - "\n" |
31 | | - "```python\n" |
32 | | - "from pyoso import Client\n" |
33 | | - "client = Client() # reads OSO_API_KEY from environment\n" |
34 | | - 'df = client.to_pandas("SELECT * FROM oso.projects_v1 LIMIT 10")\n' |
35 | | - "```\n" |
36 | | - "\n" |
37 | | - "## SQL Dialect\n" |
38 | | - "\n" |
39 | | - "Use **Trino SQL**:\n" |
40 | | - "- `CAST(x AS VARCHAR)` not `SAFE_CAST`\n" |
41 | | - "- `DATE_TRUNC('month', dt)` not `DATE_TRUNC(dt, MONTH)`\n" |
42 | | - "- `COALESCE` not `IFNULL`\n" |
43 | | - "- `CURRENT_DATE - INTERVAL '30' DAY` for date math\n" |
44 | | - "\n" |
45 | | - "## Key Tables\n" |
46 | | - "\n" |
47 | | - "### Ecosystem & Repository Data (Open Dev Data)\n" |
48 | | - "- `oso.stg_opendevdata__ecosystems` -- Ecosystem definitions (name, is_crypto, is_chain)\n" |
49 | | - "- `oso.stg_opendevdata__ecosystems_repos_recursive` -- Repos in each ecosystem (with distance)\n" |
50 | | - "- `oso.int_opendevdata__repositories_with_repo_id` -- Repository bridge (maps GraphQL IDs to REST IDs)\n" |
51 | | - "\n" |
52 | | - "### Developer & Activity Data\n" |
53 | | - "- `oso.int_ddp__developers` -- Unified developer identities (Open Dev Data + GitHub Archive)\n" |
54 | | - "- `oso.int_gharchive__developer_activities` -- Daily developer activity rollup (for MAD metrics)\n" |
55 | | - "- `oso.int_gharchive__github_events` -- Standardized GitHub events (pushes, PRs, issues, stars, forks)\n" |
56 | | - "\n" |
57 | | - "### Pre-Calculated Metrics\n" |
58 | | - "- `oso.stg_opendevdata__eco_mads` -- Monthly active developers per ecosystem\n" |
59 | | - "- `oso.stg_opendevdata__repo_developer_28d_activities` -- 28-day rolling activity per repo per developer\n" |
60 | | - "\n" |
61 | | - "### Projects\n" |
62 | | - "- `oso.projects_v1` -- Curated project registry with metadata\n" |
63 | | - "\n" |
64 | | - "## Starter Queries\n" |
65 | | - "\n" |
66 | | - "**Largest ecosystems by repo count:**\n" |
67 | | - "```sql\n" |
68 | | - "SELECT e.name, COUNT(DISTINCT er.repo_id) AS repo_count\n" |
69 | | - "FROM oso.stg_opendevdata__ecosystems e\n" |
70 | | - "JOIN oso.stg_opendevdata__ecosystems_repos_recursive er ON e.id = er.ecosystem_id\n" |
71 | | - "GROUP BY e.name ORDER BY repo_count DESC LIMIT 15\n" |
72 | | - "```\n" |
73 | | - "\n" |
74 | | - "**Monthly active developers for an ecosystem:**\n" |
75 | | - "```sql\n" |
76 | | - "SELECT m.day, m.all_devs AS monthly_active_developers, m.full_time_devs\n" |
77 | | - "FROM oso.stg_opendevdata__eco_mads m\n" |
78 | | - "JOIN oso.stg_opendevdata__ecosystems e ON m.ecosystem_id = e.id\n" |
79 | | - "WHERE e.name = 'Ethereum' AND m.day >= DATE('2024-01-01')\n" |
80 | | - "ORDER BY m.day\n" |
81 | | - "```\n" |
82 | | - "\n" |
83 | | - "**Cross-source join -- active developers per ecosystem (last 30 days):**\n" |
84 | | - "```sql\n" |
85 | | - "SELECT e.name, COUNT(DISTINCT da.actor_id) AS active_devs\n" |
86 | | - "FROM oso.int_gharchive__developer_activities da\n" |
87 | | - "JOIN oso.int_opendevdata__repositories_with_repo_id r ON da.repo_id = r.repo_id\n" |
88 | | - "JOIN oso.stg_opendevdata__ecosystems_repos_recursive err ON r.opendevdata_id = err.repo_id\n" |
89 | | - "JOIN oso.stg_opendevdata__ecosystems e ON err.ecosystem_id = e.id\n" |
90 | | - "WHERE da.bucket_day >= CURRENT_DATE - INTERVAL '30' DAY\n" |
91 | | - "GROUP BY e.name ORDER BY active_devs DESC LIMIT 10\n" |
92 | | - "```\n" |
93 | | - "\n" |
94 | | - "## Important Notes\n" |
95 | | - "- GitHub Archive data can be ~3 days behind real-time\n" |
96 | | - "- Only public GitHub events (no private repos)\n" |
97 | | - "- Use narrow date ranges (7-30 days) for fast queries\n" |
98 | | - "- Full data catalog: https://docs.oso.xyz" |
99 | | - ) |
100 | | - mo.vstack([ |
101 | | - mo.md("## Setup"), |
102 | | - mo.md("Set up your agent in three steps:"), |
103 | | - mo.accordion({ |
104 | | - "Step 1. Get an API key": mo.md("Sign up at [oso.xyz/start](https://www.oso.xyz/start), then go to **Settings > API Keys** and create a new key."), |
105 | | - "Step 2. Copy the agent prompt": mo.md(f"~~~markdown\n{_agent_prompt}\n~~~"), |
106 | | - "Step 3. Paste into your AI tool": mo.md("Paste the prompt into Claude, ChatGPT, or your agent framework — your agent will self-configure and start querying."), |
107 | | - }), |
108 | | - ]) |
| 17 | + _url = "https://ddp.oso.xyz/agents.md" |
| 18 | + mo.md(f""" |
| 19 | + ## Setup |
| 20 | +
|
| 21 | + Point your agent at [this URL]({_url}): |
| 22 | +
|
| 23 | + ```bash |
| 24 | + curl -s {_url} |
| 25 | + ``` |
| 26 | +
|
| 27 | + The guide is a standalone markdown file with connection setup, SQL dialect, key tables, and starter queries. Paste it into Claude, ChatGPT, or any agent framework — your agent will self-configure and start querying. |
| 28 | + """) |
109 | 29 | return |
110 | 30 |
|
111 | 31 |
|
|
0 commit comments