Skip to content

Commit 2964aed

Browse files
committed
feat: add fastCRW tool block
1 parent 79d98b3 commit 2964aed

18 files changed

Lines changed: 1326 additions & 0 deletions

File tree

apps/sim/app/workspace/[workspaceId]/settings/components/byok/byok.tsx

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import {
66
AnthropicIcon,
77
BasetenIcon,
88
BrandfetchIcon,
9+
CrwIcon,
910
ExaAIIcon,
1011
FalIcon,
1112
FindymailIcon,
@@ -111,6 +112,13 @@ const PROVIDERS: (BYOKManagerProvider & { id: BYOKProviderId })[] = [
111112
description: 'Web scraping, crawling, search, and extraction',
112113
placeholder: 'Enter your Firecrawl API key',
113114
},
115+
{
116+
id: 'crw',
117+
name: 'fastCRW',
118+
icon: CrwIcon,
119+
description: 'Web scraping, crawling, search, and mapping',
120+
placeholder: 'Enter your fastCRW API key',
121+
},
114122
{
115123
id: 'exa',
116124
name: 'Exa',
@@ -249,6 +257,7 @@ const PROVIDER_SECTIONS: BYOKProviderSection[] = [
249257
label: 'Search & web',
250258
ids: [
251259
'firecrawl',
260+
'crw',
252261
'exa',
253262
'serper',
254263
'linkup',

apps/sim/blocks/blocks/crw.ts

Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
import { CrwIcon } from '@/components/icons'
2+
import type { BlockConfig, BlockMeta } from '@/blocks/types'
3+
import { AuthMode, IntegrationType } from '@/blocks/types'
4+
import type { CrwResponse } from '@/tools/crw/types'
5+
6+
export const CrwBlock: BlockConfig<CrwResponse> = {
7+
type: 'crw',
8+
name: 'fastCRW',
9+
description: 'Scrape, search, crawl, and map web data',
10+
authMode: AuthMode.ApiKey,
11+
longDescription:
12+
'Integrate fastCRW into the workflow. Scrape pages, search the web, crawl entire sites, and map URL structures. fastCRW is a Firecrawl-compatible web scraper in a single binary — self-host or cloud.',
13+
docsLink: 'https://docs.sim.ai/integrations/crw',
14+
category: 'tools',
15+
integrationType: IntegrationType.Search,
16+
bgColor: '#181C1E',
17+
icon: CrwIcon,
18+
subBlocks: [
19+
{
20+
id: 'operation',
21+
title: 'Operation',
22+
type: 'dropdown',
23+
options: [
24+
{ label: 'Scrape', id: 'scrape' },
25+
{ label: 'Search', id: 'search' },
26+
{ label: 'Crawl', id: 'crawl' },
27+
{ label: 'Map', id: 'map' },
28+
],
29+
value: () => 'scrape',
30+
},
31+
{
32+
id: 'url',
33+
title: 'Website URL',
34+
type: 'short-input',
35+
placeholder: 'Enter the website URL',
36+
condition: {
37+
field: 'operation',
38+
value: ['scrape', 'crawl', 'map'],
39+
},
40+
required: true,
41+
},
42+
{
43+
id: 'query',
44+
title: 'Search Query',
45+
type: 'short-input',
46+
placeholder: 'Enter the search query',
47+
condition: {
48+
field: 'operation',
49+
value: 'search',
50+
},
51+
required: true,
52+
},
53+
{
54+
id: 'onlyMainContent',
55+
title: 'Only Main Content',
56+
type: 'switch',
57+
condition: {
58+
field: 'operation',
59+
value: ['scrape', 'crawl'],
60+
},
61+
},
62+
{
63+
id: 'formats',
64+
title: 'Output Formats',
65+
type: 'long-input',
66+
placeholder: '["markdown", "html"]',
67+
condition: {
68+
field: 'operation',
69+
value: ['scrape', 'crawl'],
70+
},
71+
},
72+
{
73+
id: 'waitFor',
74+
title: 'Wait For (ms)',
75+
type: 'short-input',
76+
placeholder: '0',
77+
condition: {
78+
field: 'operation',
79+
value: 'scrape',
80+
},
81+
},
82+
{
83+
id: 'limit',
84+
title: 'Limit',
85+
type: 'short-input',
86+
placeholder: '100',
87+
condition: {
88+
field: 'operation',
89+
value: ['map', 'search'],
90+
},
91+
},
92+
{
93+
id: 'maxPages',
94+
title: 'Max Pages',
95+
type: 'short-input',
96+
placeholder: '100',
97+
condition: {
98+
field: 'operation',
99+
value: 'crawl',
100+
},
101+
},
102+
{
103+
id: 'baseUrl',
104+
title: 'Base URL',
105+
type: 'short-input',
106+
placeholder: 'https://fastcrw.com/api',
107+
mode: 'advanced',
108+
},
109+
{
110+
id: 'apiKey',
111+
title: 'API Key',
112+
type: 'short-input',
113+
placeholder: 'Enter your fastCRW API key',
114+
password: true,
115+
required: true,
116+
hideWhenHosted: true,
117+
},
118+
],
119+
tools: {
120+
access: ['crw_scrape', 'crw_search', 'crw_crawl', 'crw_map'],
121+
config: {
122+
tool: (params) => {
123+
switch (params.operation) {
124+
case 'scrape':
125+
return 'crw_scrape'
126+
case 'search':
127+
return 'crw_search'
128+
case 'crawl':
129+
return 'crw_crawl'
130+
case 'map':
131+
return 'crw_map'
132+
default:
133+
return 'crw_scrape'
134+
}
135+
},
136+
params: (params) => {
137+
const {
138+
operation,
139+
limit,
140+
maxPages,
141+
formats,
142+
waitFor,
143+
url,
144+
query,
145+
onlyMainContent,
146+
baseUrl,
147+
apiKey,
148+
} = params
149+
150+
const result: Record<string, any> = { apiKey }
151+
152+
if (baseUrl) result.baseUrl = baseUrl
153+
154+
switch (operation) {
155+
case 'scrape':
156+
if (url) result.url = url
157+
if (formats) {
158+
if (Array.isArray(formats)) {
159+
result.formats = formats
160+
} else if (typeof formats === 'string') {
161+
try {
162+
const parsed = JSON.parse(formats)
163+
result.formats = Array.isArray(parsed) ? parsed : ['markdown']
164+
} catch {
165+
result.formats = ['markdown']
166+
}
167+
}
168+
}
169+
if (waitFor) result.waitFor = Number.parseInt(waitFor)
170+
if (onlyMainContent != null) result.onlyMainContent = onlyMainContent
171+
break
172+
173+
case 'search':
174+
if (query) result.query = query
175+
if (limit) result.limit = Number.parseInt(limit)
176+
break
177+
178+
case 'crawl':
179+
if (url) result.url = url
180+
if (maxPages) result.maxPages = Number.parseInt(maxPages)
181+
if (formats) {
182+
if (Array.isArray(formats)) {
183+
result.formats = formats
184+
} else if (typeof formats === 'string') {
185+
try {
186+
const parsed = JSON.parse(formats)
187+
result.formats = Array.isArray(parsed) ? parsed : ['markdown']
188+
} catch {
189+
result.formats = ['markdown']
190+
}
191+
}
192+
}
193+
if (onlyMainContent != null) result.onlyMainContent = onlyMainContent
194+
break
195+
196+
case 'map':
197+
if (url) result.url = url
198+
if (limit) result.limit = Number.parseInt(limit)
199+
break
200+
}
201+
202+
return result
203+
},
204+
},
205+
},
206+
inputs: {
207+
apiKey: { type: 'string', description: 'fastCRW API key' },
208+
baseUrl: { type: 'string', description: 'Base URL for self-hosted fastCRW' },
209+
operation: { type: 'string', description: 'Operation to perform' },
210+
url: { type: 'string', description: 'Target website URL' },
211+
query: { type: 'string', description: 'Search query terms' },
212+
limit: { type: 'string', description: 'Result/link limit' },
213+
maxPages: { type: 'string', description: 'Maximum pages to crawl' },
214+
formats: { type: 'json', description: 'Output formats array' },
215+
waitFor: { type: 'number', description: 'Wait time before scraping in ms' },
216+
onlyMainContent: { type: 'boolean', description: 'Extract only main content' },
217+
scrapeOptions: { type: 'json', description: 'Advanced scraping options' },
218+
},
219+
outputs: {
220+
// Scrape output
221+
markdown: { type: 'string', description: 'Page content markdown' },
222+
html: { type: 'string', description: 'Raw HTML content' },
223+
metadata: { type: 'json', description: 'Page metadata' },
224+
// Search output
225+
data: { type: 'json', description: 'Search results data' },
226+
// Crawl output
227+
pages: { type: 'json', description: 'Crawled pages data' },
228+
total: { type: 'number', description: 'Total pages found' },
229+
// Map output
230+
success: { type: 'boolean', description: 'Operation success status' },
231+
links: { type: 'json', description: 'Discovered URLs array' },
232+
},
233+
}
234+
235+
export const CrwBlockMeta = {
236+
tags: ['web-scraping', 'automation'],
237+
templates: [
238+
{
239+
icon: CrwIcon,
240+
title: 'fastCRW competitor site monitor',
241+
prompt:
242+
'Build a scheduled workflow that uses fastCRW to scrape competitor pricing, product, and changelog pages weekly, diffs against the prior snapshot, and posts changes to Slack.',
243+
modules: ['scheduled', 'agent', 'workflows'],
244+
category: 'marketing',
245+
tags: ['marketing', 'monitoring'],
246+
alsoIntegrations: ['slack'],
247+
},
248+
{
249+
icon: CrwIcon,
250+
title: 'fastCRW knowledge-base builder',
251+
prompt:
252+
'Build a workflow that crawls a documentation site with fastCRW, chunks and embeds the pages, and upserts them into a knowledge base for an answering agent.',
253+
modules: ['knowledge-base', 'agent', 'workflows'],
254+
category: 'engineering',
255+
tags: ['research', 'sync'],
256+
},
257+
{
258+
icon: CrwIcon,
259+
title: 'fastCRW research stack',
260+
prompt:
261+
'Create an agent that uses fastCRW Search to find authoritative URLs on a topic, scrapes each with fastCRW, and produces a structured research brief with citations.',
262+
modules: ['agent', 'files', 'workflows'],
263+
category: 'productivity',
264+
tags: ['research'],
265+
},
266+
],
267+
skills: [
268+
{
269+
name: 'scrape-page-to-markdown',
270+
description:
271+
'Scrape a single URL with fastCRW and return clean main-content markdown for an agent to read.',
272+
content:
273+
'# Scrape Page to Markdown\n\nUse fastCRW to fetch a web page as clean, LLM-ready markdown.\n\n## Steps\n1. Use the Scrape operation on the target URL.\n2. Enable Only Main Content to strip navigation, ads, and footers; set a Wait For delay if the page renders content with JavaScript.\n3. Return the markdown output and capture page metadata (title, description).\n\n## Output\nReturn the page markdown plus key metadata. If the page failed to load or returned empty content, report that instead of fabricating text.',
274+
},
275+
{
276+
name: 'crawl-site',
277+
description:
278+
'Crawl an entire site or section with fastCRW and return the page content for indexing or analysis.',
279+
content:
280+
'# Crawl Site\n\nUse fastCRW to traverse a site and collect its pages.\n\n## Steps\n1. Use the Crawl operation on the root URL, setting a sensible Max Pages limit to control cost.\n2. Enable Only Main Content so each page comes back as clean markdown.\n3. Collect the crawled pages and their URLs from the response.\n\n## Output\nReturn the list of crawled pages with their URL and markdown content, plus the total page count. This output is ready to chunk and embed into a knowledge base.',
281+
},
282+
{
283+
name: 'research-with-search',
284+
description:
285+
'Run a web search with fastCRW, then scrape the top results into a cited research brief.',
286+
content:
287+
'# Research With Search\n\nUse fastCRW to gather and synthesize web sources on a topic.\n\n## Steps\n1. Use the Search operation with the research query and a result Limit.\n2. For the most relevant results, use Scrape to pull the full page markdown.\n3. Synthesize the findings into a brief, attributing each claim to its source URL.\n\n## Output\nReturn a structured research brief with key findings and a Sources list of the URLs used. Keep claims grounded in the scraped content.',
288+
},
289+
],
290+
} as const satisfies BlockMeta

apps/sim/blocks/registry.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ import { ConfluenceBlock, ConfluenceBlockMeta, ConfluenceV2Block } from '@/block
4141
import { ConvexBlock, ConvexBlockMeta } from '@/blocks/blocks/convex'
4242
import { CredentialBlock } from '@/blocks/blocks/credential'
4343
import { CrowdStrikeBlock, CrowdStrikeBlockMeta } from '@/blocks/blocks/crowdstrike'
44+
import { CrwBlock, CrwBlockMeta } from '@/blocks/blocks/crw'
4445
import { CursorBlock, CursorBlockMeta, CursorV2Block } from '@/blocks/blocks/cursor'
4546
import { DagsterBlock, DagsterBlockMeta } from '@/blocks/blocks/dagster'
4647
import { DatabricksBlock, DatabricksBlockMeta } from '@/blocks/blocks/databricks'
@@ -406,6 +407,7 @@ const BLOCK_REGISTRY: Record<string, BlockConfig> = {
406407
zerobounce: ZeroBounceBlock,
407408
neverbounce: NeverBounceBlock,
408409
millionverifier: MillionVerifierBlock,
410+
crw: CrwBlock,
409411
firecrawl: FirecrawlBlock,
410412
fireflies: FirefliesBlock,
411413
fireflies_v2: FirefliesV2Block,
@@ -669,6 +671,7 @@ const BLOCK_META_REGISTRY: Record<string, BlockMeta> = {
669671
confluence: ConfluenceBlockMeta,
670672
convex: ConvexBlockMeta,
671673
crowdstrike: CrowdStrikeBlockMeta,
674+
crw: CrwBlockMeta,
672675
cursor: CursorBlockMeta,
673676
dagster: DagsterBlockMeta,
674677
databricks: DatabricksBlockMeta,

apps/sim/components/icons.tsx

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,25 @@ export function FirecrawlIcon(props: SVGProps<SVGSVGElement>) {
604604
)
605605
}
606606

607+
export function CrwIcon(props: SVGProps<SVGSVGElement>) {
608+
return (
609+
<svg viewBox='0 0 24 24' xmlns='http://www.w3.org/2000/svg' {...props}>
610+
<path
611+
d='M12 2 3 6v6c0 5.25 3.75 9.27 9 10 5.25-.73 9-4.75 9-10V6l-9-4z'
612+
fill='#F97316'
613+
/>
614+
<path
615+
d='M9.5 8.5a3.5 3.5 0 1 0 0 7 3.5 3.5 0 0 0 3.2-2.1M12.5 8.7l2 1.15v2.3l-2 1.15-2-1.15v-2.3l2-1.15zM16.5 8.5a3.5 3.5 0 1 1 0 7'
616+
fill='none'
617+
stroke='#FFFFFF'
618+
strokeWidth='1.4'
619+
strokeLinecap='round'
620+
strokeLinejoin='round'
621+
/>
622+
</svg>
623+
)
624+
}
625+
607626
export function JinaAIIcon(props: SVGProps<SVGSVGElement>) {
608627
return (
609628
<svg

apps/sim/lib/api/contracts/byok-keys.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ export const byokProviderIdSchema = z.enum([
1212
'ollama-cloud',
1313
'falai',
1414
'firecrawl',
15+
'crw',
1516
'exa',
1617
'serper',
1718
'linkup',

apps/sim/lib/core/security/csp.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ const STATIC_CONNECT_SRC = [
7171
'wss://api.elevenlabs.io',
7272
'https://api.exa.ai',
7373
'https://api.firecrawl.dev',
74+
'https://fastcrw.com',
7475
'https://*.googleapis.com',
7576
'https://*.amazonaws.com',
7677
'https://*.s3.amazonaws.com',

0 commit comments

Comments
 (0)