diff --git a/README.md b/README.md index bdfc74a..a627f58 100644 --- a/README.md +++ b/README.md @@ -6,17 +6,68 @@ This is an n8n community node. It lets you use **[Firecrawl](https://firecrawl.d [n8n](https://n8n.io/) is a [fair-code licensed](https://docs.n8n.io/reference/license/) workflow automation platform. -[Installation](#installation) -[Operations](#operations) -[Credentials](#credentials) -[Compatibility](#compatibility) -[Resources](#resources) +## ✨ NEW: AI Agent Tool Support + +The Firecrawl node now supports **n8n's AI Agent system**! Use it as a tool in AI Agent workflows for intelligent web scraping and data extraction. + +[Installation](#installation) +[AI Agent Tool Usage](#ai-agent-tool-usage) +[Operations](#operations) +[Credentials](#credentials) +[Compatibility](#compatibility) +[Resources](#resources) [Version history](#version-history) ## Installation Follow the [installation guide](https://docs.n8n.io/integrations/community-nodes/installation/) in the n8n community nodes documentation. +**For AI Agent Tool usage**, you must also set this environment variable: +```bash +export N8N_COMMUNITY_PACKAGES_ALLOW_TOOL_USAGE=true +``` + +## AI Agent Tool Usage + +The Firecrawl node can now be used as a **Tool Node** in n8n's AI Agent workflows! This enables AI agents to intelligently scrape websites, search the web, and extract structured data. + +### Quick Start + +1. **Install** the Firecrawl node (see installation above) +2. **Set environment variable**: `N8N_COMMUNITY_PACKAGES_ALLOW_TOOL_USAGE=true` +3. **Restart n8n** +4. **Create a workflow** with an AI Agent node +5. **Connect** the Firecrawl node to the AI Agent +6. **Configure** your Firecrawl API credentials + +### Example AI Agent Prompts + +Once connected to an AI Agent, you can use natural language prompts like: + +- *"Scrape the content from https://example.com and summarize it"* +- *"Search for recent AI research papers and extract key insights"* +- *"Map all URLs on https://company.com and then scrape their About page"* +- *"Extract product information (name, price, description) from this e-commerce page"* + +### Available Tool Operations + +When used as an AI Agent tool, Firecrawl provides these capabilities: + +- **πŸ” Web Search**: Find information across multiple websites +- **πŸ“„ Page Scraping**: Extract content from specific URLs +- **πŸ—ΊοΈ Site Mapping**: Discover all URLs on a website +- **πŸ•·οΈ Website Crawling**: Extract content from multiple related pages +- **🎯 Data Extraction**: Get structured data using AI prompts +- **πŸ“Š Batch Operations**: Handle multiple URLs efficiently + +### Benefits of AI Agent Integration + +- **One-click setup**: No complex workflow design needed +- **Natural language control**: Use simple prompts instead of manual configuration +- **Intelligent decision making**: AI chooses the right Firecrawl operation automatically +- **Seamless integration**: Works with any AI model supported by n8n +- **Reduced complexity**: Eliminates the need for manual parameter configuration + ## Operations The **Firecrawl** node supports the following operations: @@ -103,6 +154,16 @@ To use the Firecrawl node, you need to: ## Version history +### 1.0.7 (AI Agent Tool Support) +- ✨ **NEW**: Added support for n8n's AI Agent system +- πŸ€– Node can now be used as an AI Agent Tool for intelligent web scraping +- πŸ”§ Added `usableAsTool: true` property for seamless AI integration +- πŸ“š Enhanced node description for better AI agent understanding +- πŸ› οΈ Created tools.ts with AI-friendly operation descriptions +- πŸ“– Updated documentation with AI Agent usage examples +- 🎯 Enables natural language control of web scraping operations +- πŸš€ One-click integration with AI workflows + ### 1.0.6 - Add support for additional Firecrawl endpoints: - Batch Scrape (start/status/errors) diff --git a/nodes/Firecrawl/Firecrawl.node.ts b/nodes/Firecrawl/Firecrawl.node.ts index bd70ecb..cb648f1 100644 --- a/nodes/Firecrawl/Firecrawl.node.ts +++ b/nodes/Firecrawl/Firecrawl.node.ts @@ -13,12 +13,13 @@ export class Firecrawl implements INodeType { group: ['transform'], version: 1, subtitle: '={{$parameter["operation"] + ": " + $parameter["resource"]}}', - description: 'Get data from Firecrawl API', + description: 'Powerful web scraping, crawling, and data extraction from websites using Firecrawl API. Perfect for AI agents to gather web content, search the internet, map websites, and extract structured data.', defaults: { name: 'Firecrawl', }, inputs: `={{["main"]}}`, outputs: `={{["main"]}}`, + usableAsTool: true, credentials: [ { name: 'firecrawlApi', diff --git a/nodes/Firecrawl/tools.ts b/nodes/Firecrawl/tools.ts new file mode 100644 index 0000000..9572a51 --- /dev/null +++ b/nodes/Firecrawl/tools.ts @@ -0,0 +1,106 @@ +/** + * AI Tool Configuration for Firecrawl Node + * + * This file provides configuration and helper functions to make the Firecrawl node + * compatible with n8n's AI Agent system. The main enablement is through the + * usableAsTool: true property in the main node description. + */ + +export const firecrawlToolDescription = { + // Tool descriptions for AI Agents to understand capabilities + scrape: ` + Scrape content from a single URL with advanced options. + Best for single page content extraction when you know the exact URL. + Returns markdown, HTML, or other specified formats. + `, + + search: ` + Search the web and optionally extract content from search results. + Best for finding information across multiple websites when you don't know which site has it. + Returns search results with optional scraped content. + `, + + map: ` + Map a website to discover all indexed URLs on the site. + Best for discovering URLs before deciding what to scrape. + Returns array of URLs found on the site. + `, + + crawl: ` + Start a crawl job on a website to extract content from multiple pages. + Best for comprehensive coverage of related pages. + Returns operation ID for status checking. + `, + + extract: ` + Extract structured information from web pages using LLM capabilities. + Best for extracting specific structured data like prices, names, details. + Returns extracted structured data as defined by schema. + `, + + checkCrawlStatus: ` + Check the status of a crawl job. + Returns status and progress of the crawl job, including results if available. + ` +}; + +// Example usage patterns for AI Agents +export const firecrawlToolExamples = { + scrape: { + description: "Scrape content from a specific webpage", + example: { + operation: "scrape", + resource: "content", + url: "https://example.com", + formats: ["markdown"], + onlyMainContent: true + } + }, + + search: { + description: "Search the web for information", + example: { + operation: "search", + resource: "web", + query: "latest AI research papers 2024", + limit: 10, + sources: ["web"] + } + }, + + map: { + description: "Discover URLs on a website", + example: { + operation: "map", + resource: "website", + url: "https://example.com", + limit: 100 + } + } +}; + +/** + * Helper function to validate tool parameters for AI Agents + */ +export function validateToolParameters(operation: string, parameters: any): { valid: boolean; error?: string } { + switch (operation) { + case 'scrape': + if (!parameters.url) { + return { valid: false, error: 'URL is required for scrape operation' }; + } + break; + case 'search': + if (!parameters.query) { + return { valid: false, error: 'Query is required for search operation' }; + } + break; + case 'map': + if (!parameters.url) { + return { valid: false, error: 'URL is required for map operation' }; + } + break; + default: + return { valid: false, error: `Unknown operation: ${operation}` }; + } + return { valid: true }; +} \ No newline at end of file diff --git a/package.json b/package.json index 27f00bf..6d94573 100644 --- a/package.json +++ b/package.json @@ -1,9 +1,15 @@ { "name": "@mendable/n8n-nodes-firecrawl", - "version": "1.0.6", + "version": "1.0.7", "description": "Firecrawl node for n8n", "keywords": [ - "n8n-community-node-package" + "n8n-community-node-package", + "ai-agent", + "ai-tool", + "web-scraping", + "firecrawl", + "llm-ready", + "data-extraction" ], "license": "MIT", "homepage": "https://www.firecrawl.dev",