# Directory Structure ``` ├── .dockerignore ├── .gitignore ├── .npmignore ├── .smithery │ └── index.cjs ├── bin │ └── cli.js ├── Dockerfile ├── LICENSE ├── package-lock.json ├── package.json ├── README.md ├── smithery.yaml ├── src │ ├── index.js │ ├── index.ts │ ├── tools │ │ ├── feloTool.js │ │ ├── fetchUrlTool.js │ │ ├── metadataTool.js │ │ └── searchTool.js │ └── utils │ ├── search_felo.js │ └── search.js └── tsconfig.json ``` # Files -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- ``` 1 | node_modules 2 | npm-debug.log 3 | .dockerignore 4 | .git 5 | .gitignore 6 | .smithery 7 | dist 8 | .build 9 | .idea 10 | .vscode 11 | **/*.md 12 | **/*.log 13 | ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` 1 | .qodo 2 | 3 | # Dependency directories 4 | node_modules/ 5 | npm-debug.log 6 | yarn-debug.log 7 | yarn-error.log 8 | 9 | # Environment variables 10 | .env 11 | .env.local 12 | .env.development.local 13 | .env.test.local 14 | .env.production.local 15 | 16 | # Build directories 17 | dist/ 18 | build/ 19 | 20 | # IDE and editor files 21 | .idea/ 22 | .vscode/ 23 | *.swp 24 | *.swo 25 | .DS_Store 26 | ``` -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- ``` 1 | # Development files 2 | .git/ 3 | .github/ 4 | .vscode/ 5 | .idea/ 6 | .DS_Store 7 | 8 | # Test files 9 | test/ 10 | tests/ 11 | __tests__/ 12 | coverage/ 13 | 14 | # Configuration files 15 | .eslintrc* 16 | .prettierrc* 17 | .editorconfig 18 | tsconfig.json 19 | jest.config.js 20 | 21 | # Logs 22 | logs/ 23 | *.log 24 | npm-debug.log* 25 | yarn-debug.log* 26 | yarn-error.log* 27 | 28 | # Misc 29 | .qodo 30 | .env 31 | .env.* 32 | node_modules/ 33 | ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown 1 | <div align="center"> 2 | <img src="https://img.shields.io/npm/v/@oevortex/ddg_search.svg" alt="npm version" /> 3 | <img src="https://img.shields.io/badge/License-Apache%202.0-blue.svg" alt="License: Apache 2.0" /> 4 | <img src="https://img.shields.io/badge/YouTube-%40OEvortex-red.svg" alt="YouTube Channel" /> 5 | <h1>DuckDuckGo & Felo AI Search MCP 🔍🧠</h1> 6 | <p>A blazing-fast, privacy-friendly Model Context Protocol (MCP) server for web search and AI-powered responses using DuckDuckGo and Felo AI.</p> 7 | <a href="https://glama.ai/mcp/servers/@OEvortex/ddg_search"> 8 | <img width="380" height="200" src="https://glama.ai/mcp/servers/@OEvortex/ddg_search/badge" alt="DuckDuckGo Search MCP server" /> 9 | </a> 10 | <a href="https://youtube.com/@OEvortex"><strong>Subscribe for updates & tutorials</strong></a> 11 | </div> 12 | 13 | --- 14 | 15 | > [!IMPORTANT] 16 | > DuckDuckGo Search MCP supports the Model Context Protocol (MCP) standard, making it compatible with various AI assistants and tools. 17 | 18 | --- 19 | 20 | ## ✨ Features 21 | 22 | <div style="display: flex; flex-wrap: wrap; gap: 1.5em; margin-bottom: 1.5em;"> <div><b>🌐 Web search</b> using DuckDuckGo HTML</div> 23 | <div><b>🧠 AI search</b> using Felo AI</div> 24 | <div><b>📄 URL content extraction</b> with smart filtering</div> 25 | <div><b>📊 URL metadata extraction</b> (title, description, images)</div> 26 | <div><b>⚡ Performance optimized</b> with caching</div> 27 | <div><b>🛡️ Security features</b> including rate limiting and rotating user agents</div> 28 | <div><b>🔌 MCP-compliant</b> server implementation</div> 29 | <div><b>🆓 No API keys required</b> - works out of the box</div> 30 | </div> 31 | 32 | > [!IMPORTANT] 33 | > Unlike many search tools, this package performs actual web scraping rather than using limited APIs, giving you more comprehensive results. 34 | 35 | --- 36 | 37 | ## 🚀 Quick Start 38 | 39 | <div style="background: #222; color: #fff; padding: 1.5em; border-radius: 8px; margin: 1.5em 0;"> 40 | <b>Run instantly with npx:</b> 41 | 42 | ```bash 43 | npx -y @oevortex/ddg_search@latest 44 | ``` 45 | </div> 46 | 47 | > [!TIP] 48 | > This will download and run the latest version of the MCP server directly without installation – perfect for quick use with AI assistants. 49 | 50 | --- 51 | 52 | ## 🛠️ Installation Options 53 | 54 | <details> 55 | <summary><b>Global Installation</b></summary> 56 | 57 | ```bash 58 | npm install -g @oevortex/ddg_search 59 | ``` 60 | 61 | Run globally: 62 | 63 | ```bash 64 | ddg-search-mcp 65 | ``` 66 | 67 | </details> 68 | 69 | <details> 70 | <summary><b>Local Installation (Development)</b></summary> 71 | 72 | ```bash 73 | git clone https://github.com/OEvortex/ddg_search.git 74 | cd ddg_search 75 | npm install 76 | npm start 77 | ``` 78 | 79 | </details> 80 | 81 | --- 82 | 83 | ## 🧑💻 Command Line Options 84 | 85 | ```bash 86 | npx -y @oevortex/ddg_search@latest --help 87 | ``` 88 | 89 | > [!TIP] 90 | > Use the <code>--version</code> flag to check which version you're running. 91 | 92 | --- 93 | 94 | ## 🤖 Using with MCP Clients 95 | 96 | > [!IMPORTANT] 97 | > The most common way to use this tool is by integrating it with MCP-compatible AI assistants. 98 | 99 | Add the server to your MCP client configuration: 100 | 101 | ```json 102 | { 103 | "mcpServers": { 104 | "ddg-search": { 105 | "command": "npx", 106 | "args": ["-y", "@oevortex/ddg_search@latest"] 107 | } 108 | } 109 | } 110 | ``` 111 | 112 | Or if installed globally: 113 | 114 | ```json 115 | { 116 | "mcpServers": { 117 | "ddg-search": { 118 | "command": "ddg-search-mcp" 119 | } 120 | } 121 | } 122 | ``` 123 | 124 | > [!TIP] 125 | > After configuring, restart your MCP client to apply the changes. 126 | 127 | --- 128 | 129 | ## 🧰 Tools Overview 130 | 131 | <div style="display: flex; flex-wrap: wrap; gap: 2.5em; margin: 1.5em 0;"> 132 | <div style="margin-bottom: 1.5em;"> 133 | <b>🔍 Web Search Tool</b><br/> 134 | <code>web-search</code><br/> 135 | <ul> 136 | <li><b>query</b> (string, required): The search query</li> 137 | <li><b>page</b> (integer, optional, default: 1): Page number</li> 138 | <li><b>numResults</b> (integer, optional, default: 10): Number of results (1-20)</li> 139 | </ul> 140 | <i>Example: Search the web for "climate change solutions"</i> 141 | </div> 142 | <div style="margin-bottom: 1.5em;"> 143 | <b>🧠 Felo AI Search Tool</b><br/> 144 | <code>felo-search</code><br/> 145 | <ul> 146 | <li><b>query</b> (string, required): The search query or prompt</li> 147 | <li><b>stream</b> (boolean, optional, default: false): Whether to stream the response</li> 148 | </ul> 149 | <i>Example: Search Felo AI for "Explain quantum computing in simple terms"</i> 150 | </div> 151 | <div style="margin-bottom: 1.5em;"> 152 | <b>📄 Fetch URL Tool</b><br/> 153 | <code>fetch-url</code><br/> 154 | <ul> 155 | <li><b>url</b> (string, required): The URL to fetch</li> 156 | <li><b>maxLength</b> (integer, optional, default: 10000): Max content length</li> 157 | <li><b>extractMainContent</b> (boolean, optional, default: true): Extract main content</li> 158 | <li><b>includeLinks</b> (boolean, optional, default: true): Include link text</li> 159 | <li><b>includeImages</b> (boolean, optional, default: true): Include image alt text</li> 160 | <li><b>excludeTags</b> (array, optional): Tags to exclude</li> 161 | </ul> 162 | <i>Example: Fetch the content from "https://example.com"</i> 163 | </div> 164 | <div style="margin-bottom: 1.5em;"> 165 | <b>📊 URL Metadata Tool</b><br/> 166 | <code>url-metadata</code><br/> 167 | <ul> 168 | <li><b>url</b> (string, required): The URL to extract metadata from</li> 169 | </ul> 170 | <i>Example: Get metadata for "https://example.com"</i> 171 | </div> 172 | </div> 173 | 174 | --- 175 | 176 | ## 📁 Project Structure 177 | 178 | 179 | ```text 180 | bin/ # Command-line interface 181 | src/ 182 | index.js # Main entry point 183 | tools/ # Tool definitions and handlers 184 | searchTool.js 185 | fetchUrlTool.js 186 | metadataTool.js 187 | feloTool.js 188 | utils/ 189 | search.js # Search and URL utilities 190 | search_felo.js # Felo AI search utilities 191 | package.json 192 | README.md 193 | ``` 194 | 195 | --- 196 | 197 | ## 🤝 Contributing 198 | 199 | 200 | Contributions are welcome! Please open issues or submit pull requests. 201 | 202 | > [!NOTE] 203 | > Please follow the existing code style and add tests for new features. 204 | 205 | --- 206 | 207 | ## 📺 YouTube Channel 208 | 209 | 210 | <div align="center"> 211 | <a href="https://youtube.com/@OEvortex"><img src="https://img.shields.io/badge/YouTube-%40OEvortex-red.svg" alt="YouTube Channel" /></a> 212 | <br/> 213 | <a href="https://youtube.com/@OEvortex">youtube.com/@OEvortex</a> 214 | </div> 215 | 216 | --- 217 | 218 | ## 📄 License 219 | 220 | 221 | Apache License 2.0 222 | 223 | > [!NOTE] 224 | > This project is licensed under the Apache License 2.0 – see the <a href="LICENSE">LICENSE</a> file for details. 225 | 226 | --- 227 | 228 | <div align="center"> 229 | <sub>Made with ❤️ by <a href="https://youtube.com/@OEvortex">@OEvortex</a></sub> 230 | </div> 231 | ``` -------------------------------------------------------------------------------- /smithery.yaml: -------------------------------------------------------------------------------- ```yaml 1 | runtime: typescript 2 | 3 | build: 4 | external: 5 | - canvas 6 | - utf-8-validate 7 | - bufferutil 8 | esbuild: 9 | bundle: true 10 | platform: node 11 | format: cjs 12 | target: node18 13 | ``` -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "ES2022", 5 | "moduleResolution": "node", 6 | "allowSyntheticDefaultImports": true, 7 | "esModuleInterop": true, 8 | "allowJs": true, 9 | "checkJs": false, 10 | "outDir": "./dist", 11 | "rootDir": "./src", 12 | "strict": false, 13 | "skipLibCheck": true, 14 | "forceConsistentCasingInFileNames": true, 15 | "declaration": true, 16 | "declarationMap": true, 17 | "sourceMap": true, 18 | "types": ["node"] 19 | }, 20 | "include": [ 21 | "src/**/*" 22 | ], 23 | "exclude": [ 24 | "node_modules", 25 | "dist", 26 | "bin" 27 | ] 28 | } ``` -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- ```dockerfile 1 | # Build stage: install dependencies 2 | FROM node:22-slim AS build 3 | WORKDIR /app 4 | 5 | # Copy package manifests and lockfile first for better caching 6 | COPY package.json package-lock.json ./ 7 | 8 | # Install production dependencies (use npm ci when lockfile exists) 9 | RUN if [ -f package-lock.json ]; then npm ci --production; else npm install --production; fi 10 | 11 | # Copy application source 12 | COPY . . 13 | 14 | # Final minimal runtime image 15 | FROM node:22-slim AS runtime 16 | WORKDIR /app 17 | 18 | # Copy node_modules and built app from build stage 19 | COPY --from=build /app/node_modules ./node_modules 20 | COPY --from=build /app . 21 | 22 | # Expose port in case the MCP server needs it 23 | EXPOSE 3000 24 | 25 | # Default command: use the CLI entry which starts the MCP server 26 | CMD ["node", "bin/cli.js"] 27 | ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- ```json 1 | {"name":"@oevortex/ddg_search","version":"1.1.2","description":"A Model Context Protocol server for web search using DuckDuckGo and Felo AI","main":"src/index.js","module":"src/index.ts","exports":{".":{"import":"./src/index.js","default":"./src/index.js"}},"bin":{"ddg-search-mcp":"bin/cli.js","oevortex-ddg-search":"bin/cli.js"},"scripts":{"test":"echo \"Error: no test specified\" && exit 1","start":"node bin/cli.js","prepublishOnly":"npm run lint","lint":"echo \"No linting configured\"","build":"npx @smithery/cli build","dev":"npx @smithery/cli dev"},"publishConfig":{"access":"public"},"keywords":["mcp","model-context-protocol","duckduckgo","felo","search","web-search","ai-search","claude","ai","llm"],"author":"OEvortex","license":"Apache-2.0","type":"module","dependencies":{"@modelcontextprotocol/sdk":"^1.17.4","axios":"^1.8.4","cheerio":"^1.0.0","jsdom":"^26.1.0","smithery":"^0.5.2","uuid":"^9.0.1"},"devDependencies":{"@types/node":"^24.3.0","tsx":"^4.20.4","typescript":"^5.9.2"}} ``` -------------------------------------------------------------------------------- /src/tools/metadataTool.js: -------------------------------------------------------------------------------- ```javascript 1 | import { extractUrlMetadata } from '../utils/search.js'; 2 | 3 | /** 4 | * URL metadata tool definition 5 | */ 6 | export const metadataToolDefinition = { 7 | name: 'url-metadata', 8 | title: 'URL Metadata Extractor', 9 | description: 'Extract metadata from a URL including title, description, Open Graph data, and favicon information', 10 | inputSchema: { 11 | type: 'object', 12 | properties: { 13 | url: { 14 | type: 'string', 15 | description: 'The URL to extract metadata from (must be a valid HTTP/HTTPS URL)' 16 | } 17 | }, 18 | required: ['url'] 19 | } 20 | }; 21 | 22 | /** 23 | * URL metadata tool handler 24 | * @param {Object} params - The tool parameters 25 | * @returns {Promise<Object>} - The tool result 26 | */ 27 | export async function metadataToolHandler(params) { 28 | const { url } = params; 29 | console.log(`Extracting metadata from URL: ${url}`); 30 | 31 | const metadata = await extractUrlMetadata(url); 32 | 33 | // Format the metadata for display 34 | const formattedMetadata = ` 35 | ## URL Metadata for ${url} 36 | 37 | **Title:** ${metadata.title} 38 | 39 | **Description:** ${metadata.description} 40 | 41 | **Image:** ${metadata.ogImage || 'None'} 42 | 43 | **Favicon:** ${metadata.favicon || 'None'} 44 | `.trim(); 45 | 46 | return { 47 | content: [ 48 | { 49 | type: 'text', 50 | text: formattedMetadata 51 | } 52 | ] 53 | }; 54 | } 55 | ``` -------------------------------------------------------------------------------- /src/tools/searchTool.js: -------------------------------------------------------------------------------- ```javascript 1 | import { searchDuckDuckGo } from '../utils/search.js'; 2 | 3 | /** 4 | * Web search tool definition 5 | */ 6 | export const searchToolDefinition = { 7 | name: 'web-search', 8 | title: 'Web Search', 9 | description: 'Search the web using DuckDuckGo and return comprehensive results with titles, URLs, and snippets', 10 | inputSchema: { 11 | type: 'object', 12 | properties: { 13 | query: { 14 | type: 'string', 15 | description: 'The search query to find relevant web pages' 16 | }, 17 | page: { 18 | type: 'integer', 19 | description: 'Page number for pagination (default: 1)', 20 | default: 1, 21 | minimum: 1 22 | }, 23 | numResults: { 24 | type: 'integer', 25 | description: 'Number of results to return per page (default: 10, max: 20)', 26 | default: 10, 27 | minimum: 1, 28 | maximum: 20 29 | } 30 | }, 31 | required: ['query'] 32 | } 33 | }; 34 | 35 | /** 36 | * Web search tool handler 37 | * @param {Object} params - The tool parameters 38 | * @returns {Promise<Object>} - The tool result 39 | */ 40 | export async function searchToolHandler(params) { 41 | const { query, page = 1, numResults = 10 } = params; 42 | console.log(`Searching for: ${query} (page ${page}, ${numResults} results)`); 43 | 44 | const results = await searchDuckDuckGo(query, page, numResults); 45 | console.log(`Found ${results.length} results`); 46 | 47 | // Format the results for display 48 | const formattedResults = results.map((result, index) => 49 | `${index + 1}. [${result.title}](${result.url})\n ${result.snippet}` 50 | ).join('\n\n'); 51 | 52 | return { 53 | content: [ 54 | { 55 | type: 'text', 56 | text: formattedResults || 'No results found.' 57 | } 58 | ] 59 | }; 60 | } 61 | ``` -------------------------------------------------------------------------------- /src/tools/feloTool.js: -------------------------------------------------------------------------------- ```javascript 1 | import { searchFelo } from '../utils/search_felo.js'; 2 | 3 | /** 4 | * Felo AI search tool definition 5 | */ 6 | export const feloToolDefinition = { 7 | name: 'felo-search', 8 | title: 'Felo AI Advanced Search', 9 | description: 'Advanced AI-powered web search for technical intelligence. Retrieves up-to-date information including software releases, security advisories, migration guides, benchmarks, developer documentation, and community insights. Supports both standard and streaming responses.', 10 | inputSchema: { 11 | type: 'object', 12 | properties: { 13 | query: { 14 | type: 'string', 15 | description: 'A detailed search query or prompt describing the technical information needed. Supports natural language and keyword-based queries for precise results.' 16 | }, 17 | stream: { 18 | type: 'boolean', 19 | description: 'Enable streaming mode to receive incremental, real-time search results as they are discovered. Useful for monitoring live updates or large result sets. Default is false (returns full result at once).', 20 | default: false 21 | } 22 | }, 23 | required: ['query'] 24 | }, 25 | annotations: { 26 | readOnlyHint: true, 27 | openWorldHint: false 28 | } 29 | }; 30 | 31 | /** 32 | * Felo AI search tool handler 33 | * @param {Object} params - The tool parameters 34 | * @returns {Promise<Object>} - The tool result 35 | */ 36 | export async function feloToolHandler(params) { 37 | const { query, stream = false } = params; 38 | console.log(`Searching Felo AI for: "${query}" (stream: ${stream})`); 39 | 40 | try { 41 | if (stream) { 42 | // For streaming responses, we need to collect them and then return 43 | let fullResponse = ''; 44 | const chunks = []; 45 | 46 | for await (const chunk of await searchFelo(query, true)) { 47 | chunks.push(chunk); 48 | fullResponse += chunk; 49 | } 50 | 51 | // Format the response 52 | return { 53 | content: [ 54 | { 55 | type: 'text', 56 | text: fullResponse || 'No results found.' 57 | } 58 | ] 59 | }; 60 | } else { 61 | // For non-streaming responses 62 | const response = await searchFelo(query, false); 63 | 64 | return { 65 | content: [ 66 | { 67 | type: 'text', 68 | text: response || 'No results found.' 69 | } 70 | ] 71 | }; 72 | } 73 | } catch (error) { 74 | console.error(`Error in Felo search: ${error.message}`); 75 | return { 76 | isError: true, 77 | content: [ 78 | { 79 | type: 'text', 80 | text: `Error searching Felo: ${error.message}` 81 | } 82 | ] 83 | }; 84 | } 85 | } 86 | ``` -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { Server } from '@modelcontextprotocol/sdk/server/index.js'; 2 | import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js'; 3 | 4 | // Import tool definitions and handlers 5 | import { searchToolDefinition, searchToolHandler } from './tools/searchTool.js'; 6 | import { fetchUrlToolDefinition, fetchUrlToolHandler } from './tools/fetchUrlTool.js'; 7 | import { metadataToolDefinition, metadataToolHandler } from './tools/metadataTool.js'; 8 | import { feloToolDefinition, feloToolHandler } from './tools/feloTool.js'; 9 | 10 | // Required: Export default createServer function for Smithery 11 | export default function createServer({ config }: { config?: any } = {}) { 12 | console.log('Creating MCP server with latest SDK...'); 13 | 14 | // Global variable to track available tools 15 | const availableTools = [ 16 | searchToolDefinition, 17 | fetchUrlToolDefinition, 18 | metadataToolDefinition, 19 | feloToolDefinition 20 | ]; 21 | 22 | console.log('Available tools:', availableTools.map(t => t.name)); 23 | 24 | // Create the MCP server using the Server class 25 | const server = new Server({ 26 | name: 'ddg-search-mcp', 27 | version: '1.1.2' 28 | }, { 29 | capabilities: { 30 | tools: { 31 | listChanged: true 32 | } 33 | } 34 | }); 35 | 36 | // Define available tools 37 | server.setRequestHandler(ListToolsRequestSchema, async () => { 38 | console.log('Tools list requested, returning:', availableTools.length, 'tools'); 39 | return { 40 | tools: availableTools 41 | }; 42 | }); 43 | 44 | // Handle tool execution 45 | server.setRequestHandler(CallToolRequestSchema, async (request) => { 46 | try { 47 | const { name, arguments: args } = request.params; 48 | console.log(`Tool call received: ${name} with args:`, args); 49 | 50 | // Route to the appropriate tool handler 51 | switch (name) { 52 | case 'web-search': 53 | return await searchToolHandler(args); 54 | 55 | case 'fetch-url': 56 | return await fetchUrlToolHandler(args); 57 | 58 | case 'url-metadata': 59 | return await metadataToolHandler(args); 60 | 61 | case 'felo-search': 62 | return await feloToolHandler(args); 63 | 64 | default: 65 | throw new Error(`Tool not found: ${name}`); 66 | } 67 | } catch (error: any) { 68 | console.error(`Error handling ${request.params.name} tool call:`, error); 69 | 70 | // Return proper tool execution error format 71 | return { 72 | isError: true, 73 | content: [ 74 | { 75 | type: 'text', 76 | text: `Error executing tool '${request.params.name}': ${error.message}` 77 | } 78 | ] 79 | }; 80 | } 81 | }); 82 | 83 | console.log('MCP server created successfully'); 84 | 85 | // Return the server instance (required for Smithery) 86 | return server; 87 | } 88 | 89 | // Optional: No configuration schema needed for this server 90 | // export const configSchema = z.object({}); ``` -------------------------------------------------------------------------------- /src/tools/fetchUrlTool.js: -------------------------------------------------------------------------------- ```javascript 1 | import { fetchUrlContent } from '../utils/search.js'; 2 | 3 | /** 4 | * Fetch URL tool definition 5 | */ 6 | export const fetchUrlToolDefinition = { 7 | name: 'fetch-url', 8 | title: 'Fetch URL Content', 9 | description: 'Fetch and extract the main content from any URL, with customizable extraction options for text, links, and images', 10 | inputSchema: { 11 | type: 'object', 12 | properties: { 13 | url: { 14 | type: 'string', 15 | description: 'The URL to fetch content from (must be a valid HTTP/HTTPS URL)' 16 | }, 17 | maxLength: { 18 | type: 'integer', 19 | description: 'Maximum length of content to return in characters (default: 10000)', 20 | default: 10000, 21 | minimum: 1000, 22 | maximum: 50000 23 | }, 24 | extractMainContent: { 25 | type: 'boolean', 26 | description: 'Whether to attempt to extract main content only, filtering out navigation and ads (default: true)', 27 | default: true 28 | }, 29 | includeLinks: { 30 | type: 'boolean', 31 | description: 'Whether to include link text in the extracted content (default: true)', 32 | default: true 33 | }, 34 | includeImages: { 35 | type: 'boolean', 36 | description: 'Whether to include image alt text in the extracted content (default: true)', 37 | default: true 38 | }, 39 | excludeTags: { 40 | type: 'array', 41 | description: 'HTML tags to exclude from extraction (default: script, style, etc.)', 42 | items: { 43 | type: 'string' 44 | } 45 | } 46 | }, 47 | required: ['url'] 48 | } 49 | }; 50 | 51 | /** 52 | * Fetch URL tool handler 53 | * @param {Object} params - The tool parameters 54 | * @returns {Promise<Object>} - The tool result 55 | */ 56 | export async function fetchUrlToolHandler(params) { 57 | const { 58 | url, 59 | maxLength = 10000, 60 | extractMainContent = true, 61 | includeLinks = true, 62 | includeImages = true, 63 | excludeTags = ['script', 'style', 'noscript', 'iframe', 'svg', 'nav', 'footer', 'header', 'aside'] 64 | } = params; 65 | 66 | console.log(`Fetching content from URL: ${url} (maxLength: ${maxLength})`); 67 | 68 | try { 69 | // Fetch content with specified options 70 | const content = await fetchUrlContent(url, { 71 | extractMainContent, 72 | includeLinks, 73 | includeImages, 74 | excludeTags 75 | }); 76 | 77 | // Truncate content if it's too long 78 | const truncatedContent = content.length > maxLength 79 | ? content.substring(0, maxLength) + '... [Content truncated due to length]' 80 | : content; 81 | 82 | // Add metadata about the extraction 83 | const metadata = ` 84 | --- 85 | Extraction settings: 86 | - URL: ${url} 87 | - Main content extraction: ${extractMainContent ? 'Enabled' : 'Disabled'} 88 | - Links included: ${includeLinks ? 'Yes' : 'No'} 89 | - Images included: ${includeImages ? 'Yes (as alt text)' : 'No'} 90 | - Content length: ${content.length} characters${content.length > maxLength ? ` (truncated to ${maxLength})` : ''} 91 | --- 92 | `; 93 | 94 | return { 95 | content: [ 96 | { 97 | type: 'text', 98 | text: truncatedContent + metadata 99 | } 100 | ] 101 | }; 102 | } catch (error) { 103 | console.error(`Error fetching URL ${url}:`, error); 104 | return { 105 | isError: true, 106 | content: [ 107 | { 108 | type: 'text', 109 | text: `Error fetching URL: ${error.message}` 110 | } 111 | ] 112 | }; 113 | } 114 | } 115 | ``` -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- ```javascript 1 | import { Server } from '@modelcontextprotocol/sdk/server/index.js'; 2 | import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js'; 3 | 4 | // Import tool definitions and handlers 5 | import { searchToolDefinition, searchToolHandler } from './tools/searchTool.js'; 6 | import { fetchUrlToolDefinition, fetchUrlToolHandler } from './tools/fetchUrlTool.js'; 7 | import { metadataToolDefinition, metadataToolHandler } from './tools/metadataTool.js'; 8 | import { feloToolDefinition, feloToolHandler } from './tools/feloTool.js'; 9 | 10 | // Required: Export default createServer function for Smithery 11 | export default function createServer({ config } = {}) { 12 | console.log('Creating MCP server with latest SDK...'); 13 | 14 | // Global variable to track available tools 15 | const availableTools = [ 16 | searchToolDefinition, 17 | fetchUrlToolDefinition, 18 | metadataToolDefinition, 19 | feloToolDefinition 20 | ]; 21 | 22 | console.log('Available tools:', availableTools.map(t => t.name)); 23 | 24 | // Create the MCP server using the Server class 25 | const server = new Server({ 26 | name: 'ddg-search-mcp', 27 | version: '1.1.2' 28 | }, { 29 | capabilities: { 30 | tools: { 31 | listChanged: true 32 | } 33 | } 34 | }); 35 | 36 | // Define available tools 37 | server.setRequestHandler(ListToolsRequestSchema, async () => { 38 | console.log('Tools list requested, returning:', availableTools.length, 'tools'); 39 | return { 40 | tools: availableTools 41 | }; 42 | }); 43 | 44 | // Handle tool execution 45 | server.setRequestHandler(CallToolRequestSchema, async (request) => { 46 | try { 47 | const { name, arguments: args } = request.params; 48 | console.log(`Tool call received: ${name} with args:`, args); 49 | 50 | // Route to the appropriate tool handler 51 | switch (name) { 52 | case 'web-search': 53 | return await searchToolHandler(args); 54 | 55 | case 'fetch-url': 56 | return await fetchUrlToolHandler(args); 57 | 58 | case 'url-metadata': 59 | return await metadataToolHandler(args); 60 | 61 | case 'felo-search': 62 | return await feloToolHandler(args); 63 | 64 | default: 65 | throw new Error(`Tool not found: ${name}`); 66 | } 67 | } catch (error) { 68 | console.error(`Error handling ${request.params.name} tool call:`, error); 69 | 70 | // Return proper tool execution error format 71 | return { 72 | isError: true, 73 | content: [ 74 | { 75 | type: 'text', 76 | text: `Error executing tool '${request.params.name}': ${error.message}` 77 | } 78 | ] 79 | }; 80 | } 81 | }); 82 | 83 | console.log('MCP server created successfully'); 84 | 85 | // Return the server instance (required for Smithery) 86 | return server; 87 | } 88 | 89 | // Legacy standalone server support (for CLI usage) 90 | if (import.meta.url === `file://${process.argv[1]}`) { 91 | async function main() { 92 | try { 93 | const { StdioServerTransport } = await import('@modelcontextprotocol/sdk/server/stdio.js'); 94 | const server = createServer(); 95 | const transport = new StdioServerTransport(); 96 | await server.connect(transport); 97 | console.error('WebSearch MCP server started and listening on stdio'); 98 | } catch (error) { 99 | console.error('Failed to start server:', error); 100 | process.exit(1); 101 | } 102 | } 103 | 104 | main(); 105 | } 106 | ``` -------------------------------------------------------------------------------- /bin/cli.js: -------------------------------------------------------------------------------- ```javascript 1 | #!/usr/bin/env node 2 | 3 | import { Server } from '@modelcontextprotocol/sdk/server/index.js'; 4 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; 5 | import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js'; 6 | 7 | // Import tool definitions and handlers 8 | const modulePath = new URL('../src', import.meta.url).pathname; 9 | 10 | // Dynamic imports 11 | async function startServer() { 12 | try { 13 | // Dynamically import the modules 14 | const { searchToolDefinition, searchToolHandler } = await import(`${modulePath}/tools/searchTool.js`); 15 | const { fetchUrlToolDefinition, fetchUrlToolHandler } = await import(`${modulePath}/tools/fetchUrlTool.js`); 16 | const { metadataToolDefinition, metadataToolHandler } = await import(`${modulePath}/tools/metadataTool.js`); 17 | const { feloToolDefinition, feloToolHandler } = await import(`${modulePath}/tools/feloTool.js`); // Create the MCP server 18 | const server = new Server({ 19 | id: 'ddg-search-mcp', 20 | name: 'DuckDuckGo & Felo AI Search MCP', 21 | description: 'A Model Context Protocol server for web search using DuckDuckGo and Felo AI', 22 | version: '1.1.2' 23 | }, { 24 | capabilities: { 25 | tools: { 26 | listChanged: true 27 | } 28 | } 29 | }); 30 | 31 | // Global variable to track available tools 32 | let availableTools = [ 33 | searchToolDefinition, 34 | fetchUrlToolDefinition, 35 | metadataToolDefinition, 36 | feloToolDefinition 37 | ]; 38 | 39 | // Define available tools 40 | server.setRequestHandler(ListToolsRequestSchema, async () => { 41 | return { 42 | tools: availableTools 43 | }; 44 | }); 45 | 46 | // Function to notify clients when tools list changes 47 | function notifyToolsChanged() { 48 | server.notification({ 49 | method: 'notifications/tools/list_changed' 50 | }); 51 | } 52 | 53 | // Handle tool execution 54 | server.setRequestHandler(CallToolRequestSchema, async (request) => { 55 | try { 56 | const { name, arguments: args } = request.params; 57 | 58 | // Validate tool name 59 | const validTools = ['web-search', 'fetch-url', 'url-metadata', 'felo-search']; 60 | if (!validTools.includes(name)) { 61 | throw new Error(`Unknown tool: ${name}`); 62 | } 63 | 64 | // Route to the appropriate tool handler 65 | switch (name) { 66 | case 'web-search': 67 | return await searchToolHandler(args); 68 | 69 | case 'fetch-url': 70 | return await fetchUrlToolHandler(args); 71 | 72 | case 'url-metadata': 73 | return await metadataToolHandler(args); 74 | 75 | case 'felo-search': 76 | return await feloToolHandler(args); 77 | 78 | default: 79 | throw new Error(`Tool not found: ${name}`); 80 | } 81 | } catch (error) { 82 | console.error(`Error handling ${request.params.name} tool call:`, error); 83 | 84 | // Return proper tool execution error format 85 | return { 86 | isError: true, 87 | content: [ 88 | { 89 | type: 'text', 90 | text: `Error executing tool '${request.params.name}': ${error.message}` 91 | } 92 | ] 93 | }; 94 | } 95 | }); // Display promotional message 96 | console.error('\n\x1b[36m╔════════════════════════════════════════════════════════════╗'); 97 | console.error('║ ║'); 98 | console.error('║ \x1b[1m\x1b[31mDuckDuckGo & Felo AI Search MCP\x1b[0m\x1b[36m by \x1b[1m\x1b[33m@OEvortex\x1b[0m\x1b[36m ║'); 99 | console.error('║ ║'); 100 | console.error('║ \x1b[0m👉 Subscribe to \x1b[1m\x1b[37myoutube.com/@OEvortex\x1b[0m\x1b[36m for more tools! ║'); 101 | console.error('║ ║'); 102 | console.error('╚════════════════════════════════════════════════════════════╝\x1b[0m\n'); 103 | 104 | // Start the server with stdio transport 105 | const transport = new StdioServerTransport(); 106 | await server.connect(transport); 107 | console.error('DuckDuckGo & Felo AI Search MCP server started and listening on stdio'); 108 | } catch (error) { 109 | console.error('Failed to start server:', error); 110 | process.exit(1); 111 | } 112 | } 113 | 114 | // Parse command line arguments 115 | const args = process.argv.slice(2); 116 | const helpFlag = args.includes('--help') || args.includes('-h'); 117 | const versionFlag = args.includes('--version') || args.includes('-v'); 118 | 119 | if (helpFlag) { 120 | console.log(` 121 | DuckDuckGo & Felo AI Search MCP - A Model Context Protocol server for web search 122 | 123 | Usage: 124 | npx -y @oevortex/ddg_search@latest [options] 125 | 126 | Options: 127 | -h, --help Show this help message 128 | -v, --version Show version information 129 | 130 | This MCP server provides the following tools: 131 | - web-search: Search the web using DuckDuckGo 132 | - fetch-url: Fetch and extract content from a URL 133 | - url-metadata: Extract metadata from a URL 134 | - felo-search: Search using Felo AI for AI-generated responses 135 | 136 | Created by @OEvortex 137 | Subscribe to youtube.com/@OEvortex for more tools and tutorials! 138 | 139 | For more information, visit: https://github.com/OEvortex/ddg_search 140 | `); 141 | process.exit(0); 142 | } 143 | 144 | if (versionFlag) { 145 | // Read version from package.json using fs 146 | import('fs/promises') 147 | .then(async ({ readFile }) => { 148 | try { 149 | const packageJson = JSON.parse( 150 | await readFile(new URL('../package.json', import.meta.url), 'utf8') 151 | ); 152 | console.log(`DuckDuckGo & Felo AI Search MCP v${packageJson.version}\nCreated by @OEvortex - Subscribe to youtube.com/@OEvortex!`); 153 | process.exit(0); 154 | } catch (err) { 155 | console.error('Error reading version information:', err); 156 | process.exit(1); 157 | } 158 | }) 159 | .catch(err => { 160 | console.error('Error importing fs module:', err); 161 | process.exit(1); 162 | }); 163 | } else { 164 | // Start the server 165 | startServer(); 166 | } 167 | ``` -------------------------------------------------------------------------------- /src/utils/search_felo.js: -------------------------------------------------------------------------------- ```javascript 1 | import axios from 'axios'; 2 | import { v4 as uuidv4 } from 'uuid'; 3 | import https from 'https'; 4 | 5 | // Rotating User Agents 6 | const USER_AGENTS = [ 7 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 8 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Edge/120.0.0.0', 9 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.15', 10 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0', 11 | 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' 12 | ]; 13 | 14 | // Cache results to avoid repeated requests 15 | const resultsCache = new Map(); 16 | const CACHE_DURATION = 5 * 60 * 1000; // 5 minutes 17 | 18 | // HTTPS agent configuration to handle certificate chain issues 19 | const httpsAgent = new https.Agent({ 20 | rejectUnauthorized: true, // Keep security enabled 21 | keepAlive: true, 22 | timeout: 30000, 23 | // Provide fallback for certificate issues while maintaining security 24 | secureProtocol: 'TLSv1_2_method' 25 | }); 26 | 27 | // Create a persistent axios instance to maintain session state 28 | const feloSession = axios.create({ 29 | timeout: 30000, 30 | httpsAgent: httpsAgent, 31 | headers: { 32 | 'accept': '*/*', 33 | 'accept-encoding': 'gzip, deflate, br, zstd', 34 | 'accept-language': 'en-US,en;q=0.9,en-IN;q=0.8', 35 | 'content-type': 'application/json', 36 | 'dnt': '1', 37 | 'origin': 'https://felo.ai', 38 | 'referer': 'https://felo.ai/', 39 | 'sec-ch-ua': '"Not)A;Brand";v="99", "Microsoft Edge";v="127", "Chromium";v="127"', 40 | 'sec-ch-ua-mobile': '?0', 41 | 'sec-ch-ua-platform': '"Windows"', 42 | 'sec-fetch-dest': 'empty', 43 | 'sec-fetch-mode': 'cors', 44 | 'sec-fetch-site': 'same-site', 45 | 'user-agent': getRandomUserAgent() 46 | } 47 | }); 48 | 49 | /** 50 | * Response class for Felo API responses 51 | */ 52 | class Response { 53 | /** 54 | * Create a new Response 55 | * @param {string} text - The text content of the response 56 | */ 57 | constructor(text) { 58 | this.text = text; 59 | } 60 | 61 | /** 62 | * String representation of the response 63 | * @returns {string} The text content 64 | */ 65 | toString() { 66 | return this.text; 67 | } 68 | } 69 | 70 | /** 71 | * Get a random user agent from the list 72 | * @returns {string} A random user agent string 73 | */ 74 | function getRandomUserAgent() { 75 | return USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)]; 76 | } 77 | 78 | /** 79 | * Generate a cache key for a search query 80 | * @param {string} query - The search query 81 | * @returns {string} The cache key 82 | */ 83 | function getCacheKey(query) { 84 | return `felo-${query}`; 85 | } 86 | 87 | /** 88 | * Clear old entries from the cache 89 | */ 90 | function clearOldCache() { 91 | const now = Date.now(); 92 | for (const [key, value] of resultsCache.entries()) { 93 | if (now - value.timestamp > CACHE_DURATION) { 94 | resultsCache.delete(key); 95 | } 96 | } 97 | } 98 | 99 | /** 100 | * Search using the Felo AI API 101 | * @param {string} prompt - The search query or prompt 102 | * @param {boolean} stream - If true, yields response chunks as they arrive 103 | * @param {boolean} raw - If true, returns raw response dictionaries 104 | * @returns {Promise<string|AsyncGenerator<string>>} The search results 105 | */ 106 | async function searchFelo(prompt, stream = false, raw = false) { 107 | // Clear old cache entries 108 | clearOldCache(); 109 | 110 | // Check cache first if not streaming 111 | if (!stream) { 112 | const cacheKey = getCacheKey(prompt); 113 | const cachedResults = resultsCache.get(cacheKey); 114 | 115 | if (cachedResults && Date.now() - cachedResults.timestamp < CACHE_DURATION) { 116 | return cachedResults.results; 117 | } 118 | } 119 | 120 | // Create payload for Felo API with proper structure from reference 121 | const payload = { 122 | query: prompt, 123 | search_uuid: uuidv4().replace(/-/g, ''), // Remove dashes like in reference 124 | lang: "", 125 | agent_lang: "en", 126 | search_options: { 127 | langcode: "en-US", 128 | search_image: true, 129 | search_video: true 130 | }, 131 | search_video: true, 132 | model: "", 133 | contexts_from: "google", 134 | auto_routing: true 135 | }; 136 | 137 | // Update user agent for this request 138 | feloSession.defaults.headers['user-agent'] = getRandomUserAgent(); 139 | 140 | // Define the streaming function 141 | async function* streamFunction() { 142 | try { 143 | const response = await feloSession.post('https://api.felo.ai/search/threads', payload, { 144 | responseType: 'stream' 145 | }); 146 | 147 | // Check for HTTP errors 148 | if (response.status !== 200) { 149 | throw new Error(`Failed to generate response - (${response.status}, ${response.statusText}) - ${response.data}`); 150 | } 151 | 152 | let streamingText = ''; 153 | let buffer = ''; 154 | 155 | // Process the stream as it comes in 156 | for await (const chunk of response.data) { 157 | buffer += chunk.toString(); 158 | 159 | const lines = buffer.split('\n'); 160 | buffer = lines.pop() || ''; // Keep the last (potentially incomplete) line in the buffer 161 | 162 | for (const line of lines) { 163 | if (line.startsWith('data:')) { 164 | try { 165 | const dataStr = line.substring(5).trim(); 166 | if (dataStr) { 167 | const data = JSON.parse(dataStr); 168 | if (data.type === 'answer' && 'text' in data.data) { 169 | const newText = data.data.text; 170 | if (newText.length > streamingText.length) { 171 | const delta = newText.substring(streamingText.length); 172 | streamingText = newText; 173 | 174 | if (raw) { 175 | yield { text: delta }; 176 | } else { 177 | yield new Response(delta).toString(); 178 | } 179 | } 180 | } 181 | } 182 | } catch (error) { 183 | // Ignore JSON parse errors and continue 184 | console.debug('JSON parse error:', error.message); 185 | } 186 | } 187 | } 188 | } 189 | 190 | // Cache the complete response 191 | if (streamingText) { 192 | resultsCache.set(getCacheKey(prompt), { 193 | results: streamingText, 194 | timestamp: Date.now() 195 | }); 196 | } 197 | 198 | } catch (error) { 199 | console.error('Error searching Felo:', error.message); 200 | 201 | // Handle specific API errors 202 | if (error.response) { 203 | const status = error.response.status; 204 | const statusText = error.response.statusText; 205 | const data = error.response.data; 206 | throw new Error(`Felo API error: ${status} ${statusText} - ${data}`); 207 | } 208 | 209 | throw new Error(`Failed to search Felo: ${error.message}`); 210 | } 211 | } 212 | 213 | // If streaming is requested, return the generator 214 | if (stream) { 215 | return streamFunction(); 216 | } 217 | 218 | // For non-streaming, collect all chunks and return as a single string 219 | let fullResponse = ''; 220 | 221 | try { 222 | for await (const chunk of streamFunction()) { 223 | if (raw) { 224 | fullResponse += chunk.text; 225 | } else { 226 | fullResponse += chunk; 227 | } 228 | } 229 | 230 | return fullResponse; 231 | } catch (error) { 232 | console.error('Error in non-streaming Felo search:', error.message); 233 | throw error; 234 | } 235 | } 236 | 237 | export { searchFelo }; 238 | ``` -------------------------------------------------------------------------------- /src/utils/search.js: -------------------------------------------------------------------------------- ```javascript 1 | import axios from 'axios'; 2 | import * as cheerio from 'cheerio'; 3 | import https from 'https'; 4 | 5 | // Constants 6 | const RESULTS_PER_PAGE = 10; 7 | const MAX_CACHE_PAGES = 5; 8 | 9 | // Rotating User Agents 10 | const USER_AGENTS = [ 11 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 12 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Edge/120.0.0.0', 13 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.15', 14 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0', 15 | 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' 16 | ]; 17 | 18 | // Cache results to avoid repeated requests 19 | const resultsCache = new Map(); 20 | const CACHE_DURATION = 5 * 60 * 1000; // 5 minutes 21 | 22 | // HTTPS agent configuration to handle certificate chain issues 23 | const httpsAgent = new https.Agent({ 24 | rejectUnauthorized: true, // Keep security enabled 25 | keepAlive: true, 26 | timeout: 10000, 27 | // Provide fallback for certificate issues while maintaining security 28 | secureProtocol: 'TLSv1_2_method' 29 | }); 30 | 31 | /** 32 | * Get a random user agent from the list 33 | * @returns {string} A random user agent string 34 | */ 35 | function getRandomUserAgent() { 36 | return USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)]; 37 | } 38 | 39 | /** 40 | * Generate a cache key for a search query and page 41 | * @param {string} query - The search query 42 | * @param {number} page - The page number 43 | * @returns {string} The cache key 44 | */ 45 | function getCacheKey(query, page) { 46 | return `${query}-${page}`; 47 | } 48 | 49 | /** 50 | * Clear old entries from the cache 51 | */ 52 | function clearOldCache() { 53 | const now = Date.now(); 54 | for (const [key, value] of resultsCache.entries()) { 55 | if (now - value.timestamp > CACHE_DURATION) { 56 | resultsCache.delete(key); 57 | } 58 | } 59 | } 60 | 61 | /** 62 | * Extract the direct URL from a DuckDuckGo redirect URL 63 | * @param {string} duckduckgoUrl - The DuckDuckGo URL to extract from 64 | * @returns {string} The direct URL 65 | */ 66 | function extractDirectUrl(duckduckgoUrl) { 67 | try { 68 | // Handle relative URLs from DuckDuckGo 69 | if (duckduckgoUrl.startsWith('//')) { 70 | duckduckgoUrl = 'https:' + duckduckgoUrl; 71 | } else if (duckduckgoUrl.startsWith('/')) { 72 | duckduckgoUrl = 'https://duckduckgo.com' + duckduckgoUrl; 73 | } 74 | 75 | const url = new URL(duckduckgoUrl); 76 | 77 | // Extract direct URL from DuckDuckGo redirect 78 | if (url.hostname === 'duckduckgo.com' && url.pathname === '/l/') { 79 | const uddg = url.searchParams.get('uddg'); 80 | if (uddg) { 81 | return decodeURIComponent(uddg); 82 | } 83 | } 84 | 85 | // Handle ad redirects 86 | if (url.hostname === 'duckduckgo.com' && url.pathname === '/y.js') { 87 | const u3 = url.searchParams.get('u3'); 88 | if (u3) { 89 | try { 90 | const decodedU3 = decodeURIComponent(u3); 91 | const u3Url = new URL(decodedU3); 92 | const clickUrl = u3Url.searchParams.get('ld'); 93 | if (clickUrl) { 94 | return decodeURIComponent(clickUrl); 95 | } 96 | return decodedU3; 97 | } catch { 98 | return duckduckgoUrl; 99 | } 100 | } 101 | } 102 | 103 | return duckduckgoUrl; 104 | } catch { 105 | // If URL parsing fails, try to extract URL from a basic string match 106 | const urlMatch = duckduckgoUrl.match(/https?:\/\/[^\s<>"]+/); 107 | if (urlMatch) { 108 | return urlMatch[0]; 109 | } 110 | return duckduckgoUrl; 111 | } 112 | } 113 | 114 | /** 115 | * Get a favicon URL for a given website URL 116 | * @param {string} url - The website URL 117 | * @returns {string} The favicon URL 118 | */ 119 | function getFaviconUrl(url) { 120 | try { 121 | const urlObj = new URL(url); 122 | return `https://www.google.com/s2/favicons?domain=${urlObj.hostname}&sz=32`; 123 | } catch { 124 | return ''; // Return empty string if URL is invalid 125 | } 126 | } 127 | 128 | /** 129 | * Scrapes search results from DuckDuckGo HTML 130 | * @param {string} query - The search query 131 | * @param {number} page - The page number (default: 1) 132 | * @param {number} numResults - Number of results to return (default: 10) 133 | * @returns {Promise<Array>} - Array of search results 134 | */ 135 | async function searchDuckDuckGo(query, page = 1, numResults = 10) { 136 | try { 137 | // Clear old cache entries 138 | clearOldCache(); 139 | 140 | // Calculate start index for pagination 141 | const startIndex = (page - 1) * RESULTS_PER_PAGE; 142 | 143 | // Check cache first 144 | const cacheKey = getCacheKey(query, page); 145 | const cachedResults = resultsCache.get(cacheKey); 146 | 147 | if (cachedResults && Date.now() - cachedResults.timestamp < CACHE_DURATION) { 148 | return cachedResults.results.slice(0, numResults); 149 | } 150 | 151 | // Get a random user agent 152 | const userAgent = getRandomUserAgent(); 153 | 154 | // Fetch results 155 | const response = await axios.get( 156 | `https://duckduckgo.com/html/?q=${encodeURIComponent(query)}&s=${startIndex}`, 157 | { 158 | headers: { 159 | 'User-Agent': userAgent 160 | }, 161 | httpsAgent: httpsAgent 162 | } 163 | ); 164 | 165 | if (response.status !== 200) { 166 | throw new Error('Failed to fetch search results'); 167 | } 168 | 169 | const html = response.data; 170 | 171 | // Parse results using cheerio 172 | const $ = cheerio.load(html); 173 | 174 | const results = []; 175 | $('.result').each((i, result) => { 176 | const $result = $(result); 177 | const titleEl = $result.find('.result__title a'); 178 | const linkEl = $result.find('.result__url'); 179 | const snippetEl = $result.find('.result__snippet'); 180 | 181 | const title = titleEl.text()?.trim(); 182 | const rawLink = titleEl.attr('href'); 183 | const description = snippetEl.text()?.trim(); 184 | const displayUrl = linkEl.text()?.trim(); 185 | 186 | const directLink = extractDirectUrl(rawLink || ''); 187 | const favicon = getFaviconUrl(directLink); 188 | 189 | if (title && directLink) { 190 | results.push({ 191 | title, 192 | url: directLink, 193 | snippet: description || '', 194 | favicon: favicon, 195 | displayUrl: displayUrl || '' 196 | }); 197 | } 198 | }); 199 | 200 | // Get paginated results 201 | const paginatedResults = results.slice(0, numResults); 202 | 203 | // Cache the results 204 | resultsCache.set(cacheKey, { 205 | results: paginatedResults, 206 | timestamp: Date.now() 207 | }); 208 | 209 | // If cache is too big, remove oldest entries 210 | if (resultsCache.size > MAX_CACHE_PAGES) { 211 | const oldestKey = Array.from(resultsCache.keys())[0]; 212 | resultsCache.delete(oldestKey); 213 | } 214 | 215 | return paginatedResults; 216 | } catch (error) { 217 | console.error('Error searching DuckDuckGo:', error.message); 218 | throw error; 219 | } 220 | } 221 | 222 | /** 223 | * Fetches the content of a URL and returns it as text 224 | * @param {string} url - The URL to fetch 225 | * @param {Object} options - Options for content extraction 226 | * @param {boolean} options.extractMainContent - Whether to attempt to extract main content (default: true) 227 | * @param {boolean} options.includeLinks - Whether to include link text (default: true) 228 | * @param {boolean} options.includeImages - Whether to include image alt text (default: true) 229 | * @param {string[]} options.excludeTags - Tags to exclude from extraction 230 | * @returns {Promise<string>} - The content of the URL 231 | */ 232 | async function fetchUrlContent(url, options = {}) { 233 | try { 234 | // Default options 235 | const { 236 | extractMainContent = true, 237 | includeLinks = true, 238 | includeImages = true, 239 | excludeTags = ['script', 'style', 'noscript', 'iframe', 'svg', 'nav', 'footer', 'header', 'aside'] 240 | } = options; 241 | 242 | // Get a random user agent 243 | const userAgent = getRandomUserAgent(); 244 | 245 | const response = await axios.get(url, { 246 | headers: { 247 | 'User-Agent': userAgent 248 | }, 249 | timeout: 10000, // 10 second timeout 250 | httpsAgent: httpsAgent 251 | }); 252 | 253 | if (response.status !== 200) { 254 | throw new Error(`Failed to fetch URL: ${url}`); 255 | } 256 | 257 | // If the content is HTML, extract the text content 258 | const contentType = response.headers['content-type'] || ''; 259 | if (contentType.includes('text/html')) { 260 | const $ = cheerio.load(response.data); 261 | 262 | // Remove unwanted elements 263 | excludeTags.forEach(tag => { 264 | $(tag).remove(); 265 | }); 266 | 267 | // Remove ads and other common unwanted elements 268 | const unwantedSelectors = [ 269 | '[id*="ad"]', '[class*="ad"]', '[id*="banner"]', '[class*="banner"]', 270 | '[id*="popup"]', '[class*="popup"]', '[class*="cookie"]', 271 | '[id*="cookie"]', '[class*="newsletter"]', '[id*="newsletter"]', 272 | '[class*="social"]', '[id*="social"]', '[class*="share"]', '[id*="share"]' 273 | ]; 274 | 275 | unwantedSelectors.forEach(selector => { 276 | try { 277 | $(selector).remove(); 278 | } catch (e) { 279 | // Ignore invalid selectors 280 | } 281 | }); 282 | 283 | // Handle links and images 284 | if (!includeLinks) { 285 | $('a').each((i, link) => { 286 | $(link).replaceWith($(link).text()); 287 | }); 288 | } 289 | 290 | if (!includeImages) { 291 | $('img').remove(); 292 | } else { 293 | // Replace images with their alt text 294 | $('img').each((i, img) => { 295 | const alt = $(img).attr('alt'); 296 | if (alt) { 297 | $(img).replaceWith(`[Image: ${alt}]`); 298 | } else { 299 | $(img).remove(); 300 | } 301 | }); 302 | } 303 | 304 | // Try to extract main content if requested 305 | if (extractMainContent) { 306 | // Common content selectors in order of priority 307 | const contentSelectors = [ 308 | 'article', 'main', '[role="main"]', '.post-content', '.article-content', 309 | '.content', '#content', '.post', '.article', '.entry-content', 310 | '.page-content', '.post-body', '.post-text', '.story-body' 311 | ]; 312 | 313 | for (const selector of contentSelectors) { 314 | const mainContent = $(selector).first(); 315 | if (mainContent.length > 0) { 316 | // Clean up the content 317 | return cleanText(mainContent.text()); 318 | } 319 | } 320 | } 321 | 322 | // If no main content found or not requested, use the body 323 | return cleanText($('body').text()); 324 | } 325 | 326 | // For non-HTML content, return as is 327 | return response.data.toString(); 328 | } catch (error) { 329 | console.error('Error fetching URL content:', error.message); 330 | throw error; 331 | } 332 | } 333 | 334 | /** 335 | * Cleans up text by removing excessive whitespace and normalizing line breaks 336 | * @param {string} text - The text to clean 337 | * @returns {string} - The cleaned text 338 | */ 339 | function cleanText(text) { 340 | return text 341 | .replace(/\s+/g, ' ') // Replace multiple whitespace with single space 342 | .replace(/\n\s*\n/g, '\n\n') // Normalize multiple line breaks 343 | .replace(/^\s+|\s+$/g, '') // Trim start and end 344 | .trim(); 345 | } 346 | 347 | /** 348 | * Extracts metadata from a URL (title, description, etc.) 349 | * @param {string} url - The URL to extract metadata from 350 | * @returns {Promise<Object>} - The metadata 351 | */ 352 | async function extractUrlMetadata(url) { 353 | try { 354 | // Get a random user agent 355 | const userAgent = getRandomUserAgent(); 356 | 357 | const response = await axios.get(url, { 358 | headers: { 359 | 'User-Agent': userAgent 360 | }, 361 | httpsAgent: httpsAgent 362 | }); 363 | 364 | if (response.status !== 200) { 365 | throw new Error(`Failed to fetch URL: ${url}`); 366 | } 367 | 368 | const $ = cheerio.load(response.data); 369 | 370 | // Extract metadata 371 | const title = $('title').text() || ''; 372 | const description = $('meta[name="description"]').attr('content') || 373 | $('meta[property="og:description"]').attr('content') || ''; 374 | const ogImage = $('meta[property="og:image"]').attr('content') || ''; 375 | const favicon = $('link[rel="icon"]').attr('href') || 376 | $('link[rel="shortcut icon"]').attr('href') || ''; 377 | 378 | // Resolve relative URLs 379 | const resolvedFavicon = favicon ? new URL(favicon, url).href : getFaviconUrl(url); 380 | const resolvedOgImage = ogImage ? new URL(ogImage, url).href : ''; 381 | 382 | return { 383 | title, 384 | description, 385 | ogImage: resolvedOgImage, 386 | favicon: resolvedFavicon, 387 | url 388 | }; 389 | } catch (error) { 390 | console.error('Error extracting URL metadata:', error.message); 391 | throw error; 392 | } 393 | } 394 | 395 | export { 396 | searchDuckDuckGo, 397 | fetchUrlContent, 398 | extractUrlMetadata, 399 | extractDirectUrl, 400 | getFaviconUrl 401 | }; 402 | ```