# Directory Structure
```
├── .env.example
├── .gitignore
├── claude_desktop_config_sample.json
├── package.json
├── README.md
├── src
│ ├── client-example.ts
│ ├── index.ts
│ ├── services
│ │ ├── firecrawl.ts
│ │ ├── openai.ts
│ │ └── research.ts
│ └── types
│ └── index.ts
└── tsconfig.json
```
# Files
--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
```
# OpenAI API Key
OPENAI_API_KEY=sk-your-openai-api-key
# Firecrawl API Key
FIRECRAWL_API_KEY=your-firecrawl-api-key
# Server Configuration
PORT=3000
MCP_SERVER_NAME=DeepResearch
MCP_SERVER_VERSION=1.0.0
```
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
# dependencies
node_modules/
npm-debug.log*
yarn-debug.log*
yarn-error.log*
package-lock.json
# build output
dist/
build/
*.tsbuildinfo
# environment variables
.env
.env.local
.env.development.local
.env.test.local
.env.production.local
# logs
logs/
*.log
# OS generated files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
# IDE files
.idea/
.vscode/
*.swp
*.swo
*.sublime-project
*.sublime-workspace
# Claude Desktop specific
claude_desktop_config.json
```
--------------------------------------------------------------------------------
/claude_desktop_config_sample.json:
--------------------------------------------------------------------------------
```json
{
"mcpServers": {
"deep-research": {
"command": "node",
"args": [
"path to deep-research-mcp/dist/index.js"
],
"env": {
"FIRECRAWL_API_KEY": "your-firecrawler-api-key",
"OPENAI_API_KEY": "your-openai-api-key"
}
}
}
}
```
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
```json
{
"name": "deep-research-mcp",
"version": "1.0.0",
"type": "module",
"main": "dist/index.js",
"scripts": {
"build": "tsc",
"start": "node dist/index.js",
"dev": "tsc -w & node --watch dist/index.js",
"client": "node dist/client-example.js"
},
"keywords": [],
"author": "",
"license": "ISC",
"description": "A Deep Research MCP Server",
"dependencies": {
"@modelcontextprotocol/sdk": "^1.6.1",
"axios": "^1.8.1",
"dotenv": "^16.4.7",
"openai": "^4.86.1",
"typescript": "^5.8.2",
"zod": "^3.24.2"
}
}
```
--------------------------------------------------------------------------------
/src/types/index.ts:
--------------------------------------------------------------------------------
```typescript
// Research state
export interface ResearchState {
query: string;
depth: number;
currentDepth: number;
topics: string[];
findings: string[];
nextSearchTopic: string | null;
shouldContinue: boolean;
}
// Web search result
export interface SearchResult {
query: string;
results: Array<{
title: string;
url: string;
content: string;
score: number;
}>;
}
// Analysis result from LLM
export interface AnalysisResult {
nextSearchTopic: string | null;
shouldContinue: boolean;
}
// Final research report
export interface ResearchReport {
query: string;
findings: string[];
topics: string[];
report: string;
}
```
--------------------------------------------------------------------------------
/src/services/firecrawl.ts:
--------------------------------------------------------------------------------
```typescript
import axios from 'axios';
import { SearchResult } from '../types/index.js';
import dotenv from 'dotenv';
dotenv.config();
const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;
if (!FIRECRAWL_API_KEY) {
throw new Error('FIRECRAWL_API_KEY is not set in environment variables');
}
/**
* Search the web using Firecrawl API
*
* @param query Search query
* @param maxResults Maximum number of results to return
* @param searchDepth Search depth ('basic' or 'advanced')
* @returns Search results
*/
export async function searchWeb(
query: string,
maxResults: number = 5,
searchDepth: 'basic' | 'advanced' = 'advanced'
): Promise<SearchResult> {
try {
console.error(`[Firecrawl] Searching for: "${query}" with depth: ${searchDepth}`);
// Using only supported parameters according to Firecrawl v1 API
const response = await axios.post(
'https://api.firecrawl.dev/v1/search',
{
query: query,
limit: maxResults,
scrapeOptions: {
formats: ["markdown"], // Get markdown content
timeout: 25000, // Increase timeout for better scraping
blockAds: true // Block ads for cleaner content
},
country: "us", // Set country for consistent results
lang: "en" // Set language to English
},
{
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${FIRECRAWL_API_KEY}`
},
timeout: 30000 // Increase timeout for the overall request
}
);
// Validate response and handle possible error cases
if (!response.data.success || !response.data.data || !Array.isArray(response.data.data)) {
console.error('[Firecrawl] Invalid response structure:', JSON.stringify(response.data).substring(0, 500) + '...');
throw new Error('Invalid response structure from Firecrawl');
}
// Enhanced error handling for empty results
if (response.data.data.length === 0) {
console.warn(`[Firecrawl] No results found for query: "${query}"`);
return {
query,
results: []
};
}
return {
query,
results: response.data.data.map((result: any, index: number) => {
// Enhanced content extraction with fallbacks to ensure maximum content quality
let content = '';
// First try to get markdown content as it's cleaner
if (result.markdown && typeof result.markdown === 'string' && result.markdown.trim().length > 0) {
content = result.markdown;
}
// Fallback to description as last resort
else if (result.description && typeof result.description === 'string') {
content = result.description;
}
else {
content = 'No content available';
}
// Ensure all necessary fields are present
return {
title: result.title || `Result ${index + 1} for "${query}"`,
url: result.url || 'No URL available',
content: content,
score: result.score || 1.0 // Use provided score or default
};
}).slice(0, maxResults)
};
} catch (error) {
console.error('[Firecrawl] Search error:', error);
// More detailed error message
if (axios.isAxiosError(error) && error.response) {
throw new Error(`Failed to search with Firecrawl: ${error.response.status} ${error.response.statusText} - ${JSON.stringify(error.response.data)}`);
}
throw new Error(`Failed to search with Firecrawl: ${error instanceof Error ? error.message : String(error)}`);
}
}
```
--------------------------------------------------------------------------------
/src/services/research.ts:
--------------------------------------------------------------------------------
```typescript
import { ResearchState, SearchResult, ResearchReport } from '../types/index.js';
// import { searchTavily } from './tavily.js';
import { searchWeb } from './firecrawl.js';
import { analyzeResearch, generateReport } from './openai.js';
// Store all research sessions
const researchSessions = new Map<string, ResearchState>();
/**
* Initialize a new research session
*
* @param sessionId Unique session identifier
* @param query Research query
* @param depth Maximum research depth
* @returns New research state
*/
export function initializeResearch(
sessionId: string,
query: string,
depth: number = 3
): ResearchState {
const researchState: ResearchState = {
query,
depth,
currentDepth: 0,
topics: [],
findings: [],
nextSearchTopic: query, // Initial search topic is the query itself
shouldContinue: true
};
researchSessions.set(sessionId, researchState);
return researchState;
}
/**
* Get research state for a session
*
* @param sessionId Session identifier
* @returns Research state or null if not found
*/
export function getResearchState(sessionId: string): ResearchState | null {
return researchSessions.get(sessionId) || null;
}
/**
* Execute the next step in the research process
*
* @param sessionId Session identifier
* @returns Updated research state
*/
export async function executeResearchStep(sessionId: string): Promise<ResearchState> {
const researchState = researchSessions.get(sessionId);
if (!researchState) {
throw new Error(`No research session found with ID: ${sessionId}`);
}
if (researchState.currentDepth >= researchState.depth) {
// Max depth reached - research is complete
return researchState;
}
try {
// Determine search topic for this step
const currentSearchTopic = researchState.nextSearchTopic || researchState.query;
// Add current topic to the list of searched topics
researchState.topics.push(currentSearchTopic);
console.error(`[Research] Searching for: "${currentSearchTopic}"`);
// Search for information on the current topic
let searchResult;
try {
searchResult = await searchWeb(currentSearchTopic);
} catch (searchError) {
console.error(`[Research] Search error: ${searchError instanceof Error ? searchError.message : String(searchError)}`);
// Create a fallback search result to indicate the error
searchResult = {
query: currentSearchTopic,
results: [{
title: 'Search Error',
url: 'https://error.example.com',
content: `An error occurred while searching: ${searchError instanceof Error ? searchError.message : String(searchError)}. The search will continue with the next topic.`,
score: 0
}]
};
}
// Format search results into a detailed finding
const finding = formatSearchResults(searchResult);
researchState.findings.push(finding);
// Always analyze findings to determine next steps, regardless of current shouldContinue value
const analysis = await analyzeResearch(
researchState.query,
researchState.findings,
researchState.topics
);
// Update research state with next topic and continue flag
researchState.nextSearchTopic = analysis.nextSearchTopic;
researchState.shouldContinue = analysis.shouldContinue;
// Always increment the depth counter
researchState.currentDepth++;
// Update the session
researchSessions.set(sessionId, researchState);
return researchState;
} catch (error) {
console.error('Error executing research step:', error);
// Update the research state to continue despite errors
if (researchState) {
// Increment depth to make progress even with errors
researchState.currentDepth++;
// If we don't have a next search topic, set shouldContinue to false to end the process
if (!researchState.nextSearchTopic) {
researchState.shouldContinue = false;
}
// Update the session
researchSessions.set(sessionId, researchState);
}
throw new Error(`Failed to execute research step: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Generate a final research report
*
* @param sessionId Session identifier
* @returns Research report
*/
export async function generateResearchReport(sessionId: string): Promise<ResearchReport> {
const researchState = researchSessions.get(sessionId);
if (!researchState) {
throw new Error(`No research session found with ID: ${sessionId}`);
}
const report = await generateReport(researchState.query, researchState.findings);
return {
query: researchState.query,
findings: researchState.findings,
topics: researchState.topics,
report
};
}
/**
* Format search results into a readable finding
*
* @param searchResult Search results
* @returns Formatted finding text
*/
function formatSearchResults(searchResult: SearchResult): string {
let formattedResult = `# Search Results for: ${searchResult.query}\n\n`;
searchResult.results.forEach((result, index) => {
const resultNumber = index + 1;
formattedResult += `## Source [${resultNumber}]: ${result.title}\n`;
formattedResult += `URL: ${result.url}\n`;
formattedResult += `Citation: [${resultNumber}] ${result.url}\n\n`;
formattedResult += `### Content from Source [${resultNumber}]:\n${result.content}\n\n`;
});
// Add a clear and standardized source section for easy citation
formattedResult += `# Source URLs for Citation\n\n`;
searchResult.results.forEach((result, index) => {
formattedResult += `[${index + 1}] ${result.url} - ${result.title}\n`;
});
return formattedResult;
}
```
--------------------------------------------------------------------------------
/src/client-example.ts:
--------------------------------------------------------------------------------
```typescript
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
import dotenv from 'dotenv';
dotenv.config();
// Define types for our tool responses
interface ToolResponse {
content: Array<{
type: string;
text: string;
}>;
isError?: boolean;
}
interface InitResponse {
sessionId: string;
message: string;
state: any;
}
interface StepResponse {
message: string;
currentDepth: number;
maxDepth: number;
lastTopic: string;
nextTopic: string | null;
shouldContinue: boolean;
state: any;
}
/**
* Example MCP client that connects to the DeepResearch server
* and performs a complete research workflow
*/
async function main() {
// Get the research query from command line arguments or use a default
const query = process.argv[2] || "The impact of quantum computing on cryptography";
const depth = Number(process.argv[3]) || 2;
const useCompleteResearch = process.argv[4] === "complete" || false;
console.error(`Starting research on: "${query}" with depth ${depth}`);
if (useCompleteResearch) {
console.error("Using complete-research flow (all steps in one call)");
}
// Connect to the server
const transport = new StdioClientTransport({
command: "node",
args: ["dist/index.js"]
});
const client = new Client({
name: "deep-research-client",
version: "1.0.0"
}, {
capabilities: {
resources: {},
tools: {},
prompts: {}
}
});
console.error("Connecting to DeepResearch MCP server...");
await client.connect(transport);
console.error("Connected successfully.");
try {
if (useCompleteResearch) {
// Use the complete-research tool for a one-step process
console.error("Executing complete research process...");
const completeResult = await client.callTool({
name: "complete-research",
arguments: {
query,
depth,
timeout: 180000 // 3 minutes timeout
},
timeout: 240000 // Give the client a bit extra time (4 minutes)
}) as ToolResponse;
if (!completeResult.content || !completeResult.content[0] || typeof completeResult.content[0].text !== 'string') {
throw new Error('Invalid response format from complete-research');
}
// Check if this is an error response
if (completeResult.isError) {
console.error(`Error in complete research flow: ${completeResult.content[0].text}`);
} else {
const reportText = completeResult.content[0].text;
console.error("\n================================");
console.error("FINAL RESEARCH REPORT");
console.error("================================\n");
console.error(reportText);
}
} else {
// Original step-by-step process
// Initialize research
console.error("Initializing research session...");
const initResult = await client.callTool({
name: "initialize-research",
arguments: {
query,
depth
}
}) as ToolResponse;
// Parse the response to get sessionId
if (!initResult.content || !initResult.content[0] || typeof initResult.content[0].text !== 'string') {
throw new Error('Invalid response format from initialize-research');
}
const initData = JSON.parse(initResult.content[0].text) as InitResponse;
const { sessionId } = initData;
console.error(`Research session initialized with ID: ${sessionId}`);
// Execute steps until complete
let currentDepth = 0;
while (currentDepth < depth) {
console.error(`\nExecuting research step ${currentDepth + 1}/${depth}...`);
const stepResult = await client.callTool({
name: "execute-research-step",
arguments: { sessionId }
}) as ToolResponse;
if (!stepResult.content || !stepResult.content[0] || typeof stepResult.content[0].text !== 'string') {
throw new Error('Invalid response format from execute-research-step');
}
try {
// Check if this is an error response
if (stepResult.isError) {
console.error(`Error: ${stepResult.content[0].text}`);
// Increment depth to continue despite errors
currentDepth++;
continue;
}
const stepData = JSON.parse(stepResult.content[0].text) as StepResponse;
currentDepth = stepData.currentDepth;
console.error(`Completed step ${currentDepth}/${depth}`);
console.error(`Last topic searched: ${stepData.lastTopic}`);
if (stepData.nextTopic) {
console.error(`Next topic to search: ${stepData.nextTopic}`);
} else {
console.error("No further topics to search.");
}
} catch (parseError) {
console.error(`Error parsing response: ${parseError}`);
console.error(`Raw response: ${stepResult.content[0].text}`);
// Increment depth to continue despite errors
currentDepth++;
}
}
// Generate final report
console.error("\nGenerating final research report...");
const reportResult = await client.callTool({
name: "generate-report",
arguments: { sessionId },
timeout: 180000 // 3 minutes timeout for report generation
}) as ToolResponse;
if (!reportResult.content || !reportResult.content[0] || typeof reportResult.content[0].text !== 'string') {
throw new Error('Invalid response format from generate-report');
}
// Check if this is an error response
if (reportResult.isError) {
console.error(`Error generating report: ${reportResult.content[0].text}`);
} else {
const reportText = reportResult.content[0].text;
console.error("\n================================");
console.error("FINAL RESEARCH REPORT");
console.error("================================\n");
console.error(reportText);
}
}
} catch (error) {
console.error("Error during research process:", error);
} finally {
// Clean up
console.error("\nDisconnecting from server...");
// Note: The StdioClientTransport doesn't have a disconnect method in the current SDK
// This would need to be implemented in a real-world application
}
}
main().catch(console.error);
```
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
```json
{
"compilerOptions": {
/* Visit https://aka.ms/tsconfig to read more about this file */
/* Projects */
// "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
// "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */
// "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */
// "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */
// "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */
// "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
/* Language and Environment */
"target": "ES2020",
// "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
// "jsx": "preserve", /* Specify what JSX code is generated. */
// "libReplacement": true, /* Enable lib replacement. */
// "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
// "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
// "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
// "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
// "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
// "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
// "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
// "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
// "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
/* Modules */
"module": "NodeNext",
"moduleResolution": "NodeNext",
// "rootDir": "./", /* Specify the root folder within your source files. */
// "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
// "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
// "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
// "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */
// "types": [], /* Specify type package names to be included without being referenced in a source file. */
// "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
// "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */
// "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */
// "rewriteRelativeImportExtensions": true, /* Rewrite '.ts', '.tsx', '.mts', and '.cts' file extensions in relative import paths to their JavaScript equivalent in output files. */
// "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */
// "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */
// "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
// "noUncheckedSideEffectImports": true, /* Check side effect imports. */
"resolveJsonModule": true,
// "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */
// "noResolve": true, /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
/* JavaScript Support */
// "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
// "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */
// "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
/* Emit */
"declaration": true,
// "declarationMap": true, /* Create sourcemaps for d.ts files. */
// "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */
"sourceMap": true,
// "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */
// "noEmit": true, /* Disable emitting files from a compilation. */
// "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
"outDir": "./dist",
// "removeComments": true, /* Disable emitting comments. */
// "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
// "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
// "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */
// "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
// "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */
// "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
// "newLine": "crlf", /* Set the newline character for emitting files. */
// "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
// "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */
// "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */
// "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */
// "declarationDir": "./", /* Specify the output directory for generated declaration files. */
/* Interop Constraints */
// "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */
// "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
// "isolatedDeclarations": true, /* Require sufficient annotation on exports so other tools can trivially generate declaration files. */
// "erasableSyntaxOnly": true, /* Do not allow runtime constructs that are not part of ECMAScript. */
// "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */
"esModuleInterop": true,
// "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
"forceConsistentCasingInFileNames": true,
/* Type Checking */
"strict": true,
// "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */
// "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */
// "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
// "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
// "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */
// "strictBuiltinIteratorReturn": true, /* Built-in iterators are instantiated with a 'TReturn' type of 'undefined' instead of 'any'. */
// "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */
// "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */
// "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */
// "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */
// "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */
// "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */
// "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */
// "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */
// "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */
// "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */
// "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */
// "allowUnusedLabels": true, /* Disable error reporting for unused labels. */
// "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */
/* Completeness */
// "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */
"skipLibCheck": true
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist"],
"rootDir": "./src"
}
```
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
```typescript
import { McpServer, ResourceTemplate } from '@modelcontextprotocol/sdk/server/mcp.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import { z } from 'zod';
import * as crypto from 'crypto';
import dotenv from 'dotenv';
// Import our services
import { initializeResearch, executeResearchStep, getResearchState, generateResearchReport } from './services/research.js';
import { ResearchReport } from './types/index.js';
// Load environment variables
dotenv.config();
// Create MCP server
const server = new McpServer({
name: process.env.MCP_SERVER_NAME || 'DeepResearch',
version: process.env.MCP_SERVER_VERSION || '1.0.0'
});
// Resource to access research state
server.resource(
'research-state',
new ResourceTemplate('research://state/{sessionId}', { list: undefined }),
async (uri, params) => {
const sessionId = params.sessionId as string;
const state = getResearchState(sessionId);
if (!state) {
throw new Error(`Research session not found: ${sessionId}`);
}
return {
contents: [{
uri: uri.href,
text: JSON.stringify(state, null, 2)
}]
};
}
);
// Resource to access research findings
server.resource(
'research-findings',
new ResourceTemplate('research://findings/{sessionId}', { list: undefined }),
async (uri, params) => {
const sessionId = params.sessionId as string;
const state = getResearchState(sessionId);
if (!state) {
throw new Error(`Research session not found: ${sessionId}`);
}
return {
contents: [{
uri: uri.href,
text: state.findings.join('\n\n---\n\n')
}]
};
}
);
// Tool to initialize research
server.tool(
'initialize-research',
{
query: z.string(),
depth: z.number().default(3),
},
async ({ query, depth }) => {
try {
// Generate a unique session ID
const sessionId = crypto.randomUUID();
const state = initializeResearch(sessionId, query, depth);
return {
content: [{
type: 'text',
text: JSON.stringify({
sessionId,
message: 'Research session initialized',
state
}, null, 2)
}]
};
} catch (error) {
console.error('Error initializing research:', error);
return {
content: [{
type: 'text',
text: `Error initializing research: ${error instanceof Error ? error.message : String(error)}`
}],
isError: true
};
}
}
);
// Tool to execute a research step
server.tool(
'execute-research-step',
{
sessionId: z.string(),
},
async ({ sessionId }) => {
try {
const updatedState = await executeResearchStep(sessionId);
return {
content: [{
type: 'text',
text: JSON.stringify({
message: 'Research step executed',
currentDepth: updatedState.currentDepth,
maxDepth: updatedState.depth,
lastTopic: updatedState.topics[updatedState.topics.length - 1],
nextTopic: updatedState.nextSearchTopic,
shouldContinue: updatedState.shouldContinue,
state: updatedState
}, null, 2)
}]
};
} catch (error) {
console.error('Error executing research step:', error);
// Get the current state, even if there was an error
const currentState = getResearchState(sessionId);
// If we have a valid state, return a properly formatted JSON response with the error
if (currentState) {
return {
content: [{
type: 'text',
text: JSON.stringify({
message: `Error: ${error instanceof Error ? error.message : String(error)}`,
currentDepth: currentState.currentDepth,
maxDepth: currentState.depth,
lastTopic: currentState.topics.length > 0 ? currentState.topics[currentState.topics.length - 1] : currentState.query,
nextTopic: currentState.nextSearchTopic,
shouldContinue: false, // Stop research on error
state: currentState,
error: true
}, null, 2)
}]
};
}
// Fallback if we can't get the current state
return {
content: [{
type: 'text',
text: JSON.stringify({
message: `Error executing research step: ${error instanceof Error ? error.message : String(error)}`,
error: true
}, null, 2)
}],
isError: true
};
}
}
);
// Tool to generate a final research report
server.tool(
'generate-report',
{
sessionId: z.string(),
timeout: z.number().optional().default(60000)
},
async ({ sessionId, timeout }) => {
// Create a promise that rejects after the timeout
const timeoutPromise = new Promise((_, reject) => {
setTimeout(() => reject(new Error('Report generation timed out')), timeout);
});
try {
// Race the report generation against the timeout
const report = await Promise.race([
generateResearchReport(sessionId),
timeoutPromise
]) as ResearchReport;
return {
content: [{
type: 'text',
text: report.report
}]
};
} catch (error) {
console.error('Error generating research report:', error);
// Get the current state, even if there was an error
const currentState = getResearchState(sessionId);
// If we have a valid state, try to generate a basic report from what we have
if (currentState && currentState.findings.length > 0) {
return {
content: [{
type: 'text',
text: `# Research Report (Error Recovery)\n\n` +
`**Original Query:** ${currentState.query}\n\n` +
`**Note:** This is a partial report generated after an error occurred: ${error instanceof Error ? error.message : String(error)}\n\n` +
`## Summary of Findings\n\n` +
`The research process collected ${currentState.findings.length} sets of findings ` +
`across ${currentState.topics.length} topics but encountered an error during the final report generation.\n\n` +
`### Topics Researched\n\n` +
currentState.topics.map((topic, index) => `${index + 1}. ${topic}`).join('\n')
}]
};
}
return {
content: [{
type: 'text',
text: JSON.stringify({
message: `Error generating research report: ${error instanceof Error ? error.message : String(error)}`,
error: true
}, null, 2)
}],
isError: true
};
}
}
);
// NEW TOOL: Complete Research in one step
server.tool(
'complete-research',
{
query: z.string(),
depth: z.number().default(3),
timeout: z.number().optional().default(180000) // 3 minute timeout (same as client setting)
},
async ({ query, depth, timeout }) => {
// Define and initialize sessionId in the outer scope
const sessionId = crypto.randomUUID();
try {
// Step 1: Initialize research
console.error(`Initializing research session for: "${query}" with depth ${depth}`);
initializeResearch(sessionId, query, depth);
console.error(`Research session initialized with ID: ${sessionId}`);
// Step 2: Execute all research steps sequentially
let currentDepth = 0;
let stepData;
while (currentDepth < depth) {
console.error(`Executing research step ${currentDepth + 1}/${depth}...`);
try {
stepData = await executeResearchStep(sessionId);
currentDepth = stepData.currentDepth;
console.error(`Completed step ${currentDepth}/${depth}`);
console.error(`Last topic searched: ${stepData.topics[stepData.topics.length - 1]}`);
if (stepData.nextSearchTopic) {
console.error(`Next topic to search: ${stepData.nextSearchTopic}`);
} else {
console.error(`No further topics to search.`);
}
} catch (stepError) {
// Log the error but continue with next steps
console.error(`Error in research step ${currentDepth + 1}: ${stepError}`);
// Get the current state to determine the new depth
const currentState = getResearchState(sessionId);
if (currentState) {
currentDepth = currentState.currentDepth;
} else {
// If we can't get the state, just increment manually
currentDepth++;
}
}
}
// Step 3: Generate the final report with timeout handling
console.error(`Generating final research report...`);
// Create a promise that rejects after the timeout
const timeoutPromise = new Promise((_, reject) => {
setTimeout(() => reject(new Error('Report generation timed out')), timeout);
});
// Race the report generation against the timeout
const report = await Promise.race([
generateResearchReport(sessionId),
timeoutPromise
]) as ResearchReport;
return {
content: [{
type: 'text',
text: `# Complete Research Results for "${query}"\n\n` +
`Research completed with depth: ${depth}\n\n` +
`## Final Report\n\n${report.report}`
}]
};
} catch (error) {
console.error('Error in complete research process:', error);
// Get the current state to generate a fallback report
const currentState = getResearchState(sessionId);
if (currentState && currentState.findings.length > 0) {
return {
content: [{
type: 'text',
text: `# Research Report (Error Recovery)\n\n` +
`**Original Query:** ${currentState.query}\n\n` +
`**Note:** This is a partial report generated after an error occurred: ${error instanceof Error ? error.message : String(error)}\n\n` +
`## Summary of Findings\n\n` +
`The research process collected ${currentState.findings.length} sets of findings ` +
`across ${currentState.topics.length} topics but encountered an error during the final report generation.\n\n` +
`### Topics Researched\n\n` +
currentState.topics.map((topic, index) => `${index + 1}. ${topic}`).join('\n')
}]
};
}
return {
content: [{
type: 'text',
text: JSON.stringify({
message: `Error performing research: ${error instanceof Error ? error.message : String(error)}`,
error: true
}, null, 2)
}],
isError: true
};
}
}
);
// Prompt for complete research flow
server.prompt(
'deep-research',
'A deep research tool that explores topics thoroughly through iterative search',
() => ({
messages: [{
role: 'user',
content: {
type: 'text',
text: `Please conduct a deep research session on a topic of interest.
I'm looking for a comprehensive analysis with multiple angles and perspectives. The research should explore the topic thoroughly, considering different viewpoints and citing reliable sources.
Please follow these steps:
1. Initialize a research session with my topic
2. Perform multiple rounds of iterative research, exploring different aspects
3. Generate a comprehensive report with your findings`
}
}]
})
);
// Start the server
async function startServer() {
const transport = new StdioServerTransport();
const serverName = server?.constructor?.name || 'DeepResearch';
const serverVersion = '1.0.0';
console.error(`Starting DeepResearch MCP Server (${serverName} v${serverVersion})`);
try {
await server.connect(transport);
console.error('Server connected. Waiting for requests...');
} catch (error) {
console.error('Failed to start server:', error);
process.exit(1);
}
}
startServer().catch(console.error);
```
--------------------------------------------------------------------------------
/src/services/openai.ts:
--------------------------------------------------------------------------------
```typescript
import OpenAI from 'openai';
import { AnalysisResult } from '../types/index.js';
import dotenv from 'dotenv';
dotenv.config();
const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
if (!OPENAI_API_KEY) {
throw new Error('OPENAI_API_KEY is not set in environment variables');
}
const openai = new OpenAI({
apiKey: OPENAI_API_KEY
});
/**
* Analyze research findings and determine the next steps
*
* @param query Original research query
* @param findings Research findings so far
* @param searchedTopics Topics already searched
* @returns Analysis result with follow-up steps
*/
export async function analyzeResearch(
query: string,
findings: string[],
searchedTopics: string[]
): Promise<{ nextSearchTopic: string | null; shouldContinue: boolean }> {
console.error(`[OpenAI Service] Analyzing research with ${findings.length} findings`);
try {
// Implement token management strategy
let processedFindings: string;
// If we have more than 2 findings, we need to be careful about token limits
if (findings.length > 2) {
console.error(`[OpenAI Service] Managing tokens for ${findings.length} findings`);
// Keep the two most recent findings intact
const recentFindings = findings.slice(-2);
// For older findings, extract only essential information to save tokens
const olderFindings = findings.slice(0, -2).map((finding, index) => {
// Extract just the search query and source titles/URLs
const searchQueryMatch = finding.match(/# Search Results for: (.*?)\n/);
const searchQuery = searchQueryMatch ? searchQueryMatch[1] : `Research step ${index + 1}`;
// Extract source titles and URLs only (no full content)
const sourceMatches = [...finding.matchAll(/## Source \[(\d+)\]: (.*?)\nURL: (https?:\/\/[^\s]+)/g)];
const sourceSummaries = sourceMatches.map(match =>
`Source [${match[1]}]: ${match[2]}\nURL: ${match[3]}`
).join('\n\n');
return `# Summary of Search for: ${searchQuery}\n\n${sourceSummaries}\n\n[Full content omitted to save tokens]`;
});
// Combine summaries of older findings with full recent findings
processedFindings = [
...olderFindings,
...recentFindings
].join('\n\n---\n\n');
console.error(`[OpenAI Service] Processed ${findings.length} findings: ${olderFindings.length} summarized, ${recentFindings.length} full`);
} else {
// If we have 2 or fewer findings, we can include all of them in full
processedFindings = findings.join('\n\n---\n\n');
console.error(`[OpenAI Service] Using all ${findings.length} findings in full`);
}
// Create a list of already searched topics to avoid duplication
const searchedTopicsText = searchedTopics.length
? `ALREADY SEARCHED TOPICS (DO NOT REPEAT THESE):\n${searchedTopics.map(t => `- ${t}`).join('\n')}`
: 'No topics have been searched yet.';
// Make API call with optimized content
const response = await openai.chat.completions.create({
model: 'gpt-4-turbo',
messages: [
{
role: 'system',
content: `You are a research assistant helping to explore a topic thoroughly through web searches.
MAIN RESEARCH TOPIC: "${query}"
${searchedTopicsText}
Your task is to determine if further research is needed and what specific topic to search next.`
},
{
role: 'user',
content: `I've been researching "${query}" and have the following findings so far:
${processedFindings}
Based on these findings, I need you to:
1. Determine if further research is necessary to fully explore the original query.
2. If further research is needed, provide ONE specific search query that would best supplement the existing findings. Be precise and focused.
3. If no further research is needed, explicitly state that the research is complete.
Format your response EXACTLY as follows:
CONTINUE: YES/NO
NEXT SEARCH QUERY: [your suggested search query only if continuing]`
}
],
temperature: 0.3,
max_tokens: 1000
});
if (!response.choices[0]?.message?.content) {
throw new Error('No response content from OpenAI API');
}
const content = response.choices[0].message.content;
const shouldContinue = content.includes('CONTINUE: YES');
// Extract next search query if continuing
let nextSearchTopic: string | null = null;
if (shouldContinue) {
const nextTopicMatch = content.match(/NEXT SEARCH QUERY: (.*?)($|\n)/);
nextSearchTopic = nextTopicMatch ? nextTopicMatch[1].trim() : null;
}
return {
shouldContinue,
nextSearchTopic
};
} catch (error) {
console.error('[OpenAI Service] Error analyzing research:', error);
throw error;
}
}
/**
* Generate a comprehensive research report
*
* @param query Original research query
* @param findings All research findings
* @returns Final research report
*/
export async function generateReport(
query: string,
findings: string[]
): Promise<string> {
try {
// Extract all sources and their content into a structured format
interface SourceContent {
url: string;
title: string;
content: string;
sourceNum: number;
searchQuery: string;
}
// Track all sources and their content
const allSources: SourceContent[] = [];
const sourceUrlMap: Map<string, number> = new Map(); // URL to source number mapping
let globalSourceCounter = 0;
// Process each finding to extract structured content
findings.forEach((finding, findingIndex) => {
// Extract search query
const searchQueryMatch = finding.match(/# Search Results for: (.*?)(\n|$)/);
const searchQuery = searchQueryMatch ? searchQueryMatch[1] : `Finding ${findingIndex + 1}`;
// Process each source in the finding
let isInContent = false;
let contentBuffer: string[] = [];
let currentUrl = '';
let currentTitle = '';
let currentSourceNum = 0;
// Split the finding into lines for processing
finding.split('\n').forEach(line => {
// Source header pattern: ## Source [1]: Title
const sourceMatch = line.match(/## Source \[(\d+)\]: (.*?)$/);
if (sourceMatch) {
currentSourceNum = parseInt(sourceMatch[1]);
currentTitle = sourceMatch[2];
isInContent = false;
// If we were processing a previous source, finalize it
if (contentBuffer.length > 0 && currentUrl) {
// Avoid duplicating content from the same URL
if (!sourceUrlMap.has(currentUrl)) {
globalSourceCounter++;
sourceUrlMap.set(currentUrl, globalSourceCounter);
allSources.push({
url: currentUrl,
title: currentTitle,
content: contentBuffer.join('\n'),
sourceNum: globalSourceCounter,
searchQuery
});
}
contentBuffer = [];
currentUrl = '';
}
}
// URL pattern: URL: https://...
else if (line.startsWith('URL: ')) {
currentUrl = line.substring(5).trim();
}
// Content header pattern: ### Content from Source [1]:
else if (line.match(/### Content from Source \[\d+\]:/)) {
isInContent = true;
contentBuffer = [];
}
// End of source content (next source starts or end of finding)
else if (isInContent && (line.startsWith('## Source') || line.startsWith('# Source URLs'))) {
isInContent = false;
// Finalize the current source
if (contentBuffer.length > 0 && currentUrl) {
// Avoid duplicating content from the same URL
if (!sourceUrlMap.has(currentUrl)) {
globalSourceCounter++;
sourceUrlMap.set(currentUrl, globalSourceCounter);
allSources.push({
url: currentUrl,
title: currentTitle,
content: contentBuffer.join('\n'),
sourceNum: globalSourceCounter,
searchQuery
});
}
contentBuffer = [];
currentUrl = '';
}
// No continue or break needed - just let it naturally move to the next line
} else if (isInContent) {
contentBuffer.push(line);
}
});
});
console.error(`Extracted ${allSources.length} sources from ${findings.length} findings`);
// More aggressive content optimization
// 1. Set a much lower character limit for content
const MAX_CONTENT_LENGTH = 40000; // Reduced from 60000 to 40000 characters
let totalContentLength = 0;
// 2. Calculate total content length
allSources.forEach(source => {
totalContentLength += source.content.length;
});
// 3. Group sources by search query
const sourcesByQuery = new Map<string, SourceContent[]>();
allSources.forEach(source => {
if (!sourcesByQuery.has(source.searchQuery)) {
sourcesByQuery.set(source.searchQuery, []);
}
sourcesByQuery.get(source.searchQuery)?.push(source);
});
// 4. If content is too large, trim it intelligently
let optimizedContent = '';
if (totalContentLength > MAX_CONTENT_LENGTH) {
console.error(`Content exceeds token limit (${totalContentLength} characters), optimizing...`);
// 5. Instead of proportional allocation, use a more aggressive summarization approach
// Create a structured bibliography with minimal content
optimizedContent = '# BIBLIOGRAPHY\n\n';
// First pass: Add only metadata for each source
sourcesByQuery.forEach((sources, query) => {
optimizedContent += `## Search Query: ${query}\n\n`;
sources.forEach(source => {
// Just add metadata and URL for each source, no content
optimizedContent += `[${source.sourceNum}] "${source.title}"\n`;
optimizedContent += `URL: ${source.url}\n\n`;
});
});
// Second pass: Add abbreviated content for each source until we reach the limit
let currentLength = optimizedContent.length;
const remainingLength = MAX_CONTENT_LENGTH - currentLength;
// Calculate how many characters we can allocate per source
const maxCharsPerSource = Math.floor(remainingLength / allSources.length);
// Add additional section for content excerpts
optimizedContent += '# CONTENT EXCERPTS\n\n';
// Add abbreviated content for each source
allSources.forEach(source => {
// Truncate the content to the allocated size
const excerpt = source.content.length > maxCharsPerSource
? source.content.substring(0, maxCharsPerSource) + '...'
: source.content;
optimizedContent += `## [${source.sourceNum}] ${source.title}\n\n`;
optimizedContent += `${excerpt}\n\n`;
});
} else {
// If content is within limits, use the original approach
sourcesByQuery.forEach((sources, query) => {
optimizedContent += `## Search Query: ${query}\n\n`;
sources.forEach(source => {
optimizedContent += `### [${source.sourceNum}] ${source.title}\n`;
optimizedContent += `URL: ${source.url}\n\n`;
optimizedContent += `${source.content.trim()}\n\n`;
});
});
}
// Now generate the report with the optimized content
console.error(`Generating report with optimized content (${optimizedContent.length} characters)`);
// More optimized prompt with fewer instructions
const response = await openai.chat.completions.create({
model: 'gpt-4-turbo',
messages: [
{
role: 'system',
content: `Generate a concise research report on "${query}" using the provided sources.
Format:
- Executive Summary (2-3 paragraphs)
- Introduction
- Main Findings (organized by themes)
- Conclusion
- Bibliography
Cite sources using [X] format. Focus on key insights rather than exhaustive detail.`
},
{
role: 'user',
content: `Research report on "${query}" based on the following:
${optimizedContent}`
}
],
temperature: 0.5, // Lower temperature for more focused output
max_tokens: 4000
});
if (!response.choices[0]?.message?.content) {
throw new Error("No response content from OpenAI API");
}
return response.choices[0].message.content;
} catch (error) {
console.error("[OpenAI Service] Error generating report:", error);
throw error;
}
}
```