# Directory Structure
```
├── .env.example
├── .gitignore
├── claude_desktop_config_sample.json
├── package.json
├── README.md
├── src
│ ├── client-example.ts
│ ├── index.ts
│ ├── services
│ │ ├── firecrawl.ts
│ │ ├── openai.ts
│ │ └── research.ts
│ └── types
│ └── index.ts
└── tsconfig.json
```
# Files
--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
```
1 | # OpenAI API Key
2 | OPENAI_API_KEY=sk-your-openai-api-key
3 |
4 | # Firecrawl API Key
5 | FIRECRAWL_API_KEY=your-firecrawl-api-key
6 |
7 | # Server Configuration
8 | PORT=3000
9 | MCP_SERVER_NAME=DeepResearch
10 | MCP_SERVER_VERSION=1.0.0
```
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
```
1 | # dependencies
2 | node_modules/
3 | npm-debug.log*
4 | yarn-debug.log*
5 | yarn-error.log*
6 | package-lock.json
7 |
8 | # build output
9 | dist/
10 | build/
11 | *.tsbuildinfo
12 |
13 | # environment variables
14 | .env
15 | .env.local
16 | .env.development.local
17 | .env.test.local
18 | .env.production.local
19 |
20 | # logs
21 | logs/
22 | *.log
23 |
24 | # OS generated files
25 | .DS_Store
26 | .DS_Store?
27 | ._*
28 | .Spotlight-V100
29 | .Trashes
30 | ehthumbs.db
31 | Thumbs.db
32 |
33 | # IDE files
34 | .idea/
35 | .vscode/
36 | *.swp
37 | *.swo
38 | *.sublime-project
39 | *.sublime-workspace
40 |
41 | # Claude Desktop specific
42 | claude_desktop_config.json
```
--------------------------------------------------------------------------------
/claude_desktop_config_sample.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "mcpServers": {
3 | "deep-research": {
4 | "command": "node",
5 | "args": [
6 | "path to deep-research-mcp/dist/index.js"
7 | ],
8 | "env": {
9 | "FIRECRAWL_API_KEY": "your-firecrawler-api-key",
10 | "OPENAI_API_KEY": "your-openai-api-key"
11 | }
12 | }
13 | }
14 | }
15 |
```
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "name": "deep-research-mcp",
3 | "version": "1.0.0",
4 | "type": "module",
5 | "main": "dist/index.js",
6 | "scripts": {
7 | "build": "tsc",
8 | "start": "node dist/index.js",
9 | "dev": "tsc -w & node --watch dist/index.js",
10 | "client": "node dist/client-example.js"
11 | },
12 | "keywords": [],
13 | "author": "",
14 | "license": "ISC",
15 | "description": "A Deep Research MCP Server",
16 | "dependencies": {
17 | "@modelcontextprotocol/sdk": "^1.6.1",
18 | "axios": "^1.8.1",
19 | "dotenv": "^16.4.7",
20 | "openai": "^4.86.1",
21 | "typescript": "^5.8.2",
22 | "zod": "^3.24.2"
23 | }
24 | }
25 |
```
--------------------------------------------------------------------------------
/src/types/index.ts:
--------------------------------------------------------------------------------
```typescript
1 | // Research state
2 | export interface ResearchState {
3 | query: string;
4 | depth: number;
5 | currentDepth: number;
6 | topics: string[];
7 | findings: string[];
8 | nextSearchTopic: string | null;
9 | shouldContinue: boolean;
10 | }
11 |
12 | // Web search result
13 | export interface SearchResult {
14 | query: string;
15 | results: Array<{
16 | title: string;
17 | url: string;
18 | content: string;
19 | score: number;
20 | }>;
21 | }
22 |
23 | // Analysis result from LLM
24 | export interface AnalysisResult {
25 | nextSearchTopic: string | null;
26 | shouldContinue: boolean;
27 | }
28 |
29 | // Final research report
30 | export interface ResearchReport {
31 | query: string;
32 | findings: string[];
33 | topics: string[];
34 | report: string;
35 | }
```
--------------------------------------------------------------------------------
/src/services/firecrawl.ts:
--------------------------------------------------------------------------------
```typescript
1 | import axios from 'axios';
2 | import { SearchResult } from '../types/index.js';
3 | import dotenv from 'dotenv';
4 |
5 | dotenv.config();
6 |
7 | const FIRECRAWL_API_KEY = process.env.FIRECRAWL_API_KEY;
8 |
9 | if (!FIRECRAWL_API_KEY) {
10 | throw new Error('FIRECRAWL_API_KEY is not set in environment variables');
11 | }
12 |
13 | /**
14 | * Search the web using Firecrawl API
15 | *
16 | * @param query Search query
17 | * @param maxResults Maximum number of results to return
18 | * @param searchDepth Search depth ('basic' or 'advanced')
19 | * @returns Search results
20 | */
21 | export async function searchWeb(
22 | query: string,
23 | maxResults: number = 5,
24 | searchDepth: 'basic' | 'advanced' = 'advanced'
25 | ): Promise<SearchResult> {
26 | try {
27 | console.error(`[Firecrawl] Searching for: "${query}" with depth: ${searchDepth}`);
28 |
29 | // Using only supported parameters according to Firecrawl v1 API
30 | const response = await axios.post(
31 | 'https://api.firecrawl.dev/v1/search',
32 | {
33 | query: query,
34 | limit: maxResults,
35 | scrapeOptions: {
36 | formats: ["markdown"], // Get markdown content
37 | timeout: 25000, // Increase timeout for better scraping
38 | blockAds: true // Block ads for cleaner content
39 | },
40 | country: "us", // Set country for consistent results
41 | lang: "en" // Set language to English
42 | },
43 | {
44 | headers: {
45 | 'Content-Type': 'application/json',
46 | 'Authorization': `Bearer ${FIRECRAWL_API_KEY}`
47 | },
48 | timeout: 30000 // Increase timeout for the overall request
49 | }
50 | );
51 |
52 | // Validate response and handle possible error cases
53 | if (!response.data.success || !response.data.data || !Array.isArray(response.data.data)) {
54 | console.error('[Firecrawl] Invalid response structure:', JSON.stringify(response.data).substring(0, 500) + '...');
55 | throw new Error('Invalid response structure from Firecrawl');
56 | }
57 |
58 | // Enhanced error handling for empty results
59 | if (response.data.data.length === 0) {
60 | console.warn(`[Firecrawl] No results found for query: "${query}"`);
61 | return {
62 | query,
63 | results: []
64 | };
65 | }
66 |
67 | return {
68 | query,
69 | results: response.data.data.map((result: any, index: number) => {
70 | // Enhanced content extraction with fallbacks to ensure maximum content quality
71 | let content = '';
72 |
73 | // First try to get markdown content as it's cleaner
74 | if (result.markdown && typeof result.markdown === 'string' && result.markdown.trim().length > 0) {
75 | content = result.markdown;
76 | }
77 | // Fallback to description as last resort
78 | else if (result.description && typeof result.description === 'string') {
79 | content = result.description;
80 | }
81 | else {
82 | content = 'No content available';
83 | }
84 |
85 | // Ensure all necessary fields are present
86 | return {
87 | title: result.title || `Result ${index + 1} for "${query}"`,
88 | url: result.url || 'No URL available',
89 | content: content,
90 | score: result.score || 1.0 // Use provided score or default
91 | };
92 | }).slice(0, maxResults)
93 | };
94 | } catch (error) {
95 | console.error('[Firecrawl] Search error:', error);
96 | // More detailed error message
97 | if (axios.isAxiosError(error) && error.response) {
98 | throw new Error(`Failed to search with Firecrawl: ${error.response.status} ${error.response.statusText} - ${JSON.stringify(error.response.data)}`);
99 | }
100 | throw new Error(`Failed to search with Firecrawl: ${error instanceof Error ? error.message : String(error)}`);
101 | }
102 | }
```
--------------------------------------------------------------------------------
/src/services/research.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { ResearchState, SearchResult, ResearchReport } from '../types/index.js';
2 | // import { searchTavily } from './tavily.js';
3 | import { searchWeb } from './firecrawl.js';
4 | import { analyzeResearch, generateReport } from './openai.js';
5 |
6 | // Store all research sessions
7 | const researchSessions = new Map<string, ResearchState>();
8 |
9 | /**
10 | * Initialize a new research session
11 | *
12 | * @param sessionId Unique session identifier
13 | * @param query Research query
14 | * @param depth Maximum research depth
15 | * @returns New research state
16 | */
17 | export function initializeResearch(
18 | sessionId: string,
19 | query: string,
20 | depth: number = 3
21 | ): ResearchState {
22 | const researchState: ResearchState = {
23 | query,
24 | depth,
25 | currentDepth: 0,
26 | topics: [],
27 | findings: [],
28 | nextSearchTopic: query, // Initial search topic is the query itself
29 | shouldContinue: true
30 | };
31 |
32 | researchSessions.set(sessionId, researchState);
33 | return researchState;
34 | }
35 |
36 | /**
37 | * Get research state for a session
38 | *
39 | * @param sessionId Session identifier
40 | * @returns Research state or null if not found
41 | */
42 | export function getResearchState(sessionId: string): ResearchState | null {
43 | return researchSessions.get(sessionId) || null;
44 | }
45 |
46 | /**
47 | * Execute the next step in the research process
48 | *
49 | * @param sessionId Session identifier
50 | * @returns Updated research state
51 | */
52 | export async function executeResearchStep(sessionId: string): Promise<ResearchState> {
53 | const researchState = researchSessions.get(sessionId);
54 | if (!researchState) {
55 | throw new Error(`No research session found with ID: ${sessionId}`);
56 | }
57 |
58 | if (researchState.currentDepth >= researchState.depth) {
59 | // Max depth reached - research is complete
60 | return researchState;
61 | }
62 |
63 | try {
64 | // Determine search topic for this step
65 | const currentSearchTopic = researchState.nextSearchTopic || researchState.query;
66 |
67 | // Add current topic to the list of searched topics
68 | researchState.topics.push(currentSearchTopic);
69 |
70 | console.error(`[Research] Searching for: "${currentSearchTopic}"`);
71 |
72 | // Search for information on the current topic
73 | let searchResult;
74 | try {
75 | searchResult = await searchWeb(currentSearchTopic);
76 | } catch (searchError) {
77 | console.error(`[Research] Search error: ${searchError instanceof Error ? searchError.message : String(searchError)}`);
78 | // Create a fallback search result to indicate the error
79 | searchResult = {
80 | query: currentSearchTopic,
81 | results: [{
82 | title: 'Search Error',
83 | url: 'https://error.example.com',
84 | content: `An error occurred while searching: ${searchError instanceof Error ? searchError.message : String(searchError)}. The search will continue with the next topic.`,
85 | score: 0
86 | }]
87 | };
88 | }
89 |
90 | // Format search results into a detailed finding
91 | const finding = formatSearchResults(searchResult);
92 | researchState.findings.push(finding);
93 |
94 | // Always analyze findings to determine next steps, regardless of current shouldContinue value
95 | const analysis = await analyzeResearch(
96 | researchState.query,
97 | researchState.findings,
98 | researchState.topics
99 | );
100 |
101 | // Update research state with next topic and continue flag
102 | researchState.nextSearchTopic = analysis.nextSearchTopic;
103 | researchState.shouldContinue = analysis.shouldContinue;
104 |
105 | // Always increment the depth counter
106 | researchState.currentDepth++;
107 |
108 | // Update the session
109 | researchSessions.set(sessionId, researchState);
110 |
111 | return researchState;
112 | } catch (error) {
113 | console.error('Error executing research step:', error);
114 |
115 | // Update the research state to continue despite errors
116 | if (researchState) {
117 | // Increment depth to make progress even with errors
118 | researchState.currentDepth++;
119 |
120 | // If we don't have a next search topic, set shouldContinue to false to end the process
121 | if (!researchState.nextSearchTopic) {
122 | researchState.shouldContinue = false;
123 | }
124 |
125 | // Update the session
126 | researchSessions.set(sessionId, researchState);
127 | }
128 |
129 | throw new Error(`Failed to execute research step: ${error instanceof Error ? error.message : String(error)}`);
130 | }
131 | }
132 |
133 | /**
134 | * Generate a final research report
135 | *
136 | * @param sessionId Session identifier
137 | * @returns Research report
138 | */
139 | export async function generateResearchReport(sessionId: string): Promise<ResearchReport> {
140 | const researchState = researchSessions.get(sessionId);
141 | if (!researchState) {
142 | throw new Error(`No research session found with ID: ${sessionId}`);
143 | }
144 |
145 | const report = await generateReport(researchState.query, researchState.findings);
146 |
147 | return {
148 | query: researchState.query,
149 | findings: researchState.findings,
150 | topics: researchState.topics,
151 | report
152 | };
153 | }
154 |
155 | /**
156 | * Format search results into a readable finding
157 | *
158 | * @param searchResult Search results
159 | * @returns Formatted finding text
160 | */
161 | function formatSearchResults(searchResult: SearchResult): string {
162 | let formattedResult = `# Search Results for: ${searchResult.query}\n\n`;
163 |
164 | searchResult.results.forEach((result, index) => {
165 | const resultNumber = index + 1;
166 | formattedResult += `## Source [${resultNumber}]: ${result.title}\n`;
167 | formattedResult += `URL: ${result.url}\n`;
168 | formattedResult += `Citation: [${resultNumber}] ${result.url}\n\n`;
169 | formattedResult += `### Content from Source [${resultNumber}]:\n${result.content}\n\n`;
170 | });
171 |
172 | // Add a clear and standardized source section for easy citation
173 | formattedResult += `# Source URLs for Citation\n\n`;
174 | searchResult.results.forEach((result, index) => {
175 | formattedResult += `[${index + 1}] ${result.url} - ${result.title}\n`;
176 | });
177 |
178 | return formattedResult;
179 | }
```
--------------------------------------------------------------------------------
/src/client-example.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { Client } from "@modelcontextprotocol/sdk/client/index.js";
2 | import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
3 | import dotenv from 'dotenv';
4 |
5 | dotenv.config();
6 |
7 | // Define types for our tool responses
8 | interface ToolResponse {
9 | content: Array<{
10 | type: string;
11 | text: string;
12 | }>;
13 | isError?: boolean;
14 | }
15 |
16 | interface InitResponse {
17 | sessionId: string;
18 | message: string;
19 | state: any;
20 | }
21 |
22 | interface StepResponse {
23 | message: string;
24 | currentDepth: number;
25 | maxDepth: number;
26 | lastTopic: string;
27 | nextTopic: string | null;
28 | shouldContinue: boolean;
29 | state: any;
30 | }
31 |
32 | /**
33 | * Example MCP client that connects to the DeepResearch server
34 | * and performs a complete research workflow
35 | */
36 | async function main() {
37 | // Get the research query from command line arguments or use a default
38 | const query = process.argv[2] || "The impact of quantum computing on cryptography";
39 | const depth = Number(process.argv[3]) || 2;
40 | const useCompleteResearch = process.argv[4] === "complete" || false;
41 |
42 | console.error(`Starting research on: "${query}" with depth ${depth}`);
43 | if (useCompleteResearch) {
44 | console.error("Using complete-research flow (all steps in one call)");
45 | }
46 |
47 | // Connect to the server
48 | const transport = new StdioClientTransport({
49 | command: "node",
50 | args: ["dist/index.js"]
51 | });
52 |
53 | const client = new Client({
54 | name: "deep-research-client",
55 | version: "1.0.0"
56 | }, {
57 | capabilities: {
58 | resources: {},
59 | tools: {},
60 | prompts: {}
61 | }
62 | });
63 |
64 | console.error("Connecting to DeepResearch MCP server...");
65 | await client.connect(transport);
66 | console.error("Connected successfully.");
67 |
68 | try {
69 | if (useCompleteResearch) {
70 | // Use the complete-research tool for a one-step process
71 | console.error("Executing complete research process...");
72 | const completeResult = await client.callTool({
73 | name: "complete-research",
74 | arguments: {
75 | query,
76 | depth,
77 | timeout: 180000 // 3 minutes timeout
78 | },
79 | timeout: 240000 // Give the client a bit extra time (4 minutes)
80 | }) as ToolResponse;
81 |
82 | if (!completeResult.content || !completeResult.content[0] || typeof completeResult.content[0].text !== 'string') {
83 | throw new Error('Invalid response format from complete-research');
84 | }
85 |
86 | // Check if this is an error response
87 | if (completeResult.isError) {
88 | console.error(`Error in complete research flow: ${completeResult.content[0].text}`);
89 | } else {
90 | const reportText = completeResult.content[0].text;
91 |
92 | console.error("\n================================");
93 | console.error("FINAL RESEARCH REPORT");
94 | console.error("================================\n");
95 | console.error(reportText);
96 | }
97 | } else {
98 | // Original step-by-step process
99 | // Initialize research
100 | console.error("Initializing research session...");
101 | const initResult = await client.callTool({
102 | name: "initialize-research",
103 | arguments: {
104 | query,
105 | depth
106 | }
107 | }) as ToolResponse;
108 |
109 | // Parse the response to get sessionId
110 | if (!initResult.content || !initResult.content[0] || typeof initResult.content[0].text !== 'string') {
111 | throw new Error('Invalid response format from initialize-research');
112 | }
113 |
114 | const initData = JSON.parse(initResult.content[0].text) as InitResponse;
115 | const { sessionId } = initData;
116 | console.error(`Research session initialized with ID: ${sessionId}`);
117 |
118 | // Execute steps until complete
119 | let currentDepth = 0;
120 |
121 | while (currentDepth < depth) {
122 | console.error(`\nExecuting research step ${currentDepth + 1}/${depth}...`);
123 | const stepResult = await client.callTool({
124 | name: "execute-research-step",
125 | arguments: { sessionId }
126 | }) as ToolResponse;
127 |
128 | if (!stepResult.content || !stepResult.content[0] || typeof stepResult.content[0].text !== 'string') {
129 | throw new Error('Invalid response format from execute-research-step');
130 | }
131 |
132 | try {
133 | // Check if this is an error response
134 | if (stepResult.isError) {
135 | console.error(`Error: ${stepResult.content[0].text}`);
136 | // Increment depth to continue despite errors
137 | currentDepth++;
138 | continue;
139 | }
140 |
141 | const stepData = JSON.parse(stepResult.content[0].text) as StepResponse;
142 | currentDepth = stepData.currentDepth;
143 |
144 | console.error(`Completed step ${currentDepth}/${depth}`);
145 | console.error(`Last topic searched: ${stepData.lastTopic}`);
146 |
147 | if (stepData.nextTopic) {
148 | console.error(`Next topic to search: ${stepData.nextTopic}`);
149 | } else {
150 | console.error("No further topics to search.");
151 | }
152 | } catch (parseError) {
153 | console.error(`Error parsing response: ${parseError}`);
154 | console.error(`Raw response: ${stepResult.content[0].text}`);
155 | // Increment depth to continue despite errors
156 | currentDepth++;
157 | }
158 | }
159 |
160 | // Generate final report
161 | console.error("\nGenerating final research report...");
162 | const reportResult = await client.callTool({
163 | name: "generate-report",
164 | arguments: { sessionId },
165 | timeout: 180000 // 3 minutes timeout for report generation
166 | }) as ToolResponse;
167 |
168 | if (!reportResult.content || !reportResult.content[0] || typeof reportResult.content[0].text !== 'string') {
169 | throw new Error('Invalid response format from generate-report');
170 | }
171 |
172 | // Check if this is an error response
173 | if (reportResult.isError) {
174 | console.error(`Error generating report: ${reportResult.content[0].text}`);
175 | } else {
176 | const reportText = reportResult.content[0].text;
177 |
178 | console.error("\n================================");
179 | console.error("FINAL RESEARCH REPORT");
180 | console.error("================================\n");
181 | console.error(reportText);
182 | }
183 | }
184 | } catch (error) {
185 | console.error("Error during research process:", error);
186 | } finally {
187 | // Clean up
188 | console.error("\nDisconnecting from server...");
189 | // Note: The StdioClientTransport doesn't have a disconnect method in the current SDK
190 | // This would need to be implemented in a real-world application
191 | }
192 | }
193 |
194 | main().catch(console.error);
```
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
```json
1 | {
2 | "compilerOptions": {
3 | /* Visit https://aka.ms/tsconfig to read more about this file */
4 |
5 | /* Projects */
6 | // "incremental": true, /* Save .tsbuildinfo files to allow for incremental compilation of projects. */
7 | // "composite": true, /* Enable constraints that allow a TypeScript project to be used with project references. */
8 | // "tsBuildInfoFile": "./.tsbuildinfo", /* Specify the path to .tsbuildinfo incremental compilation file. */
9 | // "disableSourceOfProjectReferenceRedirect": true, /* Disable preferring source files instead of declaration files when referencing composite projects. */
10 | // "disableSolutionSearching": true, /* Opt a project out of multi-project reference checking when editing. */
11 | // "disableReferencedProjectLoad": true, /* Reduce the number of projects loaded automatically by TypeScript. */
12 |
13 | /* Language and Environment */
14 | "target": "ES2020",
15 | // "lib": [], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
16 | // "jsx": "preserve", /* Specify what JSX code is generated. */
17 | // "libReplacement": true, /* Enable lib replacement. */
18 | // "experimentalDecorators": true, /* Enable experimental support for legacy experimental decorators. */
19 | // "emitDecoratorMetadata": true, /* Emit design-type metadata for decorated declarations in source files. */
20 | // "jsxFactory": "", /* Specify the JSX factory function used when targeting React JSX emit, e.g. 'React.createElement' or 'h'. */
21 | // "jsxFragmentFactory": "", /* Specify the JSX Fragment reference used for fragments when targeting React JSX emit e.g. 'React.Fragment' or 'Fragment'. */
22 | // "jsxImportSource": "", /* Specify module specifier used to import the JSX factory functions when using 'jsx: react-jsx*'. */
23 | // "reactNamespace": "", /* Specify the object invoked for 'createElement'. This only applies when targeting 'react' JSX emit. */
24 | // "noLib": true, /* Disable including any library files, including the default lib.d.ts. */
25 | // "useDefineForClassFields": true, /* Emit ECMAScript-standard-compliant class fields. */
26 | // "moduleDetection": "auto", /* Control what method is used to detect module-format JS files. */
27 |
28 | /* Modules */
29 | "module": "NodeNext",
30 | "moduleResolution": "NodeNext",
31 | // "rootDir": "./", /* Specify the root folder within your source files. */
32 | // "baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
33 | // "paths": {}, /* Specify a set of entries that re-map imports to additional lookup locations. */
34 | // "rootDirs": [], /* Allow multiple folders to be treated as one when resolving modules. */
35 | // "typeRoots": [], /* Specify multiple folders that act like './node_modules/@types'. */
36 | // "types": [], /* Specify type package names to be included without being referenced in a source file. */
37 | // "allowUmdGlobalAccess": true, /* Allow accessing UMD globals from modules. */
38 | // "moduleSuffixes": [], /* List of file name suffixes to search when resolving a module. */
39 | // "allowImportingTsExtensions": true, /* Allow imports to include TypeScript file extensions. Requires '--moduleResolution bundler' and either '--noEmit' or '--emitDeclarationOnly' to be set. */
40 | // "rewriteRelativeImportExtensions": true, /* Rewrite '.ts', '.tsx', '.mts', and '.cts' file extensions in relative import paths to their JavaScript equivalent in output files. */
41 | // "resolvePackageJsonExports": true, /* Use the package.json 'exports' field when resolving package imports. */
42 | // "resolvePackageJsonImports": true, /* Use the package.json 'imports' field when resolving imports. */
43 | // "customConditions": [], /* Conditions to set in addition to the resolver-specific defaults when resolving imports. */
44 | // "noUncheckedSideEffectImports": true, /* Check side effect imports. */
45 | "resolveJsonModule": true,
46 | // "allowArbitraryExtensions": true, /* Enable importing files with any extension, provided a declaration file is present. */
47 | // "noResolve": true, /* Disallow 'import's, 'require's or '<reference>'s from expanding the number of files TypeScript should add to a project. */
48 |
49 | /* JavaScript Support */
50 | // "allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
51 | // "checkJs": true, /* Enable error reporting in type-checked JavaScript files. */
52 | // "maxNodeModuleJsDepth": 1, /* Specify the maximum folder depth used for checking JavaScript files from 'node_modules'. Only applicable with 'allowJs'. */
53 |
54 | /* Emit */
55 | "declaration": true,
56 | // "declarationMap": true, /* Create sourcemaps for d.ts files. */
57 | // "emitDeclarationOnly": true, /* Only output d.ts files and not JavaScript files. */
58 | "sourceMap": true,
59 | // "inlineSourceMap": true, /* Include sourcemap files inside the emitted JavaScript. */
60 | // "noEmit": true, /* Disable emitting files from a compilation. */
61 | // "outFile": "./", /* Specify a file that bundles all outputs into one JavaScript file. If 'declaration' is true, also designates a file that bundles all .d.ts output. */
62 | "outDir": "./dist",
63 | // "removeComments": true, /* Disable emitting comments. */
64 | // "importHelpers": true, /* Allow importing helper functions from tslib once per project, instead of including them per-file. */
65 | // "downlevelIteration": true, /* Emit more compliant, but verbose and less performant JavaScript for iteration. */
66 | // "sourceRoot": "", /* Specify the root path for debuggers to find the reference source code. */
67 | // "mapRoot": "", /* Specify the location where debugger should locate map files instead of generated locations. */
68 | // "inlineSources": true, /* Include source code in the sourcemaps inside the emitted JavaScript. */
69 | // "emitBOM": true, /* Emit a UTF-8 Byte Order Mark (BOM) in the beginning of output files. */
70 | // "newLine": "crlf", /* Set the newline character for emitting files. */
71 | // "stripInternal": true, /* Disable emitting declarations that have '@internal' in their JSDoc comments. */
72 | // "noEmitHelpers": true, /* Disable generating custom helper functions like '__extends' in compiled output. */
73 | // "noEmitOnError": true, /* Disable emitting files if any type checking errors are reported. */
74 | // "preserveConstEnums": true, /* Disable erasing 'const enum' declarations in generated code. */
75 | // "declarationDir": "./", /* Specify the output directory for generated declaration files. */
76 |
77 | /* Interop Constraints */
78 | // "isolatedModules": true, /* Ensure that each file can be safely transpiled without relying on other imports. */
79 | // "verbatimModuleSyntax": true, /* Do not transform or elide any imports or exports not marked as type-only, ensuring they are written in the output file's format based on the 'module' setting. */
80 | // "isolatedDeclarations": true, /* Require sufficient annotation on exports so other tools can trivially generate declaration files. */
81 | // "erasableSyntaxOnly": true, /* Do not allow runtime constructs that are not part of ECMAScript. */
82 | // "allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */
83 | "esModuleInterop": true,
84 | // "preserveSymlinks": true, /* Disable resolving symlinks to their realpath. This correlates to the same flag in node. */
85 | "forceConsistentCasingInFileNames": true,
86 |
87 | /* Type Checking */
88 | "strict": true,
89 | // "noImplicitAny": true, /* Enable error reporting for expressions and declarations with an implied 'any' type. */
90 | // "strictNullChecks": true, /* When type checking, take into account 'null' and 'undefined'. */
91 | // "strictFunctionTypes": true, /* When assigning functions, check to ensure parameters and the return values are subtype-compatible. */
92 | // "strictBindCallApply": true, /* Check that the arguments for 'bind', 'call', and 'apply' methods match the original function. */
93 | // "strictPropertyInitialization": true, /* Check for class properties that are declared but not set in the constructor. */
94 | // "strictBuiltinIteratorReturn": true, /* Built-in iterators are instantiated with a 'TReturn' type of 'undefined' instead of 'any'. */
95 | // "noImplicitThis": true, /* Enable error reporting when 'this' is given the type 'any'. */
96 | // "useUnknownInCatchVariables": true, /* Default catch clause variables as 'unknown' instead of 'any'. */
97 | // "alwaysStrict": true, /* Ensure 'use strict' is always emitted. */
98 | // "noUnusedLocals": true, /* Enable error reporting when local variables aren't read. */
99 | // "noUnusedParameters": true, /* Raise an error when a function parameter isn't read. */
100 | // "exactOptionalPropertyTypes": true, /* Interpret optional property types as written, rather than adding 'undefined'. */
101 | // "noImplicitReturns": true, /* Enable error reporting for codepaths that do not explicitly return in a function. */
102 | // "noFallthroughCasesInSwitch": true, /* Enable error reporting for fallthrough cases in switch statements. */
103 | // "noUncheckedIndexedAccess": true, /* Add 'undefined' to a type when accessed using an index. */
104 | // "noImplicitOverride": true, /* Ensure overriding members in derived classes are marked with an override modifier. */
105 | // "noPropertyAccessFromIndexSignature": true, /* Enforces using indexed accessors for keys declared using an indexed type. */
106 | // "allowUnusedLabels": true, /* Disable error reporting for unused labels. */
107 | // "allowUnreachableCode": true, /* Disable error reporting for unreachable code. */
108 |
109 | /* Completeness */
110 | // "skipDefaultLibCheck": true, /* Skip type checking .d.ts files that are included with TypeScript. */
111 | "skipLibCheck": true
112 | },
113 | "include": ["src/**/*"],
114 | "exclude": ["node_modules", "dist"],
115 | "rootDir": "./src"
116 | }
117 |
```
--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { McpServer, ResourceTemplate } from '@modelcontextprotocol/sdk/server/mcp.js';
2 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
3 | import { z } from 'zod';
4 | import * as crypto from 'crypto';
5 | import dotenv from 'dotenv';
6 |
7 | // Import our services
8 | import { initializeResearch, executeResearchStep, getResearchState, generateResearchReport } from './services/research.js';
9 | import { ResearchReport } from './types/index.js';
10 |
11 | // Load environment variables
12 | dotenv.config();
13 |
14 | // Create MCP server
15 | const server = new McpServer({
16 | name: process.env.MCP_SERVER_NAME || 'DeepResearch',
17 | version: process.env.MCP_SERVER_VERSION || '1.0.0'
18 | });
19 |
20 | // Resource to access research state
21 | server.resource(
22 | 'research-state',
23 | new ResourceTemplate('research://state/{sessionId}', { list: undefined }),
24 | async (uri, params) => {
25 | const sessionId = params.sessionId as string;
26 | const state = getResearchState(sessionId);
27 | if (!state) {
28 | throw new Error(`Research session not found: ${sessionId}`);
29 | }
30 |
31 | return {
32 | contents: [{
33 | uri: uri.href,
34 | text: JSON.stringify(state, null, 2)
35 | }]
36 | };
37 | }
38 | );
39 |
40 | // Resource to access research findings
41 | server.resource(
42 | 'research-findings',
43 | new ResourceTemplate('research://findings/{sessionId}', { list: undefined }),
44 | async (uri, params) => {
45 | const sessionId = params.sessionId as string;
46 | const state = getResearchState(sessionId);
47 | if (!state) {
48 | throw new Error(`Research session not found: ${sessionId}`);
49 | }
50 |
51 | return {
52 | contents: [{
53 | uri: uri.href,
54 | text: state.findings.join('\n\n---\n\n')
55 | }]
56 | };
57 | }
58 | );
59 |
60 | // Tool to initialize research
61 | server.tool(
62 | 'initialize-research',
63 | {
64 | query: z.string(),
65 | depth: z.number().default(3),
66 | },
67 | async ({ query, depth }) => {
68 | try {
69 | // Generate a unique session ID
70 | const sessionId = crypto.randomUUID();
71 | const state = initializeResearch(sessionId, query, depth);
72 |
73 | return {
74 | content: [{
75 | type: 'text',
76 | text: JSON.stringify({
77 | sessionId,
78 | message: 'Research session initialized',
79 | state
80 | }, null, 2)
81 | }]
82 | };
83 | } catch (error) {
84 | console.error('Error initializing research:', error);
85 | return {
86 | content: [{
87 | type: 'text',
88 | text: `Error initializing research: ${error instanceof Error ? error.message : String(error)}`
89 | }],
90 | isError: true
91 | };
92 | }
93 | }
94 | );
95 |
96 | // Tool to execute a research step
97 | server.tool(
98 | 'execute-research-step',
99 | {
100 | sessionId: z.string(),
101 | },
102 | async ({ sessionId }) => {
103 | try {
104 | const updatedState = await executeResearchStep(sessionId);
105 |
106 | return {
107 | content: [{
108 | type: 'text',
109 | text: JSON.stringify({
110 | message: 'Research step executed',
111 | currentDepth: updatedState.currentDepth,
112 | maxDepth: updatedState.depth,
113 | lastTopic: updatedState.topics[updatedState.topics.length - 1],
114 | nextTopic: updatedState.nextSearchTopic,
115 | shouldContinue: updatedState.shouldContinue,
116 | state: updatedState
117 | }, null, 2)
118 | }]
119 | };
120 | } catch (error) {
121 | console.error('Error executing research step:', error);
122 |
123 | // Get the current state, even if there was an error
124 | const currentState = getResearchState(sessionId);
125 |
126 | // If we have a valid state, return a properly formatted JSON response with the error
127 | if (currentState) {
128 | return {
129 | content: [{
130 | type: 'text',
131 | text: JSON.stringify({
132 | message: `Error: ${error instanceof Error ? error.message : String(error)}`,
133 | currentDepth: currentState.currentDepth,
134 | maxDepth: currentState.depth,
135 | lastTopic: currentState.topics.length > 0 ? currentState.topics[currentState.topics.length - 1] : currentState.query,
136 | nextTopic: currentState.nextSearchTopic,
137 | shouldContinue: false, // Stop research on error
138 | state: currentState,
139 | error: true
140 | }, null, 2)
141 | }]
142 | };
143 | }
144 |
145 | // Fallback if we can't get the current state
146 | return {
147 | content: [{
148 | type: 'text',
149 | text: JSON.stringify({
150 | message: `Error executing research step: ${error instanceof Error ? error.message : String(error)}`,
151 | error: true
152 | }, null, 2)
153 | }],
154 | isError: true
155 | };
156 | }
157 | }
158 | );
159 |
160 | // Tool to generate a final research report
161 | server.tool(
162 | 'generate-report',
163 | {
164 | sessionId: z.string(),
165 | timeout: z.number().optional().default(60000)
166 | },
167 | async ({ sessionId, timeout }) => {
168 | // Create a promise that rejects after the timeout
169 | const timeoutPromise = new Promise((_, reject) => {
170 | setTimeout(() => reject(new Error('Report generation timed out')), timeout);
171 | });
172 |
173 | try {
174 | // Race the report generation against the timeout
175 | const report = await Promise.race([
176 | generateResearchReport(sessionId),
177 | timeoutPromise
178 | ]) as ResearchReport;
179 |
180 | return {
181 | content: [{
182 | type: 'text',
183 | text: report.report
184 | }]
185 | };
186 | } catch (error) {
187 | console.error('Error generating research report:', error);
188 |
189 | // Get the current state, even if there was an error
190 | const currentState = getResearchState(sessionId);
191 |
192 | // If we have a valid state, try to generate a basic report from what we have
193 | if (currentState && currentState.findings.length > 0) {
194 | return {
195 | content: [{
196 | type: 'text',
197 | text: `# Research Report (Error Recovery)\n\n` +
198 | `**Original Query:** ${currentState.query}\n\n` +
199 | `**Note:** This is a partial report generated after an error occurred: ${error instanceof Error ? error.message : String(error)}\n\n` +
200 | `## Summary of Findings\n\n` +
201 | `The research process collected ${currentState.findings.length} sets of findings ` +
202 | `across ${currentState.topics.length} topics but encountered an error during the final report generation.\n\n` +
203 | `### Topics Researched\n\n` +
204 | currentState.topics.map((topic, index) => `${index + 1}. ${topic}`).join('\n')
205 | }]
206 | };
207 | }
208 |
209 | return {
210 | content: [{
211 | type: 'text',
212 | text: JSON.stringify({
213 | message: `Error generating research report: ${error instanceof Error ? error.message : String(error)}`,
214 | error: true
215 | }, null, 2)
216 | }],
217 | isError: true
218 | };
219 | }
220 | }
221 | );
222 |
223 | // NEW TOOL: Complete Research in one step
224 | server.tool(
225 | 'complete-research',
226 | {
227 | query: z.string(),
228 | depth: z.number().default(3),
229 | timeout: z.number().optional().default(180000) // 3 minute timeout (same as client setting)
230 | },
231 | async ({ query, depth, timeout }) => {
232 | // Define and initialize sessionId in the outer scope
233 | const sessionId = crypto.randomUUID();
234 |
235 | try {
236 | // Step 1: Initialize research
237 | console.error(`Initializing research session for: "${query}" with depth ${depth}`);
238 | initializeResearch(sessionId, query, depth);
239 | console.error(`Research session initialized with ID: ${sessionId}`);
240 |
241 | // Step 2: Execute all research steps sequentially
242 | let currentDepth = 0;
243 | let stepData;
244 |
245 | while (currentDepth < depth) {
246 | console.error(`Executing research step ${currentDepth + 1}/${depth}...`);
247 | try {
248 | stepData = await executeResearchStep(sessionId);
249 | currentDepth = stepData.currentDepth;
250 |
251 | console.error(`Completed step ${currentDepth}/${depth}`);
252 | console.error(`Last topic searched: ${stepData.topics[stepData.topics.length - 1]}`);
253 |
254 | if (stepData.nextSearchTopic) {
255 | console.error(`Next topic to search: ${stepData.nextSearchTopic}`);
256 | } else {
257 | console.error(`No further topics to search.`);
258 | }
259 | } catch (stepError) {
260 | // Log the error but continue with next steps
261 | console.error(`Error in research step ${currentDepth + 1}: ${stepError}`);
262 | // Get the current state to determine the new depth
263 | const currentState = getResearchState(sessionId);
264 | if (currentState) {
265 | currentDepth = currentState.currentDepth;
266 | } else {
267 | // If we can't get the state, just increment manually
268 | currentDepth++;
269 | }
270 | }
271 | }
272 |
273 | // Step 3: Generate the final report with timeout handling
274 | console.error(`Generating final research report...`);
275 |
276 | // Create a promise that rejects after the timeout
277 | const timeoutPromise = new Promise((_, reject) => {
278 | setTimeout(() => reject(new Error('Report generation timed out')), timeout);
279 | });
280 |
281 | // Race the report generation against the timeout
282 | const report = await Promise.race([
283 | generateResearchReport(sessionId),
284 | timeoutPromise
285 | ]) as ResearchReport;
286 |
287 | return {
288 | content: [{
289 | type: 'text',
290 | text: `# Complete Research Results for "${query}"\n\n` +
291 | `Research completed with depth: ${depth}\n\n` +
292 | `## Final Report\n\n${report.report}`
293 | }]
294 | };
295 | } catch (error) {
296 | console.error('Error in complete research process:', error);
297 |
298 | // Get the current state to generate a fallback report
299 | const currentState = getResearchState(sessionId);
300 |
301 | if (currentState && currentState.findings.length > 0) {
302 | return {
303 | content: [{
304 | type: 'text',
305 | text: `# Research Report (Error Recovery)\n\n` +
306 | `**Original Query:** ${currentState.query}\n\n` +
307 | `**Note:** This is a partial report generated after an error occurred: ${error instanceof Error ? error.message : String(error)}\n\n` +
308 | `## Summary of Findings\n\n` +
309 | `The research process collected ${currentState.findings.length} sets of findings ` +
310 | `across ${currentState.topics.length} topics but encountered an error during the final report generation.\n\n` +
311 | `### Topics Researched\n\n` +
312 | currentState.topics.map((topic, index) => `${index + 1}. ${topic}`).join('\n')
313 | }]
314 | };
315 | }
316 |
317 | return {
318 | content: [{
319 | type: 'text',
320 | text: JSON.stringify({
321 | message: `Error performing research: ${error instanceof Error ? error.message : String(error)}`,
322 | error: true
323 | }, null, 2)
324 | }],
325 | isError: true
326 | };
327 | }
328 | }
329 | );
330 |
331 | // Prompt for complete research flow
332 | server.prompt(
333 | 'deep-research',
334 | 'A deep research tool that explores topics thoroughly through iterative search',
335 | () => ({
336 | messages: [{
337 | role: 'user',
338 | content: {
339 | type: 'text',
340 | text: `Please conduct a deep research session on a topic of interest.
341 |
342 | I'm looking for a comprehensive analysis with multiple angles and perspectives. The research should explore the topic thoroughly, considering different viewpoints and citing reliable sources.
343 |
344 | Please follow these steps:
345 | 1. Initialize a research session with my topic
346 | 2. Perform multiple rounds of iterative research, exploring different aspects
347 | 3. Generate a comprehensive report with your findings`
348 | }
349 | }]
350 | })
351 | );
352 |
353 | // Start the server
354 | async function startServer() {
355 | const transport = new StdioServerTransport();
356 |
357 | const serverName = server?.constructor?.name || 'DeepResearch';
358 | const serverVersion = '1.0.0';
359 | console.error(`Starting DeepResearch MCP Server (${serverName} v${serverVersion})`);
360 |
361 | try {
362 | await server.connect(transport);
363 | console.error('Server connected. Waiting for requests...');
364 | } catch (error) {
365 | console.error('Failed to start server:', error);
366 | process.exit(1);
367 | }
368 | }
369 |
370 | startServer().catch(console.error);
```
--------------------------------------------------------------------------------
/src/services/openai.ts:
--------------------------------------------------------------------------------
```typescript
1 | import OpenAI from 'openai';
2 | import { AnalysisResult } from '../types/index.js';
3 | import dotenv from 'dotenv';
4 |
5 | dotenv.config();
6 |
7 | const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
8 |
9 | if (!OPENAI_API_KEY) {
10 | throw new Error('OPENAI_API_KEY is not set in environment variables');
11 | }
12 |
13 | const openai = new OpenAI({
14 | apiKey: OPENAI_API_KEY
15 | });
16 |
17 | /**
18 | * Analyze research findings and determine the next steps
19 | *
20 | * @param query Original research query
21 | * @param findings Research findings so far
22 | * @param searchedTopics Topics already searched
23 | * @returns Analysis result with follow-up steps
24 | */
25 | export async function analyzeResearch(
26 | query: string,
27 | findings: string[],
28 | searchedTopics: string[]
29 | ): Promise<{ nextSearchTopic: string | null; shouldContinue: boolean }> {
30 | console.error(`[OpenAI Service] Analyzing research with ${findings.length} findings`);
31 |
32 | try {
33 | // Implement token management strategy
34 | let processedFindings: string;
35 |
36 | // If we have more than 2 findings, we need to be careful about token limits
37 | if (findings.length > 2) {
38 | console.error(`[OpenAI Service] Managing tokens for ${findings.length} findings`);
39 |
40 | // Keep the two most recent findings intact
41 | const recentFindings = findings.slice(-2);
42 | // For older findings, extract only essential information to save tokens
43 | const olderFindings = findings.slice(0, -2).map((finding, index) => {
44 | // Extract just the search query and source titles/URLs
45 | const searchQueryMatch = finding.match(/# Search Results for: (.*?)\n/);
46 | const searchQuery = searchQueryMatch ? searchQueryMatch[1] : `Research step ${index + 1}`;
47 |
48 | // Extract source titles and URLs only (no full content)
49 | const sourceMatches = [...finding.matchAll(/## Source \[(\d+)\]: (.*?)\nURL: (https?:\/\/[^\s]+)/g)];
50 | const sourceSummaries = sourceMatches.map(match =>
51 | `Source [${match[1]}]: ${match[2]}\nURL: ${match[3]}`
52 | ).join('\n\n');
53 |
54 | return `# Summary of Search for: ${searchQuery}\n\n${sourceSummaries}\n\n[Full content omitted to save tokens]`;
55 | });
56 |
57 | // Combine summaries of older findings with full recent findings
58 | processedFindings = [
59 | ...olderFindings,
60 | ...recentFindings
61 | ].join('\n\n---\n\n');
62 |
63 | console.error(`[OpenAI Service] Processed ${findings.length} findings: ${olderFindings.length} summarized, ${recentFindings.length} full`);
64 | } else {
65 | // If we have 2 or fewer findings, we can include all of them in full
66 | processedFindings = findings.join('\n\n---\n\n');
67 | console.error(`[OpenAI Service] Using all ${findings.length} findings in full`);
68 | }
69 |
70 | // Create a list of already searched topics to avoid duplication
71 | const searchedTopicsText = searchedTopics.length
72 | ? `ALREADY SEARCHED TOPICS (DO NOT REPEAT THESE):\n${searchedTopics.map(t => `- ${t}`).join('\n')}`
73 | : 'No topics have been searched yet.';
74 |
75 | // Make API call with optimized content
76 | const response = await openai.chat.completions.create({
77 | model: 'gpt-4-turbo',
78 | messages: [
79 | {
80 | role: 'system',
81 | content: `You are a research assistant helping to explore a topic thoroughly through web searches.
82 |
83 | MAIN RESEARCH TOPIC: "${query}"
84 |
85 | ${searchedTopicsText}
86 |
87 | Your task is to determine if further research is needed and what specific topic to search next.`
88 | },
89 | {
90 | role: 'user',
91 | content: `I've been researching "${query}" and have the following findings so far:
92 |
93 | ${processedFindings}
94 |
95 | Based on these findings, I need you to:
96 |
97 | 1. Determine if further research is necessary to fully explore the original query.
98 | 2. If further research is needed, provide ONE specific search query that would best supplement the existing findings. Be precise and focused.
99 | 3. If no further research is needed, explicitly state that the research is complete.
100 |
101 | Format your response EXACTLY as follows:
102 | CONTINUE: YES/NO
103 | NEXT SEARCH QUERY: [your suggested search query only if continuing]`
104 | }
105 | ],
106 | temperature: 0.3,
107 | max_tokens: 1000
108 | });
109 |
110 | if (!response.choices[0]?.message?.content) {
111 | throw new Error('No response content from OpenAI API');
112 | }
113 |
114 | const content = response.choices[0].message.content;
115 | const shouldContinue = content.includes('CONTINUE: YES');
116 |
117 | // Extract next search query if continuing
118 | let nextSearchTopic: string | null = null;
119 | if (shouldContinue) {
120 | const nextTopicMatch = content.match(/NEXT SEARCH QUERY: (.*?)($|\n)/);
121 | nextSearchTopic = nextTopicMatch ? nextTopicMatch[1].trim() : null;
122 | }
123 |
124 | return {
125 | shouldContinue,
126 | nextSearchTopic
127 | };
128 | } catch (error) {
129 | console.error('[OpenAI Service] Error analyzing research:', error);
130 | throw error;
131 | }
132 | }
133 |
134 | /**
135 | * Generate a comprehensive research report
136 | *
137 | * @param query Original research query
138 | * @param findings All research findings
139 | * @returns Final research report
140 | */
141 | export async function generateReport(
142 | query: string,
143 | findings: string[]
144 | ): Promise<string> {
145 | try {
146 | // Extract all sources and their content into a structured format
147 | interface SourceContent {
148 | url: string;
149 | title: string;
150 | content: string;
151 | sourceNum: number;
152 | searchQuery: string;
153 | }
154 |
155 | // Track all sources and their content
156 | const allSources: SourceContent[] = [];
157 | const sourceUrlMap: Map<string, number> = new Map(); // URL to source number mapping
158 | let globalSourceCounter = 0;
159 |
160 | // Process each finding to extract structured content
161 | findings.forEach((finding, findingIndex) => {
162 | // Extract search query
163 | const searchQueryMatch = finding.match(/# Search Results for: (.*?)(\n|$)/);
164 | const searchQuery = searchQueryMatch ? searchQueryMatch[1] : `Finding ${findingIndex + 1}`;
165 |
166 | // Process each source in the finding
167 | let isInContent = false;
168 | let contentBuffer: string[] = [];
169 | let currentUrl = '';
170 | let currentTitle = '';
171 | let currentSourceNum = 0;
172 |
173 | // Split the finding into lines for processing
174 | finding.split('\n').forEach(line => {
175 | // Source header pattern: ## Source [1]: Title
176 | const sourceMatch = line.match(/## Source \[(\d+)\]: (.*?)$/);
177 | if (sourceMatch) {
178 | currentSourceNum = parseInt(sourceMatch[1]);
179 | currentTitle = sourceMatch[2];
180 | isInContent = false;
181 |
182 | // If we were processing a previous source, finalize it
183 | if (contentBuffer.length > 0 && currentUrl) {
184 | // Avoid duplicating content from the same URL
185 | if (!sourceUrlMap.has(currentUrl)) {
186 | globalSourceCounter++;
187 | sourceUrlMap.set(currentUrl, globalSourceCounter);
188 |
189 | allSources.push({
190 | url: currentUrl,
191 | title: currentTitle,
192 | content: contentBuffer.join('\n'),
193 | sourceNum: globalSourceCounter,
194 | searchQuery
195 | });
196 | }
197 |
198 | contentBuffer = [];
199 | currentUrl = '';
200 | }
201 | }
202 | // URL pattern: URL: https://...
203 | else if (line.startsWith('URL: ')) {
204 | currentUrl = line.substring(5).trim();
205 | }
206 | // Content header pattern: ### Content from Source [1]:
207 | else if (line.match(/### Content from Source \[\d+\]:/)) {
208 | isInContent = true;
209 | contentBuffer = [];
210 | }
211 | // End of source content (next source starts or end of finding)
212 | else if (isInContent && (line.startsWith('## Source') || line.startsWith('# Source URLs'))) {
213 | isInContent = false;
214 |
215 | // Finalize the current source
216 | if (contentBuffer.length > 0 && currentUrl) {
217 | // Avoid duplicating content from the same URL
218 | if (!sourceUrlMap.has(currentUrl)) {
219 | globalSourceCounter++;
220 | sourceUrlMap.set(currentUrl, globalSourceCounter);
221 |
222 | allSources.push({
223 | url: currentUrl,
224 | title: currentTitle,
225 | content: contentBuffer.join('\n'),
226 | sourceNum: globalSourceCounter,
227 | searchQuery
228 | });
229 | }
230 |
231 | contentBuffer = [];
232 | currentUrl = '';
233 | }
234 |
235 | // No continue or break needed - just let it naturally move to the next line
236 | } else if (isInContent) {
237 | contentBuffer.push(line);
238 | }
239 | });
240 | });
241 |
242 | console.error(`Extracted ${allSources.length} sources from ${findings.length} findings`);
243 |
244 | // More aggressive content optimization
245 | // 1. Set a much lower character limit for content
246 | const MAX_CONTENT_LENGTH = 40000; // Reduced from 60000 to 40000 characters
247 | let totalContentLength = 0;
248 |
249 | // 2. Calculate total content length
250 | allSources.forEach(source => {
251 | totalContentLength += source.content.length;
252 | });
253 |
254 | // 3. Group sources by search query
255 | const sourcesByQuery = new Map<string, SourceContent[]>();
256 | allSources.forEach(source => {
257 | if (!sourcesByQuery.has(source.searchQuery)) {
258 | sourcesByQuery.set(source.searchQuery, []);
259 | }
260 | sourcesByQuery.get(source.searchQuery)?.push(source);
261 | });
262 |
263 | // 4. If content is too large, trim it intelligently
264 | let optimizedContent = '';
265 |
266 | if (totalContentLength > MAX_CONTENT_LENGTH) {
267 | console.error(`Content exceeds token limit (${totalContentLength} characters), optimizing...`);
268 |
269 | // 5. Instead of proportional allocation, use a more aggressive summarization approach
270 | // Create a structured bibliography with minimal content
271 | optimizedContent = '# BIBLIOGRAPHY\n\n';
272 |
273 | // First pass: Add only metadata for each source
274 | sourcesByQuery.forEach((sources, query) => {
275 | optimizedContent += `## Search Query: ${query}\n\n`;
276 |
277 | sources.forEach(source => {
278 | // Just add metadata and URL for each source, no content
279 | optimizedContent += `[${source.sourceNum}] "${source.title}"\n`;
280 | optimizedContent += `URL: ${source.url}\n\n`;
281 | });
282 | });
283 |
284 | // Second pass: Add abbreviated content for each source until we reach the limit
285 | let currentLength = optimizedContent.length;
286 | const remainingLength = MAX_CONTENT_LENGTH - currentLength;
287 |
288 | // Calculate how many characters we can allocate per source
289 | const maxCharsPerSource = Math.floor(remainingLength / allSources.length);
290 |
291 | // Add additional section for content excerpts
292 | optimizedContent += '# CONTENT EXCERPTS\n\n';
293 |
294 | // Add abbreviated content for each source
295 | allSources.forEach(source => {
296 | // Truncate the content to the allocated size
297 | const excerpt = source.content.length > maxCharsPerSource
298 | ? source.content.substring(0, maxCharsPerSource) + '...'
299 | : source.content;
300 |
301 | optimizedContent += `## [${source.sourceNum}] ${source.title}\n\n`;
302 | optimizedContent += `${excerpt}\n\n`;
303 | });
304 | } else {
305 | // If content is within limits, use the original approach
306 | sourcesByQuery.forEach((sources, query) => {
307 | optimizedContent += `## Search Query: ${query}\n\n`;
308 |
309 | sources.forEach(source => {
310 | optimizedContent += `### [${source.sourceNum}] ${source.title}\n`;
311 | optimizedContent += `URL: ${source.url}\n\n`;
312 | optimizedContent += `${source.content.trim()}\n\n`;
313 | });
314 | });
315 | }
316 |
317 | // Now generate the report with the optimized content
318 | console.error(`Generating report with optimized content (${optimizedContent.length} characters)`);
319 |
320 | // More optimized prompt with fewer instructions
321 | const response = await openai.chat.completions.create({
322 | model: 'gpt-4-turbo',
323 | messages: [
324 | {
325 | role: 'system',
326 | content: `Generate a concise research report on "${query}" using the provided sources.
327 | Format:
328 | - Executive Summary (2-3 paragraphs)
329 | - Introduction
330 | - Main Findings (organized by themes)
331 | - Conclusion
332 | - Bibliography
333 |
334 | Cite sources using [X] format. Focus on key insights rather than exhaustive detail.`
335 | },
336 | {
337 | role: 'user',
338 | content: `Research report on "${query}" based on the following:
339 |
340 | ${optimizedContent}`
341 | }
342 | ],
343 | temperature: 0.5, // Lower temperature for more focused output
344 | max_tokens: 4000
345 | });
346 |
347 | if (!response.choices[0]?.message?.content) {
348 | throw new Error("No response content from OpenAI API");
349 | }
350 |
351 | return response.choices[0].message.content;
352 | } catch (error) {
353 | console.error("[OpenAI Service] Error generating report:", error);
354 | throw error;
355 | }
356 | }
```