This is page 4 of 5. Use http://codebase.md/cyanheads/pubmed-mcp-server?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .clinerules
│ └── clinerules.md
├── .dockerignore
├── .github
│ ├── FUNDING.yml
│ └── workflows
│ └── publish.yml
├── .gitignore
├── .ncurc.json
├── CHANGELOG.md
├── Dockerfile
├── docs
│ ├── project-spec.md
│ ├── publishing-mcp-server-registry.md
│ └── tree.md
├── eslint.config.js
├── examples
│ ├── generate_pubmed_chart
│ │ ├── bar_chart.png
│ │ ├── doughnut_chart.png
│ │ ├── line_chart.png
│ │ ├── pie_chart.png
│ │ ├── polar_chart.png
│ │ ├── radar_chart.png
│ │ └── scatter_plot.png
│ ├── pubmed_article_connections_1.md
│ ├── pubmed_article_connections_2.md
│ ├── pubmed_fetch_contents_example.md
│ ├── pubmed_research_agent_example.md
│ └── pubmed_search_articles_example.md
├── LICENSE
├── mcp.json
├── package-lock.json
├── package.json
├── README.md
├── repomix.config.json
├── scripts
│ ├── clean.ts
│ ├── fetch-openapi-spec.ts
│ ├── make-executable.ts
│ ├── tree.ts
│ └── validate-mcp-publish-schema.ts
├── server.json
├── smithery.yaml
├── src
│ ├── config
│ │ └── index.ts
│ ├── index.ts
│ ├── mcp-server
│ │ ├── server.ts
│ │ ├── tools
│ │ │ ├── pubmedArticleConnections
│ │ │ │ ├── index.ts
│ │ │ │ ├── logic
│ │ │ │ │ ├── citationFormatter.ts
│ │ │ │ │ ├── elinkHandler.ts
│ │ │ │ │ ├── index.ts
│ │ │ │ │ └── types.ts
│ │ │ │ └── registration.ts
│ │ │ ├── pubmedFetchContents
│ │ │ │ ├── index.ts
│ │ │ │ ├── logic.ts
│ │ │ │ └── registration.ts
│ │ │ ├── pubmedGenerateChart
│ │ │ │ ├── index.ts
│ │ │ │ ├── logic.ts
│ │ │ │ └── registration.ts
│ │ │ ├── pubmedResearchAgent
│ │ │ │ ├── index.ts
│ │ │ │ ├── logic
│ │ │ │ │ ├── index.ts
│ │ │ │ │ ├── inputSchema.ts
│ │ │ │ │ ├── outputTypes.ts
│ │ │ │ │ └── planOrchestrator.ts
│ │ │ │ ├── logic.ts
│ │ │ │ └── registration.ts
│ │ │ └── pubmedSearchArticles
│ │ │ ├── index.ts
│ │ │ ├── logic.ts
│ │ │ └── registration.ts
│ │ └── transports
│ │ ├── auth
│ │ │ ├── authFactory.ts
│ │ │ ├── authMiddleware.ts
│ │ │ ├── index.ts
│ │ │ ├── lib
│ │ │ │ ├── authContext.ts
│ │ │ │ ├── authTypes.ts
│ │ │ │ └── authUtils.ts
│ │ │ └── strategies
│ │ │ ├── authStrategy.ts
│ │ │ ├── jwtStrategy.ts
│ │ │ └── oauthStrategy.ts
│ │ ├── core
│ │ │ ├── baseTransportManager.ts
│ │ │ ├── headerUtils.ts
│ │ │ ├── honoNodeBridge.ts
│ │ │ ├── statefulTransportManager.ts
│ │ │ ├── statelessTransportManager.ts
│ │ │ └── transportTypes.ts
│ │ ├── http
│ │ │ ├── httpErrorHandler.ts
│ │ │ ├── httpTransport.ts
│ │ │ ├── httpTypes.ts
│ │ │ ├── index.ts
│ │ │ └── mcpTransportMiddleware.ts
│ │ └── stdio
│ │ ├── index.ts
│ │ └── stdioTransport.ts
│ ├── services
│ │ └── NCBI
│ │ ├── core
│ │ │ ├── ncbiConstants.ts
│ │ │ ├── ncbiCoreApiClient.ts
│ │ │ ├── ncbiRequestQueueManager.ts
│ │ │ ├── ncbiResponseHandler.ts
│ │ │ └── ncbiService.ts
│ │ └── parsing
│ │ ├── eSummaryResultParser.ts
│ │ ├── index.ts
│ │ ├── pubmedArticleStructureParser.ts
│ │ └── xmlGenericHelpers.ts
│ ├── types-global
│ │ ├── declarations.d.ts
│ │ ├── errors.ts
│ │ └── pubmedXml.ts
│ └── utils
│ ├── index.ts
│ ├── internal
│ │ ├── errorHandler.ts
│ │ ├── index.ts
│ │ ├── logger.ts
│ │ ├── performance.ts
│ │ └── requestContext.ts
│ ├── metrics
│ │ ├── index.ts
│ │ └── tokenCounter.ts
│ ├── network
│ │ ├── fetchWithTimeout.ts
│ │ └── index.ts
│ ├── parsing
│ │ ├── dateParser.ts
│ │ ├── index.ts
│ │ └── jsonParser.ts
│ ├── scheduling
│ │ ├── index.ts
│ │ └── scheduler.ts
│ ├── security
│ │ ├── idGenerator.ts
│ │ ├── index.ts
│ │ ├── rateLimiter.ts
│ │ └── sanitization.ts
│ └── telemetry
│ ├── instrumentation.ts
│ └── semconv.ts
├── tsconfig.json
├── tsconfig.typedoc.json
├── tsdoc.json
└── typedoc.json
```
# Files
--------------------------------------------------------------------------------
/src/mcp-server/tools/pubmedResearchAgent/logic/planOrchestrator.ts:
--------------------------------------------------------------------------------
```typescript
1 | /**
2 | * @fileoverview Orchestrates the generation of the research plan outline
3 | * by directly mapping detailed client inputs to a structured output format.
4 | * Omits sections/steps if no relevant input is provided.
5 | * @module pubmedResearchAgent/logic/planOrchestrator
6 | */
7 |
8 | import {
9 | logger,
10 | RequestContext,
11 | requestContextService,
12 | sanitizeInputForLogging,
13 | } from "../../../../utils/index.js";
14 | import type { PubMedResearchAgentInput } from "./inputSchema.js";
15 | import type {
16 | CrossCuttingContent,
17 | Phase1Step1_1_Content,
18 | Phase1Step1_2_Content,
19 | Phase1Step1_3_Content,
20 | Phase2Step2_1_Content,
21 | Phase2Step2_2_Content,
22 | Phase3Step3_1_Content,
23 | Phase3Step3_2_Content,
24 | Phase4Step4_1_Content,
25 | Phase4Step4_2_Content,
26 | Phase4Step4_3_Content,
27 | PubMedResearchPlanGeneratedOutput,
28 | } from "./outputTypes.js";
29 |
30 | // Helper function to conditionally add a prompt prefix or return undefined if input is empty
31 | function C(
32 | userInput: string | undefined | string[],
33 | refinedPromptBase?: string,
34 | includePrompts?: boolean,
35 | ): string | undefined {
36 | if (!userInput || (Array.isArray(userInput) && userInput.length === 0)) {
37 | return undefined;
38 | }
39 | const joinedInput = Array.isArray(userInput)
40 | ? userInput.join("; ")
41 | : userInput;
42 | if (includePrompts && refinedPromptBase) {
43 | // Embed the user's input within a more directive prompt
44 | return `${refinedPromptBase} Based on the provided detail: "${joinedInput}". Ensure critical evaluation and consider alternative interpretations.`;
45 | }
46 | return joinedInput;
47 | }
48 |
49 | // Helper to generate guidance notes
50 | function G(notes: string[], includePrompts?: boolean): string[] | undefined {
51 | return includePrompts && notes.length > 0 ? notes : undefined;
52 | }
53 |
54 | // Helper to check if all properties of an object are undefined
55 | function allPropertiesUndefined<T extends object>(obj: T): boolean {
56 | return Object.values(obj as Record<string, unknown>).every(
57 | (value) => value === undefined,
58 | );
59 | }
60 |
61 | // Helper function to recursively remove keys with empty object or empty array values
62 | function removeEmptyObjectsRecursively(obj: unknown): unknown {
63 | // Base cases for recursion
64 | if (typeof obj !== "object" || obj === null) {
65 | return obj; // Not an object or array, return as is
66 | }
67 |
68 | if (Array.isArray(obj)) {
69 | // If it's an array, recurse on each element and filter out empty objects/arrays
70 | const newArr = obj
71 | .map(removeEmptyObjectsRecursively)
72 | .filter((item: unknown) => {
73 | if (item === null || item === undefined) return false;
74 | if (Array.isArray(item) && item.length === 0) return false; // Filter out empty arrays
75 | if (
76 | typeof item === "object" &&
77 | !Array.isArray(item) &&
78 | Object.keys(item).length === 0
79 | ) {
80 | return false; // Filter out empty objects
81 | }
82 | return true;
83 | });
84 | return newArr;
85 | }
86 |
87 | // If it's an object, create a new object with non-empty properties
88 | const newObj: Record<string, unknown> = {};
89 | const objAsRecord = obj as Record<string, unknown>;
90 | for (const key in objAsRecord) {
91 | if (Object.prototype.hasOwnProperty.call(objAsRecord, key)) {
92 | const value = removeEmptyObjectsRecursively(objAsRecord[key]);
93 |
94 | // Skip null or undefined values
95 | if (value === null || value === undefined) {
96 | continue;
97 | }
98 |
99 | // Skip empty arrays
100 | if (Array.isArray(value) && value.length === 0) {
101 | continue;
102 | }
103 |
104 | // Skip empty objects
105 | if (
106 | typeof value === "object" &&
107 | !Array.isArray(value) &&
108 | Object.keys(value).length === 0
109 | ) {
110 | continue;
111 | }
112 |
113 | // If value is not empty, add it
114 | newObj[key] = value;
115 | }
116 | }
117 | return newObj;
118 | }
119 |
120 | export function generateFullResearchPlanOutline(
121 | input: PubMedResearchAgentInput,
122 | parentRequestContext: RequestContext,
123 | ): PubMedResearchPlanGeneratedOutput {
124 | const logContext = requestContextService.createRequestContext({
125 | parentRequestId: parentRequestContext.requestId,
126 | operation: "generateFullResearchPlanOutlineLog",
127 | input: sanitizeInputForLogging({ input }), // Add sanitized input here
128 | });
129 | logger.debug("Generating full research plan outline.", logContext);
130 |
131 | const inferredGoal =
132 | input.primary_research_goal ||
133 | `to investigate the roles and mechanisms of ${input.research_keywords.join(" and ")}${input.organism_focus ? ` in ${input.organism_focus}` : ""}.`;
134 |
135 | const planTitle =
136 | input.project_title_suggestion || `Research Plan Outline: ${inferredGoal}`;
137 | const addPrompts = input.include_detailed_prompts_for_agent;
138 |
139 | const overallInstructions = `This document is a structured research plan outline, primarily assembled from detailed inputs for project: "${planTitle}". As the research agent, critically evaluate this plan. Your primary function is to fulfill the user's research request by leveraging this structured information alongside your comprehensive knowledge and all available tools (e.g., PubMed search, web search, data analysis, file operations).
140 | Key responsibilities:
141 | - Synthesize information to generate scientifically sound content, analyses, or answers.
142 | - Adapt the level of detail in your final response to precisely match the user's specific query. If a concise answer is requested, use this plan as foundational background.
143 | - Proactively identify and address potential ambiguities, biases, or limitations in the provided plan or data.
144 | - Ensure methodological rigor: consider controls, statistical power, reproducibility, and alternative explanations.
145 | - If critical information is missing or ambiguities hinder robust execution, formulate specific, targeted follow-up questions for the user before proceeding with extensive investigation.
146 | - Always prioritize accuracy, objectivity, and adherence to scientific best practices.`;
147 |
148 | const p1s1Content: Phase1Step1_1_Content = {
149 | primary_research_question: C(
150 | input.p1_specific_research_question,
151 | "Critically evaluate and refine the primary research question for clarity, focus, and feasibility",
152 | addPrompts,
153 | ),
154 | knowledge_gap_statement: C(
155 | input.p1_knowledge_gap,
156 | "Validate and expand on the identified knowledge gap, ensuring it's well-supported by current literature",
157 | addPrompts,
158 | ),
159 | primary_hypothesis: C(
160 | input.p1_primary_hypothesis,
161 | "Assess the primary hypothesis for testability, specificity, and falsifiability. Consider alternative hypotheses",
162 | addPrompts,
163 | ),
164 | pubmed_search_strategy: C(
165 | input.p1_pubmed_search_strategy_description,
166 | "Develop a comprehensive PubMed search strategy. Consider MeSH terms, keywords, Boolean operators, and inclusion/exclusion criteria",
167 | addPrompts,
168 | ),
169 | guidance_notes: G(
170 | [
171 | "Ensure the research question is SMART (Specific, Measurable, Achievable, Relevant, Time-bound).",
172 | "Verify the knowledge gap is current and significant.",
173 | "The hypothesis should directly address the research question.",
174 | "Consider publication type filters and date ranges for the literature search.",
175 | ],
176 | addPrompts,
177 | ),
178 | };
179 |
180 | const p1s2Content: Phase1Step1_2_Content = {
181 | literature_review_scope: C(
182 | input.p1_literature_review_scope,
183 | "Define and justify the scope of the literature review, including timeframes, study types, and key themes to investigate",
184 | addPrompts,
185 | ),
186 | key_databases_and_search_approach: C(
187 | input.p1_lit_review_databases_and_approach,
188 | "Detail the systematic search approach across specified databases (e.g., PubMed, EMBASE, Scopus). Include strategy for citation searching or snowballing",
189 | addPrompts,
190 | ),
191 | guidance_notes: G(
192 | [
193 | "Document search queries and results for reproducibility.",
194 | "Consider using reference management software.",
195 | "Plan for screening and selection of articles based on predefined criteria.",
196 | ],
197 | addPrompts,
198 | ),
199 | };
200 |
201 | const p1s3Content: Phase1Step1_3_Content = {
202 | experimental_paradigm: C(
203 | input.p1_experimental_paradigm,
204 | "Elaborate on the chosen experimental paradigm, justifying its appropriateness for testing the hypothesis",
205 | addPrompts,
206 | ),
207 | data_acquisition_plan_existing_data: C(
208 | input.p1_data_acquisition_plan_existing_data,
209 | "Strategize the identification, retrieval, and validation of relevant existing datasets. Specify databases, data types, and access protocols",
210 | addPrompts,
211 | ),
212 | data_acquisition_plan_new_data: C(
213 | input.p1_data_acquisition_plan_new_data,
214 | "Outline the plan for generating novel data, including experimental models, key procedures, sample size considerations, and data deposition strategy",
215 | addPrompts,
216 | ),
217 | blast_utilization_plan: C(
218 | input.p1_blast_utilization_plan,
219 | "Specify how sequence alignment tools (e.g., NCBI BLAST) will be employed, including purpose, programs, databases, and interpretation of results",
220 | addPrompts,
221 | ),
222 | controls_and_rigor_measures: C(
223 | input.p1_controls_and_rigor,
224 | "Detail crucial experimental controls (positive, negative, internal) and measures to ensure scientific rigor, reproducibility, and minimization of bias",
225 | addPrompts,
226 | ),
227 | methodological_challenges_and_mitigation: C(
228 | input.p1_methodological_challenges_and_mitigation,
229 | "Anticipate potential methodological challenges, their impact, and robust mitigation strategies",
230 | addPrompts,
231 | ),
232 | guidance_notes: G(
233 | [
234 | "Ensure sample sizes are adequately powered.",
235 | "Consider blinding and randomization where appropriate.",
236 | "Define clear endpoints and outcome measures.",
237 | "Address potential confounders in the experimental design.",
238 | ],
239 | addPrompts,
240 | ),
241 | };
242 |
243 | const p2s1Content: Phase2Step2_1_Content = {
244 | data_collection_methods_wet_lab: C(
245 | input.p2_data_collection_methods_wet_lab,
246 | "Provide detailed wet-lab protocols, including sample preparation, experimental treatments, instrument settings, and data recording procedures",
247 | addPrompts,
248 | ),
249 | data_collection_methods_dry_lab: C(
250 | input.p2_data_collection_methods_dry_lab,
251 | "Specify execution details for computational data retrieval, including precise queries, API usage, versioning of tools, and data provenance tracking",
252 | addPrompts,
253 | ),
254 | guidance_notes: G(
255 | [
256 | "Standardize protocols to ensure consistency.",
257 | "Implement robust data labeling and organization from the outset.",
258 | "Document any deviations from planned protocols.",
259 | ],
260 | addPrompts,
261 | ),
262 | };
263 |
264 | const p2s2Content: Phase2Step2_2_Content = {
265 | data_preprocessing_and_qc_plan: C(
266 | input.p2_data_preprocessing_and_qc_plan,
267 | "Describe the comprehensive pipeline for data cleaning, normalization, transformation, and quality control. Specify metrics, thresholds, and tools for each step",
268 | addPrompts,
269 | ),
270 | guidance_notes: G(
271 | [
272 | "Define criteria for outlier detection and handling.",
273 | "Assess data quality before and after preprocessing.",
274 | "Ensure preprocessing steps are appropriate for downstream analyses.",
275 | ],
276 | addPrompts,
277 | ),
278 | };
279 |
280 | const p3s1Content: Phase3Step3_1_Content = {
281 | data_analysis_strategy: C(
282 | input.p3_data_analysis_strategy,
283 | "Outline the core statistical and computational methods for data analysis. Specify tests, software, parameters, and how they address the hypotheses",
284 | addPrompts,
285 | ),
286 | bioinformatics_pipeline_summary: C(
287 | input.p3_bioinformatics_pipeline_summary,
288 | "Summarize the bioinformatics pipeline for high-throughput data, detailing tools, algorithms, parameter settings, and workflow for downstream analyses",
289 | addPrompts,
290 | ),
291 | guidance_notes: G(
292 | [
293 | "Justify the choice of statistical tests based on data distribution and assumptions.",
294 | "Address multiple testing corrections if applicable.",
295 | "Consider sensitivity analyses to assess robustness of findings.",
296 | ],
297 | addPrompts,
298 | ),
299 | };
300 |
301 | const p3s2Content: Phase3Step3_2_Content = {
302 | results_interpretation_framework: C(
303 | input.p3_results_interpretation_framework,
304 | "Establish a clear framework for interpreting analytical findings in the context of the hypotheses, considering statistical significance, effect sizes, and biological relevance",
305 | addPrompts,
306 | ),
307 | comparison_with_literature_plan: C(
308 | input.p3_comparison_with_literature_plan,
309 | "Develop a strategy for systematically contextualizing results with existing literature, addressing consistencies, discrepancies, and novel contributions",
310 | addPrompts,
311 | ),
312 | guidance_notes: G(
313 | [
314 | "Distinguish correlation from causation.",
315 | "Acknowledge limitations of the study and their impact on interpretation.",
316 | "Discuss clinical or translational implications if relevant.",
317 | ],
318 | addPrompts,
319 | ),
320 | };
321 |
322 | const p4s1Content: Phase4Step4_1_Content = {
323 | dissemination_manuscript_plan: C(
324 | input.p4_dissemination_manuscript_plan,
325 | "Formulate a plan for manuscript preparation, including core message, target journal profile, key figures/tables, and authorship contributions",
326 | addPrompts,
327 | ),
328 | dissemination_data_deposition_plan: C(
329 | input.p4_dissemination_data_deposition_plan,
330 | "Outline a strategy for depositing research data in public repositories, specifying data types, repository choices, metadata standards, and adherence to FAIR principles",
331 | addPrompts,
332 | ),
333 | guidance_notes: G(
334 | [
335 | "Follow journal-specific author guidelines.",
336 | "Ensure data is de-identified if it contains sensitive information.",
337 | "Obtain DOIs or accession numbers for deposited data.",
338 | ],
339 | addPrompts,
340 | ),
341 | };
342 |
343 | const p4s2Content: Phase4Step4_2_Content = {
344 | peer_review_and_publication_approach: C(
345 | input.p4_peer_review_and_publication_approach,
346 | "Describe the approach to journal submission, navigating peer review, and addressing reviewer comments constructively for publication",
347 | addPrompts,
348 | ),
349 | guidance_notes: G(
350 | [
351 | "Prepare a compelling cover letter.",
352 | "Respond to reviewer comments point-by-point and respectfully.",
353 | "Consider pre-print servers for early dissemination.",
354 | ],
355 | addPrompts,
356 | ),
357 | };
358 |
359 | const p4s3Content: Phase4Step4_3_Content = {
360 | future_research_directions: C(
361 | input.p4_future_research_directions,
362 | "Identify and articulate potential next steps, new research questions, or translational applications arising from the current study's findings and limitations",
363 | addPrompts,
364 | ),
365 | guidance_notes: G(
366 | [
367 | "Base future directions on the study's actual outcomes.",
368 | "Consider how new technologies or approaches could address remaining questions.",
369 | ],
370 | addPrompts,
371 | ),
372 | };
373 |
374 | const ccContent: CrossCuttingContent = {
375 | record_keeping_and_data_management: C(
376 | input.cc_record_keeping_and_data_management,
377 | "Detail the comprehensive plan for meticulous record-keeping, version control (code, data, manuscripts), secure data storage, backup strategy, and Data Management Plan (DMP) adherence",
378 | addPrompts,
379 | ),
380 | collaboration_strategy: C(
381 | input.cc_collaboration_strategy,
382 | "If applicable, describe the strategy for effective collaboration, including communication channels, role delineation, data sharing protocols, and authorship agreements",
383 | addPrompts,
384 | ),
385 | ethical_considerations: C(
386 | input.cc_ethical_considerations,
387 | "Thoroughly outline all ethical considerations, including plans for IRB/IACUC approval, informed consent, data privacy/anonymization, responsible conduct of research (RCR) training, and conflict of interest management",
388 | addPrompts,
389 | ),
390 | guidance_notes: G(
391 | [
392 | "Ensure compliance with institutional and funding agency requirements.",
393 | "Regularly review and update the DMP.",
394 | "Promote open science practices where appropriate.",
395 | ],
396 | addPrompts,
397 | ),
398 | };
399 |
400 | const plan = {
401 | plan_title: planTitle,
402 | overall_instructions_for_research_agent: addPrompts
403 | ? overallInstructions
404 | : undefined,
405 | input_summary: {
406 | keywords_received: input.research_keywords,
407 | primary_goal_stated_or_inferred: inferredGoal,
408 | organism_focus: input.organism_focus || "Not Specified",
409 | // Correctly use the input flag name
410 | included_detailed_prompts_for_agent:
411 | input.include_detailed_prompts_for_agent,
412 | },
413 | phase_1_conception_and_planning: {
414 | title: "Phase 1: Conception and Planning",
415 | step_1_1_research_question_and_hypothesis: allPropertiesUndefined(
416 | p1s1Content,
417 | )
418 | ? {}
419 | : p1s1Content,
420 | step_1_2_literature_review_strategy: allPropertiesUndefined(p1s2Content)
421 | ? {}
422 | : p1s2Content,
423 | step_1_3_experimental_design_and_data_acquisition: allPropertiesUndefined(
424 | p1s3Content,
425 | )
426 | ? {}
427 | : p1s3Content,
428 | },
429 | phase_2_data_collection_and_processing: {
430 | title: "Phase 2: Data Collection and Processing",
431 | step_2_1_data_collection_retrieval: allPropertiesUndefined(p2s1Content)
432 | ? {}
433 | : p2s1Content,
434 | step_2_2_data_preprocessing_and_qc: allPropertiesUndefined(p2s2Content)
435 | ? {}
436 | : p2s2Content,
437 | },
438 | phase_3_analysis_and_interpretation: {
439 | title: "Phase 3: Analysis and Interpretation",
440 | step_3_1_data_analysis_plan: allPropertiesUndefined(p3s1Content)
441 | ? {}
442 | : p3s1Content,
443 | step_3_2_results_interpretation: allPropertiesUndefined(p3s2Content)
444 | ? {}
445 | : p3s2Content,
446 | },
447 | phase_4_dissemination_and_iteration: {
448 | title: "Phase 4: Dissemination and Iteration",
449 | step_4_1_dissemination_strategy: allPropertiesUndefined(p4s1Content)
450 | ? {}
451 | : p4s1Content,
452 | step_4_2_peer_review_and_publication: allPropertiesUndefined(p4s2Content)
453 | ? {}
454 | : p4s2Content,
455 | step_4_3_further_research_and_iteration: allPropertiesUndefined(
456 | p4s3Content,
457 | )
458 | ? {}
459 | : p4s3Content,
460 | },
461 | cross_cutting_considerations: {
462 | title: "Cross-Cutting Considerations",
463 | content: allPropertiesUndefined(ccContent) ? {} : ccContent,
464 | },
465 | };
466 |
467 | return removeEmptyObjectsRecursively(
468 | plan,
469 | ) as PubMedResearchPlanGeneratedOutput;
470 | }
471 |
```
--------------------------------------------------------------------------------
/src/utils/internal/logger.ts:
--------------------------------------------------------------------------------
```typescript
1 | /**
2 | * @fileoverview Provides a singleton Logger class that wraps Winston for file logging
3 | * and supports sending MCP (Model Context Protocol) `notifications/message`.
4 | * It handles different log levels compliant with RFC 5424 and MCP specifications.
5 | * @module src/utils/internal/logger
6 | */
7 | import path from "path";
8 | import winston from "winston";
9 | import TransportStream from "winston-transport";
10 | import { config } from "../../config/index.js";
11 | import { RequestContext } from "./requestContext.js";
12 |
13 | /**
14 | * Defines the supported logging levels based on RFC 5424 Syslog severity levels,
15 | * as used by the Model Context Protocol (MCP).
16 | * Levels are: 'debug'(7), 'info'(6), 'notice'(5), 'warning'(4), 'error'(3), 'crit'(2), 'alert'(1), 'emerg'(0).
17 | * Lower numeric values indicate higher severity.
18 | */
19 | export type McpLogLevel =
20 | | "debug"
21 | | "info"
22 | | "notice"
23 | | "warning"
24 | | "error"
25 | | "crit"
26 | | "alert"
27 | | "emerg";
28 |
29 | /**
30 | * Numeric severity mapping for MCP log levels (lower is more severe).
31 | * @private
32 | */
33 | const mcpLevelSeverity: Record<McpLogLevel, number> = {
34 | emerg: 0,
35 | alert: 1,
36 | crit: 2,
37 | error: 3,
38 | warning: 4,
39 | notice: 5,
40 | info: 6,
41 | debug: 7,
42 | };
43 |
44 | /**
45 | * Maps MCP log levels to Winston's core levels for file logging.
46 | * @private
47 | */
48 | const mcpToWinstonLevel: Record<
49 | McpLogLevel,
50 | "debug" | "info" | "warn" | "error"
51 | > = {
52 | debug: "debug",
53 | info: "info",
54 | notice: "info",
55 | warning: "warn",
56 | error: "error",
57 | crit: "error",
58 | alert: "error",
59 | emerg: "error",
60 | };
61 |
62 | /**
63 | * Interface for a more structured error object, primarily for formatting console logs.
64 | * @private
65 | */
66 | interface ErrorWithMessageAndStack {
67 | message?: string;
68 | stack?: string;
69 | [key: string]: unknown;
70 | }
71 |
72 | /**
73 | * Interface for the payload of an MCP log notification.
74 | * This structure is used when sending log data via MCP `notifications/message`.
75 | */
76 | export interface McpLogPayload {
77 | message: string;
78 | context?: RequestContext;
79 | error?: {
80 | message: string;
81 | stack?: string;
82 | };
83 | [key: string]: unknown;
84 | }
85 |
86 | /**
87 | * Type for the `data` parameter of the `McpNotificationSender` function.
88 | */
89 | export type McpNotificationData = McpLogPayload | Record<string, unknown>;
90 |
91 | /**
92 | * Defines the signature for a function that can send MCP log notifications.
93 | * This function is typically provided by the MCP server instance.
94 | * @param level - The severity level of the log message.
95 | * @param data - The payload of the log notification.
96 | * @param loggerName - An optional name or identifier for the logger/server.
97 | */
98 | export type McpNotificationSender = (
99 | level: McpLogLevel,
100 | data: McpNotificationData,
101 | loggerName?: string,
102 | ) => void;
103 |
104 | // The logsPath from config is resolved and validated by src/config/index.ts.
105 | // It can be null if the directory is invalid or inaccessible, in which case file logging will be disabled.
106 |
107 | /**
108 | * Creates the Winston console log format.
109 | * @returns The Winston log format for console output.
110 | * @private
111 | */
112 | function createWinstonConsoleFormat(): winston.Logform.Format {
113 | return winston.format.combine(
114 | winston.format.colorize(),
115 | winston.format.timestamp({ format: "YYYY-MM-DD HH:mm:ss" }),
116 | winston.format.printf(({ timestamp, level, message, ...meta }) => {
117 | let metaString = "";
118 | const metaCopy = { ...meta };
119 | if (metaCopy.error && typeof metaCopy.error === "object") {
120 | const errorObj = metaCopy.error as ErrorWithMessageAndStack;
121 | if (errorObj.message) metaString += `\n Error: ${errorObj.message}`;
122 | if (errorObj.stack)
123 | metaString += `\n Stack: ${String(errorObj.stack)
124 | .split("\n")
125 | .map((l: string) => ` ${l}`)
126 | .join("\n")}`;
127 | delete metaCopy.error;
128 | }
129 | if (Object.keys(metaCopy).length > 0) {
130 | try {
131 | const replacer = (_key: string, value: unknown) =>
132 | typeof value === "bigint" ? value.toString() : value;
133 | const remainingMetaJson = JSON.stringify(metaCopy, replacer, 2);
134 | if (remainingMetaJson !== "{}")
135 | metaString += `\n Meta: ${remainingMetaJson}`;
136 | } catch (stringifyError: unknown) {
137 | const errorMessage =
138 | stringifyError instanceof Error
139 | ? stringifyError.message
140 | : String(stringifyError);
141 | metaString += `\n Meta: [Error stringifying metadata: ${errorMessage}]`;
142 | }
143 | }
144 | return `${timestamp} ${level}: ${message}${metaString}`;
145 | }),
146 | );
147 | }
148 |
149 | /**
150 | * Singleton Logger class that wraps Winston for robust logging.
151 | * Supports file logging, conditional console logging, and MCP notifications.
152 | */
153 | export class Logger {
154 | private static instance: Logger;
155 | private winstonLogger?: winston.Logger;
156 | private interactionLogger?: winston.Logger;
157 | private initialized = false;
158 | private mcpNotificationSender?: McpNotificationSender;
159 | private currentMcpLevel: McpLogLevel = "info";
160 | private currentWinstonLevel: "debug" | "info" | "warn" | "error" = "info";
161 |
162 | private readonly MCP_NOTIFICATION_STACK_TRACE_MAX_LENGTH = 1024;
163 | private readonly LOG_FILE_MAX_SIZE = 5 * 1024 * 1024; // 5MB
164 | private readonly LOG_MAX_FILES = 5;
165 |
166 | /** @private */
167 | private constructor() {}
168 |
169 | /**
170 | * Initializes the Winston logger instance.
171 | * Should be called once at application startup.
172 | * @param level - The initial minimum MCP log level.
173 | */
174 | public async initialize(level: McpLogLevel = "info"): Promise<void> {
175 | if (this.initialized) {
176 | this.warning("Logger already initialized.", {
177 | loggerSetup: true,
178 | requestId: "logger-init",
179 | timestamp: new Date().toISOString(),
180 | });
181 | return;
182 | }
183 |
184 | // Set initialized to true at the beginning of the initialization process.
185 | this.initialized = true;
186 |
187 | this.currentMcpLevel = level;
188 | this.currentWinstonLevel = mcpToWinstonLevel[level];
189 |
190 | const resolvedLogsDir = config.logsPath;
191 |
192 | const fileFormat = winston.format.combine(
193 | winston.format.timestamp(),
194 | winston.format.errors({ stack: true }),
195 | winston.format.json(),
196 | );
197 |
198 | const transports: TransportStream[] = [];
199 | const fileTransportOptions = {
200 | format: fileFormat,
201 | maxsize: this.LOG_FILE_MAX_SIZE,
202 | maxFiles: this.LOG_MAX_FILES,
203 | tailable: true,
204 | };
205 |
206 | if (resolvedLogsDir) {
207 | transports.push(
208 | new winston.transports.File({
209 | filename: path.join(resolvedLogsDir, "error.log"),
210 | level: "error",
211 | ...fileTransportOptions,
212 | }),
213 | new winston.transports.File({
214 | filename: path.join(resolvedLogsDir, "warn.log"),
215 | level: "warn",
216 | ...fileTransportOptions,
217 | }),
218 | new winston.transports.File({
219 | filename: path.join(resolvedLogsDir, "info.log"),
220 | level: "info",
221 | ...fileTransportOptions,
222 | }),
223 | new winston.transports.File({
224 | filename: path.join(resolvedLogsDir, "debug.log"),
225 | level: "debug",
226 | ...fileTransportOptions,
227 | }),
228 | new winston.transports.File({
229 | filename: path.join(resolvedLogsDir, "combined.log"),
230 | ...fileTransportOptions,
231 | }),
232 | );
233 | } else {
234 | if (process.stdout.isTTY) {
235 | console.warn(
236 | "File logging disabled as logsPath is not configured or invalid.",
237 | );
238 | }
239 | }
240 |
241 | this.winstonLogger = winston.createLogger({
242 | level: this.currentWinstonLevel,
243 | transports,
244 | exitOnError: false,
245 | });
246 |
247 | // Initialize a separate logger for structured interactions
248 | if (resolvedLogsDir) {
249 | this.interactionLogger = winston.createLogger({
250 | format: winston.format.combine(
251 | winston.format.timestamp(),
252 | winston.format.json({ space: 2 }),
253 | ),
254 | transports: [
255 | new winston.transports.File({
256 | filename: path.join(resolvedLogsDir, "interactions.log"),
257 | ...fileTransportOptions,
258 | }),
259 | ],
260 | });
261 | }
262 |
263 | // Configure console transport after Winston logger is created
264 | const consoleStatus = this._configureConsoleTransport();
265 |
266 | const initialContext: RequestContext = {
267 | loggerSetup: true,
268 | requestId: "logger-init-deferred",
269 | timestamp: new Date().toISOString(),
270 | };
271 | // Removed logging of logsDirCreatedMessage as it's no longer set
272 | if (consoleStatus.message) {
273 | this.info(consoleStatus.message, initialContext);
274 | }
275 |
276 | this.initialized = true; // Ensure this is set after successful setup
277 | this.info(
278 | `Logger initialized. File logging level: ${this.currentWinstonLevel}. MCP logging level: ${this.currentMcpLevel}. Console logging: ${consoleStatus.enabled ? "enabled" : "disabled"}`,
279 | {
280 | loggerSetup: true,
281 | requestId: "logger-post-init",
282 | timestamp: new Date().toISOString(),
283 | logsPathUsed: resolvedLogsDir ?? "none",
284 | },
285 | );
286 | }
287 |
288 | /**
289 | * Sets the function used to send MCP 'notifications/message'.
290 | * @param sender - The function to call for sending notifications, or undefined to disable.
291 | */
292 | public setMcpNotificationSender(
293 | sender: McpNotificationSender | undefined,
294 | ): void {
295 | this.mcpNotificationSender = sender;
296 | const status = sender ? "enabled" : "disabled";
297 | this.info(`MCP notification sending ${status}.`, {
298 | loggerSetup: true,
299 | requestId: "logger-set-sender",
300 | timestamp: new Date().toISOString(),
301 | });
302 | }
303 |
304 | /**
305 | * Dynamically sets the minimum logging level.
306 | * @param newLevel - The new minimum MCP log level to set.
307 | */
308 | public setLevel(newLevel: McpLogLevel): void {
309 | const setLevelContext: RequestContext = {
310 | loggerSetup: true,
311 | requestId: "logger-set-level",
312 | timestamp: new Date().toISOString(),
313 | };
314 | if (!this.ensureInitialized()) {
315 | if (process.stdout.isTTY) {
316 | console.error("Cannot set level: Logger not initialized.");
317 | }
318 | return;
319 | }
320 | if (!(newLevel in mcpLevelSeverity)) {
321 | this.warning(
322 | `Invalid MCP log level provided: ${newLevel}. Level not changed.`,
323 | setLevelContext,
324 | );
325 | return;
326 | }
327 |
328 | const oldLevel = this.currentMcpLevel;
329 | this.currentMcpLevel = newLevel;
330 | this.currentWinstonLevel = mcpToWinstonLevel[newLevel];
331 | if (this.winstonLogger) {
332 | // Ensure winstonLogger is defined
333 | this.winstonLogger.level = this.currentWinstonLevel;
334 | }
335 |
336 | const consoleStatus = this._configureConsoleTransport();
337 |
338 | if (oldLevel !== newLevel) {
339 | this.info(
340 | `Log level changed. File logging level: ${this.currentWinstonLevel}. MCP logging level: ${this.currentMcpLevel}. Console logging: ${consoleStatus.enabled ? "enabled" : "disabled"}`,
341 | setLevelContext,
342 | );
343 | if (
344 | consoleStatus.message &&
345 | consoleStatus.message !== "Console logging status unchanged."
346 | ) {
347 | this.info(consoleStatus.message, setLevelContext);
348 | }
349 | }
350 | }
351 |
352 | /**
353 | * Configures the console transport based on the current log level and TTY status.
354 | * Adds or removes the console transport as needed.
355 | * @returns {{ enabled: boolean, message: string | null }} Status of console logging.
356 | * @private
357 | */
358 | private _configureConsoleTransport(): {
359 | enabled: boolean;
360 | message: string | null;
361 | } {
362 | if (!this.winstonLogger) {
363 | return {
364 | enabled: false,
365 | message: "Cannot configure console: Winston logger not initialized.",
366 | };
367 | }
368 |
369 | const consoleTransport = this.winstonLogger.transports.find(
370 | (t) => t instanceof winston.transports.Console,
371 | );
372 | const shouldHaveConsole =
373 | this.currentMcpLevel === "debug" && process.stdout.isTTY;
374 | let message: string | null = null;
375 |
376 | if (shouldHaveConsole && !consoleTransport) {
377 | const consoleFormat = createWinstonConsoleFormat();
378 | this.winstonLogger.add(
379 | new winston.transports.Console({
380 | level: "debug", // Console always logs debug if enabled
381 | format: consoleFormat,
382 | }),
383 | );
384 | message = "Console logging enabled (level: debug, stdout is TTY).";
385 | } else if (!shouldHaveConsole && consoleTransport) {
386 | this.winstonLogger.remove(consoleTransport);
387 | message = "Console logging disabled (level not debug or stdout not TTY).";
388 | } else {
389 | message = "Console logging status unchanged.";
390 | }
391 | return { enabled: shouldHaveConsole, message };
392 | }
393 |
394 | /**
395 | * Gets the singleton instance of the Logger.
396 | * @returns The singleton Logger instance.
397 | */
398 | public static getInstance(): Logger {
399 | if (!Logger.instance) {
400 | Logger.instance = new Logger();
401 | }
402 | return Logger.instance;
403 | }
404 |
405 | /**
406 | * Resets the singleton instance.
407 | * This is intended for use in testing environments only.
408 | */
409 | public static resetForTesting(): void {
410 | // This is a clear indication that this method is for testing purposes.
411 | if (process.env.NODE_ENV !== "test") {
412 | console.warn(
413 | "Warning: `resetForTesting` should only be called in a test environment.",
414 | );
415 | return;
416 | }
417 | // De-reference the instance to allow garbage collection
418 | // and force re-creation on next getInstance() call.
419 | (Logger.instance as unknown) = undefined;
420 | }
421 |
422 | /**
423 | * Ensures the logger has been initialized.
424 | * @returns True if initialized, false otherwise.
425 | * @private
426 | */
427 | private ensureInitialized(): boolean {
428 | if (!this.initialized || !this.winstonLogger) {
429 | if (process.stdout.isTTY) {
430 | console.warn("Logger not initialized; message dropped.");
431 | }
432 | return false;
433 | }
434 | return true;
435 | }
436 |
437 | /**
438 | * Centralized log processing method.
439 | * @param level - The MCP severity level of the message.
440 | * @param msg - The main log message.
441 | * @param context - Optional request context for the log.
442 | * @param error - Optional error object associated with the log.
443 | * @private
444 | */
445 | private log(
446 | level: McpLogLevel,
447 | msg: string,
448 | context?: RequestContext,
449 | error?: Error,
450 | ): void {
451 | if (!this.ensureInitialized()) return;
452 | if (mcpLevelSeverity[level] > mcpLevelSeverity[this.currentMcpLevel]) {
453 | return; // Do not log if message level is less severe than currentMcpLevel
454 | }
455 |
456 | // The `@opentelemetry/instrumentation-winston` package automatically injects
457 | // the active trace_id and span_id into logs, so manual injection is no longer needed.
458 | const logData: Record<string, unknown> = { ...context };
459 | const winstonLevel = mcpToWinstonLevel[level];
460 |
461 | if (error) {
462 | this.winstonLogger!.log(winstonLevel, msg, { ...logData, error });
463 | } else {
464 | this.winstonLogger!.log(winstonLevel, msg, logData);
465 | }
466 |
467 | if (this.mcpNotificationSender) {
468 | const mcpDataPayload: McpLogPayload = { message: msg };
469 | if (context && Object.keys(context).length > 0)
470 | mcpDataPayload.context = context;
471 | if (error) {
472 | mcpDataPayload.error = { message: error.message };
473 | // Include stack trace in debug mode for MCP notifications, truncated for brevity
474 | if (this.currentMcpLevel === "debug" && error.stack) {
475 | mcpDataPayload.error.stack = error.stack.substring(
476 | 0,
477 | this.MCP_NOTIFICATION_STACK_TRACE_MAX_LENGTH,
478 | );
479 | }
480 | }
481 | try {
482 | const serverName =
483 | config?.mcpServerName ?? "MCP_SERVER_NAME_NOT_CONFIGURED";
484 | this.mcpNotificationSender(level, mcpDataPayload, serverName);
485 | } catch (sendError: unknown) {
486 | const errorMessage =
487 | sendError instanceof Error ? sendError.message : String(sendError);
488 | const internalErrorContext: RequestContext = {
489 | requestId: context?.requestId || "logger-internal-error",
490 | timestamp: new Date().toISOString(),
491 | originalLevel: level,
492 | originalMessage: msg,
493 | sendError: errorMessage,
494 | mcpPayload: JSON.stringify(mcpDataPayload).substring(0, 500), // Log a preview
495 | };
496 | this.winstonLogger!.error(
497 | "Failed to send MCP log notification",
498 | internalErrorContext,
499 | );
500 | }
501 | }
502 | }
503 |
504 | /** Logs a message at the 'debug' level. */
505 | public debug(msg: string, context?: RequestContext): void {
506 | this.log("debug", msg, context);
507 | }
508 |
509 | /** Logs a message at the 'info' level. */
510 | public info(msg: string, context?: RequestContext): void {
511 | this.log("info", msg, context);
512 | }
513 |
514 | /** Logs a message at the 'notice' level. */
515 | public notice(msg: string, context?: RequestContext): void {
516 | this.log("notice", msg, context);
517 | }
518 |
519 | /** Logs a message at the 'warning' level. */
520 | public warning(msg: string, context?: RequestContext): void {
521 | this.log("warning", msg, context);
522 | }
523 |
524 | /**
525 | * Logs a message at the 'error' level.
526 | * @param msg - The main log message.
527 | * @param err - Optional. Error object or RequestContext.
528 | * @param context - Optional. RequestContext if `err` is an Error.
529 | */
530 | public error(
531 | msg: string,
532 | err?: Error | RequestContext,
533 | context?: RequestContext,
534 | ): void {
535 | const errorObj = err instanceof Error ? err : undefined;
536 | const actualContext = err instanceof Error ? context : err;
537 | this.log("error", msg, actualContext, errorObj);
538 | }
539 |
540 | /**
541 | * Logs a message at the 'crit' (critical) level.
542 | * @param msg - The main log message.
543 | * @param err - Optional. Error object or RequestContext.
544 | * @param context - Optional. RequestContext if `err` is an Error.
545 | */
546 | public crit(
547 | msg: string,
548 | err?: Error | RequestContext,
549 | context?: RequestContext,
550 | ): void {
551 | const errorObj = err instanceof Error ? err : undefined;
552 | const actualContext = err instanceof Error ? context : err;
553 | this.log("crit", msg, actualContext, errorObj);
554 | }
555 |
556 | /**
557 | * Logs a message at the 'alert' level.
558 | * @param msg - The main log message.
559 | * @param err - Optional. Error object or RequestContext.
560 | * @param context - Optional. RequestContext if `err` is an Error.
561 | */
562 | public alert(
563 | msg: string,
564 | err?: Error | RequestContext,
565 | context?: RequestContext,
566 | ): void {
567 | const errorObj = err instanceof Error ? err : undefined;
568 | const actualContext = err instanceof Error ? context : err;
569 | this.log("alert", msg, actualContext, errorObj);
570 | }
571 |
572 | /**
573 | * Logs a message at the 'emerg' (emergency) level.
574 | * @param msg - The main log message.
575 | * @param err - Optional. Error object or RequestContext.
576 | * @param context - Optional. RequestContext if `err` is an Error.
577 | */
578 | public emerg(
579 | msg: string,
580 | err?: Error | RequestContext,
581 | context?: RequestContext,
582 | ): void {
583 | const errorObj = err instanceof Error ? err : undefined;
584 | const actualContext = err instanceof Error ? context : err;
585 | this.log("emerg", msg, actualContext, errorObj);
586 | }
587 |
588 | /**
589 | * Logs a message at the 'emerg' (emergency) level, typically for fatal errors.
590 | * @param msg - The main log message.
591 | * @param err - Optional. Error object or RequestContext.
592 | * @param context - Optional. RequestContext if `err` is an Error.
593 | */
594 | public fatal(
595 | msg: string,
596 | err?: Error | RequestContext,
597 | context?: RequestContext,
598 | ): void {
599 | const errorObj = err instanceof Error ? err : undefined;
600 | const actualContext = err instanceof Error ? context : err;
601 | this.log("emerg", msg, actualContext, errorObj);
602 | }
603 |
604 | /**
605 | * Logs a structured interaction object to a dedicated file.
606 | * @param interactionName - A name for the interaction type (e.g., 'OpenRouterIO').
607 | * @param data - The structured data to log.
608 | */
609 | public logInteraction(
610 | interactionName: string,
611 | data: Record<string, unknown>,
612 | ): void {
613 | if (!this.interactionLogger) {
614 | this.warning(
615 | "Interaction logger not available. File logging may be disabled.",
616 | data.context as RequestContext,
617 | );
618 | return;
619 | }
620 | this.interactionLogger.info({ interactionName, ...data });
621 | }
622 | }
623 |
624 | /**
625 | * The singleton instance of the Logger.
626 | * Use this instance for all logging operations.
627 | */
628 | export const logger = Logger.getInstance();
629 |
```
--------------------------------------------------------------------------------
/src/utils/security/sanitization.ts:
--------------------------------------------------------------------------------
```typescript
1 | /**
2 | * @fileoverview Provides a comprehensive `Sanitization` class for various input cleaning and validation tasks.
3 | * This module includes utilities for sanitizing HTML, strings, URLs, file paths, JSON, numbers,
4 | * and for redacting sensitive information from data intended for logging.
5 | * @module src/utils/security/sanitization
6 | */
7 | import path from "path";
8 | import sanitizeHtml from "sanitize-html";
9 | import validator from "validator";
10 | import { BaseErrorCode, McpError } from "../../types-global/errors.js";
11 | import { logger, requestContextService } from "../index.js";
12 |
13 | /**
14 | * Defines options for path sanitization to control how file paths are processed and validated.
15 | */
16 | export interface PathSanitizeOptions {
17 | /** If provided, restricts sanitized paths to be relative to this directory. */
18 | rootDir?: string;
19 | /** If true, normalizes Windows backslashes to POSIX forward slashes. */
20 | toPosix?: boolean;
21 | /** If true, absolute paths are permitted (subject to `rootDir`). Default: false. */
22 | allowAbsolute?: boolean;
23 | }
24 |
25 | /**
26 | * Contains information about a path sanitization operation.
27 | */
28 | export interface SanitizedPathInfo {
29 | /** The final sanitized and normalized path string. */
30 | sanitizedPath: string;
31 | /** The original path string before any processing. */
32 | originalInput: string;
33 | /** True if the input path was absolute after initial normalization. */
34 | wasAbsolute: boolean;
35 | /** True if an absolute path was converted to relative due to `allowAbsolute: false`. */
36 | convertedToRelative: boolean;
37 | /** The effective options used for sanitization, including defaults. */
38 | optionsUsed: PathSanitizeOptions;
39 | }
40 |
41 | /**
42 | * Defines options for context-specific string sanitization.
43 | */
44 | export interface SanitizeStringOptions {
45 | /** The context in which the string will be used. 'javascript' is disallowed. */
46 | context?: "text" | "html" | "attribute" | "url" | "javascript";
47 | /** Custom allowed HTML tags if `context` is 'html'. */
48 | allowedTags?: string[];
49 | /** Custom allowed HTML attributes if `context` is 'html'. */
50 | allowedAttributes?: Record<string, string[]>;
51 | }
52 |
53 | /**
54 | * Configuration options for HTML sanitization, mirroring `sanitize-html` library options.
55 | */
56 | export interface HtmlSanitizeConfig {
57 | /** An array of allowed HTML tag names. */
58 | allowedTags?: string[];
59 | /** Specifies allowed attributes, either globally or per tag. */
60 | allowedAttributes?: sanitizeHtml.IOptions["allowedAttributes"];
61 | /** If true, HTML comments are preserved. */
62 | preserveComments?: boolean;
63 | /** Custom functions to transform tags during sanitization. */
64 | transformTags?: sanitizeHtml.IOptions["transformTags"];
65 | }
66 |
67 | /**
68 | * A singleton class providing various methods for input sanitization.
69 | * Aims to protect against common vulnerabilities like XSS and path traversal.
70 | */
71 | export class Sanitization {
72 | /** @private */
73 | private static instance: Sanitization;
74 |
75 | /**
76 | * Default list of field names considered sensitive for log redaction.
77 | * Case-insensitive matching is applied.
78 | * @private
79 | */
80 | private sensitiveFields: string[] = [
81 | "password",
82 | "token",
83 | "secret",
84 | "key",
85 | "apiKey",
86 | "auth",
87 | "credential",
88 | "jwt",
89 | "ssn",
90 | "credit",
91 | "card",
92 | "cvv",
93 | "authorization",
94 | ];
95 |
96 | /**
97 | * Default configuration for HTML sanitization.
98 | * @private
99 | */
100 | private defaultHtmlSanitizeConfig: HtmlSanitizeConfig = {
101 | allowedTags: [
102 | "h1",
103 | "h2",
104 | "h3",
105 | "h4",
106 | "h5",
107 | "h6",
108 | "p",
109 | "a",
110 | "ul",
111 | "ol",
112 | "li",
113 | "b",
114 | "i",
115 | "strong",
116 | "em",
117 | "strike",
118 | "code",
119 | "hr",
120 | "br",
121 | "div",
122 | "table",
123 | "thead",
124 | "tbody",
125 | "tr",
126 | "th",
127 | "td",
128 | "pre",
129 | ],
130 | allowedAttributes: {
131 | a: ["href", "name", "target"],
132 | img: ["src", "alt", "title", "width", "height"],
133 | "*": ["class", "id", "style"],
134 | },
135 | preserveComments: false,
136 | };
137 |
138 | /** @private */
139 | private constructor() {}
140 |
141 | /**
142 | * Retrieves the singleton instance of the `Sanitization` class.
143 | * @returns The singleton `Sanitization` instance.
144 | */
145 | public static getInstance(): Sanitization {
146 | if (!Sanitization.instance) {
147 | Sanitization.instance = new Sanitization();
148 | }
149 | return Sanitization.instance;
150 | }
151 |
152 | /**
153 | * Sets or extends the list of sensitive field names for log sanitization.
154 | * @param fields - An array of field names to add to the sensitive list.
155 | */
156 | public setSensitiveFields(fields: string[]): void {
157 | this.sensitiveFields = [
158 | ...new Set([
159 | ...this.sensitiveFields,
160 | ...fields.map((f) => f.toLowerCase()),
161 | ]),
162 | ];
163 | const logContext = requestContextService.createRequestContext({
164 | operation: "Sanitization.setSensitiveFields",
165 | newSensitiveFieldCount: this.sensitiveFields.length,
166 | });
167 | logger.debug(
168 | "Updated sensitive fields list for log sanitization",
169 | logContext,
170 | );
171 | }
172 |
173 | /**
174 | * Gets a copy of the current list of sensitive field names.
175 | * @returns An array of sensitive field names.
176 | */
177 | public getSensitiveFields(): string[] {
178 | return [...this.sensitiveFields];
179 | }
180 |
181 | /**
182 | * Sanitizes an HTML string by removing potentially malicious tags and attributes.
183 | * @param input - The HTML string to sanitize.
184 | * @param config - Optional custom configuration for `sanitize-html`.
185 | * @returns The sanitized HTML string. Returns an empty string if input is falsy.
186 | */
187 | public sanitizeHtml(input: string, config?: HtmlSanitizeConfig): string {
188 | if (!input) return "";
189 | const effectiveConfig = {
190 | allowedTags:
191 | config?.allowedTags ?? this.defaultHtmlSanitizeConfig.allowedTags,
192 | allowedAttributes:
193 | config?.allowedAttributes ??
194 | this.defaultHtmlSanitizeConfig.allowedAttributes,
195 | transformTags: config?.transformTags, // Can be undefined
196 | preserveComments:
197 | config?.preserveComments ??
198 | this.defaultHtmlSanitizeConfig.preserveComments,
199 | };
200 |
201 | const options: sanitizeHtml.IOptions = {
202 | allowedTags: effectiveConfig.allowedTags,
203 | allowedAttributes: effectiveConfig.allowedAttributes,
204 | transformTags: effectiveConfig.transformTags,
205 | };
206 |
207 | if (effectiveConfig.preserveComments) {
208 | // Ensure allowedTags is an array before spreading
209 | const baseTags = Array.isArray(options.allowedTags)
210 | ? options.allowedTags
211 | : [];
212 | options.allowedTags = [...baseTags, "!--"];
213 | }
214 | return sanitizeHtml(input, options);
215 | }
216 |
217 | /**
218 | * Sanitizes a string based on its intended context (e.g., HTML, URL, text).
219 | * **Important:** `context: 'javascript'` is disallowed due to security risks.
220 | *
221 | * @param input - The string to sanitize.
222 | * @param options - Options specifying the sanitization context.
223 | * @returns The sanitized string. Returns an empty string if input is falsy.
224 | * @throws {McpError} If `options.context` is 'javascript', or URL validation fails.
225 | */
226 | public sanitizeString(
227 | input: string,
228 | options: SanitizeStringOptions = {},
229 | ): string {
230 | if (!input) return "";
231 |
232 | const context = options.context ?? "text";
233 |
234 | switch (context) {
235 | case "html": {
236 | const config: HtmlSanitizeConfig = {};
237 | if (options.allowedTags) {
238 | config.allowedTags = options.allowedTags;
239 | }
240 | if (options.allowedAttributes) {
241 | config.allowedAttributes = this.convertAttributesFormat(
242 | options.allowedAttributes,
243 | );
244 | }
245 | return this.sanitizeHtml(input, config);
246 | }
247 | case "attribute":
248 | return sanitizeHtml(input, { allowedTags: [], allowedAttributes: {} });
249 | case "url":
250 | if (
251 | !validator.isURL(input, {
252 | protocols: ["http", "https"],
253 | require_protocol: true,
254 | require_host: true,
255 | })
256 | ) {
257 | logger.warning(
258 | "Potentially invalid URL detected during string sanitization (context: url)",
259 | requestContextService.createRequestContext({
260 | operation: "Sanitization.sanitizeString.urlWarning",
261 | invalidUrlAttempt: input,
262 | }),
263 | );
264 | return "";
265 | }
266 | return validator.trim(input);
267 | case "javascript":
268 | logger.error(
269 | "Attempted JavaScript sanitization via sanitizeString, which is disallowed.",
270 | requestContextService.createRequestContext({
271 | operation: "Sanitization.sanitizeString.jsAttempt",
272 | inputSnippet: input.substring(0, 50),
273 | }),
274 | );
275 | throw new McpError(
276 | BaseErrorCode.VALIDATION_ERROR,
277 | "JavaScript sanitization is not supported through sanitizeString due to security risks.",
278 | );
279 | case "text":
280 | default:
281 | return sanitizeHtml(input, { allowedTags: [], allowedAttributes: {} });
282 | }
283 | }
284 |
285 | /**
286 | * Converts attribute format for `sanitizeHtml`.
287 | * @param attrs - Attributes in `{ tagName: ['attr1'] }` format.
288 | * @returns Attributes in `sanitize-html` expected format.
289 | * @private
290 | */
291 | private convertAttributesFormat(
292 | attrs: Record<string, string[]>,
293 | ): sanitizeHtml.IOptions["allowedAttributes"] {
294 | return attrs;
295 | }
296 |
297 | /**
298 | * Sanitizes a URL string by validating its format and protocol.
299 | * @param input - The URL string to sanitize.
300 | * @param allowedProtocols - Array of allowed URL protocols. Default: `['http', 'https']`.
301 | * @returns The sanitized and trimmed URL string.
302 | * @throws {McpError} If the URL is invalid or uses a disallowed protocol.
303 | */
304 | public sanitizeUrl(
305 | input: string,
306 | allowedProtocols: string[] = ["http", "https"],
307 | ): string {
308 | try {
309 | const trimmedInput = input.trim();
310 | if (
311 | !validator.isURL(trimmedInput, {
312 | protocols: allowedProtocols,
313 | require_protocol: true,
314 | require_host: true,
315 | })
316 | ) {
317 | throw new Error("Invalid URL format or protocol not in allowed list.");
318 | }
319 | const lowercasedInput = trimmedInput.toLowerCase();
320 | if (
321 | lowercasedInput.startsWith("javascript:") ||
322 | lowercasedInput.startsWith("data:") ||
323 | lowercasedInput.startsWith("vbscript:")
324 | ) {
325 | throw new Error(
326 | "Disallowed pseudo-protocol (javascript:, data:, or vbscript:) in URL.",
327 | );
328 | }
329 | return trimmedInput;
330 | } catch (error) {
331 | throw new McpError(
332 | BaseErrorCode.VALIDATION_ERROR,
333 | error instanceof Error
334 | ? error.message
335 | : "Invalid or unsafe URL provided.",
336 | { input },
337 | );
338 | }
339 | }
340 |
341 | /**
342 | * Sanitizes a file path to prevent path traversal and normalize format.
343 | * @param input - The file path string to sanitize.
344 | * @param options - Options to control sanitization behavior.
345 | * @returns An object with the sanitized path and sanitization metadata.
346 | * @throws {McpError} If the path is invalid or unsafe.
347 | */
348 | public sanitizePath(
349 | input: string,
350 | options: PathSanitizeOptions = {},
351 | ): SanitizedPathInfo {
352 | const originalInput = input;
353 | const effectiveOptions: PathSanitizeOptions = {
354 | toPosix: options.toPosix ?? false,
355 | allowAbsolute: options.allowAbsolute ?? false,
356 | rootDir: options.rootDir ? path.resolve(options.rootDir) : undefined,
357 | };
358 |
359 | let wasAbsoluteInitially = false;
360 |
361 | try {
362 | if (!input || typeof input !== "string")
363 | throw new Error("Invalid path input: must be a non-empty string.");
364 | if (input.includes("\0"))
365 | throw new Error("Path contains null byte, which is disallowed.");
366 |
367 | let normalized = path.normalize(input);
368 | wasAbsoluteInitially = path.isAbsolute(normalized);
369 |
370 | if (effectiveOptions.toPosix) {
371 | normalized = normalized.replace(/\\/g, "/");
372 | }
373 |
374 | let finalSanitizedPath: string;
375 |
376 | if (effectiveOptions.rootDir) {
377 | const fullPath = path.resolve(effectiveOptions.rootDir, normalized);
378 | if (
379 | !fullPath.startsWith(effectiveOptions.rootDir + path.sep) &&
380 | fullPath !== effectiveOptions.rootDir
381 | ) {
382 | throw new Error(
383 | "Path traversal detected: attempts to escape the defined root directory.",
384 | );
385 | }
386 | finalSanitizedPath = path.relative(effectiveOptions.rootDir, fullPath);
387 | finalSanitizedPath =
388 | finalSanitizedPath === "" ? "." : finalSanitizedPath;
389 | if (
390 | path.isAbsolute(finalSanitizedPath) &&
391 | !effectiveOptions.allowAbsolute
392 | ) {
393 | throw new Error(
394 | "Path resolved to absolute outside root when absolute paths are disallowed.",
395 | );
396 | }
397 | } else {
398 | if (path.isAbsolute(normalized)) {
399 | if (!effectiveOptions.allowAbsolute) {
400 | throw new Error(
401 | "Absolute paths are disallowed by current options.",
402 | );
403 | } else {
404 | finalSanitizedPath = normalized;
405 | }
406 | } else {
407 | const resolvedAgainstCwd = path.resolve(normalized);
408 | const currentWorkingDir = path.resolve(".");
409 | if (
410 | !resolvedAgainstCwd.startsWith(currentWorkingDir + path.sep) &&
411 | resolvedAgainstCwd !== currentWorkingDir
412 | ) {
413 | throw new Error(
414 | "Relative path traversal detected (escapes current working directory context).",
415 | );
416 | }
417 | finalSanitizedPath = normalized;
418 | }
419 | }
420 |
421 | return {
422 | sanitizedPath: finalSanitizedPath,
423 | originalInput,
424 | wasAbsolute: wasAbsoluteInitially,
425 | convertedToRelative:
426 | wasAbsoluteInitially &&
427 | !path.isAbsolute(finalSanitizedPath) &&
428 | !effectiveOptions.allowAbsolute,
429 | optionsUsed: effectiveOptions,
430 | };
431 | } catch (error) {
432 | logger.warning(
433 | "Path sanitization error",
434 | requestContextService.createRequestContext({
435 | operation: "Sanitization.sanitizePath.error",
436 | originalPathInput: originalInput,
437 | pathOptionsUsed: effectiveOptions,
438 | errorMessage: error instanceof Error ? error.message : String(error),
439 | }),
440 | );
441 | throw new McpError(
442 | BaseErrorCode.VALIDATION_ERROR,
443 | error instanceof Error
444 | ? error.message
445 | : "Invalid or unsafe path provided.",
446 | { input: originalInput },
447 | );
448 | }
449 | }
450 |
451 | /**
452 | * Sanitizes a JSON string by parsing it to validate its format.
453 | * Optionally checks if the JSON string exceeds a maximum allowed size.
454 | * @template T The expected type of the parsed JSON object. Defaults to `unknown`.
455 | * @param input - The JSON string to sanitize/validate.
456 | * @param maxSize - Optional maximum allowed size of the JSON string in bytes.
457 | * @returns The parsed JavaScript object.
458 | * @throws {McpError} If input is not a string, too large, or invalid JSON.
459 | */
460 | public sanitizeJson<T = unknown>(input: string, maxSize?: number): T {
461 | try {
462 | if (typeof input !== "string")
463 | throw new Error("Invalid input: expected a JSON string.");
464 | if (maxSize !== undefined && Buffer.byteLength(input, "utf8") > maxSize) {
465 | throw new McpError(
466 | BaseErrorCode.VALIDATION_ERROR,
467 | `JSON string exceeds maximum allowed size of ${maxSize} bytes.`,
468 | { actualSize: Buffer.byteLength(input, "utf8"), maxSize },
469 | );
470 | }
471 | return JSON.parse(input) as T;
472 | } catch (error) {
473 | if (error instanceof McpError) throw error;
474 | throw new McpError(
475 | BaseErrorCode.VALIDATION_ERROR,
476 | error instanceof Error ? error.message : "Invalid JSON format.",
477 | {
478 | inputPreview:
479 | input.length > 100 ? `${input.substring(0, 100)}...` : input,
480 | },
481 | );
482 | }
483 | }
484 |
485 | /**
486 | * Validates and sanitizes a numeric input, converting strings to numbers.
487 | * Clamps the number to `min`/`max` if provided.
488 | * @param input - The number or string to validate and sanitize.
489 | * @param min - Minimum allowed value (inclusive).
490 | * @param max - Maximum allowed value (inclusive).
491 | * @returns The sanitized (and potentially clamped) number.
492 | * @throws {McpError} If input is not a valid number, NaN, or Infinity.
493 | */
494 | public sanitizeNumber(
495 | input: number | string,
496 | min?: number,
497 | max?: number,
498 | ): number {
499 | let value: number;
500 | if (typeof input === "string") {
501 | const trimmedInput = input.trim();
502 | if (trimmedInput === "" || !validator.isNumeric(trimmedInput)) {
503 | throw new McpError(
504 | BaseErrorCode.VALIDATION_ERROR,
505 | "Invalid number format: input is empty or not numeric.",
506 | { input },
507 | );
508 | }
509 | value = parseFloat(trimmedInput);
510 | } else if (typeof input === "number") {
511 | value = input;
512 | } else {
513 | throw new McpError(
514 | BaseErrorCode.VALIDATION_ERROR,
515 | "Invalid input type: expected number or string.",
516 | { input: String(input) },
517 | );
518 | }
519 |
520 | if (isNaN(value) || !isFinite(value)) {
521 | throw new McpError(
522 | BaseErrorCode.VALIDATION_ERROR,
523 | "Invalid number value (NaN or Infinity).",
524 | { input },
525 | );
526 | }
527 |
528 | let clamped = false;
529 | const originalValueForLog = value;
530 | if (min !== undefined && value < min) {
531 | value = min;
532 | clamped = true;
533 | }
534 | if (max !== undefined && value > max) {
535 | value = max;
536 | clamped = true;
537 | }
538 | if (clamped) {
539 | logger.debug(
540 | "Number clamped to range.",
541 | requestContextService.createRequestContext({
542 | operation: "Sanitization.sanitizeNumber.clamped",
543 | originalInput: String(input),
544 | parsedValue: originalValueForLog,
545 | minValue: min,
546 | maxValue: max,
547 | clampedValue: value,
548 | }),
549 | );
550 | }
551 | return value;
552 | }
553 |
554 | /**
555 | * Sanitizes input for logging by redacting sensitive fields.
556 | * Creates a deep clone and replaces values of fields matching `this.sensitiveFields`
557 | * (case-insensitive substring match) with "[REDACTED]".
558 | *
559 | * It uses `structuredClone` if available for a high-fidelity deep clone.
560 | * If `structuredClone` is not available (e.g., in older Node.js environments),
561 | * it falls back to `JSON.parse(JSON.stringify(input))`. This fallback has limitations:
562 | * - `Date` objects are converted to ISO date strings.
563 | * - `undefined` values within objects are removed.
564 | * - `Map`, `Set`, `RegExp` objects are converted to empty objects (`{}`).
565 | * - Functions are removed.
566 | * - `BigInt` values will throw an error during `JSON.stringify` unless a `toJSON` method is provided.
567 | * - Circular references will cause `JSON.stringify` to throw an error.
568 | *
569 | * @param input - The input data to sanitize for logging.
570 | * @returns A sanitized (deep cloned) version of the input, safe for logging.
571 | * Returns original input if not object/array, or "[Log Sanitization Failed]" on error.
572 | */
573 | public sanitizeForLogging(input: unknown): unknown {
574 | try {
575 | if (!input || typeof input !== "object") return input;
576 |
577 | const clonedInput =
578 | typeof globalThis.structuredClone === "function"
579 | ? globalThis.structuredClone(input)
580 | : JSON.parse(JSON.stringify(input));
581 | this.redactSensitiveFields(clonedInput);
582 | return clonedInput;
583 | } catch (error) {
584 | logger.error(
585 | "Error during log sanitization, returning placeholder.",
586 | requestContextService.createRequestContext({
587 | operation: "Sanitization.sanitizeForLogging.error",
588 | errorMessage: error instanceof Error ? error.message : String(error),
589 | }),
590 | );
591 | return "[Log Sanitization Failed]";
592 | }
593 | }
594 |
595 | /**
596 | * Recursively redacts sensitive fields in an object or array in place.
597 | * @param obj - The object or array to redact.
598 | * @private
599 | */
600 | private redactSensitiveFields(obj: unknown): void {
601 | if (!obj || typeof obj !== "object") return;
602 |
603 | if (Array.isArray(obj)) {
604 | obj.forEach((item) => this.redactSensitiveFields(item));
605 | return;
606 | }
607 |
608 | for (const key in obj) {
609 | if (Object.prototype.hasOwnProperty.call(obj, key)) {
610 | const value = (obj as Record<string, unknown>)[key];
611 |
612 | // Split camelCase and snake_case/kebab-case keys into words
613 | const keyWords = key
614 | .replace(/([A-Z])/g, " $1") // Add space before uppercase letters
615 | .toLowerCase()
616 | .split(/[\s_-]+/); // Split by space, underscore, or hyphen
617 |
618 | const isSensitive = keyWords.some((word) =>
619 | this.sensitiveFields.includes(word),
620 | );
621 |
622 | if (isSensitive) {
623 | (obj as Record<string, unknown>)[key] = "[REDACTED]";
624 | } else if (value && typeof value === "object") {
625 | this.redactSensitiveFields(value);
626 | }
627 | }
628 | }
629 | }
630 | }
631 |
632 | /**
633 | * Singleton instance of the `Sanitization` class.
634 | * Use this for all input sanitization tasks.
635 | */
636 | export const sanitization = Sanitization.getInstance();
637 |
638 | /**
639 | * Convenience function calling `sanitization.sanitizeForLogging`.
640 | * @param input - The input data to sanitize.
641 | * @returns A sanitized version of the input, safe for logging.
642 | */
643 | export const sanitizeInputForLogging = (input: unknown): unknown =>
644 | sanitization.sanitizeForLogging(input);
645 |
```
--------------------------------------------------------------------------------
/docs/project-spec.md:
--------------------------------------------------------------------------------
```markdown
1 | ---
2 |
3 | ## MCP Server for PubMed Exploration: Project Specification
4 |
5 | **Version:** 1.0.0 (Aligned with `pubmed-mcp-server` v1.0.0)
6 | **MCP Specification Compliance:** 2025-03-26
7 |
8 | **1. Project Vision & Goal**
9 |
10 | To create an MCP server, "pubmed-mcp-server," that acts as an intelligent and robust gateway for Large Language Models (LLMs) and other applications to programmatically search, retrieve, and process information from the PubMed database via NCBI E-utilities. This server will abstract the complexities of E-utilities, enforce NCBI best practices, provide structured data and actions, and offer enhanced functionalities beyond raw E-utility calls. It leverages the `mcp-ts-template` foundation for core MCP functionalities and utilities.
11 |
12 | **2. Core MCP Server Configuration**
13 |
14 | - **Server Name:** `pubmed-mcp-server` (as defined in `package.json` and `src/config/index.ts`)
15 | - **Version:** `1.0.0` (initial, from `package.json`)
16 | - **Transport:**
17 | - Primarily designed for **HTTP transport** (`MCP_TRANSPORT_TYPE=http`) using Streamable HTTP Server-Sent Events (SSE) for communication, running an Express server. This is recommended for remote access and robust session management.
18 | - Supports **stdio transport** (`MCP_TRANSPORT_TYPE=stdio`) for local or embedded use cases.
19 | - Configuration via environment variables:
20 | - `MCP_TRANSPORT_TYPE`: `"http"` or `"stdio"`.
21 | - `MCP_HTTP_PORT`, `MCP_HTTP_HOST`: For HTTP transport.
22 | - `MCP_ALLOWED_ORIGINS`: For HTTP CORS configuration.
23 | - **Authentication & Authorization (HTTP Transport):**
24 | - **JWT Authentication:** Mandatory for HTTP transport, configured via `MCP_AUTH_SECRET_KEY`. Implemented in `src/mcp-server/transports/authentication/authMiddleware.ts`.
25 | - **Origin Validation:** `originCheckMiddleware` using `MCP_ALLOWED_ORIGINS`.
26 | - **NCBI E-utilities Configuration (via Environment Variables):**
27 | - `NCBI_API_KEY`: **Essential.** The server's primary NCBI API Key for higher rate limits.
28 | - `NCBI_TOOL_IDENTIFIER`: Tool name sent to NCBI (e.g., "pubmed-mcp-server/1.0.0"). Defaults to `pubmed-mcp-server/<version>`.
29 | - `NCBI_ADMIN_EMAIL`: Administrator's email for NCBI contact.
30 | - `NCBI_REQUEST_DELAY_MS`: Milliseconds to wait between NCBI requests (e.g., 100 for API key, ensuring <10 requests/sec).
31 | - `NCBI_MAX_RETRIES`: Max retries for failed NCBI requests.
32 | - The server automatically includes `api_key`, `tool`, and `email` parameters in all E-utility requests, managed by `ncbiService.ts`.
33 | - **Logging:**
34 | - Configured via `MCP_LOG_LEVEL` and `LOGS_DIR`.
35 | - Uses the structured logger from `src/utils/internal/logger.ts`, compliant with MCP spec.
36 | - **SDK Usage:**
37 | - Tools and resources are defined using the high-level SDK abstractions:
38 | - `server.tool(name, description, zodSchemaShape, handler)`
39 | - `server.resource(regName, template, metadata, handler)`
40 | - This ensures type safety, automatic schema generation, and simplified protocol adherence.
41 |
42 | **2.1. Adherence to NCBI Guidelines**
43 | This server is designed to strictly adhere to NCBI E-utility usage policies, including:
44 | - Mandatory use of a registered API Key (`NCBI_API_KEY`).
45 | - Transmission of `tool` (`NCBI_TOOL_IDENTIFIER`) and `email` (`NCBI_ADMIN_EMAIL`) parameters with every request.
46 | - Respecting request rate limits (not exceeding 10 requests per second with an API key, or 3 per second without). This is managed by the internal `ncbiService.ts` through request queuing and delays.
47 | - The server does not facilitate bulk downloading or redistribution of PubMed data in a manner that would violate NCBI policies. Users of the MCP server are also expected to comply with NCBI's terms of service.
48 |
49 | **3. MCP Tools**
50 |
51 | Tools encapsulate E-utility calls, adding value through processing, structuring, and providing LLM-friendly inputs/outputs. Handlers will utilize `RequestContext` for correlation and `ErrorHandler.tryCatch` for robust error management. All interactions with NCBI E-utilities are managed by the `ncbiService.ts`.
52 |
53 | **3.1. Tool: `searchPubMedArticles`**
54 |
55 | - **Description:** Searches PubMed for articles matching a query term. Returns PMIDs, metadata, and optional brief summaries using ESummary v2.0.
56 | - **Underlying E-utilities:** `ESearch` (primary, with `usehistory=y` if summaries are fetched), `ESummary` (optional, `version="2.0"`).
57 | - **Registration:** `src/mcp-server/tools/searchPubMedArticles/registration.ts`
58 | - **Logic:** `src/mcp-server/tools/searchPubMedArticles/logic.ts`
59 | - **Input Parameters (Zod Schema Shape - to be used with `server.tool`):**
60 | ```typescript
61 | // Shape for Zod schema, e.g., in searchPubMedArticlesLogic.ts
62 | // import { z } from 'zod';
63 | // export const SearchPubMedArticlesInputSchema = z.object({
64 | {
65 | queryTerm: z.string().min(3, "Query term must be at least 3 characters"),
66 | maxResults: z.number().int().positive().optional().default(20).max(1000, "Max results per query. ESearch's retmax is used."),
67 | sortBy: z.enum([ // Directly supported ESearch sort options for PubMed
68 | "relevance", // Default, "Best Match"
69 | "pub_date", // Publication Date
70 | "author", // First Author
71 | "journal_name" // Journal Name
72 | ]).optional().default("relevance").describe("Note: Other sorting (e.g., last_author, title) may require client-side implementation or be future server enhancements."),
73 | dateRange: z.object({
74 | minDate: z.string().regex(/^\d{4}(\/\d{2}(\/\d{2})?)?$/, "YYYY, YYYY/MM, or YYYY/MM/DD").optional(),
75 | maxDate: z.string().regex(/^\d{4}(\/\d{2}(\/\d{2})?)?$/, "YYYY, YYYY/MM, or YYYY/MM/DD").optional(),
76 | dateType: z.enum(["pdat", "mdat", "edat"]).optional().default("pdat") // pdat: Publication Date, mdat: Modification Date, edat: Entrez Date
77 | }).optional().describe("Defines a date range for the search."),
78 | filterByPublicationTypes: z.array(z.string()).optional().describe("e.g., ['Review', 'Clinical Trial']. Server maps to Entrez query syntax (e.g., \"Review\"[Publication Type])."),
79 | fetchBriefSummaries: z.number().int().min(0).max(100).optional().default(0).describe("Number of top PMIDs for ESummary v2.0. 0 to disable. Max 100 for this tool.")
80 | }
81 | // });
82 | ```
83 | - **Handler Logic (Conceptual - implemented in `logic.ts`):**
84 | 1. Utilize `requestContext` for logging and error tracking.
85 | 2. Construct `ESearch` `term` parameter by combining `queryTerm`, `dateRange` (using `mindate`, `maxdate`, `datetype`), and `filterByPublicationTypes` (e.g., `queryTerm AND "Review"[Publication Type]`). Apply input sanitization (`src/utils/security/sanitization.ts`) to `queryTerm`.
86 | 3. Call `ncbiService.ts` to execute `ESearch`. If `fetchBriefSummaries > 0`, `usehistory=y` will be set for `ESearch`. Parameters will include `db=pubmed`, `term`, `retmax=maxResults`, `sort=sortBy`.
87 | 4. Parse `ESearch` response (PMIDs, `WebEnv`, `QueryKey`, total count).
88 | 5. If `fetchBriefSummaries > 0` and PMIDs are found, call `ncbiService.ts` for `ESummary` using the `WebEnv`, `QueryKey`, and the first `fetchBriefSummaries` PMIDs (or all if fewer than requested). `ESummary` will use `version="2.0"`.
89 | 6. Parse `ESummary` response (DocSums).
90 | 7. Format output as `CallToolResult`. Errors are thrown as `McpError`.
91 | - **Output Content (MCP `content` array - example):**
92 | ```json
93 | [{
94 | "type": "application/json",
95 | "data": {
96 | "searchParameters": {
97 | "queryTerm": "original queryTerm input",
98 | "maxResults": 20,
99 | "sortBy": "relevance",
100 | "fetchBriefSummaries": 5 // example
101 | },
102 | "effectiveESearchTerm": "precision oncology AND (2023[pdat]) AND (\"Review\"[Publication Type])",
103 | "totalFound": 12345,
104 | "retrievedPmidCount": 20, // from ESearch
105 | "pmids": ["35394430", "35358407", "..."], // up to maxResults
106 | "briefSummaries": [ // up to fetchBriefSummaries
107 | {
108 | "pmid": "35394430",
109 | "title": "Example Title 1",
110 | "authors": "Doe J, Smith A.", // Simplified author string from ESummary
111 | "source": "J Example Sci. 2023 Mar",
112 | "pubDate": "2023-03-15", // Standardized
113 | "epubDate": "2023-02-01"
114 | }
115 | ],
116 | "eSearchUrl": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=...",
117 | "eSummaryUrl": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&version=2.0&id=..." // if called
118 | }
119 | }]
120 | ```
121 |
122 | **3.2. Tool: `fetchArticleDetails`**
123 |
124 | - **Description:** Retrieves detailed information for a list of PMIDs with flexible content control using EFetch.
125 | - **Underlying E-utility:** `EFetch`.
126 | - **Registration:** `src/mcp-server/tools/fetchArticleDetails/registration.ts`
127 | - **Logic:** `src/mcp-server/tools/fetchArticleDetails/logic.ts`
128 | - **Input Parameters (Zod Schema Shape):**
129 | ```typescript
130 | // import { z } from 'zod';
131 | // export const FetchArticleDetailsInputSchema = z.object({
132 | {
133 | pmids: z.array(z.string().regex(/^\d+$/)).min(1, "At least one PMID is required").max(200, "Max 200 PMIDs per call. Server uses HTTP POST for larger lists if necessary."),
134 | detailLevel: z.enum([
135 | "abstract_plus", // Server-parsed: Title, abstract, authors, journal, pub_date, keywords, DOI from EFetch XML.
136 | "full_xml", // Raw PubMedArticle XML from EFetch (retmode=xml).
137 | "medline_text", // MEDLINE formatted text from EFetch (retmode=text, rettype=medline).
138 | "citation_data" // Server-parsed minimal data for citation from EFetch XML.
139 | ]).optional().default("abstract_plus"),
140 | includeMeshTerms: z.boolean().optional().default(true).describe("Applies to 'abstract_plus' and 'citation_data' if parsed from XML."),
141 | includeGrantInfo: z.boolean().optional().default(false).describe("Applies to 'abstract_plus' if parsed from XML.")
142 | }
143 | // });
144 | ```
145 | - **Handler Logic (Conceptual):**
146 | 1. Determine `EFetch` `rettype` and `retmode` based on `detailLevel`:
147 | * `abstract_plus`, `full_xml`, `citation_data`: `db=pubmed`, `retmode=xml`. (Default `rettype` for PubMed XML is suitable).
148 | * `medline_text`: `db=pubmed`, `retmode=text`, `rettype=medline`.
149 | 2. `ncbiService.ts` handles sending PMIDs. For > ~200 PMIDs, it should use HTTP POST with `EFetch`.
150 | 3. Call `ncbiService.ts` for `EFetch`.
151 | 4. If `detailLevel` is `abstract_plus` or `citation_data`, robustly parse the XML response. This includes standardizing author lists, publication dates, and extracting MeSH/Grant info if requested. This is a core value-add of the tool.
152 | 5. Format output as `CallToolResult`.
153 | - **Output Content (MCP `content` array - example for `abstract_plus`):**
154 | ```json
155 | [{
156 | "type": "application/json", // or "application/xml" for full_xml, "text/plain" for medline_text
157 | "data": { // For abstract_plus
158 | "requestedPmids": ["35394430"],
159 | "articles": [
160 | {
161 | "pmid": "35394430",
162 | "title": "Example Title 1",
163 | "abstractText": "This is the abstract...",
164 | "authors": [
165 | { "lastName": "Doe", "firstName": "John", "initials": "J", "affiliation": "University of Science" }
166 | ],
167 | "journalInfo": {
168 | "title": "Journal of Example Science", "isoAbbreviation": "J Ex Sci", "volume": "10", "issue": "2", "pages": "100-110",
169 | "publicationDate": { "year": 2023, "month": "Mar", "day": 15, "medlineDate": "2023 Mar" } // Standardized
170 | },
171 | "publicationTypes": ["Journal Article", "Review"],
172 | "keywords": ["keyword1", "keyword2"], // From KeywordList or MeSH
173 | "meshTerms": [ // if includeMeshTerms is true
174 | { "descriptorName": "Neoplasms", "qualifierName": "therapy", "isMajorTopic": true, "ui": "D009369" }
175 | ],
176 | "grantList": [ // if includeGrantInfo is true
177 | { "grantId": "R01 CA123456", "agency": "NCI NIH HHS", "country": "United States" }
178 | ],
179 | "doi": "10.xxxx/xxxxxx"
180 | }
181 | ],
182 | "notFoundPmids": [],
183 | "eFetchDetails": {
184 | "urls": ["https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id=..."],
185 | "requestMethod": "GET" // or POST
186 | }
187 | }
188 | }]
189 | ```
190 |
191 | **3.3. Tool: `getArticleRelationships`**
192 |
193 | - **Description:** Finds articles related to a source PMID (e.g., similar articles in PubMed, articles citing it, or articles it references) or retrieves citation formats.
194 | - **Underlying E-utilities:** `ELink` (primary), `EFetch` (for citation formats).
195 | - **Registration:** `src/mcp-server/tools/getArticleRelationships/registration.ts`
196 | - **Logic:** `src/mcp-server/tools/getArticleRelationships/logic.ts`
197 | - **Input Parameters (Zod Schema Shape):**
198 | ```typescript
199 | // import { z } from 'zod';
200 | // export const GetArticleRelationshipsInputSchema = z.object({
201 | {
202 | sourcePmid: z.string().regex(/^\d+$/).describe("Primary PMID for relationship lookup."),
203 | relationshipType: z.enum([
204 | "pubmed_similar_articles", // Uses ELink cmd=neighbor, dbfrom=pubmed, db=pubmed
205 | "pubmed_citedin", // Articles in PubMed that cite this PMID (ELink cmd=neighbor, linkname=pubmed_pubmed_citedin)
206 | "pubmed_references", // Articles in PubMed referenced by this PMID (ELink cmd=neighbor, linkname=pubmed_pubmed_refs)
207 | "citation_formats" // Fetch citation data for server-side formatting
208 | ]).default("pubmed_similar_articles"),
209 | maxRelatedResults: z.number().int().positive().optional().default(5).max(50).describe("Applies to relationship types returning multiple PMIDs. Server truncates ELink results if necessary."),
210 | citationStyles: z.array(z.enum(["ris", "bibtex", "apa_string", "mla_string"])).optional().default(["ris"]).describe("For 'citation_formats' type. Formatting is server-side.")
211 | }
212 | // });
213 | ```
214 | - **Handler Logic (Conceptual):**
215 | 1. Based on `relationshipType`:
216 | * `pubmed_similar_articles`: Call `ncbiService.ts` for `ELink` with `dbfrom=pubmed`, `db=pubmed`, `cmd=neighbor`, `id=sourcePmid`.
217 | * `pubmed_citedin`: Call `ncbiService.ts` for `ELink` with `dbfrom=pubmed`, `db=pubmed`, `cmd=neighbor`, `id=sourcePmid`, `linkname=pubmed_pubmed_citedin`.
218 | * `pubmed_references`: Call `ncbiService.ts` for `ELink` with `dbfrom=pubmed`, `db=pubmed`, `cmd=neighbor`, `id=sourcePmid`, `linkname=pubmed_pubmed_refs`.
219 | * `citation_formats`: Call `ncbiService.ts` for `EFetch` (`db=pubmed`, `id=sourcePmid`, `retmode=xml`). The server then parses this XML and generates the requested citation strings.
220 | 2. Parse `ELink` XML response for linked PMIDs and scores (if available).
221 | 3. If PMIDs are returned from `ELink`, the server may optionally enrich the top `maxRelatedResults` with brief details by making an internal call to a simplified version of `fetchArticleDetails` logic (e.g., fetching only title and authors).
222 | 4. Format output as `CallToolResult`.
223 | - **Output Content (MCP `content` array - example for `pubmed_similar_articles`):**
224 | ```json
225 | [{
226 | "type": "application/json",
227 | "data": {
228 | "sourcePmid": "35394430",
229 | "relationshipType": "pubmed_similar_articles",
230 | "relatedArticles": [ // Max 'maxRelatedResults'
231 | { "pmid": "9876543", "title": "Related Article Title", "authors": "Smith J, et al.", "score": 0.85, "linkUrl": "https://pubmed.ncbi.nlm.nih.gov/9876543/" }
232 | ],
233 | "citations": {}, // Populated if relationshipType is 'citation_formats'
234 | "retrievedCount": 1, // Number of related articles returned
235 | "eLinkUrl": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/elink.fcgi?..." // Example ELink URL
236 | }
237 | }]
238 | ```
239 |
240 | **4. MCP Resources**
241 |
242 | Resources provide descriptive data about the server or PubMed. Handlers will use `RequestContext` and `ErrorHandler`.
243 |
244 | **4.1. Resource: `serverInfo`**
245 |
246 | - **Description:** Provides comprehensive information about the `pubmed-mcp-server`, configuration, NCBI compliance, and status.
247 | - **URI:** `pubmed-connect://info` (Example URI, can be adjusted)
248 | - **Registration:** `src/mcp-server/resources/serverInfo/registration.ts`
249 | - **Logic:** `src/mcp-server/resources/serverInfo/logic.ts`
250 | - **Handler Logic (Conceptual):**
251 | 1. Assemble data from `src/config/index.ts` (server version, admin email, tool ID).
252 | 2. Include dynamic status (e.g., last NCBI connectivity check via `ncbiService.ts`).
253 | 3. Return data structured as JSON, Base64 encoded in the `blob` field of `ResourceContent`.
254 | - **Output Content (MCP `contents` array, `blob` is Base64 of JSON below):**
255 | ```json
256 | {
257 | "serverName": "pubmed-mcp-server",
258 | "serverVersion": "1.0.0",
259 | "description": "MCP Server for intelligent PubMed access via NCBI E-utilities.",
260 | "contactEmail": "[email protected]",
261 | "mcpSpecVersion": "2025-03-26",
262 | "ncbiCompliance": {
263 | "apiUsageStatus": "NCBI API Key in use",
264 | "toolIdentifier": "pubmed-mcp-server/1.0.0",
265 | "ncbiUsagePolicyUrl": "https://www.ncbi.nlm.nih.gov/books/NBK25497/", // E-utilities Help
266 | "currentRateLimitAdherence": "Targeting <10 requests/sec (with API key) via request queuing."
267 | },
268 | "supportedEutilities": ["ESearch", "EFetch", "ESummary", "ELink", "EInfo"],
269 | "operationalStatus": {
270 | "lastNcbiConnectivityCheck": "2025-05-24T01:00:00.000Z",
271 | "ncbiStatus": "Nominal", // Based on last successful NCBI interaction
272 | "internalQueueLength": 0 // Current length of the NCBI request queue
273 | },
274 | "documentationUrl": "./docs/project-spec.md"
275 | }
276 | ```
277 |
278 | **4.2. Resource: `getPubMedStats`**
279 |
280 | - **Description:** Retrieves general statistics about the PubMed database using `EInfo`.
281 | - **URI:** `pubmed-connect://stats/pubmed` (Example URI)
282 | - **Underlying E-utility:** `EInfo`.
283 | - **Registration:** `src/mcp-server/resources/getPubMedStats/registration.ts`
284 | - **Logic:** `src/mcp-server/resources/getPubMedStats/logic.ts`
285 | - **Handler Logic (Conceptual):**
286 | 1. Call `ncbiService.ts` for `EInfo` (`db=pubmed`).
287 | 2. Parse XML response for key statistics (record count, last update, field list).
288 | 3. Return data structured as JSON, Base64 encoded in `blob`.
289 | - **Output Content (MCP `contents` array, `blob` is Base64 of JSON below):**
290 | ```json
291 | {
292 | "databaseName": "PubMed",
293 | "menuName": "PubMed", // From EInfo
294 | "description": "PubMed comprises more than XX million citations...", // From EInfo
295 | "totalRecordCount": 36000000, // From EInfo <Count>
296 | "lastUpdate": "2025-05-23T10:00:00Z", // From EInfo <LastUpdate>
297 | "availableSearchFields": [ // Parsed from EInfo <FieldList>
298 | { "name": "ALL", "fullName": "All Fields", "description": "All terms from all searchable fields", "isDate": false, "isNumerical": false, "termCount": "123456789" },
299 | { "name": "UID", "fullName": "UID", "description": "Unique identifier", "isDate": false, "isNumerical": true, "termCount": "36000000" }
300 | // ... other relevant fields
301 | ],
302 | "availableLinkNames": [ // Parsed from EInfo <LinkList>
303 | { "name": "pubmed_pubmed_citedin", "description": "Cited In", "dbTo": "pubmed" }
304 | // ... other relevant links
305 | ],
306 | "eInfoUrl": "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/einfo.fcgi?db=pubmed"
307 | }
308 | ```
309 |
310 | **5. Key Implementation Considerations**
311 |
312 | - **NCBI Interaction Service (`src/services/ncbiService.ts` - to be created):**
313 | - Centralize all E-utility calls.
314 | - Manage API key, tool, email parameters.
315 | - Implement robust **rate limiting via a request queue** based on `NCBI_REQUEST_DELAY_MS` to ensure compliance across all concurrent MCP requests.
316 | - Handle retries (`NCBI_MAX_RETRIES`) with appropriate backoff.
317 | - Intelligently use HTTP GET or POST based on payload size (e.g., number of PMIDs for `EFetch`).
318 | - Parse NCBI XML/JSON responses, including NCBI error structures (e.g., `<ERROR>` tags in XML), translating them to structured data or specific `McpError` instances.
319 | - Manage `usehistory=y`, `WebEnv`, and `query_key` for multi-step E-utility operations.
320 | - **XML Parsing:** Use a reliable library (e.g., `fast-xml-parser`) wrapped in utility functions within `src/utils/parsing/` or the `ncbiService` for different E-utility response structures (ESearch, ESummary v2.0, EFetch PubMedArticleSet, ELink, EInfo).
321 | - **Error Handling:**
322 | - Utilize `ErrorHandler.tryCatch` from `src/utils/internal/errorHandler.ts`.
323 | - Define specific `McpError` codes in `src/types-global/errors.ts` for NCBI-related issues (e.g., `NCBI_API_ERROR`, `NCBI_PARSING_ERROR`, `NCBI_RATE_LIMIT_WARNING`, `NCBI_QUERY_ERROR`).
324 | - **Logging:** Leverage `logger` from `src/utils/internal/logger.ts` with `RequestContext` for detailed and correlated logging of operations, NCBI requests (including constructed URLs and parameters), responses, and errors.
325 | - **Input Sanitization:** Use `sanitization` utilities from `src/utils/security/sanitization.ts` for all user/client-provided inputs, especially query terms, to prevent injection or malformed Entrez queries.
326 | - **Asynchronous Operations:** All handlers involving NCBI calls must be `async` and manage promises correctly.
327 | - **Configuration Management:** Centralized in `src/config/index.ts`, loading from environment variables with clear validation.
328 | - **Caching (Future Consideration - v1.1+):** Implement caching for frequently requested, non-volatile E-utility responses (e.g., `EFetch` for specific PMIDs, `EInfo`) to improve performance and reduce NCBI load, with appropriate Time-To-Live (TTL) strategies.
329 | - **Testing:**
330 | - Unit tests for individual logic functions, parsers, and utility components.
331 | - Integration tests mocking `ncbiService.ts` calls to verify tool/resource handlers.
332 | - Consider contract testing for the `ncbiService.ts` against known NCBI E-utility response schemas.
333 | - **Documentation:**
334 | - JSDoc for all functions, classes, and types.
335 | - This `project-spec.md` serves as the primary functional specification.
336 | - `README.md` for setup, environment variable configuration, and usage examples.
337 |
338 | **6. File Structure (Key Locations)**
339 |
340 | - **Main Entry:** `src/index.ts`
341 | - **Server Setup:** `src/mcp-server/server.ts` (creates McpServer instance, registers tools/resources)
342 | - **Configuration:** `src/config/index.ts`
343 | - **Core Utilities:** `src/utils/` (logging, error handling, parsing, security)
344 | - **Global Types:** `src/types-global/` (especially `errors.ts`)
345 | - **NCBI Service:** `src/services/ncbiService.ts` (to be created)
346 | - **Tools Implementation:** `src/mcp-server/tools/<toolName>/`
347 | - `logic.ts` (handler function, Zod schema definition)
348 | - `registration.ts` (calls `server.tool()`)
349 | - `index.ts` (exports registration)
350 | - **Resources Implementation:** `src/mcp-server/resources/<resourceName>/`
351 | - `logic.ts` (handler function)
352 | - `registration.ts` (calls `server.resource()`)
353 | - `index.ts` (exports registration)
354 |
355 | ---
356 |
```
--------------------------------------------------------------------------------
/.clinerules/clinerules.md:
--------------------------------------------------------------------------------
```markdown
1 | # pubmed-mcp-server: Developer Guide & Architectural Standards
2 |
3 | **Effective Date:** 2025-08-01
4 | **Version:** 2.3
5 |
6 | ## Preamble
7 |
8 | This document constitutes the official mandate governing all development practices, architectural patterns, and operational procedures for the `pubmed-mcp-server`. It is the single source of truth for ensuring code quality, consistency, and long-term maintainability. Adherence to these standards is not optional; it is a condition of all development activity.
9 |
10 | ## I. Core Architectural Principles
11 |
12 | The architecture is founded upon a strict separation of concerns to guarantee modularity, testability, and operational clarity. These principles are non-negotiable.
13 |
14 | ### 1. The Logic Throws, The Handler Catches
15 |
16 | This is the immutable cornerstone of the error-handling and control-flow strategy.
17 |
18 | - **Core Logic (`logic.ts`):** This layer's sole responsibility is the execution of business logic. It shall be pure, self-contained, and stateless where possible. If an operational or validation error occurs (e.g., failed validation, API error), it **must** terminate its execution by throwing a structured `McpError`. Logic files **must not** contain `try...catch` blocks for the purpose of formatting a final response.
19 | - **Handlers (`registration.ts`, Transports):** This layer's responsibility is to interface with the transport layer (e.g., MCP, HTTP), invoke core logic, and manage the final response lifecycle. It **must** wrap every call to the logic layer in a `try...catch` block. This is the exclusive location where errors are caught, processed by the `ErrorHandler`, and formatted into a definitive `CallToolResult` or HTTP response.
20 |
21 | ### 2. Structured, Traceable Operations
22 |
23 | Every operation must be fully traceable from initiation to completion via structured logging and context propagation.
24 |
25 | - **`RequestContext`**: Any significant operation shall be initiated by creating a `RequestContext` via `requestContextService.createRequestContext()`. This context, containing a unique `requestId`, must be passed as an argument through the entire call stack of the operation.
26 | - **`Logger`**: All logging shall be performed through the centralized `logger` singleton. Every log entry must include the `RequestContext` to ensure traceability.
27 |
28 | ### 3. Comprehensive Observability (OpenTelemetry)
29 |
30 | The system shall be fully observable out-of-the-box through integrated, comprehensive OpenTelemetry (OTel) instrumentation.
31 |
32 | - **Automatic Instrumentation:** The OTel SDK is initialized at the application's entry point (`src/index.ts`) **before any other module is imported**. This ensures that all supported libraries (e.g., HTTP, DNS) are automatically instrumented for distributed tracing.
33 | - **Trace-Aware Context:** The `RequestContext` is automatically enriched with the active `traceId` and `spanId` from OTel. This links every log entry directly to a specific trace, enabling seamless correlation between logs, traces, and metrics.
34 | - **Error-Trace Correlation:** The central `ErrorHandler` automatically records exceptions on the active OTel span and sets its status to `ERROR`. This ensures that every handled error is visible and searchable within the distributed trace, providing a complete picture of the failure.
35 | - **Performance Spans:** Utilities should be used to create detailed spans for every tool call, capturing critical performance metrics (duration, success status, error codes) as attributes. This provides granular insight into the performance of individual tools.
36 |
37 | ### 4. Application Lifecycle and Execution Flow
38 |
39 | This section outlines the complete operational flow of the application, from initial startup to the execution of a tool's core logic. Understanding this sequence is critical for contextualizing the role of each component.
40 |
41 | **A. Server Startup Sequence (Executed Once)**
42 |
43 | 1. **Observability Initialization (`src/utils/telemetry/instrumentation.ts`):** The very first import in `src/index.ts` is the OpenTelemetry instrumentation module. This initializes the OTel SDK, sets up exporters, and patches supported libraries for automatic tracing.
44 | 2. **Entry Point (`src/index.ts`):** The application is launched. This script performs the first-level setup, initializes the logger, calls `initializeAndStartServer()`, and establishes global process listeners for graceful shutdown.
45 | 3. **Server Orchestration (`src/mcp-server/server.ts`):** This script orchestrates the creation and configuration of the MCP server, importing and calling the `register...` function from every tool and resource.
46 | 4. **Tool Registration (`src/mcp-server/tools/toolName/registration.ts`):** During startup, each `register...` function is executed, calling `server.registerTool()` and providing the tool's metadata and runtime handler function.
47 |
48 | **B. Tool Execution Sequence (Executed for Each Tool Call)**
49 |
50 | 1. **Transport Layer:** The server's transport receives an incoming tool call request, and an OTel span is automatically created.
51 | 2. **Server Core:** The `McpServer` instance parses the request, validates it against the registered input schema, and invokes the corresponding handler function.
52 | 3. **Handler Execution (`registration.ts`):** The handler function creates a new `RequestContext`, begins a `try...catch` block, and calls the core logic function.
53 | 4. **Logic Execution (`logic.ts`):** The logic function runs, performing its business logic. It returns a structured response on success or **throws** a structured `McpError` on failure.
54 | 5. **Response Handling (`registration.ts`):** The `try...catch` block handles the outcome, formatting a success or error `CallToolResult` and ensuring the error is logged and traced via the `ErrorHandler`.
55 | 6. **Final Transmission:** The server core sends the formatted response back to the client.
56 |
57 | ## II. Tool Development Workflow
58 |
59 | This section mandates the workflow for creating and modifying all tools. Deviation is not permitted.
60 |
61 | ### A. File and Directory Structure
62 |
63 | Each tool shall reside in its own directory within `src/mcp-server/tools/` and follow this structure:
64 |
65 | - **`toolName/`**
66 | - **`index.ts`**: A barrel file that exports only the `register...` function from `registration.ts`.
67 | - **`logic.ts`**: Contains the core business logic. It **must** define and export the tool's Zod input schema, all inferred TypeScript types (input and output), and the main logic function.
68 | - **`registration.ts`**: Registers the tool with the MCP server. It imports from `logic.ts` and implements the "Handler" role.
69 | - **`logic/` (Optional Subdirectory)**: For complex tools, logic can be broken down into smaller files within this directory, orchestrated by the main `logic.ts`.
70 |
71 | ### B. The Canonical Pattern
72 |
73 | The following pattern is the authoritative implementation and shall be used as the template for all new tool development. The `PubMedFetchContents` tool is a project-specific implementation of this standard.
74 |
75 | **Step 1: Define Schema and Logic (`logic.ts`)**
76 | The `logic.ts` file defines the tool's contract and its pure function.
77 |
78 | ```typescript
79 | /**
80 | * @fileoverview Logic for the pubmed_fetch_contents MCP tool.
81 | * Handles EFetch queries for specific PMIDs and formats the results.
82 | * This tool can fetch various details from PubMed including abstracts, full XML,
83 | * MEDLINE text, and citation data.
84 | * @module src/mcp-server/tools/pubmedFetchContents/logic
85 | */
86 |
87 | import { z } from "zod";
88 | import { getNcbiService } from "../../../services/NCBI/core/ncbiService.js";
89 | import { BaseErrorCode, McpError } from "../../../types-global/errors.js";
90 | import {
91 | ParsedArticle,
92 | XmlMedlineCitation,
93 | XmlPubmedArticleSet,
94 | } from "../../../types-global/pubmedXml.js";
95 | import {
96 | logger,
97 | RequestContext,
98 | requestContextService,
99 | sanitizeInputForLogging,
100 | } from "../../../utils/index.js";
101 | import {
102 | ensureArray,
103 | extractAbstractText,
104 | extractArticleDates,
105 | extractAuthors,
106 | extractDoi,
107 | extractGrants,
108 | extractJournalInfo,
109 | extractKeywords,
110 | extractMeshTerms,
111 | extractPmid,
112 | extractPublicationTypes,
113 | getText,
114 | } from "../../../services/NCBI/parsing/index.js";
115 |
116 | export const PubMedFetchContentsInputSchema = z
117 | .object({
118 | pmids: z
119 | .array(z.string().regex(/^\d+$/))
120 | .max(200, "Max 200 PMIDs per call if not using history.")
121 | .optional()
122 | .describe(
123 | "An array of PubMed Unique Identifiers (PMIDs) for which to fetch content. Use this OR queryKey/webEnv.",
124 | ),
125 | queryKey: z
126 | .string()
127 | .optional()
128 | .describe(
129 | "Query key from ESearch history server. If used, webEnv must also be provided. Use this OR pmids.",
130 | ),
131 | webEnv: z
132 | .string()
133 | .optional()
134 | .describe(
135 | "Web environment from ESearch history server. If used, queryKey must also be provided. Use this OR pmids.",
136 | ),
137 | retstart: z
138 | .number()
139 | .int()
140 | .min(0)
141 | .optional()
142 | .describe(
143 | "Sequential index of the first record to retrieve (0-based). Used with queryKey/webEnv.",
144 | ),
145 | retmax: z
146 | .number()
147 | .int()
148 | .min(1)
149 | .optional()
150 | .describe(
151 | "Maximum number of records to retrieve. Used with queryKey/webEnv.",
152 | ),
153 | detailLevel: z
154 | .enum(["abstract_plus", "full_xml", "medline_text", "citation_data"])
155 | .optional()
156 | .default("abstract_plus")
157 | .describe(
158 | "Specifies the level of detail for the fetched content. Options: 'abstract_plus' (parsed details including abstract, authors, journal, DOI, etc.), 'full_xml' (raw PubMedArticle XML), 'medline_text' (MEDLINE format), 'citation_data' (minimal parsed data for citations). Defaults to 'abstract_plus'.",
159 | ),
160 | includeMeshTerms: z
161 | .boolean()
162 | .optional()
163 | .default(true)
164 | .describe(
165 | "Applies to 'abstract_plus' and 'citation_data' if parsed from XML.",
166 | ),
167 | includeGrantInfo: z
168 | .boolean()
169 | .optional()
170 | .default(false)
171 | .describe("Applies to 'abstract_plus' if parsed from XML."),
172 | outputFormat: z
173 | .enum(["json", "raw_text"])
174 | .optional()
175 | .default("json")
176 | .describe(
177 | "Specifies the final output format of the tool. \n- 'json' (default): Wraps the data in a standard JSON object. \n- 'raw_text': Returns raw text for 'medline_text' or 'full_xml' detailLevels. For other detailLevels, 'outputFormat' defaults to 'json'.",
178 | ),
179 | })
180 | .superRefine((data, ctx) => {
181 | if (data.queryKey && !data.webEnv) {
182 | ctx.addIssue({
183 | code: z.ZodIssueCode.custom,
184 | message: "webEnv is required if queryKey is provided.",
185 | path: ["webEnv"],
186 | });
187 | }
188 | if (!data.queryKey && data.webEnv) {
189 | ctx.addIssue({
190 | code: z.ZodIssueCode.custom,
191 | message: "queryKey is required if webEnv is provided.",
192 | path: ["queryKey"],
193 | });
194 | }
195 | if (
196 | (!data.pmids || data.pmids.length === 0) &&
197 | !(data.queryKey && data.webEnv)
198 | ) {
199 | ctx.addIssue({
200 | code: z.ZodIssueCode.custom,
201 | message:
202 | "Either pmids (non-empty array) or both queryKey and webEnv must be provided.",
203 | path: ["pmids"],
204 | });
205 | }
206 | if (data.pmids && data.pmids.length > 0 && (data.queryKey || data.webEnv)) {
207 | ctx.addIssue({
208 | code: z.ZodIssueCode.custom,
209 | message:
210 | "Cannot use pmids and queryKey/webEnv simultaneously. Please choose one method.",
211 | path: ["pmids"],
212 | });
213 | }
214 | if (
215 | (data.retstart !== undefined || data.retmax !== undefined) &&
216 | !(data.queryKey && data.webEnv)
217 | ) {
218 | ctx.addIssue({
219 | code: z.ZodIssueCode.custom,
220 | message: "retstart/retmax can only be used with queryKey and webEnv.",
221 | path: ["retstart"],
222 | });
223 | }
224 | });
225 |
226 | export type PubMedFetchContentsInput = z.infer<
227 | typeof PubMedFetchContentsInputSchema
228 | >;
229 |
230 | export type PubMedFetchContentsOutput = {
231 | content: string;
232 | articlesReturned: number;
233 | eFetchUrl: string;
234 | };
235 |
236 | interface EFetchServiceParams {
237 | db: string;
238 | id?: string;
239 | query_key?: string;
240 | WebEnv?: string;
241 | retmode?: "xml" | "text";
242 | rettype?: string;
243 | retstart?: string;
244 | retmax?: string;
245 | [key: string]: string | undefined;
246 | }
247 |
248 | function parsePubMedArticleSet(
249 | xmlData: unknown,
250 | input: PubMedFetchContentsInput,
251 | parentContext: RequestContext,
252 | ): ParsedArticle[] {
253 | const articles: ParsedArticle[] = [];
254 | const operationContext = requestContextService.createRequestContext({
255 | parentRequestId: parentContext.requestId,
256 | operation: "parsePubMedArticleSet",
257 | });
258 |
259 | if (
260 | !xmlData ||
261 | typeof xmlData !== "object" ||
262 | !("PubmedArticleSet" in xmlData)
263 | ) {
264 | throw new McpError(
265 | BaseErrorCode.PARSING_ERROR,
266 | "Invalid or unexpected structure for xmlData in parsePubMedArticleSet.",
267 | {
268 | ...operationContext,
269 | xmlDataType: typeof xmlData,
270 | xmlDataPreview: sanitizeInputForLogging(
271 | JSON.stringify(xmlData).substring(0, 200),
272 | ),
273 | },
274 | );
275 | }
276 |
277 | const typedXmlData = xmlData as { PubmedArticleSet?: XmlPubmedArticleSet };
278 | const articleSet = typedXmlData.PubmedArticleSet;
279 |
280 | if (!articleSet || !articleSet.PubmedArticle) {
281 | logger.warning(
282 | "PubmedArticleSet or PubmedArticle array not found in EFetch XML response.",
283 | operationContext,
284 | );
285 | return articles;
286 | }
287 |
288 | const pubmedArticlesXml = ensureArray(articleSet.PubmedArticle);
289 |
290 | for (const articleXml of pubmedArticlesXml) {
291 | if (!articleXml || typeof articleXml !== "object") continue;
292 |
293 | const medlineCitation: XmlMedlineCitation | undefined =
294 | articleXml.MedlineCitation;
295 | if (!medlineCitation) continue;
296 |
297 | const pmid = extractPmid(medlineCitation);
298 | if (!pmid) continue;
299 |
300 | const articleNode = medlineCitation.Article;
301 | const parsedArticle: ParsedArticle = {
302 | pmid: pmid,
303 | title: articleNode?.ArticleTitle
304 | ? getText(articleNode.ArticleTitle)
305 | : undefined,
306 | abstractText: articleNode?.Abstract
307 | ? extractAbstractText(articleNode.Abstract)
308 | : undefined,
309 | authors: articleNode?.AuthorList
310 | ? extractAuthors(articleNode.AuthorList)
311 | : undefined,
312 | journalInfo: articleNode?.Journal
313 | ? extractJournalInfo(articleNode.Journal, medlineCitation)
314 | : undefined,
315 | publicationTypes: articleNode?.PublicationTypeList
316 | ? extractPublicationTypes(articleNode.PublicationTypeList)
317 | : undefined,
318 | keywords: articleNode?.KeywordList
319 | ? extractKeywords(articleNode.KeywordList)
320 | : undefined,
321 | doi: articleNode ? extractDoi(articleNode) : undefined,
322 | articleDates: articleNode?.ArticleDate
323 | ? extractArticleDates(articleNode)
324 | : undefined,
325 | };
326 |
327 | if (input.includeMeshTerms) {
328 | parsedArticle.meshTerms = medlineCitation.MeshHeadingList
329 | ? extractMeshTerms(medlineCitation.MeshHeadingList)
330 | : undefined;
331 | }
332 |
333 | if (input.includeGrantInfo) {
334 | parsedArticle.grantList = articleNode?.GrantList
335 | ? extractGrants(articleNode.GrantList)
336 | : undefined;
337 | }
338 |
339 | articles.push(parsedArticle);
340 | }
341 | return articles;
342 | }
343 |
344 | export async function pubMedFetchContentsLogic(
345 | input: PubMedFetchContentsInput,
346 | parentRequestContext: RequestContext,
347 | ): Promise<PubMedFetchContentsOutput> {
348 | const toolLogicContext = requestContextService.createRequestContext({
349 | parentRequestId: parentRequestContext.requestId,
350 | operation: "pubMedFetchContentsLogic",
351 | input: sanitizeInputForLogging(input),
352 | });
353 |
354 | const validationResult = PubMedFetchContentsInputSchema.safeParse(input);
355 | if (!validationResult.success) {
356 | throw new McpError(
357 | BaseErrorCode.VALIDATION_ERROR,
358 | validationResult.error.errors[0]?.message || "Invalid input",
359 | { ...toolLogicContext, details: validationResult.error.flatten() },
360 | );
361 | }
362 |
363 | const ncbiService = getNcbiService();
364 | logger.info("Executing pubmed_fetch_contents tool", toolLogicContext);
365 |
366 | const eFetchParams: EFetchServiceParams = { db: "pubmed" };
367 |
368 | if (input.queryKey && input.webEnv) {
369 | eFetchParams.query_key = input.queryKey;
370 | eFetchParams.WebEnv = input.webEnv;
371 | if (input.retstart !== undefined)
372 | eFetchParams.retstart = String(input.retstart);
373 | if (input.retmax !== undefined) eFetchParams.retmax = String(input.retmax);
374 | } else if (input.pmids && input.pmids.length > 0) {
375 | eFetchParams.id = input.pmids.join(",");
376 | }
377 |
378 | let serviceRetmode: "xml" | "text" = "xml";
379 | let rettype: string | undefined;
380 |
381 | switch (input.detailLevel) {
382 | case "full_xml":
383 | serviceRetmode = "xml";
384 | break;
385 | case "medline_text":
386 | serviceRetmode = "text";
387 | rettype = "medline";
388 | break;
389 | case "abstract_plus":
390 | case "citation_data":
391 | serviceRetmode = "xml";
392 | break;
393 | }
394 | eFetchParams.retmode = serviceRetmode;
395 | if (rettype) eFetchParams.rettype = rettype;
396 |
397 | const eFetchBase =
398 | "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi";
399 | const eFetchQueryString = new URLSearchParams(
400 | eFetchParams as Record<string, string>,
401 | ).toString();
402 | const eFetchUrl = `${eFetchBase}?${eFetchQueryString}`;
403 |
404 | const shouldReturnRawXml =
405 | input.detailLevel === "full_xml" && input.outputFormat === "raw_text";
406 |
407 | const eFetchResponseData = await ncbiService.eFetch(
408 | eFetchParams,
409 | toolLogicContext,
410 | { retmode: serviceRetmode, rettype, returnRawXml: shouldReturnRawXml },
411 | );
412 |
413 | let finalOutputText: string;
414 | let articlesCount = 0;
415 |
416 | if (input.detailLevel === "medline_text") {
417 | const medlineText = String(eFetchResponseData);
418 | const foundPmidsInMedline = new Set<string>();
419 | const pmidRegex = /^PMID- (\d+)/gm;
420 | let match;
421 | while ((match = pmidRegex.exec(medlineText)) !== null) {
422 | if (match[1]) {
423 | foundPmidsInMedline.add(match[1]);
424 | }
425 | }
426 | articlesCount = foundPmidsInMedline.size;
427 |
428 | if (input.outputFormat === "raw_text") {
429 | finalOutputText = medlineText;
430 | } else {
431 | const notFoundPmids =
432 | input.pmids?.filter((pmid) => !foundPmidsInMedline.has(pmid)) || [];
433 | finalOutputText = JSON.stringify({
434 | requestedPmids: input.pmids || "N/A (history query)",
435 | articles: [{ medlineText }],
436 | notFoundPmids,
437 | eFetchDetails: { urls: [eFetchUrl] },
438 | });
439 | }
440 | } else if (input.detailLevel === "full_xml") {
441 | const articlesXml = ensureArray(
442 | (eFetchResponseData as any)?.PubmedArticleSet?.PubmedArticle || [],
443 | );
444 | articlesCount = articlesXml.length;
445 | if (input.outputFormat === "raw_text") {
446 | // Note: Raw XML output is requested, but we still parse to get an accurate count.
447 | // This is a trade-off for robustness over performance in this specific case.
448 | finalOutputText = String(eFetchResponseData);
449 | } else {
450 | const foundPmidsInXml = new Set<string>();
451 | const articlesPayload = articlesXml.map((articleXml) => {
452 | const pmid = extractPmid(articleXml.MedlineCitation) || "unknown_pmid";
453 | if (pmid !== "unknown_pmid") foundPmidsInXml.add(pmid);
454 | return { pmid, fullXmlContent: articleXml };
455 | });
456 | const notFoundPmids =
457 | input.pmids?.filter((pmid) => !foundPmidsInXml.has(pmid)) || [];
458 | finalOutputText = JSON.stringify({
459 | requestedPmids: input.pmids || "N/A (history query)",
460 | articles: articlesPayload,
461 | notFoundPmids,
462 | eFetchDetails: { urls: [eFetchUrl] },
463 | });
464 | }
465 | } else {
466 | const parsedArticles = parsePubMedArticleSet(
467 | eFetchResponseData as XmlPubmedArticleSet,
468 | input,
469 | toolLogicContext,
470 | );
471 | articlesCount = parsedArticles.length;
472 | const foundPmids = new Set(parsedArticles.map((p) => p.pmid));
473 | const notFoundPmids =
474 | input.pmids?.filter((pmid) => !foundPmids.has(pmid)) || [];
475 |
476 | let articlesToReturn: any = parsedArticles;
477 | if (input.detailLevel === "citation_data") {
478 | articlesToReturn = parsedArticles.map((article) => ({
479 | pmid: article.pmid,
480 | title: article.title,
481 | authors: article.authors?.map((a) => ({
482 | lastName: a.lastName,
483 | initials: a.initials,
484 | })),
485 | journalInfo: {
486 | title: article.journalInfo?.title,
487 | isoAbbreviation: article.journalInfo?.isoAbbreviation,
488 | volume: article.journalInfo?.volume,
489 | issue: article.journalInfo?.issue,
490 | pages: article.journalInfo?.pages,
491 | year: article.journalInfo?.publicationDate?.year,
492 | },
493 | doi: article.doi,
494 | ...(input.includeMeshTerms && { meshTerms: article.meshTerms }),
495 | }));
496 | }
497 | finalOutputText = JSON.stringify({
498 | requestedPmids: input.pmids || "N/A (history query)",
499 | articles: articlesToReturn,
500 | notFoundPmids,
501 | eFetchDetails: { urls: [eFetchUrl] },
502 | });
503 | }
504 |
505 | logger.notice("Successfully executed pubmed_fetch_contents tool.", {
506 | ...toolLogicContext,
507 | articlesReturned: articlesCount,
508 | });
509 |
510 | return {
511 | content: finalOutputText,
512 | articlesReturned: articlesCount,
513 | eFetchUrl,
514 | };
515 | }
516 | ```
517 |
518 | **Step 2: Register the Tool and Handle All Outcomes (`registration.ts`)**
519 | The `registration.ts` file acts as the handler, connecting the logic to the server and ensuring stability.
520 |
521 | ```typescript
522 | /**
523 | * @fileoverview Registration for the pubmed_fetch_contents MCP tool.
524 | * @module src/mcp-server/tools/pubmedFetchContents/registration
525 | */
526 |
527 | import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
528 | import { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
529 | import { BaseErrorCode, McpError } from "../../../types-global/errors.js";
530 | import {
531 | ErrorHandler,
532 | logger,
533 | RequestContext,
534 | requestContextService,
535 | } from "../../../utils/index.js";
536 | import {
537 | PubMedFetchContentsInput,
538 | PubMedFetchContentsInputSchema,
539 | pubMedFetchContentsLogic,
540 | } from "./logic.js";
541 |
542 | /**
543 | * Registers the pubmed_fetch_contents tool with the MCP server.
544 | * @param server - The McpServer instance.
545 | */
546 | export async function registerPubMedFetchContentsTool(
547 | server: McpServer,
548 | ): Promise<void> {
549 | const operation = "registerPubMedFetchContentsTool";
550 | const toolName = "pubmed_fetch_contents";
551 | const toolDescription =
552 | "Fetches detailed information from PubMed using NCBI EFetch. Can be used with a direct list of PMIDs or with queryKey/webEnv from an ESearch history entry. Supports pagination (retstart, retmax) when using history. Available 'detailLevel' options: 'abstract_plus' (parsed title, abstract, authors, journal, keywords, DOI, optional MeSH/grant info), 'full_xml' (JSON representation of the PubMedArticle XML structure), 'medline_text' (MEDLINE format), or 'citation_data' (minimal data for citations). Returns a JSON object containing results, any PMIDs not found (if applicable), and EFetch details.";
553 |
554 | const context = requestContextService.createRequestContext({ operation });
555 |
556 | await ErrorHandler.tryCatch(
557 | async () => {
558 | server.tool(
559 | toolName,
560 | toolDescription,
561 | PubMedFetchContentsInputSchema._def.schema.shape,
562 | async (
563 | input: PubMedFetchContentsInput,
564 | toolContext: any,
565 | ): Promise<CallToolResult> => {
566 | const richContext: RequestContext =
567 | requestContextService.createRequestContext({
568 | parentRequestId: context.requestId,
569 | operation: "pubMedFetchContentsToolHandler",
570 | mcpToolContext: toolContext,
571 | input,
572 | });
573 |
574 | try {
575 | const result = await pubMedFetchContentsLogic(input, richContext);
576 | return {
577 | content: [{ type: "text", text: result.content }],
578 | isError: false,
579 | };
580 | } catch (error) {
581 | const handledError = ErrorHandler.handleError(error, {
582 | operation: "pubMedFetchContentsToolHandler",
583 | context: richContext,
584 | input,
585 | rethrow: false,
586 | });
587 |
588 | const mcpError =
589 | handledError instanceof McpError
590 | ? handledError
591 | : new McpError(
592 | BaseErrorCode.INTERNAL_ERROR,
593 | "An unexpected error occurred while fetching PubMed content.",
594 | {
595 | originalErrorName: handledError.name,
596 | originalErrorMessage: handledError.message,
597 | },
598 | );
599 |
600 | return {
601 | content: [
602 | {
603 | type: "text",
604 | text: JSON.stringify({
605 | error: {
606 | code: mcpError.code,
607 | message: mcpError.message,
608 | details: mcpError.details,
609 | },
610 | }),
611 | },
612 | ],
613 | isError: true,
614 | };
615 | }
616 | },
617 | );
618 |
619 | logger.notice(`Tool '${toolName}' registered.`, context);
620 | },
621 | {
622 | operation,
623 | context,
624 | errorCode: BaseErrorCode.INITIALIZATION_FAILED,
625 | critical: true,
626 | },
627 | );
628 | }
629 | ```
630 |
631 | ## III. Resource Development Workflow
632 |
633 | The workflow for creating Resources mirrors that of Tools, with a focus on data retrieval. Structure is identical but located under `src/mcp-server/resources/`. Registration shall use `server.resource()` and the handler must return an object conforming to the `{ contents: [{ uri, blob, mimeType }] }` structure.
634 |
635 | ## IV. External Service Integration
636 |
637 | Interaction with any external service (e.g., NCBI APIs) shall be encapsulated within a singleton provider class in the `src/services/` directory. The singleton instance shall be imported directly into the `logic.ts` file where it is required. This applies to the `ncbiRequestQueueManager` and any other external clients.
638 |
639 | ## V. Code Quality and Documentation Mandates
640 |
641 | - **JSDoc**: Every file shall begin with a `@fileoverview` and `@module` block. All exported functions, types, and classes shall have complete JSDoc comments.
642 | - **LLM-Facing Descriptions**: The tool's `title`, `description`, and all parameter descriptions defined in Zod schemas (`.describe()`) are transmitted directly to the LLM. They must be written with the LLM as the primary audience—descriptive, concise, and explicit about requirements and constraints.
643 | - **Clarity Over Brevity**: Write self-documenting code with meaningful variable and function names.
644 | - **Immutability**: Prefer functional approaches and immutable data structures to prevent side effects.
645 | - **Formatting**: All code must be formatted using Prettier (`npm run format`) prior to being committed.
646 |
647 | ## VI. Security Mandates
648 |
649 | - **Input Sanitization**: All input from any external source must be treated as untrusted and validated with Zod. Use `sanitization` utilities where appropriate.
650 | - **Secrets Management**: All secrets (API keys, auth keys) **must** be loaded exclusively from environment variables via the `config` module. Never hardcode secrets.
651 | - **Authentication & Authorization**: The server's authentication mode is configured via environment variables. Tools requiring specific permissions shall be protected by checking scopes within the tool handler.
652 | - **Rate Limiting**: Respect the rate limits of external services like NCBI. Use the centralized `rateLimiter` and `ncbiRequestQueueManager`.
653 |
654 | ## VII. Testing Mandates
655 |
656 | A `tests/` directory should mirror the `src/` directory structure. All tests shall be written using Vitest, following an **integration-testing-first principle**.
657 |
658 | - **Principle**: Tests shall prioritize validating the complete flow from input to output, including real dependencies, over mocked unit testing. Heavy mocking is explicitly discouraged.
659 | - **Methodology**:
660 | - **Real Dependencies**: Use actual service instances and data flows. For uncontrollable external services, use test doubles that simulate realistic behavior.
661 | - **Error Flow Testing**: Test actual error conditions by triggering real failure states, not by mocking errors.
662 | - **Protocol Compliance**: All MCP transport tests must validate actual MCP protocol compliance.
663 | - **Controlled Mocking**: When mocking is necessary, it must be surgical, justified, and documented. Mock only truly external, uncontrollable dependencies.
664 | - **Test Organization**:
665 | - `tests/mcp-server/tools/toolName/integration.test.ts`
666 | - `tests/mcp-server/tools/toolName/logic.test.ts`
667 | - **Running Tests**:
668 | - `npm test`: Run all tests.
669 | - `npm test:watch`: Run tests in watch mode.
670 | - `npm test:coverage`: Run tests and generate a coverage report.
671 |
672 | This guide is the single source of truth for development standards. All code reviews will be conducted against these principles.
673 |
```
--------------------------------------------------------------------------------
/examples/pubmed_research_agent_example.md:
--------------------------------------------------------------------------------
```markdown
1 | Tool Call Arguments:
2 |
3 | ```json
4 | {
5 | "project_title_suggestion": "Investigating the Role of TREM2 Variants in Microglial Activation and Amyloid-beta Clearance in Alzheimer's Disease Pathogenesis",
6 | "primary_research_goal": "To determine how specific TREM2 genetic variants affect microglial phagocytic capacity for amyloid-beta and subsequent inflammatory responses in the context of Alzheimer's Disease.",
7 | "research_keywords": [
8 | "Alzheimer's Disease",
9 | "TREM2",
10 | "Microglia",
11 | "Neuroinflammation",
12 | "Amyloid-beta",
13 | "Genetic Variants",
14 | "Phagocytosis"
15 | ],
16 | "organism_focus": "Human iPSC-derived microglia (from patients with different TREM2 genotypes) and Mus musculus (transgenic AD models with human TREM2 knock-in)",
17 | "p1_introduction_and_background": "Alzheimer's disease (AD) is a progressive neurodegenerative disorder. Genome-wide association studies (GWAS) have identified variants in the Triggering Receptor Expressed on Myeloid cells 2 (TREM2) gene as significant risk factors for late-onset AD. TREM2 is expressed on microglia, the brain's resident immune cells, and plays a crucial role in microglial activation, survival, proliferation, and phagocytosis. Understanding how AD-associated TREM2 variants impair these functions is critical for developing targeted therapies.",
18 | "p1_specific_research_question": "How do specific AD-associated TREM2 variants (e.g., R47H, R62H, D87N) alter microglial phagocytosis of amyloid-beta (Aβ) oligomers and fibrils, and modulate subsequent pro-inflammatory and anti-inflammatory cytokine release profiles compared to common TREM2 variants?",
19 | "p1_knowledge_gap": "While TREM2's role in AD is established, the precise molecular mechanisms by which different AD-risk variants differentially affect microglial Aβ clearance and inflammatory signaling remain incompletely understood, particularly in human-relevant cellular models.",
20 | "p1_primary_hypothesis": "AD-associated TREM2 risk variants (R47H, R62H, D87N) will exhibit reduced Aβ phagocytic capacity and a skewed pro-inflammatory cytokine profile in human iPSC-derived microglia upon Aβ stimulation, compared to the common TREM2 variant.",
21 | "p1_secondary_questions_or_hypotheses": [
22 | "Do TREM2 variants affect microglial lipid metabolism, and does this correlate with phagocytic defects?",
23 | "How do TREM2 variants influence microglial transcriptional signatures in response to Aβ (e.g., DAM signature)?",
24 | "Can TREM2 agonistic antibodies rescue phagocytic defects observed in risk-variant expressing microglia?"
25 | ],
26 | "p1_pubmed_search_strategy_description": "Systematic search of PubMed using MeSH terms and keywords: ('TREM2' OR 'Triggering Receptor Expressed on Myeloid cells 2') AND ('Alzheimer Disease' OR 'AD') AND ('microglia' OR 'microglial cells') AND ('amyloid beta-peptides' OR 'Abeta' OR 'amyloid plaques') AND ('genetic variation' OR 'polymorphism, single nucleotide' OR 'mutation') AND ('phagocytosis' OR 'inflammation' OR 'cytokines'). Filters: English language, last 10 years.",
27 | "p1_literature_review_scope": "Focus on studies investigating TREM2 function in microglia, impact of AD-associated TREM2 variants, mechanisms of Aβ clearance, and microglial inflammatory responses in AD. Include both in vitro and in vivo studies.",
28 | "p1_lit_review_databases_and_approach": "Primary: PubMed. Secondary: Scopus, Web of Science. Approach: Initial broad search followed by iterative refinement. Snowballing from key review articles and highly cited papers. Critical appraisal of methodologies and findings.",
29 | "p1_experimental_paradigm": "Comparative functional genomics study using human iPSC-derived microglia carrying different TREM2 variants and corresponding transgenic mouse models.",
30 | "p1_data_acquisition_plan_existing_data": "Retrieve publicly available RNA-seq/scRNA-seq datasets of human AD brain tissue and TREM2 variant mouse models from GEO/ArrayExpress to correlate in vitro findings with in vivo expression patterns. Utilize ADNI database for clinical correlations if applicable.",
31 | "p1_data_acquisition_plan_new_data": "Generate human iPSC lines from AD patients with known TREM2 genotypes (R47H, R62H, D87N, common variant) and differentiate into microglia. Perform Aβ phagocytosis assays, cytokine profiling (Luminex/ELISA), RNA-seq, and lipidomics. In vivo: Utilize TREM2-R47H knock-in mice crossed with 5xFAD model for behavioral, immunohistochemical, and biochemical analyses.",
32 | "p1_blast_utilization_plan": "Verify TREM2 variant sequences in generated iPSC lines and mouse models. Analyze TREM2 orthologs across species if comparative studies are needed.",
33 | "p1_controls_and_rigor": "Isogenic iPSC lines as controls where possible. Age- and sex-matched littermate controls for mouse studies. Blinding for behavioral and immunohistochemical analyses. Multiple biological and technical replicates. Statistical power analysis.",
34 | "p1_methodological_challenges_and_mitigation": "Challenge: Variability in iPSC differentiation. Mitigation: Standardized protocols, multiple clones per genotype, rigorous quality control of differentiated microglia. Challenge: Off-target effects of CRISPR editing. Mitigation: Whole-genome sequencing of edited lines.",
35 | "p2_data_collection_methods_wet_lab": "iPSC culture and differentiation, CRISPR/Cas9 gene editing, flow cytometry for phagocytosis, Luminex/ELISA for cytokines, RNA extraction and library preparation for RNA-seq, lipid extraction for mass spectrometry. Mouse behavioral testing (Morris water maze, Y-maze), tissue collection, immunohistochemistry, Western blotting.",
36 | "p2_data_collection_methods_dry_lab": "Data retrieval from GEO/ArrayExpress using their respective APIs/web interfaces. Accessioning of all generated high-throughput data.",
37 | "p2_data_preprocessing_and_qc_plan": "RNA-seq: FastQC, Trimmomatic, STAR alignment, featureCounts. scRNA-seq: Cell Ranger, Seurat/Scanpy for QC and normalization. Lipidomics: XCMS/MZmine for peak picking and alignment. Rigorous QC metrics at each step.",
38 | "p3_data_analysis_strategy": "Differential gene expression analysis (DESeq2/edgeR). Pathway analysis (GSEA, IPA). Statistical tests: ANOVA, t-tests with appropriate corrections for multiple comparisons. Machine learning for integrating multi-omics data if applicable.",
39 | "p3_bioinformatics_pipeline_summary": "Custom scripts (R/Python) integrating standard bioinformatics tools for RNA-seq, scRNA-seq, and lipidomics data. Visualization using ggplot2, Seurat, custom plotting.",
40 | "p3_results_interpretation_framework": "Evaluate findings against primary and secondary hypotheses. Assess statistical significance (p < 0.05) and biological relevance (effect size, pathway enrichment). Correlate in vitro findings with in vivo data and existing literature.",
41 | "p3_comparison_with_literature_plan": "Systematically compare results with published studies on TREM2 variants and AD. Discuss consistencies and discrepancies, potential reasons for differences (model systems, methodologies).",
42 | "p4_dissemination_manuscript_plan": "Core message: TREM2 risk variants impair microglial Aβ clearance and promote detrimental neuroinflammation. Target journals: Nature Neuroscience, Cell, Neuron, Immunity. Key figures: Phagocytosis assays, cytokine profiles, RNA-seq heatmaps/volcano plots, in vivo pathology.",
43 | "p4_dissemination_data_deposition_plan": "Deposit all raw and processed high-throughput sequencing data in GEO. Share iPSC lines and mouse models through appropriate repositories (e.g., WiCell, JAX). Adhere to FAIR data principles.",
44 | "p4_peer_review_and_publication_approach": "Submit to target journal, respond thoroughly to reviewer comments. Consider preprint server (bioRxiv) prior to peer-reviewed publication.",
45 | "p4_future_research_directions": "Investigate downstream signaling pathways affected by TREM2 variants. Explore therapeutic potential of TREM2 agonists in variant-specific contexts. Assess impact of TREM2 variants on other microglial functions (e.g., synaptic pruning).",
46 | "cc_record_keeping_and_data_management": "Electronic lab notebook (ELN) for experimental records. Version control (Git/GitHub) for code and analysis scripts. Secure, backed-up server for raw and processed data. Data Management Plan (DMP) as per funding agency requirements.",
47 | "cc_collaboration_strategy": "Collaborate with iPSC core facility for cell line generation, genomics core for sequencing, and biostatistics core for complex data analysis. Regular meetings and shared data platforms.",
48 | "cc_ethical_considerations": "Human iPSC work approved by Institutional Review Board (IRB). Animal studies approved by Institutional Animal Care and Use Committee (IACUC). Adherence to ARRIVE guidelines for animal research. Data de-identification where applicable. Responsible Conduct of Research (RCR) training for all personnel.",
49 | "include_detailed_prompts_for_agent": true
50 | }
51 | ```
52 |
53 | Tool Response:
54 |
55 | ```json
56 | {
57 | "plan_title": "Investigating the Role of TREM2 Variants in Microglial Activation and Amyloid-beta Clearance in Alzheimer's Disease Pathogenesis",
58 | "overall_instructions_for_research_agent": "This document is a structured research plan outline, primarily assembled from detailed inputs for project: \"Investigating the Role of TREM2 Variants in Microglial Activation and Amyloid-beta Clearance in Alzheimer's Disease Pathogenesis\". As the research agent, critically evaluate this plan. Your primary function is to fulfill the user's research request by leveraging this structured information alongside your comprehensive knowledge and all available tools (e.g., PubMed search, web search, data analysis, file operations). \nKey responsibilities:\n- Synthesize information to generate scientifically sound content, analyses, or answers.\n- Adapt the level of detail in your final response to precisely match the user's specific query. If a concise answer is requested, use this plan as foundational background.\n- Proactively identify and address potential ambiguities, biases, or limitations in the provided plan or data.\n- Ensure methodological rigor: consider controls, statistical power, reproducibility, and alternative explanations.\n- If critical information is missing or ambiguities hinder robust execution, formulate specific, targeted follow-up questions for the user before proceeding with extensive investigation.\n- Always prioritize accuracy, objectivity, and adherence to scientific best practices.",
59 | "input_summary": {
60 | "keywords_received": [
61 | "Alzheimer's Disease",
62 | "TREM2",
63 | "Microglia",
64 | "Neuroinflammation",
65 | "Amyloid-beta",
66 | "Genetic Variants",
67 | "Phagocytosis"
68 | ],
69 | "primary_goal_stated_or_inferred": "To determine how specific TREM2 genetic variants affect microglial phagocytic capacity for amyloid-beta and subsequent inflammatory responses in the context of Alzheimer's Disease.",
70 | "organism_focus": "Human iPSC-derived microglia (from patients with different TREM2 genotypes) and Mus musculus (transgenic AD models with human TREM2 knock-in)",
71 | "included_detailed_prompts_for_agent": true
72 | },
73 | "phase_1_conception_and_planning": {
74 | "title": "Phase 1: Conception and Planning",
75 | "step_1_1_research_question_and_hypothesis": {
76 | "primary_research_question": "Critically evaluate and refine the primary research question for clarity, focus, and feasibility Based on the provided detail: \"How do specific AD-associated TREM2 variants (e.g., R47H, R62H, D87N) alter microglial phagocytosis of amyloid-beta (Aβ) oligomers and fibrils, and modulate subsequent pro-inflammatory and anti-inflammatory cytokine release profiles compared to common TREM2 variants?\". Ensure critical evaluation and consider alternative interpretations.",
77 | "knowledge_gap_statement": "Validate and expand on the identified knowledge gap, ensuring it's well-supported by current literature Based on the provided detail: \"While TREM2's role in AD is established, the precise molecular mechanisms by which different AD-risk variants differentially affect microglial Aβ clearance and inflammatory signaling remain incompletely understood, particularly in human-relevant cellular models.\". Ensure critical evaluation and consider alternative interpretations.",
78 | "primary_hypothesis": "Assess the primary hypothesis for testability, specificity, and falsifiability. Consider alternative hypotheses Based on the provided detail: \"AD-associated TREM2 risk variants (R47H, R62H, D87N) will exhibit reduced Aβ phagocytic capacity and a skewed pro-inflammatory cytokine profile in human iPSC-derived microglia upon Aβ stimulation, compared to the common TREM2 variant.\". Ensure critical evaluation and consider alternative interpretations.",
79 | "pubmed_search_strategy": "Develop a comprehensive PubMed search strategy. Consider MeSH terms, keywords, Boolean operators, and inclusion/exclusion criteria Based on the provided detail: \"Systematic search of PubMed using MeSH terms and keywords: ('TREM2' OR 'Triggering Receptor Expressed on Myeloid cells 2') AND ('Alzheimer Disease' OR 'AD') AND ('microglia' OR 'microglial cells') AND ('amyloid beta-peptides' OR 'Abeta' OR 'amyloid plaques') AND ('genetic variation' OR 'polymorphism, single nucleotide' OR 'mutation') AND ('phagocytosis' OR 'inflammation' OR 'cytokines'). Filters: English language, last 10 years.\". Ensure critical evaluation and consider alternative interpretations.",
80 | "guidance_notes": [
81 | "Ensure the research question is SMART (Specific, Measurable, Achievable, Relevant, Time-bound).",
82 | "Verify the knowledge gap is current and significant.",
83 | "The hypothesis should directly address the research question.",
84 | "Consider publication type filters and date ranges for the literature search."
85 | ]
86 | },
87 | "step_1_2_literature_review_strategy": {
88 | "literature_review_scope": "Define and justify the scope of the literature review, including timeframes, study types, and key themes to investigate Based on the provided detail: \"Focus on studies investigating TREM2 function in microglia, impact of AD-associated TREM2 variants, mechanisms of Aβ clearance, and microglial inflammatory responses in AD. Include both in vitro and in vivo studies.\". Ensure critical evaluation and consider alternative interpretations.",
89 | "key_databases_and_search_approach": "Detail the systematic search approach across specified databases (e.g., PubMed, EMBASE, Scopus). Include strategy for citation searching or snowballing Based on the provided detail: \"Primary: PubMed. Secondary: Scopus, Web of Science. Approach: Initial broad search followed by iterative refinement. Snowballing from key review articles and highly cited papers. Critical appraisal of methodologies and findings.\". Ensure critical evaluation and consider alternative interpretations.",
90 | "guidance_notes": [
91 | "Document search queries and results for reproducibility.",
92 | "Consider using reference management software.",
93 | "Plan for screening and selection of articles based on predefined criteria."
94 | ]
95 | },
96 | "step_1_3_experimental_design_and_data_acquisition": {
97 | "experimental_paradigm": "Elaborate on the chosen experimental paradigm, justifying its appropriateness for testing the hypothesis Based on the provided detail: \"Comparative functional genomics study using human iPSC-derived microglia carrying different TREM2 variants and corresponding transgenic mouse models.\". Ensure critical evaluation and consider alternative interpretations.",
98 | "data_acquisition_plan_existing_data": "Strategize the identification, retrieval, and validation of relevant existing datasets. Specify databases, data types, and access protocols Based on the provided detail: \"Retrieve publicly available RNA-seq/scRNA-seq datasets of human AD brain tissue and TREM2 variant mouse models from GEO/ArrayExpress to correlate in vitro findings with in vivo expression patterns. Utilize ADNI database for clinical correlations if applicable.\". Ensure critical evaluation and consider alternative interpretations.",
99 | "data_acquisition_plan_new_data": "Outline the plan for generating novel data, including experimental models, key procedures, sample size considerations, and data deposition strategy Based on the provided detail: \"Generate human iPSC lines from AD patients with known TREM2 genotypes (R47H, R62H, D87N, common variant) and differentiate into microglia. Perform Aβ phagocytosis assays, cytokine profiling (Luminex/ELISA), RNA-seq, and lipidomics. In vivo: Utilize TREM2-R47H knock-in mice crossed with 5xFAD model for behavioral, immunohistochemical, and biochemical analyses.\". Ensure critical evaluation and consider alternative interpretations.",
100 | "blast_utilization_plan": "Specify how sequence alignment tools (e.g., NCBI BLAST) will be employed, including purpose, programs, databases, and interpretation of results Based on the provided detail: \"Verify TREM2 variant sequences in generated iPSC lines and mouse models. Analyze TREM2 orthologs across species if comparative studies are needed.\". Ensure critical evaluation and consider alternative interpretations.",
101 | "controls_and_rigor_measures": "Detail crucial experimental controls (positive, negative, internal) and measures to ensure scientific rigor, reproducibility, and minimization of bias Based on the provided detail: \"Isogenic iPSC lines as controls where possible. Age- and sex-matched littermate controls for mouse studies. Blinding for behavioral and immunohistochemical analyses. Multiple biological and technical replicates. Statistical power analysis.\". Ensure critical evaluation and consider alternative interpretations.",
102 | "methodological_challenges_and_mitigation": "Anticipate potential methodological challenges, their impact, and robust mitigation strategies Based on the provided detail: \"Challenge: Variability in iPSC differentiation. Mitigation: Standardized protocols, multiple clones per genotype, rigorous quality control of differentiated microglia. Challenge: Off-target effects of CRISPR editing. Mitigation: Whole-genome sequencing of edited lines.\". Ensure critical evaluation and consider alternative interpretations.",
103 | "guidance_notes": [
104 | "Ensure sample sizes are adequately powered.",
105 | "Consider blinding and randomization where appropriate.",
106 | "Define clear endpoints and outcome measures.",
107 | "Address potential confounders in the experimental design."
108 | ]
109 | }
110 | },
111 | "phase_2_data_collection_and_processing": {
112 | "title": "Phase 2: Data Collection and Processing",
113 | "step_2_1_data_collection_retrieval": {
114 | "data_collection_methods_wet_lab": "Provide detailed wet-lab protocols, including sample preparation, experimental treatments, instrument settings, and data recording procedures Based on the provided detail: \"iPSC culture and differentiation, CRISPR/Cas9 gene editing, flow cytometry for phagocytosis, Luminex/ELISA for cytokines, RNA extraction and library preparation for RNA-seq, lipid extraction for mass spectrometry. Mouse behavioral testing (Morris water maze, Y-maze), tissue collection, immunohistochemistry, Western blotting.\". Ensure critical evaluation and consider alternative interpretations.",
115 | "data_collection_methods_dry_lab": "Specify execution details for computational data retrieval, including precise queries, API usage, versioning of tools, and data provenance tracking Based on the provided detail: \"Data retrieval from GEO/ArrayExpress using their respective APIs/web interfaces. Accessioning of all generated high-throughput data.\". Ensure critical evaluation and consider alternative interpretations.",
116 | "guidance_notes": [
117 | "Standardize protocols to ensure consistency.",
118 | "Implement robust data labeling and organization from the outset.",
119 | "Document any deviations from planned protocols."
120 | ]
121 | },
122 | "step_2_2_data_preprocessing_and_qc": {
123 | "data_preprocessing_and_qc_plan": "Describe the comprehensive pipeline for data cleaning, normalization, transformation, and quality control. Specify metrics, thresholds, and tools for each step Based on the provided detail: \"RNA-seq: FastQC, Trimmomatic, STAR alignment, featureCounts. scRNA-seq: Cell Ranger, Seurat/Scanpy for QC and normalization. Lipidomics: XCMS/MZmine for peak picking and alignment. Rigorous QC metrics at each step.\". Ensure critical evaluation and consider alternative interpretations.",
124 | "guidance_notes": [
125 | "Define criteria for outlier detection and handling.",
126 | "Assess data quality before and after preprocessing.",
127 | "Ensure preprocessing steps are appropriate for downstream analyses."
128 | ]
129 | }
130 | },
131 | "phase_3_analysis_and_interpretation": {
132 | "title": "Phase 3: Analysis and Interpretation",
133 | "step_3_1_data_analysis_plan": {
134 | "data_analysis_strategy": "Outline the core statistical and computational methods for data analysis. Specify tests, software, parameters, and how they address the hypotheses Based on the provided detail: \"Differential gene expression analysis (DESeq2/edgeR). Pathway analysis (GSEA, IPA). Statistical tests: ANOVA, t-tests with appropriate corrections for multiple comparisons. Machine learning for integrating multi-omics data if applicable.\". Ensure critical evaluation and consider alternative interpretations.",
135 | "bioinformatics_pipeline_summary": "Summarize the bioinformatics pipeline for high-throughput data, detailing tools, algorithms, parameter settings, and workflow for downstream analyses Based on the provided detail: \"Custom scripts (R/Python) integrating standard bioinformatics tools for RNA-seq, scRNA-seq, and lipidomics data. Visualization using ggplot2, Seurat, custom plotting.\". Ensure critical evaluation and consider alternative interpretations.",
136 | "guidance_notes": [
137 | "Justify the choice of statistical tests based on data distribution and assumptions.",
138 | "Address multiple testing corrections if applicable.",
139 | "Consider sensitivity analyses to assess robustness of findings."
140 | ]
141 | },
142 | "step_3_2_results_interpretation": {
143 | "results_interpretation_framework": "Establish a clear framework for interpreting analytical findings in the context of the hypotheses, considering statistical significance, effect sizes, and biological relevance Based on the provided detail: \"Evaluate findings against primary and secondary hypotheses. Assess statistical significance (p < 0.05) and biological relevance (effect size, pathway enrichment). Correlate in vitro findings with in vivo data and existing literature.\". Ensure critical evaluation and consider alternative interpretations.",
144 | "comparison_with_literature_plan": "Develop a strategy for systematically contextualizing results with existing literature, addressing consistencies, discrepancies, and novel contributions Based on the provided detail: \"Systematically compare results with published studies on TREM2 variants and AD. Discuss consistencies and discrepancies, potential reasons for differences (model systems, methodologies).\". Ensure critical evaluation and consider alternative interpretations.",
145 | "guidance_notes": [
146 | "Distinguish correlation from causation.",
147 | "Acknowledge limitations of the study and their impact on interpretation.",
148 | "Discuss clinical or translational implications if relevant."
149 | ]
150 | }
151 | },
152 | "phase_4_dissemination_and_iteration": {
153 | "title": "Phase 4: Dissemination and Iteration",
154 | "step_4_1_dissemination_strategy": {
155 | "dissemination_manuscript_plan": "Formulate a plan for manuscript preparation, including core message, target journal profile, key figures/tables, and authorship contributions Based on the provided detail: \"Core message: TREM2 risk variants impair microglial Aβ clearance and promote detrimental neuroinflammation. Target journals: Nature Neuroscience, Cell, Neuron, Immunity. Key figures: Phagocytosis assays, cytokine profiles, RNA-seq heatmaps/volcano plots, in vivo pathology.\". Ensure critical evaluation and consider alternative interpretations.",
156 | "dissemination_data_deposition_plan": "Outline a strategy for depositing research data in public repositories, specifying data types, repository choices, metadata standards, and adherence to FAIR principles Based on the provided detail: \"Deposit all raw and processed high-throughput sequencing data in GEO. Share iPSC lines and mouse models through appropriate repositories (e.g., WiCell, JAX). Adhere to FAIR data principles.\". Ensure critical evaluation and consider alternative interpretations.",
157 | "guidance_notes": [
158 | "Follow journal-specific author guidelines.",
159 | "Ensure data is de-identified if it contains sensitive information.",
160 | "Obtain DOIs or accession numbers for deposited data."
161 | ]
162 | },
163 | "step_4_2_peer_review_and_publication": {
164 | "peer_review_and_publication_approach": "Describe the approach to journal submission, navigating peer review, and addressing reviewer comments constructively for publication Based on the provided detail: \"Submit to target journal, respond thoroughly to reviewer comments. Consider preprint server (bioRxiv) prior to peer-reviewed publication.\". Ensure critical evaluation and consider alternative interpretations.",
165 | "guidance_notes": [
166 | "Prepare a compelling cover letter.",
167 | "Respond to reviewer comments point-by-point and respectfully.",
168 | "Consider pre-print servers for early dissemination."
169 | ]
170 | },
171 | "step_4_3_further_research_and_iteration": {
172 | "future_research_directions": "Identify and articulate potential next steps, new research questions, or translational applications arising from the current study's findings and limitations Based on the provided detail: \"Investigate downstream signaling pathways affected by TREM2 variants. Explore therapeutic potential of TREM2 agonists in variant-specific contexts. Assess impact of TREM2 variants on other microglial functions (e.g., synaptic pruning).\". Ensure critical evaluation and consider alternative interpretations.",
173 | "guidance_notes": [
174 | "Base future directions on the study's actual outcomes.",
175 | "Consider how new technologies or approaches could address remaining questions."
176 | ]
177 | }
178 | },
179 | "cross_cutting_considerations": {
180 | "title": "Cross-Cutting Considerations",
181 | "content": {
182 | "record_keeping_and_data_management": "Detail the comprehensive plan for meticulous record-keeping, version control (code, data, manuscripts), secure data storage, backup strategy, and Data Management Plan (DMP) adherence Based on the provided detail: \"Electronic lab notebook (ELN) for experimental records. Version control (Git/GitHub) for code and analysis scripts. Secure, backed-up server for raw and processed data. Data Management Plan (DMP) as per funding agency requirements.\". Ensure critical evaluation and consider alternative interpretations.",
183 | "collaboration_strategy": "If applicable, describe the strategy for effective collaboration, including communication channels, role delineation, data sharing protocols, and authorship agreements Based on the provided detail: \"Collaborate with iPSC core facility for cell line generation, genomics core for sequencing, and biostatistics core for complex data analysis. Regular meetings and shared data platforms.\". Ensure critical evaluation and consider alternative interpretations.",
184 | "ethical_considerations": "Thoroughly outline all ethical considerations, including plans for IRB/IACUC approval, informed consent, data privacy/anonymization, responsible conduct of research (RCR) training, and conflict of interest management Based on the provided detail: \"Human iPSC work approved by Institutional Review Board (IRB). Animal studies approved by Institutional Animal Care and Use Committee (IACUC). Adherence to ARRIVE guidelines for animal research. Data de-identification where applicable. Responsible Conduct of Research (RCR) training for all personnel.\". Ensure critical evaluation and consider alternative interpretations.",
185 | "guidance_notes": [
186 | "Ensure compliance with institutional and funding agency requirements.",
187 | "Regularly review and update the DMP.",
188 | "Promote open science practices where appropriate."
189 | ]
190 | }
191 | }
192 | }
193 | ```
194 |
```