This is page 17 of 29. Use http://codebase.md/tosin2013/documcp?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .dockerignore
├── .eslintignore
├── .eslintrc.json
├── .github
│ ├── agents
│ │ ├── documcp-ast.md
│ │ ├── documcp-deploy.md
│ │ ├── documcp-memory.md
│ │ ├── documcp-test.md
│ │ └── documcp-tool.md
│ ├── copilot-instructions.md
│ ├── dependabot.yml
│ ├── ISSUE_TEMPLATE
│ │ ├── automated-changelog.md
│ │ ├── bug_report.md
│ │ ├── bug_report.yml
│ │ ├── documentation_issue.md
│ │ ├── feature_request.md
│ │ ├── feature_request.yml
│ │ ├── npm-publishing-fix.md
│ │ └── release_improvements.md
│ ├── PULL_REQUEST_TEMPLATE.md
│ ├── release-drafter.yml
│ └── workflows
│ ├── auto-merge.yml
│ ├── ci.yml
│ ├── codeql.yml
│ ├── dependency-review.yml
│ ├── deploy-docs.yml
│ ├── README.md
│ ├── release-drafter.yml
│ └── release.yml
├── .gitignore
├── .husky
│ ├── commit-msg
│ └── pre-commit
├── .linkcheck.config.json
├── .markdown-link-check.json
├── .nvmrc
├── .pre-commit-config.yaml
├── .versionrc.json
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── commitlint.config.js
├── CONTRIBUTING.md
├── docker-compose.docs.yml
├── Dockerfile.docs
├── docs
│ ├── .docusaurus
│ │ ├── docusaurus-plugin-content-docs
│ │ │ └── default
│ │ │ └── __mdx-loader-dependency.json
│ │ └── docusaurus-plugin-content-pages
│ │ └── default
│ │ └── __plugin.json
│ ├── adrs
│ │ ├── 001-mcp-server-architecture.md
│ │ ├── 002-repository-analysis-engine.md
│ │ ├── 003-static-site-generator-recommendation-engine.md
│ │ ├── 004-diataxis-framework-integration.md
│ │ ├── 005-github-pages-deployment-automation.md
│ │ ├── 006-mcp-tools-api-design.md
│ │ ├── 007-mcp-prompts-and-resources-integration.md
│ │ ├── 008-intelligent-content-population-engine.md
│ │ ├── 009-content-accuracy-validation-framework.md
│ │ ├── 010-mcp-resource-pattern-redesign.md
│ │ └── README.md
│ ├── api
│ │ ├── .nojekyll
│ │ ├── assets
│ │ │ ├── hierarchy.js
│ │ │ ├── highlight.css
│ │ │ ├── icons.js
│ │ │ ├── icons.svg
│ │ │ ├── main.js
│ │ │ ├── navigation.js
│ │ │ ├── search.js
│ │ │ └── style.css
│ │ ├── hierarchy.html
│ │ ├── index.html
│ │ ├── modules.html
│ │ └── variables
│ │ └── TOOLS.html
│ ├── assets
│ │ └── logo.svg
│ ├── development
│ │ └── MCP_INSPECTOR_TESTING.md
│ ├── docusaurus.config.js
│ ├── explanation
│ │ ├── architecture.md
│ │ └── index.md
│ ├── guides
│ │ ├── link-validation.md
│ │ ├── playwright-integration.md
│ │ └── playwright-testing-workflow.md
│ ├── how-to
│ │ ├── analytics-setup.md
│ │ ├── custom-domains.md
│ │ ├── documentation-freshness-tracking.md
│ │ ├── github-pages-deployment.md
│ │ ├── index.md
│ │ ├── local-testing.md
│ │ ├── performance-optimization.md
│ │ ├── prompting-guide.md
│ │ ├── repository-analysis.md
│ │ ├── seo-optimization.md
│ │ ├── site-monitoring.md
│ │ ├── troubleshooting.md
│ │ └── usage-examples.md
│ ├── index.md
│ ├── knowledge-graph.md
│ ├── package-lock.json
│ ├── package.json
│ ├── phase-2-intelligence.md
│ ├── reference
│ │ ├── api-overview.md
│ │ ├── cli.md
│ │ ├── configuration.md
│ │ ├── deploy-pages.md
│ │ ├── index.md
│ │ ├── mcp-tools.md
│ │ └── prompt-templates.md
│ ├── research
│ │ ├── cross-domain-integration
│ │ │ └── README.md
│ │ ├── domain-1-mcp-architecture
│ │ │ ├── index.md
│ │ │ └── mcp-performance-research.md
│ │ ├── domain-2-repository-analysis
│ │ │ └── README.md
│ │ ├── domain-3-ssg-recommendation
│ │ │ ├── index.md
│ │ │ └── ssg-performance-analysis.md
│ │ ├── domain-4-diataxis-integration
│ │ │ └── README.md
│ │ ├── domain-5-github-deployment
│ │ │ ├── github-pages-security-analysis.md
│ │ │ └── index.md
│ │ ├── domain-6-api-design
│ │ │ └── README.md
│ │ ├── README.md
│ │ ├── research-integration-summary-2025-01-14.md
│ │ ├── research-progress-template.md
│ │ └── research-questions-2025-01-14.md
│ ├── robots.txt
│ ├── sidebars.js
│ ├── sitemap.xml
│ ├── src
│ │ └── css
│ │ └── custom.css
│ └── tutorials
│ ├── development-setup.md
│ ├── environment-setup.md
│ ├── first-deployment.md
│ ├── getting-started.md
│ ├── index.md
│ ├── memory-workflows.md
│ └── user-onboarding.md
├── jest.config.js
├── LICENSE
├── Makefile
├── MCP_PHASE2_IMPLEMENTATION.md
├── mcp-config-example.json
├── mcp.json
├── package-lock.json
├── package.json
├── README.md
├── release.sh
├── scripts
│ └── check-package-structure.cjs
├── SECURITY.md
├── setup-precommit.sh
├── src
│ ├── benchmarks
│ │ └── performance.ts
│ ├── index.ts
│ ├── memory
│ │ ├── contextual-retrieval.ts
│ │ ├── deployment-analytics.ts
│ │ ├── enhanced-manager.ts
│ │ ├── export-import.ts
│ │ ├── freshness-kg-integration.ts
│ │ ├── index.ts
│ │ ├── integration.ts
│ │ ├── kg-code-integration.ts
│ │ ├── kg-health.ts
│ │ ├── kg-integration.ts
│ │ ├── kg-link-validator.ts
│ │ ├── kg-storage.ts
│ │ ├── knowledge-graph.ts
│ │ ├── learning.ts
│ │ ├── manager.ts
│ │ ├── multi-agent-sharing.ts
│ │ ├── pruning.ts
│ │ ├── schemas.ts
│ │ ├── storage.ts
│ │ ├── temporal-analysis.ts
│ │ ├── user-preferences.ts
│ │ └── visualization.ts
│ ├── prompts
│ │ └── technical-writer-prompts.ts
│ ├── scripts
│ │ └── benchmark.ts
│ ├── templates
│ │ └── playwright
│ │ ├── accessibility.spec.template.ts
│ │ ├── Dockerfile.template
│ │ ├── docs-e2e.workflow.template.yml
│ │ ├── link-validation.spec.template.ts
│ │ └── playwright.config.template.ts
│ ├── tools
│ │ ├── analyze-deployments.ts
│ │ ├── analyze-readme.ts
│ │ ├── analyze-repository.ts
│ │ ├── check-documentation-links.ts
│ │ ├── deploy-pages.ts
│ │ ├── detect-gaps.ts
│ │ ├── evaluate-readme-health.ts
│ │ ├── generate-config.ts
│ │ ├── generate-contextual-content.ts
│ │ ├── generate-llm-context.ts
│ │ ├── generate-readme-template.ts
│ │ ├── generate-technical-writer-prompts.ts
│ │ ├── kg-health-check.ts
│ │ ├── manage-preferences.ts
│ │ ├── manage-sitemap.ts
│ │ ├── optimize-readme.ts
│ │ ├── populate-content.ts
│ │ ├── readme-best-practices.ts
│ │ ├── recommend-ssg.ts
│ │ ├── setup-playwright-tests.ts
│ │ ├── setup-structure.ts
│ │ ├── sync-code-to-docs.ts
│ │ ├── test-local-deployment.ts
│ │ ├── track-documentation-freshness.ts
│ │ ├── update-existing-documentation.ts
│ │ ├── validate-content.ts
│ │ ├── validate-documentation-freshness.ts
│ │ ├── validate-readme-checklist.ts
│ │ └── verify-deployment.ts
│ ├── types
│ │ └── api.ts
│ ├── utils
│ │ ├── ast-analyzer.ts
│ │ ├── code-scanner.ts
│ │ ├── content-extractor.ts
│ │ ├── drift-detector.ts
│ │ ├── freshness-tracker.ts
│ │ ├── language-parsers-simple.ts
│ │ ├── permission-checker.ts
│ │ └── sitemap-generator.ts
│ └── workflows
│ └── documentation-workflow.ts
├── test-docs-local.sh
├── tests
│ ├── api
│ │ └── mcp-responses.test.ts
│ ├── benchmarks
│ │ └── performance.test.ts
│ ├── edge-cases
│ │ └── error-handling.test.ts
│ ├── functional
│ │ └── tools.test.ts
│ ├── integration
│ │ ├── kg-documentation-workflow.test.ts
│ │ ├── knowledge-graph-workflow.test.ts
│ │ ├── mcp-readme-tools.test.ts
│ │ ├── memory-mcp-tools.test.ts
│ │ ├── readme-technical-writer.test.ts
│ │ └── workflow.test.ts
│ ├── memory
│ │ ├── contextual-retrieval.test.ts
│ │ ├── enhanced-manager.test.ts
│ │ ├── export-import.test.ts
│ │ ├── freshness-kg-integration.test.ts
│ │ ├── kg-code-integration.test.ts
│ │ ├── kg-health.test.ts
│ │ ├── kg-link-validator.test.ts
│ │ ├── kg-storage-validation.test.ts
│ │ ├── kg-storage.test.ts
│ │ ├── knowledge-graph-enhanced.test.ts
│ │ ├── knowledge-graph.test.ts
│ │ ├── learning.test.ts
│ │ ├── manager-advanced.test.ts
│ │ ├── manager.test.ts
│ │ ├── mcp-resource-integration.test.ts
│ │ ├── mcp-tool-persistence.test.ts
│ │ ├── schemas.test.ts
│ │ ├── storage.test.ts
│ │ ├── temporal-analysis.test.ts
│ │ └── user-preferences.test.ts
│ ├── performance
│ │ ├── memory-load-testing.test.ts
│ │ └── memory-stress-testing.test.ts
│ ├── prompts
│ │ ├── guided-workflow-prompts.test.ts
│ │ └── technical-writer-prompts.test.ts
│ ├── server.test.ts
│ ├── setup.ts
│ ├── tools
│ │ ├── all-tools.test.ts
│ │ ├── analyze-coverage.test.ts
│ │ ├── analyze-deployments.test.ts
│ │ ├── analyze-readme.test.ts
│ │ ├── analyze-repository.test.ts
│ │ ├── check-documentation-links.test.ts
│ │ ├── deploy-pages-kg-retrieval.test.ts
│ │ ├── deploy-pages-tracking.test.ts
│ │ ├── deploy-pages.test.ts
│ │ ├── detect-gaps.test.ts
│ │ ├── evaluate-readme-health.test.ts
│ │ ├── generate-contextual-content.test.ts
│ │ ├── generate-llm-context.test.ts
│ │ ├── generate-readme-template.test.ts
│ │ ├── generate-technical-writer-prompts.test.ts
│ │ ├── kg-health-check.test.ts
│ │ ├── manage-sitemap.test.ts
│ │ ├── optimize-readme.test.ts
│ │ ├── readme-best-practices.test.ts
│ │ ├── recommend-ssg-historical.test.ts
│ │ ├── recommend-ssg-preferences.test.ts
│ │ ├── recommend-ssg.test.ts
│ │ ├── simple-coverage.test.ts
│ │ ├── sync-code-to-docs.test.ts
│ │ ├── test-local-deployment.test.ts
│ │ ├── tool-error-handling.test.ts
│ │ ├── track-documentation-freshness.test.ts
│ │ ├── validate-content.test.ts
│ │ ├── validate-documentation-freshness.test.ts
│ │ └── validate-readme-checklist.test.ts
│ ├── types
│ │ └── type-safety.test.ts
│ └── utils
│ ├── ast-analyzer.test.ts
│ ├── content-extractor.test.ts
│ ├── drift-detector.test.ts
│ ├── freshness-tracker.test.ts
│ └── sitemap-generator.test.ts
├── tsconfig.json
└── typedoc.json
```
# Files
--------------------------------------------------------------------------------
/docs/research/research-questions-2025-01-14.md:
--------------------------------------------------------------------------------
```markdown
1 | ---
2 | documcp:
3 | last_updated: "2025-11-20T00:46:21.969Z"
4 | last_validated: "2025-11-20T00:46:21.969Z"
5 | auto_updated: false
6 | update_frequency: monthly
7 | ---
8 |
9 | # DocuMCP Implementation Research Questions
10 |
11 | **Generated**: January 14, 2025
12 | **Project**: DocuMCP - Intelligent MCP Server for GitHub Pages Documentation Deployment
13 | **Phase**: Pre-Implementation Research
14 | **Context**: Comprehensive validation of ADR decisions and implementation planning
15 |
16 | ---
17 |
18 | ## Research Overview
19 |
20 | This document contains systematic research questions organized by architectural domain, based on the 6 ADRs established for DocuMCP. Each section includes priority ratings, validation criteria, and expected outcomes to guide effective pre-implementation research.
21 |
22 | ### Research Objectives
23 |
24 | 1. **Validate technical feasibility** of ADR decisions
25 | 2. **Identify implementation risks** and mitigation strategies
26 | 3. **Research best practices** for MCP server development
27 | 4. **Investigate SSG ecosystem** integration patterns
28 | 5. **Explore Diataxis framework** implementation approaches
29 |
30 | ### Research Constraints
31 |
32 | - TypeScript/Node.js ecosystem limitations
33 | - MCP specification compliance requirements
34 | - GitHub Pages deployment constraints
35 | - Performance and scalability requirements
36 |
37 | ---
38 |
39 | ## Domain 1: MCP Server Architecture Research (ADR-001)
40 |
41 | ### Priority: HIGH - Foundation Critical
42 |
43 | #### Core Architecture Questions
44 |
45 | **Q1.1: TypeScript MCP SDK Performance Characteristics**
46 |
47 | - **Question**: What are the performance benchmarks and limitations of the TypeScript MCP SDK under heavy concurrent usage?
48 | - **Priority**: CRITICAL
49 | - **Research Method**: Performance testing, benchmark analysis
50 | - **Success Criteria**: Documented performance profiles for different load scenarios
51 | - **Timeline**: Week 1
52 | - **Dependencies**: None
53 |
54 | **Q1.2: Node.js Memory Management for Repository Analysis**
55 |
56 | - **Question**: How can we optimize Node.js memory usage when analyzing large repositories (>10GB)?
57 | - **Priority**: HIGH
58 | - **Research Method**: Memory profiling, stress testing
59 | - **Success Criteria**: Memory optimization strategies with <2GB footprint for 10GB repos
60 | - **Timeline**: Week 1-2
61 | - **Dependencies**: Q1.1
62 |
63 | **Q1.3: MCP Tool Orchestration Patterns**
64 |
65 | - **Question**: What are the most effective patterns for orchestrating complex multi-tool workflows in MCP?
66 | - **Priority**: HIGH
67 | - **Research Method**: Pattern analysis, prototype development
68 | - **Success Criteria**: Documented orchestration patterns with examples
69 | - **Timeline**: Week 2
70 | - **Dependencies**: Q1.1
71 |
72 | **Q1.4: Stateless Session Context Management**
73 |
74 | - **Question**: How can we efficiently maintain temporary context across tool calls while preserving stateless architecture?
75 | - **Priority**: MEDIUM
76 | - **Research Method**: Architecture research, implementation prototyping
77 | - **Success Criteria**: Context management strategy that doesn't violate MCP principles
78 | - **Timeline**: Week 2-3
79 | - **Dependencies**: Q1.3
80 |
81 | **Q1.5: Error Recovery and Fault Tolerance**
82 |
83 | - **Question**: What are the best practices for implementing robust error recovery in MCP servers?
84 | - **Priority**: HIGH
85 | - **Research Method**: Error pattern analysis, resilience testing
86 | - **Success Criteria**: Comprehensive error handling framework
87 | - **Timeline**: Week 3
88 | - **Dependencies**: Q1.1, Q1.3
89 |
90 | #### Integration and Deployment Questions
91 |
92 | **Q1.6: GitHub Copilot Integration Patterns**
93 |
94 | - **Question**: What are the optimal integration patterns for MCP servers with GitHub Copilot?
95 | - **Priority**: MEDIUM
96 | - **Research Method**: Integration testing, user experience research
97 | - **Success Criteria**: Documented integration best practices
98 | - **Timeline**: Week 3-4
99 | - **Dependencies**: Q1.3
100 |
101 | **Q1.7: Development Environment Setup**
102 |
103 | - **Question**: What tooling and development practices optimize TypeScript MCP server development?
104 | - **Priority**: LOW
105 | - **Research Method**: Tool evaluation, workflow analysis
106 | - **Success Criteria**: Development environment recommendations
107 | - **Timeline**: Week 4
108 | - **Dependencies**: None
109 |
110 | ---
111 |
112 | ## Domain 2: Repository Analysis Engine Research (ADR-002)
113 |
114 | ### Priority: HIGH - Intelligence Foundation
115 |
116 | #### Analysis Algorithm Questions
117 |
118 | **Q2.1: Multi-layered Analysis Performance**
119 |
120 | - **Question**: How can we optimize the performance of parallel multi-layered repository analysis?
121 | - **Priority**: CRITICAL
122 | - **Research Method**: Algorithm optimization, parallel processing research
123 | - **Success Criteria**: Analysis completion <30 seconds for typical repositories
124 | - **Timeline**: Week 1-2
125 | - **Dependencies**: Q1.2
126 |
127 | **Q2.2: Language Ecosystem Detection Accuracy**
128 |
129 | - **Question**: What are the most reliable methods for detecting and analyzing language ecosystems in repositories?
130 | - **Priority**: HIGH
131 | - **Research Method**: Accuracy testing across diverse repositories
132 | - **Success Criteria**: >95% accuracy for major language ecosystems
133 | - **Timeline**: Week 2
134 | - **Dependencies**: None
135 |
136 | **Q2.3: Content Analysis Natural Language Processing**
137 |
138 | - **Question**: What NLP techniques are most effective for analyzing documentation quality and gaps?
139 | - **Priority**: MEDIUM
140 | - **Research Method**: NLP library evaluation, accuracy testing
141 | - **Success Criteria**: Reliable content quality assessment methodology
142 | - **Timeline**: Week 3
143 | - **Dependencies**: Q2.1
144 |
145 | **Q2.4: Complexity Scoring Algorithm Validation**
146 |
147 | - **Question**: How can we validate and calibrate the project complexity scoring algorithm?
148 | - **Priority**: MEDIUM
149 | - **Research Method**: Validation against known project types, expert review
150 | - **Success Criteria**: Complexity scores correlate with manual expert assessment
151 | - **Timeline**: Week 3-4
152 | - **Dependencies**: Q2.1, Q2.2
153 |
154 | **Q2.5: Incremental Analysis Capabilities**
155 |
156 | - **Question**: How can we implement incremental analysis for repositories that change over time?
157 | - **Priority**: LOW
158 | - **Research Method**: Differential analysis research, caching strategies
159 | - **Success Criteria**: Incremental analysis reduces re-analysis time by >80%
160 | - **Timeline**: Week 4+
161 | - **Dependencies**: Q2.1
162 |
163 | #### Scalability and Performance Questions
164 |
165 | **Q2.6: Large Repository Handling**
166 |
167 | - **Question**: What strategies ensure reliable analysis of enterprise-scale repositories (>100GB)?
168 | - **Priority**: MEDIUM
169 | - **Research Method**: Scalability testing, streaming analysis research
170 | - **Success Criteria**: Successful analysis of repositories up to 100GB
171 | - **Timeline**: Week 2-3
172 | - **Dependencies**: Q1.2, Q2.1
173 |
174 | **Q2.7: Analysis Caching Strategies**
175 |
176 | - **Question**: What caching strategies provide optimal performance for repository analysis?
177 | - **Priority**: MEDIUM
178 | - **Research Method**: Caching pattern research, performance testing
179 | - **Success Criteria**: Cache hit rates >70% for repeated analysis
180 | - **Timeline**: Week 3
181 | - **Dependencies**: Q2.1
182 |
183 | ---
184 |
185 | ## Domain 3: SSG Recommendation Engine Research (ADR-003)
186 |
187 | ### Priority: HIGH - Core Intelligence
188 |
189 | #### Decision Analysis Questions
190 |
191 | **Q3.1: Multi-Criteria Decision Algorithm Validation**
192 |
193 | - **Question**: How can we validate the accuracy of the MCDA framework for SSG recommendations?
194 | - **Priority**: CRITICAL
195 | - **Research Method**: Validation against expert recommendations, A/B testing
196 | - **Success Criteria**: Algorithm recommendations match expert choices >85% of the time
197 | - **Timeline**: Week 1-2
198 | - **Dependencies**: Q2.4
199 |
200 | **Q3.2: SSG Capability Profiling Methodology**
201 |
202 | - **Question**: What methodology ensures accurate and up-to-date SSG capability profiles?
203 | - **Priority**: HIGH
204 | - **Research Method**: SSG feature analysis, performance benchmarking
205 | - **Success Criteria**: Comprehensive profiles for 5 major SSGs
206 | - **Timeline**: Week 2-3
207 | - **Dependencies**: None
208 |
209 | **Q3.3: Confidence Score Calibration**
210 |
211 | - **Question**: How can we calibrate confidence scores to accurately reflect recommendation reliability?
212 | - **Priority**: HIGH
213 | - **Research Method**: Statistical analysis, outcome tracking
214 | - **Success Criteria**: Confidence scores correlate with actual recommendation success
215 | - **Timeline**: Week 3
216 | - **Dependencies**: Q3.1
217 |
218 | **Q3.4: Performance Modeling Accuracy**
219 |
220 | - **Question**: How accurate are our build time and performance predictions for different SSGs?
221 | - **Priority**: MEDIUM
222 | - **Research Method**: Prediction validation, real-world testing
223 | - **Success Criteria**: Performance predictions within 20% of actual results
224 | - **Timeline**: Week 3-4
225 | - **Dependencies**: Q3.2
226 |
227 | **Q3.5: Dynamic Weight Adjustment**
228 |
229 | - **Question**: Should recommendation weights be dynamically adjusted based on project characteristics?
230 | - **Priority**: LOW
231 | - **Research Method**: Machine learning research, adaptive algorithm development
232 | - **Success Criteria**: Dynamic weighting improves recommendation accuracy by >10%
233 | - **Timeline**: Week 4+
234 | - **Dependencies**: Q3.1, Q3.3
235 |
236 | #### Knowledge Base Maintenance Questions
237 |
238 | **Q3.6: Automated SSG Capability Monitoring**
239 |
240 | - **Question**: How can we automate the monitoring and updating of SSG capabilities?
241 | - **Priority**: MEDIUM
242 | - **Research Method**: API research, automation tool development
243 | - **Success Criteria**: Automated detection of SSG capability changes
244 | - **Timeline**: Week 4
245 | - **Dependencies**: Q3.2
246 |
247 | **Q3.7: Community Feedback Integration**
248 |
249 | - **Question**: How can we integrate community feedback to improve recommendation accuracy?
250 | - **Priority**: LOW
251 | - **Research Method**: Feedback system design, data analysis methods
252 | - **Success Criteria**: Community feedback improves recommendations measurably
253 | - **Timeline**: Week 4+
254 | - **Dependencies**: Q3.1
255 |
256 | ---
257 |
258 | ## Domain 4: Diataxis Framework Integration Research (ADR-004)
259 |
260 | ### Priority: MEDIUM - Quality Enhancement
261 |
262 | #### Implementation Strategy Questions
263 |
264 | **Q4.1: Automated Content Structure Generation**
265 |
266 | - **Question**: What are the most effective approaches for automating Diataxis-compliant structure generation?
267 | - **Priority**: HIGH
268 | - **Research Method**: Template system research, automation testing
269 | - **Success Criteria**: Automated generation of compliant structures for all supported SSGs
270 | - **Timeline**: Week 2
271 | - **Dependencies**: Q3.2
272 |
273 | **Q4.2: Content Planning Intelligence**
274 |
275 | - **Question**: How can we intelligently suggest content based on project analysis and Diataxis principles?
276 | - **Priority**: MEDIUM
277 | - **Research Method**: Content analysis algorithms, suggestion accuracy testing
278 | - **Success Criteria**: Content suggestions deemed useful by documentation experts >80% of time
279 | - **Timeline**: Week 3
280 | - **Dependencies**: Q2.3, Q4.1
281 |
282 | **Q4.3: SSG-Specific Diataxis Adaptations**
283 |
284 | - **Question**: How should Diataxis implementation be adapted for each SSG's unique capabilities?
285 | - **Priority**: MEDIUM
286 | - **Research Method**: SSG feature analysis, adaptation strategy development
287 | - **Success Criteria**: Optimal Diataxis implementation for each supported SSG
288 | - **Timeline**: Week 3-4
289 | - **Dependencies**: Q3.2, Q4.1
290 |
291 | **Q4.4: Navigation Generation Algorithms**
292 |
293 | - **Question**: What algorithms generate the most intuitive navigation for Diataxis-organized content?
294 | - **Priority**: MEDIUM
295 | - **Research Method**: UX research, navigation pattern analysis
296 | - **Success Criteria**: Navigation usability scores >90% in user testing
297 | - **Timeline**: Week 4
298 | - **Dependencies**: Q4.1, Q4.3
299 |
300 | #### Quality Assurance Questions
301 |
302 | **Q4.5: Diataxis Compliance Validation**
303 |
304 | - **Question**: How can we automatically validate Diataxis compliance in generated structures?
305 | - **Priority**: MEDIUM
306 | - **Research Method**: Validation algorithm development, compliance testing
307 | - **Success Criteria**: Automated compliance checking with >95% accuracy
308 | - **Timeline**: Week 3
309 | - **Dependencies**: Q4.1
310 |
311 | **Q4.6: Content Quality Metrics**
312 |
313 | - **Question**: What metrics best measure the quality of Diataxis-organized documentation?
314 | - **Priority**: LOW
315 | - **Research Method**: Quality metric research, correlation analysis
316 | - **Success Criteria**: Validated quality metrics that predict user satisfaction
317 | - **Timeline**: Week 4+
318 | - **Dependencies**: Q4.2, Q4.5
319 |
320 | ---
321 |
322 | ## Domain 5: GitHub Pages Deployment Research (ADR-005)
323 |
324 | ### Priority: HIGH - Implementation Critical
325 |
326 | #### Workflow Optimization Questions
327 |
328 | **Q5.1: SSG-Specific Workflow Performance**
329 |
330 | - **Question**: What are the optimal GitHub Actions configurations for each supported SSG?
331 | - **Priority**: CRITICAL
332 | - **Research Method**: Workflow benchmarking, optimization testing
333 | - **Success Criteria**: Optimized workflows reduce build times by >30%
334 | - **Timeline**: Week 1-2
335 | - **Dependencies**: Q3.2
336 |
337 | **Q5.2: Advanced Caching Strategies**
338 |
339 | - **Question**: What caching strategies provide maximum build performance in GitHub Actions?
340 | - **Priority**: HIGH
341 | - **Research Method**: Caching pattern research, performance testing
342 | - **Success Criteria**: Cache strategies reduce build times by >50% for incremental changes
343 | - **Timeline**: Week 2
344 | - **Dependencies**: Q5.1
345 |
346 | **Q5.3: Build Failure Diagnosis and Recovery**
347 |
348 | - **Question**: How can we implement intelligent build failure diagnosis and automatic recovery?
349 | - **Priority**: HIGH
350 | - **Research Method**: Error pattern analysis, recovery strategy development
351 | - **Success Criteria**: Automatic recovery for >70% of common build failures
352 | - **Timeline**: Week 3
353 | - **Dependencies**: Q5.1
354 |
355 | **Q5.4: Multi-Environment Deployment Strategies**
356 |
357 | - **Question**: What strategies support deployment to multiple environments (staging, production)?
358 | - **Priority**: MEDIUM
359 | - **Research Method**: Deployment pattern research, environment management
360 | - **Success Criteria**: Seamless multi-environment deployment capabilities
361 | - **Timeline**: Week 4
362 | - **Dependencies**: Q5.1, Q5.2
363 |
364 | #### Security and Compliance Questions
365 |
366 | **Q5.5: Workflow Security Best Practices**
367 |
368 | - **Question**: What security best practices should be enforced in generated GitHub Actions workflows?
369 | - **Priority**: HIGH
370 | - **Research Method**: Security research, vulnerability analysis
371 | - **Success Criteria**: Security-hardened workflows with minimal attack surface
372 | - **Timeline**: Week 2-3
373 | - **Dependencies**: Q5.1
374 |
375 | **Q5.6: Dependency Vulnerability Management**
376 |
377 | - **Question**: How can we automatically manage and update vulnerable dependencies in workflows?
378 | - **Priority**: MEDIUM
379 | - **Research Method**: Dependency scanning research, automation development
380 | - **Success Criteria**: Automated vulnerability detection and resolution
381 | - **Timeline**: Week 3
382 | - **Dependencies**: Q5.5
383 |
384 | **Q5.7: Secrets and Environment Management**
385 |
386 | - **Question**: What are the best practices for managing secrets and environment variables in automated deployments?
387 | - **Priority**: MEDIUM
388 | - **Research Method**: Security pattern research, credential management
389 | - **Success Criteria**: Secure secrets management without user complexity
390 | - **Timeline**: Week 3
391 | - **Dependencies**: Q5.5
392 |
393 | #### Monitoring and Troubleshooting Questions
394 |
395 | **Q5.8: Deployment Health Monitoring**
396 |
397 | - **Question**: How can we implement comprehensive health monitoring for deployed documentation sites?
398 | - **Priority**: MEDIUM
399 | - **Research Method**: Monitoring tool research, health check development
400 | - **Success Criteria**: Comprehensive health monitoring with actionable alerts
401 | - **Timeline**: Week 4
402 | - **Dependencies**: Q5.1
403 |
404 | **Q5.9: Performance Optimization Recommendations**
405 |
406 | - **Question**: How can we provide automated performance optimization recommendations for deployed sites?
407 | - **Priority**: LOW
408 | - **Research Method**: Performance analysis research, optimization pattern development
409 | - **Success Criteria**: Automated performance recommendations that improve site speed
410 | - **Timeline**: Week 4+
411 | - **Dependencies**: Q5.8
412 |
413 | ---
414 |
415 | ## Domain 6: MCP Tools API Research (ADR-006)
416 |
417 | ### Priority: HIGH - User Interface Critical
418 |
419 | #### API Design and Usability Questions
420 |
421 | **Q6.1: Tool Parameter Schema Optimization**
422 |
423 | - **Question**: What parameter schema designs provide the best balance of flexibility and usability?
424 | - **Priority**: HIGH
425 | - **Research Method**: API design research, usability testing
426 | - **Success Criteria**: Parameter schemas that are intuitive and comprehensive
427 | - **Timeline**: Week 1-2
428 | - **Dependencies**: None
429 |
430 | **Q6.2: Response Format Standardization**
431 |
432 | - **Question**: What response formats provide optimal client integration and user experience?
433 | - **Priority**: HIGH
434 | - **Research Method**: Format analysis, client integration testing
435 | - **Success Criteria**: Standardized formats that simplify client development
436 | - **Timeline**: Week 2
437 | - **Dependencies**: Q6.1
438 |
439 | **Q6.3: Error Handling and User Guidance**
440 |
441 | - **Question**: How can we provide the most helpful error messages and recovery guidance?
442 | - **Priority**: HIGH
443 | - **Research Method**: Error analysis, user experience research
444 | - **Success Criteria**: Error messages that enable users to resolve issues >90% of the time
445 | - **Timeline**: Week 2-3
446 | - **Dependencies**: Q6.1
447 |
448 | **Q6.4: Progressive Complexity Disclosure**
449 |
450 | - **Question**: How can we design APIs that are simple for beginners but powerful for experts?
451 | - **Priority**: MEDIUM
452 | - **Research Method**: API design pattern research, user journey analysis
453 | - **Success Criteria**: APIs that scale from simple to complex use cases seamlessly
454 | - **Timeline**: Week 3
455 | - **Dependencies**: Q6.1, Q6.2
456 |
457 | #### Validation and Security Questions
458 |
459 | **Q6.5: Comprehensive Input Validation**
460 |
461 | - **Question**: What validation strategies ensure robust security and user-friendly error reporting?
462 | - **Priority**: HIGH
463 | - **Research Method**: Validation framework research, security testing
464 | - **Success Criteria**: Validation that prevents all security issues while providing clear feedback
465 | - **Timeline**: Week 2
466 | - **Dependencies**: Q6.1
467 |
468 | **Q6.6: Performance and Caching Optimization**
469 |
470 | - **Question**: How can we optimize API performance through intelligent caching and response optimization?
471 | - **Priority**: MEDIUM
472 | - **Research Method**: Performance testing, caching strategy research
473 | - **Success Criteria**: API response times <1 second for all operations
474 | - **Timeline**: Week 3
475 | - **Dependencies**: Q6.2
476 |
477 | #### Integration and Extension Questions
478 |
479 | **Q6.7: Client Integration Patterns**
480 |
481 | - **Question**: What integration patterns work best for different types of MCP clients?
482 | - **Priority**: MEDIUM
483 | - **Research Method**: Integration testing, client developer feedback
484 | - **Success Criteria**: Integration patterns that simplify client development
485 | - **Timeline**: Week 3-4
486 | - **Dependencies**: Q6.2, Q6.4
487 |
488 | **Q6.8: API Extension and Versioning**
489 |
490 | - **Question**: How can we design APIs that support future extensions without breaking existing clients?
491 | - **Priority**: LOW
492 | - **Research Method**: Versioning strategy research, extension pattern analysis
493 | - **Success Criteria**: Extension mechanisms that maintain backward compatibility
494 | - **Timeline**: Week 4
495 | - **Dependencies**: Q6.1, Q6.2
496 |
497 | ---
498 |
499 | ## Cross-Domain Integration Research
500 |
501 | ### Priority: MEDIUM - System Integration
502 |
503 | #### End-to-End Workflow Questions
504 |
505 | **Q7.1: Complete Workflow Orchestration**
506 |
507 | - **Question**: How can we optimize the complete workflow from repository analysis to deployed documentation?
508 | - **Priority**: HIGH
509 | - **Research Method**: Workflow analysis, performance optimization
510 | - **Success Criteria**: End-to-end workflow completion in <10 minutes for typical projects
511 | - **Timeline**: Week 3-4
512 | - **Dependencies**: All previous domains
513 |
514 | **Q7.2: Error Recovery Across Tools**
515 |
516 | - **Question**: How can we implement robust error recovery that spans multiple tool invocations?
517 | - **Priority**: MEDIUM
518 | - **Research Method**: Error pattern analysis, recovery strategy development
519 | - **Success Criteria**: Graceful recovery from failures at any workflow stage
520 | - **Timeline**: Week 4
521 | - **Dependencies**: Q7.1
522 |
523 | **Q7.3: Performance Monitoring and Optimization**
524 |
525 | - **Question**: How can we monitor and optimize performance across the entire system?
526 | - **Priority**: MEDIUM
527 | - **Research Method**: Performance monitoring research, optimization strategies
528 | - **Success Criteria**: System-wide performance monitoring and optimization recommendations
529 | - **Timeline**: Week 4
530 | - **Dependencies**: Q7.1
531 |
532 | #### Quality Assurance and Validation
533 |
534 | **Q7.4: Integration Testing Strategies**
535 |
536 | - **Question**: What testing strategies ensure reliable operation across all components?
537 | - **Priority**: MEDIUM
538 | - **Research Method**: Testing framework research, integration test development
539 | - **Success Criteria**: Comprehensive integration tests with >95% coverage
540 | - **Timeline**: Week 4
541 | - **Dependencies**: All previous domains
542 |
543 | **Q7.5: User Acceptance Validation**
544 |
545 | - **Question**: How can we validate that the complete system meets user needs and expectations?
546 | - **Priority**: LOW
547 | - **Research Method**: User research, acceptance testing
548 | - **Success Criteria**: User satisfaction scores >85% in testing
549 | - **Timeline**: Week 4+
550 | - **Dependencies**: Q7.1, Q7.4
551 |
552 | ---
553 |
554 | ## Research Execution Framework
555 |
556 | ### Research Methodology
557 |
558 | 1. **Literature Review**: Systematic review of existing solutions and best practices
559 | 2. **Prototype Development**: Small-scale implementations to validate approaches
560 | 3. **Performance Testing**: Quantitative analysis of performance characteristics
561 | 4. **Expert Consultation**: Validation with domain experts and practitioners
562 | 5. **Community Research**: Analysis of community practices and feedback
563 |
564 | ### Success Criteria Framework
565 |
566 | Each research question includes:
567 |
568 | - **Quantitative Metrics**: Measurable success criteria
569 | - **Qualitative Assessments**: Expert validation and user feedback
570 | - **Risk Mitigation**: Identification of potential issues and solutions
571 | - **Implementation Guidance**: Actionable recommendations for development
572 |
573 | ### Documentation Requirements
574 |
575 | All research outcomes must be documented with:
576 |
577 | - **Executive Summary**: Key findings and recommendations
578 | - **Detailed Analysis**: Comprehensive research methodology and results
579 | - **Implementation Recommendations**: Specific guidance for development
580 | - **Risk Assessment**: Identified risks and mitigation strategies
581 | - **Follow-up Actions**: Additional research or validation needed
582 |
583 | ### Timeline and Prioritization
584 |
585 | **Week 1 Focus**: Critical path items (Q1.1, Q2.1, Q3.1, Q5.1)
586 | **Week 2 Focus**: High priority foundational research
587 | **Week 3 Focus**: Integration and optimization research
588 | **Week 4 Focus**: Advanced features and system integration
589 |
590 | ### Quality Assurance
591 |
592 | - **Peer Review**: All research findings reviewed by team members
593 | - **Expert Validation**: Critical decisions validated by external experts
594 | - **Prototype Validation**: Key approaches validated through working prototypes
595 | - **Documentation Standards**: All research properly documented and archived
596 |
597 | ---
598 |
599 | ## Research Output Organization
600 |
601 | ### File Structure
602 |
603 | ```
604 | docs/research/
605 | ├── research-questions-2025-01-14.md (this file)
606 | ├── domain-1-mcp-architecture/
607 | ├── domain-2-repository-analysis/
608 | ├── domain-3-ssg-recommendation/
609 | ├── domain-4-diataxis-integration/
610 | ├── domain-5-github-deployment/
611 | ├── domain-6-api-design/
612 | ├── cross-domain-integration/
613 | └── research-findings-summary.md
614 | ```
615 |
616 | ### Progress Tracking
617 |
618 | Research progress will be tracked using:
619 |
620 | - **Weekly Status Reports**: Progress on each research domain
621 | - **Risk Register**: Ongoing tracking of identified risks and mitigations
622 | - **Decision Log**: Record of key decisions made based on research findings
623 | - **Implementation Readiness Assessment**: Regular evaluation of readiness to begin development
624 |
625 | ---
626 |
627 | **Total Research Questions**: 47 questions across 6 domains
628 | **Critical Path Questions**: 6 questions requiring immediate attention
629 | **High Priority Questions**: 19 questions for weeks 1-2
630 | **Estimated Research Duration**: 4 weeks
631 | **Success Metrics**: Quantitative criteria for each research area
632 |
633 | This comprehensive research framework ensures systematic validation of all ADR decisions and provides the foundation for confident implementation of the DocuMCP project.
634 |
```
--------------------------------------------------------------------------------
/src/utils/drift-detector.ts:
--------------------------------------------------------------------------------
```typescript
1 | /**
2 | * Documentation Drift Detection System (Phase 3)
3 | *
4 | * Detects when code changes invalidate existing documentation
5 | * Provides automatic update suggestions based on code changes
6 | */
7 |
8 | import { promises as fs } from "fs";
9 | import path from "path";
10 | import { ASTAnalyzer, ASTAnalysisResult, CodeDiff } from "./ast-analyzer.js";
11 |
12 | export interface DriftDetectionResult {
13 | filePath: string;
14 | hasDrift: boolean;
15 | severity: "none" | "low" | "medium" | "high" | "critical";
16 | drifts: DocumentationDrift[];
17 | suggestions: DriftSuggestion[];
18 | impactAnalysis: ImpactAnalysis;
19 | }
20 |
21 | export interface DocumentationDrift {
22 | type: "outdated" | "incorrect" | "missing" | "breaking";
23 | affectedDocs: string[];
24 | codeChanges: CodeDiff[];
25 | description: string;
26 | detectedAt: string;
27 | severity: "low" | "medium" | "high" | "critical";
28 | }
29 |
30 | export interface DriftSuggestion {
31 | docFile: string;
32 | section: string;
33 | currentContent: string;
34 | suggestedContent: string;
35 | reasoning: string;
36 | confidence: number;
37 | autoApplicable: boolean;
38 | }
39 |
40 | export interface ImpactAnalysis {
41 | breakingChanges: number;
42 | majorChanges: number;
43 | minorChanges: number;
44 | affectedDocFiles: string[];
45 | estimatedUpdateEffort: "low" | "medium" | "high";
46 | requiresManualReview: boolean;
47 | }
48 |
49 | export interface DriftSnapshot {
50 | projectPath: string;
51 | timestamp: string;
52 | files: Map<string, ASTAnalysisResult>;
53 | documentation: Map<string, DocumentationSnapshot>;
54 | }
55 |
56 | export interface DocumentationSnapshot {
57 | filePath: string;
58 | contentHash: string;
59 | referencedCode: string[];
60 | lastUpdated: string;
61 | sections: DocumentationSection[];
62 | }
63 |
64 | export interface DocumentationSection {
65 | title: string;
66 | content: string;
67 | referencedFunctions: string[];
68 | referencedClasses: string[];
69 | referencedTypes: string[];
70 | codeExamples: CodeExample[];
71 | startLine: number;
72 | endLine: number;
73 | }
74 |
75 | export interface CodeExample {
76 | language: string;
77 | code: string;
78 | description: string;
79 | referencedSymbols: string[];
80 | }
81 |
82 | /**
83 | * Main Drift Detector class
84 | */
85 | export class DriftDetector {
86 | private analyzer: ASTAnalyzer;
87 | private snapshotDir: string;
88 | private currentSnapshot: DriftSnapshot | null = null;
89 | private previousSnapshot: DriftSnapshot | null = null;
90 |
91 | constructor(projectPath: string, snapshotDir?: string) {
92 | this.analyzer = new ASTAnalyzer();
93 | this.snapshotDir =
94 | snapshotDir || path.join(projectPath, ".documcp", "snapshots");
95 | }
96 |
97 | /**
98 | * Initialize the drift detector
99 | */
100 | async initialize(): Promise<void> {
101 | await this.analyzer.initialize();
102 | await fs.mkdir(this.snapshotDir, { recursive: true });
103 | }
104 |
105 | /**
106 | * Create a snapshot of the current codebase and documentation
107 | */
108 | async createSnapshot(
109 | projectPath: string,
110 | docsPath: string,
111 | ): Promise<DriftSnapshot> {
112 | const files = new Map<string, ASTAnalysisResult>();
113 | const documentation = new Map<string, DocumentationSnapshot>();
114 |
115 | // Analyze source files
116 | const sourceFiles = await this.findSourceFiles(projectPath);
117 | for (const filePath of sourceFiles) {
118 | const analysis = await this.analyzer.analyzeFile(filePath);
119 | if (analysis) {
120 | files.set(filePath, analysis);
121 | }
122 | }
123 |
124 | // Analyze documentation files
125 | const docFiles = await this.findDocumentationFiles(docsPath);
126 | for (const docPath of docFiles) {
127 | const docSnapshot = await this.analyzeDocumentation(docPath);
128 | if (docSnapshot) {
129 | documentation.set(docPath, docSnapshot);
130 | }
131 | }
132 |
133 | const snapshot: DriftSnapshot = {
134 | projectPath,
135 | timestamp: new Date().toISOString(),
136 | files,
137 | documentation,
138 | };
139 |
140 | // Save snapshot
141 | await this.saveSnapshot(snapshot);
142 |
143 | return snapshot;
144 | }
145 |
146 | /**
147 | * Detect drift between two snapshots
148 | */
149 | async detectDrift(
150 | oldSnapshot: DriftSnapshot,
151 | newSnapshot: DriftSnapshot,
152 | ): Promise<DriftDetectionResult[]> {
153 | const results: DriftDetectionResult[] = [];
154 |
155 | // Compare each file
156 | for (const [filePath, newAnalysis] of newSnapshot.files) {
157 | const oldAnalysis = oldSnapshot.files.get(filePath);
158 |
159 | if (!oldAnalysis) {
160 | // New file - check if documentation is needed
161 | continue;
162 | }
163 |
164 | // Detect code changes
165 | const codeDiffs = await this.analyzer.detectDrift(
166 | oldAnalysis,
167 | newAnalysis,
168 | );
169 |
170 | if (codeDiffs.length > 0) {
171 | // Find affected documentation
172 | const affectedDocs = this.findAffectedDocumentation(
173 | filePath,
174 | codeDiffs,
175 | newSnapshot.documentation,
176 | );
177 |
178 | // Report drift even if no documentation is affected
179 | // (missing documentation is also a type of drift)
180 | const driftResult = await this.analyzeDrift(
181 | filePath,
182 | codeDiffs,
183 | affectedDocs,
184 | oldSnapshot,
185 | newSnapshot,
186 | );
187 |
188 | results.push(driftResult);
189 | }
190 | }
191 |
192 | return results;
193 | }
194 |
195 | /**
196 | * Analyze drift and generate suggestions
197 | */
198 | private async analyzeDrift(
199 | filePath: string,
200 | codeDiffs: CodeDiff[],
201 | affectedDocs: string[],
202 | oldSnapshot: DriftSnapshot,
203 | newSnapshot: DriftSnapshot,
204 | ): Promise<DriftDetectionResult> {
205 | const drifts: DocumentationDrift[] = [];
206 | const suggestions: DriftSuggestion[] = [];
207 |
208 | // Categorize drifts by severity
209 | const breakingChanges = codeDiffs.filter(
210 | (d) => d.impactLevel === "breaking",
211 | );
212 | const majorChanges = codeDiffs.filter((d) => d.impactLevel === "major");
213 | const minorChanges = codeDiffs.filter((d) => d.impactLevel === "minor");
214 |
215 | // Create drift entries
216 | for (const diff of codeDiffs) {
217 | const drift: DocumentationDrift = {
218 | type: this.determineDriftType(diff),
219 | affectedDocs,
220 | codeChanges: [diff],
221 | description: this.generateDriftDescription(diff),
222 | detectedAt: new Date().toISOString(),
223 | severity: this.mapImpactToSeverity(diff.impactLevel),
224 | };
225 |
226 | drifts.push(drift);
227 |
228 | // Generate suggestions for each affected doc
229 | for (const docPath of affectedDocs) {
230 | const docSnapshot = newSnapshot.documentation.get(docPath);
231 | if (docSnapshot) {
232 | const docSuggestions = await this.generateSuggestions(
233 | diff,
234 | docSnapshot,
235 | newSnapshot,
236 | );
237 | suggestions.push(...docSuggestions);
238 | }
239 | }
240 | }
241 |
242 | const impactAnalysis: ImpactAnalysis = {
243 | breakingChanges: breakingChanges.length,
244 | majorChanges: majorChanges.length,
245 | minorChanges: minorChanges.length,
246 | affectedDocFiles: affectedDocs,
247 | estimatedUpdateEffort: this.estimateUpdateEffort(drifts),
248 | requiresManualReview:
249 | breakingChanges.length > 0 || majorChanges.length > 3,
250 | };
251 |
252 | const severity = this.calculateOverallSeverity(drifts);
253 |
254 | return {
255 | filePath,
256 | hasDrift: drifts.length > 0,
257 | severity,
258 | drifts,
259 | suggestions,
260 | impactAnalysis,
261 | };
262 | }
263 |
264 | /**
265 | * Generate update suggestions for documentation
266 | */
267 | private async generateSuggestions(
268 | diff: CodeDiff,
269 | docSnapshot: DocumentationSnapshot,
270 | snapshot: DriftSnapshot,
271 | ): Promise<DriftSuggestion[]> {
272 | const suggestions: DriftSuggestion[] = [];
273 |
274 | // Find sections that reference the changed code
275 | for (const section of docSnapshot.sections) {
276 | const isAffected = this.isSectionAffected(section, diff);
277 |
278 | if (isAffected) {
279 | const suggestion = await this.createSuggestion(
280 | diff,
281 | docSnapshot,
282 | section,
283 | snapshot,
284 | );
285 |
286 | if (suggestion) {
287 | suggestions.push(suggestion);
288 | }
289 | }
290 | }
291 |
292 | return suggestions;
293 | }
294 |
295 | /**
296 | * Create a specific suggestion for a documentation section
297 | */
298 | private async createSuggestion(
299 | diff: CodeDiff,
300 | docSnapshot: DocumentationSnapshot,
301 | section: DocumentationSection,
302 | snapshot: DriftSnapshot,
303 | ): Promise<DriftSuggestion | null> {
304 | let suggestedContent = section.content;
305 | let reasoning = "";
306 | let confidence = 0.5;
307 | let autoApplicable = false;
308 |
309 | switch (diff.type) {
310 | case "removed":
311 | reasoning = `The ${diff.category} '${diff.name}' has been removed from the codebase. This section should be updated or removed.`;
312 | suggestedContent = this.generateRemovalSuggestion(section, diff);
313 | confidence = 0.8;
314 | autoApplicable = false;
315 | break;
316 |
317 | case "added":
318 | reasoning = `A new ${diff.category} '${diff.name}' has been added. Consider documenting it.`;
319 | suggestedContent = this.generateAdditionSuggestion(
320 | section,
321 | diff,
322 | snapshot,
323 | );
324 | confidence = 0.6;
325 | autoApplicable = false;
326 | break;
327 |
328 | case "modified":
329 | reasoning = `The ${diff.category} '${diff.name}' has been modified: ${diff.details}`;
330 | suggestedContent = this.generateModificationSuggestion(
331 | section,
332 | diff,
333 | snapshot,
334 | );
335 | confidence = 0.7;
336 | autoApplicable = diff.impactLevel === "patch";
337 | break;
338 | }
339 |
340 | return {
341 | docFile: docSnapshot.filePath,
342 | section: section.title,
343 | currentContent: section.content,
344 | suggestedContent,
345 | reasoning,
346 | confidence,
347 | autoApplicable,
348 | };
349 | }
350 |
351 | /**
352 | * Generate suggestion for removed code
353 | */
354 | private generateRemovalSuggestion(
355 | section: DocumentationSection,
356 | diff: CodeDiff,
357 | ): string {
358 | let content = section.content;
359 |
360 | // Remove references to the deleted symbol
361 | const symbolRegex = new RegExp(`\\b${diff.name}\\b`, "g");
362 | content = content.replace(symbolRegex, `~~${diff.name}~~ (removed)`);
363 |
364 | // Add deprecation notice
365 | const notice = `\n\n> **Note**: The \`${diff.name}\` ${diff.category} has been removed in the latest version.\n`;
366 | content = notice + content;
367 |
368 | return content;
369 | }
370 |
371 | /**
372 | * Generate suggestion for added code
373 | */
374 | private generateAdditionSuggestion(
375 | section: DocumentationSection,
376 | diff: CodeDiff,
377 | _snapshot: DriftSnapshot,
378 | ): string {
379 | let content = section.content;
380 |
381 | // Add new section for the added symbol
382 | const additionNotice = `\n\n## ${diff.name}\n\nA new ${diff.category} has been added.\n\n`;
383 |
384 | // Try to extract signature if available
385 | if (diff.newSignature) {
386 | content +=
387 | additionNotice + `\`\`\`typescript\n${diff.newSignature}\n\`\`\`\n`;
388 | } else {
389 | content +=
390 | additionNotice +
391 | `> **Documentation needed**: Please document the \`${diff.name}\` ${diff.category}.\n`;
392 | }
393 |
394 | return content;
395 | }
396 |
397 | /**
398 | * Generate suggestion for modified code
399 | */
400 | private generateModificationSuggestion(
401 | section: DocumentationSection,
402 | diff: CodeDiff,
403 | _snapshot: DriftSnapshot,
404 | ): string {
405 | let content = section.content;
406 |
407 | // Update signature references
408 | if (diff.oldSignature && diff.newSignature) {
409 | content = content.replace(diff.oldSignature, diff.newSignature);
410 | }
411 |
412 | // Add update notice
413 | const updateNotice = `\n\n> **Updated**: ${diff.details}\n`;
414 | content = updateNotice + content;
415 |
416 | return content;
417 | }
418 |
419 | /**
420 | * Check if a section is affected by a code change
421 | */
422 | private isSectionAffected(
423 | section: DocumentationSection,
424 | diff: CodeDiff,
425 | ): boolean {
426 | switch (diff.category) {
427 | case "function":
428 | return section.referencedFunctions.includes(diff.name);
429 | case "class":
430 | return section.referencedClasses.includes(diff.name);
431 | case "interface":
432 | case "type":
433 | return section.referencedTypes.includes(diff.name);
434 | default:
435 | return false;
436 | }
437 | }
438 |
439 | /**
440 | * Find documentation files that reference changed code
441 | */
442 | private findAffectedDocumentation(
443 | filePath: string,
444 | codeDiffs: CodeDiff[],
445 | documentation: Map<string, DocumentationSnapshot>,
446 | ): string[] {
447 | const affected: string[] = [];
448 |
449 | for (const [docPath, docSnapshot] of documentation) {
450 | // Check if doc references the changed file
451 | if (docSnapshot.referencedCode.includes(filePath)) {
452 | affected.push(docPath);
453 | continue;
454 | }
455 |
456 | // Check if doc references changed symbols
457 | for (const diff of codeDiffs) {
458 | for (const section of docSnapshot.sections) {
459 | if (this.isSectionAffected(section, diff)) {
460 | affected.push(docPath);
461 | break;
462 | }
463 | }
464 | }
465 | }
466 |
467 | return [...new Set(affected)];
468 | }
469 |
470 | /**
471 | * Analyze a documentation file
472 | */
473 | private async analyzeDocumentation(
474 | docPath: string,
475 | ): Promise<DocumentationSnapshot | null> {
476 | try {
477 | const content = await fs.readFile(docPath, "utf-8");
478 | const crypto = await import("crypto");
479 | const contentHash = crypto
480 | .createHash("sha256")
481 | .update(content)
482 | .digest("hex");
483 | const stats = await fs.stat(docPath);
484 |
485 | const sections = this.extractDocSections(content);
486 | const referencedCode = this.extractCodeReferences(content);
487 |
488 | return {
489 | filePath: docPath,
490 | contentHash,
491 | referencedCode,
492 | lastUpdated: stats.mtime.toISOString(),
493 | sections,
494 | };
495 | } catch (error) {
496 | console.warn(`Failed to analyze documentation ${docPath}:`, error);
497 | return null;
498 | }
499 | }
500 |
501 | /**
502 | * Extract sections from documentation
503 | */
504 | private extractDocSections(content: string): DocumentationSection[] {
505 | const sections: DocumentationSection[] = [];
506 | const lines = content.split("\n");
507 | let currentSection: Partial<DocumentationSection> | null = null;
508 | let currentContent: string[] = [];
509 |
510 | for (let i = 0; i < lines.length; i++) {
511 | const line = lines[i];
512 |
513 | // Detect headings
514 | const headingMatch = line.match(/^(#{1,6})\s+(.+)/);
515 | if (headingMatch) {
516 | // Save previous section
517 | if (currentSection) {
518 | currentSection.content = currentContent.join("\n");
519 | currentSection.endLine = i - 1;
520 | sections.push(currentSection as DocumentationSection);
521 | }
522 |
523 | const title = headingMatch[2];
524 | const referencedFunctions: string[] = [];
525 | const referencedClasses: string[] = [];
526 |
527 | // Extract function name from heading if it looks like a function signature
528 | // e.g., "## calculate(x: number): number" or "## myFunction()"
529 | const funcMatch = title.match(/^([a-z][A-Za-z0-9_]*)\s*\(/);
530 | if (funcMatch) {
531 | referencedFunctions.push(funcMatch[1]);
532 | }
533 |
534 | // Extract class name from heading if it starts with uppercase
535 | const classMatch = title.match(/^([A-Z][A-Za-z0-9_]*)/);
536 | if (classMatch && !funcMatch) {
537 | referencedClasses.push(classMatch[1]);
538 | }
539 |
540 | // Start new section
541 | currentSection = {
542 | title,
543 | startLine: i,
544 | referencedFunctions,
545 | referencedClasses,
546 | referencedTypes: [],
547 | codeExamples: [],
548 | };
549 | currentContent = [];
550 | } else if (currentSection) {
551 | currentContent.push(line);
552 |
553 | // Extract code examples
554 | if (line.startsWith("```")) {
555 | const langMatch = line.match(/```(\w+)/);
556 | const language = langMatch ? langMatch[1] : "text";
557 | const codeLines: string[] = [];
558 | i++;
559 |
560 | while (i < lines.length && !lines[i].startsWith("```")) {
561 | codeLines.push(lines[i]);
562 | i++;
563 | }
564 |
565 | const codeExample: CodeExample = {
566 | language,
567 | code: codeLines.join("\n"),
568 | description: "",
569 | referencedSymbols: this.extractSymbolsFromCode(
570 | codeLines.join("\n"),
571 | ),
572 | };
573 |
574 | currentSection.codeExamples!.push(codeExample);
575 | }
576 |
577 | // Extract inline code references (with or without parentheses for functions)
578 | const inlineCodeMatches = line.matchAll(
579 | /`([A-Za-z_][A-Za-z0-9_]*)\(\)?`/g,
580 | );
581 | for (const match of inlineCodeMatches) {
582 | const symbol = match[1];
583 | // Heuristic: CamelCase = class/type, camelCase = function
584 | if (/^[A-Z]/.test(symbol)) {
585 | if (!currentSection.referencedClasses!.includes(symbol)) {
586 | currentSection.referencedClasses!.push(symbol);
587 | }
588 | } else {
589 | if (!currentSection.referencedFunctions!.includes(symbol)) {
590 | currentSection.referencedFunctions!.push(symbol);
591 | }
592 | }
593 | }
594 |
595 | // Also extract identifiers without parentheses
596 | const plainIdentifiers = line.matchAll(/`([A-Za-z_][A-Za-z0-9_]*)`/g);
597 | for (const match of plainIdentifiers) {
598 | const symbol = match[1];
599 | if (/^[A-Z]/.test(symbol)) {
600 | if (!currentSection.referencedClasses!.includes(symbol)) {
601 | currentSection.referencedClasses!.push(symbol);
602 | }
603 | } else {
604 | if (!currentSection.referencedFunctions!.includes(symbol)) {
605 | currentSection.referencedFunctions!.push(symbol);
606 | }
607 | }
608 | }
609 | }
610 | }
611 |
612 | // Save last section
613 | if (currentSection) {
614 | currentSection.content = currentContent.join("\n");
615 | currentSection.endLine = lines.length - 1;
616 | sections.push(currentSection as DocumentationSection);
617 | }
618 |
619 | return sections;
620 | }
621 |
622 | /**
623 | * Extract code file references from documentation
624 | */
625 | private extractCodeReferences(content: string): string[] {
626 | const references: string[] = [];
627 |
628 | // Extract from markdown links
629 | const linkMatches = content.matchAll(
630 | /\[.*?\]\((.*?\.(ts|js|py|go|rs|java|rb).*?)\)/g,
631 | );
632 | for (const match of linkMatches) {
633 | references.push(match[1]);
634 | }
635 |
636 | // Extract from inline code
637 | const codeMatches = content.matchAll(
638 | /`([^`]+\.(ts|js|py|go|rs|java|rb))`/g,
639 | );
640 | for (const match of codeMatches) {
641 | references.push(match[1]);
642 | }
643 |
644 | return [...new Set(references)];
645 | }
646 |
647 | /**
648 | * Extract symbols from code examples
649 | */
650 | private extractSymbolsFromCode(code: string): string[] {
651 | const symbols: string[] = [];
652 |
653 | // Extract function calls
654 | const functionMatches = code.matchAll(/\b([a-z][A-Za-z0-9_]*)\s*\(/g);
655 | for (const match of functionMatches) {
656 | symbols.push(match[1]);
657 | }
658 |
659 | // Extract class/type references
660 | const classMatches = code.matchAll(/\b([A-Z][A-Za-z0-9_]*)\b/g);
661 | for (const match of classMatches) {
662 | symbols.push(match[1]);
663 | }
664 |
665 | return [...new Set(symbols)];
666 | }
667 |
668 | /**
669 | * Find all source files in project
670 | */
671 | private async findSourceFiles(projectPath: string): Promise<string[]> {
672 | const files: string[] = [];
673 | const extensions = [
674 | ".ts",
675 | ".tsx",
676 | ".js",
677 | ".jsx",
678 | ".py",
679 | ".go",
680 | ".rs",
681 | ".java",
682 | ".rb",
683 | ];
684 |
685 | const walk = async (dir: string) => {
686 | try {
687 | const entries = await fs.readdir(dir, { withFileTypes: true });
688 |
689 | for (const entry of entries) {
690 | const fullPath = path.join(dir, entry.name);
691 |
692 | if (entry.isDirectory()) {
693 | if (
694 | !["node_modules", "dist", "build", ".git", ".next"].includes(
695 | entry.name,
696 | )
697 | ) {
698 | await walk(fullPath);
699 | }
700 | } else {
701 | const ext = path.extname(entry.name);
702 | if (extensions.includes(ext)) {
703 | files.push(fullPath);
704 | }
705 | }
706 | }
707 | } catch (error) {
708 | console.warn(`Failed to read directory ${dir}:`, error);
709 | }
710 | };
711 |
712 | await walk(projectPath);
713 | return files;
714 | }
715 |
716 | /**
717 | * Find all documentation files
718 | */
719 | private async findDocumentationFiles(docsPath: string): Promise<string[]> {
720 | const files: string[] = [];
721 |
722 | const walk = async (dir: string) => {
723 | try {
724 | const entries = await fs.readdir(dir, { withFileTypes: true });
725 |
726 | for (const entry of entries) {
727 | const fullPath = path.join(dir, entry.name);
728 |
729 | if (entry.isDirectory()) {
730 | await walk(fullPath);
731 | } else if (
732 | entry.name.endsWith(".md") ||
733 | entry.name.endsWith(".mdx")
734 | ) {
735 | files.push(fullPath);
736 | }
737 | }
738 | } catch (error) {
739 | console.warn(`Failed to read documentation directory ${dir}:`, error);
740 | }
741 | };
742 |
743 | try {
744 | await walk(docsPath);
745 | } catch {
746 | // Docs path doesn't exist
747 | }
748 |
749 | return files;
750 | }
751 |
752 | /**
753 | * Save snapshot to disk
754 | */
755 | private async saveSnapshot(snapshot: DriftSnapshot): Promise<void> {
756 | const timestamp = new Date().toISOString().replace(/:/g, "-");
757 | const snapshotPath = path.join(
758 | this.snapshotDir,
759 | `snapshot-${timestamp}.json`,
760 | );
761 |
762 | // Convert Maps to objects for JSON serialization
763 | const serializable = {
764 | projectPath: snapshot.projectPath,
765 | timestamp: snapshot.timestamp,
766 | files: Object.fromEntries(snapshot.files),
767 | documentation: Object.fromEntries(snapshot.documentation),
768 | };
769 |
770 | await fs.writeFile(snapshotPath, JSON.stringify(serializable, null, 2));
771 | }
772 |
773 | /**
774 | * Load the latest snapshot
775 | */
776 | async loadLatestSnapshot(): Promise<DriftSnapshot | null> {
777 | try {
778 | const files = await fs.readdir(this.snapshotDir);
779 | const snapshotFiles = files
780 | .filter((f) => f.startsWith("snapshot-"))
781 | .sort()
782 | .reverse();
783 |
784 | if (snapshotFiles.length === 0) return null;
785 |
786 | const latestPath = path.join(this.snapshotDir, snapshotFiles[0]);
787 | const content = await fs.readFile(latestPath, "utf-8");
788 | const data = JSON.parse(content);
789 |
790 | return {
791 | projectPath: data.projectPath,
792 | timestamp: data.timestamp,
793 | files: new Map(Object.entries(data.files)),
794 | documentation: new Map(Object.entries(data.documentation)),
795 | };
796 | } catch {
797 | return null;
798 | }
799 | }
800 |
801 | // Helper methods
802 |
803 | private determineDriftType(
804 | diff: CodeDiff,
805 | ): "outdated" | "incorrect" | "missing" | "breaking" {
806 | if (diff.impactLevel === "breaking") return "breaking";
807 | if (diff.type === "removed") return "incorrect";
808 | if (diff.type === "modified") return "outdated";
809 | return "missing";
810 | }
811 |
812 | private generateDriftDescription(diff: CodeDiff): string {
813 | const action =
814 | diff.type === "added"
815 | ? "added"
816 | : diff.type === "removed"
817 | ? "removed"
818 | : "modified";
819 | return `${diff.category} '${diff.name}' was ${action}: ${diff.details}`;
820 | }
821 |
822 | private mapImpactToSeverity(
823 | impact: "breaking" | "major" | "minor" | "patch",
824 | ): "low" | "medium" | "high" | "critical" {
825 | switch (impact) {
826 | case "breaking":
827 | return "critical";
828 | case "major":
829 | return "high";
830 | case "minor":
831 | return "medium";
832 | case "patch":
833 | return "low";
834 | }
835 | }
836 |
837 | private estimateUpdateEffort(
838 | drifts: DocumentationDrift[],
839 | ): "low" | "medium" | "high" {
840 | const critical = drifts.filter((d) => d.severity === "critical").length;
841 | const high = drifts.filter((d) => d.severity === "high").length;
842 |
843 | if (critical > 0 || high > 5) return "high";
844 | if (high > 0 || drifts.length > 10) return "medium";
845 | return "low";
846 | }
847 |
848 | private calculateOverallSeverity(
849 | drifts: DocumentationDrift[],
850 | ): "none" | "low" | "medium" | "high" | "critical" {
851 | if (drifts.length === 0) return "none";
852 |
853 | const hasCritical = drifts.some((d) => d.severity === "critical");
854 | if (hasCritical) return "critical";
855 |
856 | const hasHigh = drifts.some((d) => d.severity === "high");
857 | if (hasHigh) return "high";
858 |
859 | const hasMedium = drifts.some((d) => d.severity === "medium");
860 | if (hasMedium) return "medium";
861 |
862 | return "low";
863 | }
864 | }
865 |
```
--------------------------------------------------------------------------------
/src/prompts/technical-writer-prompts.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { promises as fs } from "fs";
2 | import { join } from "path";
3 |
4 | export interface ProjectContext {
5 | projectType: string;
6 | languages: string[];
7 | frameworks: string[];
8 | hasTests: boolean;
9 | hasCI: boolean;
10 | readmeExists: boolean;
11 | packageManager?: string;
12 | documentationGaps: string[];
13 | }
14 |
15 | export interface PromptMessage {
16 | role: "user" | "assistant" | "system";
17 | content: {
18 | type: "text";
19 | text: string;
20 | };
21 | }
22 |
23 | export async function analyzeProjectContext(
24 | projectPath: string,
25 | ): Promise<ProjectContext> {
26 | const context: ProjectContext = {
27 | projectType: "unknown",
28 | languages: [],
29 | frameworks: [],
30 | hasTests: false,
31 | hasCI: false,
32 | readmeExists: false,
33 | documentationGaps: [],
34 | };
35 |
36 | // Check for README
37 | context.readmeExists = await fileExists(join(projectPath, "README.md"));
38 |
39 | // Analyze package.json for Node.js projects
40 | const packageJsonPath = join(projectPath, "package.json");
41 | if (await fileExists(packageJsonPath)) {
42 | try {
43 | const packageJson = JSON.parse(
44 | await fs.readFile(packageJsonPath, "utf-8"),
45 | );
46 | const deps = {
47 | ...packageJson.dependencies,
48 | ...packageJson.devDependencies,
49 | };
50 |
51 | context.projectType = "node_application";
52 | context.languages.push("JavaScript");
53 |
54 | // Detect frameworks
55 | if (deps["react"]) context.frameworks.push("React");
56 | if (deps["vue"]) context.frameworks.push("Vue");
57 | if (deps["angular"]) context.frameworks.push("Angular");
58 | if (deps["express"]) context.frameworks.push("Express");
59 | if (deps["next"]) context.frameworks.push("Next.js");
60 | if (deps["nuxt"]) context.frameworks.push("Nuxt.js");
61 | if (deps["svelte"]) context.frameworks.push("Svelte");
62 | if (deps["typescript"]) context.languages.push("TypeScript");
63 |
64 | // Detect package manager
65 | if (await fileExists(join(projectPath, "yarn.lock"))) {
66 | context.packageManager = "yarn";
67 | } else if (await fileExists(join(projectPath, "pnpm-lock.yaml"))) {
68 | context.packageManager = "pnpm";
69 | } else {
70 | context.packageManager = "npm";
71 | }
72 | } catch (error) {
73 | // If package.json exists but can't be parsed, continue with other detections
74 | console.warn("Failed to parse package.json:", error);
75 | }
76 | }
77 |
78 | // Check for Python projects
79 | if (
80 | (await fileExists(join(projectPath, "requirements.txt"))) ||
81 | (await fileExists(join(projectPath, "pyproject.toml"))) ||
82 | (await fileExists(join(projectPath, "setup.py")))
83 | ) {
84 | context.projectType = "python_application";
85 | context.languages.push("Python");
86 | }
87 |
88 | // Check for Go projects
89 | if (await fileExists(join(projectPath, "go.mod"))) {
90 | context.projectType = "go_application";
91 | context.languages.push("Go");
92 | }
93 |
94 | // Check for Rust projects
95 | if (await fileExists(join(projectPath, "Cargo.toml"))) {
96 | context.projectType = "rust_application";
97 | context.languages.push("Rust");
98 | }
99 |
100 | // Check for tests
101 | context.hasTests = await hasTestFiles(projectPath);
102 |
103 | // Check for CI/CD
104 | context.hasCI = await hasCIConfig(projectPath);
105 |
106 | // Identify documentation gaps
107 | context.documentationGaps = await identifyDocumentationGaps(
108 | projectPath,
109 | context,
110 | );
111 |
112 | return context;
113 | }
114 |
115 | export async function generateTechnicalWriterPrompts(
116 | promptType: string,
117 | projectPath: string,
118 | args: Record<string, any> = {},
119 | ): Promise<PromptMessage[]> {
120 | const context = await analyzeProjectContext(projectPath);
121 |
122 | switch (promptType) {
123 | case "tutorial-writer":
124 | return generateTutorialWriterPrompt(context, args);
125 | case "howto-guide-writer":
126 | return generateHowToGuideWriterPrompt(context, args);
127 | case "reference-writer":
128 | return generateReferenceWriterPrompt(context, args);
129 | case "explanation-writer":
130 | return generateExplanationWriterPrompt(context, args);
131 | case "diataxis-organizer":
132 | return generateDiataxisOrganizerPrompt(context, args);
133 | case "readme-optimizer":
134 | return generateReadmeOptimizerPrompt(context, args);
135 | case "analyze-and-recommend":
136 | return generateAnalyzeAndRecommendPrompt(context, args);
137 | case "setup-documentation":
138 | return generateSetupDocumentationPrompt(context, args);
139 | case "troubleshoot-deployment":
140 | return generateTroubleshootDeploymentPrompt(context, args);
141 | case "maintain-documentation-freshness":
142 | return generateMaintainDocumentationFreshnessPrompt(context, args);
143 | default:
144 | throw new Error(`Unknown prompt type: ${promptType}`);
145 | }
146 | }
147 |
148 | function generateTutorialWriterPrompt(
149 | context: ProjectContext,
150 | args: Record<string, any>,
151 | ): PromptMessage[] {
152 | const targetAudience = args.target_audience || "beginners";
153 | const learningGoal = args.learning_goal || "get started with the project";
154 |
155 | return [
156 | {
157 | role: "user",
158 | content: {
159 | type: "text",
160 | text: `Create a comprehensive tutorial for a ${
161 | context.projectType
162 | } project following Diataxis framework principles.
163 |
164 | **Project Context:**
165 | - Type: ${context.projectType}
166 | - Languages: ${context.languages.join(", ")}
167 | - Frameworks: ${context.frameworks.join(", ")}
168 | - Package Manager: ${context.packageManager || "N/A"}
169 | - Target Audience: ${targetAudience}
170 | - Learning Goal: ${learningGoal}
171 |
172 | **Diataxis Tutorial Requirements:**
173 | 1. Learning-oriented: Focus on helping users learn by doing
174 | 2. Step-by-step progression from simple to complex
175 | 3. Practical exercises with clear outcomes
176 | 4. Safe-to-fail environment for experimentation
177 | 5. Minimal explanation - focus on action
178 |
179 | **Tutorial Structure:**
180 | 1. Prerequisites and setup
181 | 2. Step-by-step guided exercises
182 | 3. What you'll build/learn
183 | 4. Hands-on activities with immediate feedback
184 | 5. Next steps for continued learning
185 |
186 | **Integration Hints:**
187 | - Use analyze_repository for project structure insights
188 | - Reference setup_development_environment for environment setup
189 | - Consider validate_tutorial_steps for step verification
190 |
191 | Please create a tutorial that teaches through guided practice:`,
192 | },
193 | },
194 | ];
195 | }
196 |
197 | function generateHowToGuideWriterPrompt(
198 | context: ProjectContext,
199 | args: Record<string, any>,
200 | ): PromptMessage[] {
201 | const problemToSolve = args.problem || "common development task";
202 | const userExperience = args.user_experience || "intermediate";
203 |
204 | return [
205 | {
206 | role: "user",
207 | content: {
208 | type: "text",
209 | text: `Create a practical how-to guide for a ${
210 | context.projectType
211 | } project following Diataxis framework principles.
212 |
213 | **Project Context:**
214 | - Type: ${context.projectType}
215 | - Languages: ${context.languages.join(", ")}
216 | - Frameworks: ${context.frameworks.join(", ")}
217 | - Problem to Solve: ${problemToSolve}
218 | - User Experience Level: ${userExperience}
219 |
220 | **Diataxis How-to Guide Requirements:**
221 | 1. Problem-oriented: Address specific real-world problems
222 | 2. Goal-focused: Clear objective and success criteria
223 | 3. Action-oriented: Direct, actionable steps
224 | 4. Assume prior knowledge appropriate to user level
225 | 5. Practical and immediately applicable
226 |
227 | **How-to Guide Structure:**
228 | 1. Problem statement and context
229 | 2. Prerequisites and assumptions
230 | 3. Step-by-step solution
231 | 4. Verification and testing
232 | 5. Troubleshooting common issues
233 | 6. Related tasks and variations
234 |
235 | **Integration Hints:**
236 | - Use analyze_codebase for understanding current implementation
237 | - Reference best_practices for recommended approaches
238 | - Consider validate_solution for testing guidance
239 |
240 | Please create a how-to guide that solves real problems:`,
241 | },
242 | },
243 | ];
244 | }
245 |
246 | function generateReferenceWriterPrompt(
247 | context: ProjectContext,
248 | args: Record<string, any>,
249 | ): PromptMessage[] {
250 | const referenceType = args.reference_type || "API";
251 | const completeness = args.completeness || "comprehensive";
252 |
253 | return [
254 | {
255 | role: "user",
256 | content: {
257 | type: "text",
258 | text: `Create comprehensive reference documentation for a ${
259 | context.projectType
260 | } project following Diataxis framework principles.
261 |
262 | **Project Context:**
263 | - Type: ${context.projectType}
264 | - Languages: ${context.languages.join(", ")}
265 | - Frameworks: ${context.frameworks.join(", ")}
266 | - Reference Type: ${referenceType}
267 | - Completeness Level: ${completeness}
268 |
269 | **Diataxis Reference Requirements:**
270 | 1. Information-oriented: Provide complete, accurate information
271 | 2. Structured and consistent organization
272 | 3. Comprehensive coverage of all features/APIs
273 | 4. Neutral tone - describe what is, not how to use
274 | 5. Easy to scan and search
275 |
276 | **Reference Structure:**
277 | 1. Overview and organization
278 | 2. Complete feature/API listings
279 | 3. Parameters, return values, examples
280 | 4. Technical specifications
281 | 5. Cross-references and relationships
282 | 6. Version compatibility information
283 |
284 | **Integration Hints:**
285 | - Use analyze_api_endpoints for API documentation
286 | - Reference code_analysis for implementation details
287 | - Consider validate_completeness for coverage verification
288 |
289 | Please create reference documentation that serves as the authoritative source:`,
290 | },
291 | },
292 | ];
293 | }
294 |
295 | function generateExplanationWriterPrompt(
296 | context: ProjectContext,
297 | args: Record<string, any>,
298 | ): PromptMessage[] {
299 | const conceptToExplain = args.concept || "system architecture";
300 | const depth = args.depth || "detailed";
301 |
302 | return [
303 | {
304 | role: "user",
305 | content: {
306 | type: "text",
307 | text: `Create in-depth explanation documentation for a ${
308 | context.projectType
309 | } project following Diataxis framework principles.
310 |
311 | **Project Context:**
312 | - Type: ${context.projectType}
313 | - Languages: ${context.languages.join(", ")}
314 | - Frameworks: ${context.frameworks.join(", ")}
315 | - Concept to Explain: ${conceptToExplain}
316 | - Depth Level: ${depth}
317 |
318 | **Diataxis Explanation Requirements:**
319 | 1. Understanding-oriented: Help users understand concepts
320 | 2. Theoretical and conceptual focus
321 | 3. Provide context and background
322 | 4. Explain why things work the way they do
323 | 5. Connect ideas and show relationships
324 |
325 | **Explanation Structure:**
326 | 1. Introduction and context
327 | 2. Core concepts and principles
328 | 3. How components relate and interact
329 | 4. Design decisions and trade-offs
330 | 5. Historical context and evolution
331 | 6. Implications and consequences
332 |
333 | **Integration Hints:**
334 | - Use analyze_architecture for system understanding
335 | - Reference design_patterns for architectural insights
336 | - Consider validate_understanding for comprehension checks
337 |
338 | Please create explanatory content that builds deep understanding:`,
339 | },
340 | },
341 | ];
342 | }
343 |
344 | function generateDiataxisOrganizerPrompt(
345 | context: ProjectContext,
346 | args: Record<string, any>,
347 | ): PromptMessage[] {
348 | const currentDocs = args.current_docs || "mixed documentation";
349 | const priority = args.priority || "user needs";
350 |
351 | return [
352 | {
353 | role: "user",
354 | content: {
355 | type: "text",
356 | text: `Organize existing documentation for a ${
357 | context.projectType
358 | } project using Diataxis framework principles.
359 |
360 | **Project Context:**
361 | - Type: ${context.projectType}
362 | - Languages: ${context.languages.join(", ")}
363 | - Current Documentation: ${currentDocs}
364 | - Organization Priority: ${priority}
365 |
366 | **Diataxis Organization Requirements:**
367 | 1. Categorize content into four types: Tutorials, How-to guides, Reference, Explanation
368 | 2. Ensure each piece serves its intended purpose
369 | 3. Create clear navigation between content types
370 | 4. Identify gaps and overlaps
371 | 5. Establish content relationships and cross-references
372 |
373 | **Organization Structure:**
374 | 1. Content audit and classification
375 | 2. Diataxis quadrant mapping
376 | 3. Navigation and information architecture
377 | 4. Content gap analysis
378 | 5. Cross-reference strategy
379 | 6. Migration and improvement plan
380 |
381 | **Integration Hints:**
382 | - Use analyze_existing_docs for current state assessment
383 | - Reference content_classification for categorization
384 | - Consider validate_organization for structure verification
385 |
386 | Please organize documentation according to Diataxis principles:`,
387 | },
388 | },
389 | ];
390 | }
391 |
392 | function generateReadmeOptimizerPrompt(
393 | context: ProjectContext,
394 | args: Record<string, any>,
395 | ): PromptMessage[] {
396 | const optimizationFocus = args.optimization_focus || "general";
397 |
398 | return [
399 | {
400 | role: "user",
401 | content: {
402 | type: "text",
403 | text: `Optimize existing README content for a ${
404 | context.projectType
405 | } project using Diataxis-aware principles.
406 |
407 | **Project Context:**
408 | - Type: ${context.projectType}
409 | - Languages: ${context.languages.join(", ")}
410 | - README Exists: ${context.readmeExists}
411 | - Documentation Gaps: ${
412 | context.documentationGaps.join(", ") || "None identified"
413 | }
414 | - Optimization Focus: ${optimizationFocus}
415 |
416 | **Diataxis-Aware README Requirements:**
417 | 1. Clear content type identification (tutorial, how-to, reference, explanation)
418 | 2. Appropriate depth for each content type
419 | 3. Logical flow from learning to doing to understanding
420 | 4. Clear navigation to detailed documentation
421 | 5. Audience-appropriate entry points
422 |
423 | **README Structure (Diataxis-organized):**
424 | 1. Quick start (tutorial-style for beginners)
425 | 2. Common tasks (how-to style for users)
426 | 3. API/feature overview (reference-style for developers)
427 | 4. Architecture overview (explanation-style for understanding)
428 | 5. Links to detailed Diataxis-organized documentation
429 |
430 | **Integration Hints:**
431 | - Use analyze_readme for current content analysis
432 | - Reference diataxis_principles for content organization
433 | - Consider validate_readme_structure for optimization verification
434 |
435 | Please optimize the README with Diataxis awareness:`,
436 | },
437 | },
438 | ];
439 | }
440 |
441 | // Helper functions
442 | async function fileExists(path: string): Promise<boolean> {
443 | try {
444 | await fs.access(path);
445 | return true;
446 | } catch {
447 | return false;
448 | }
449 | }
450 |
451 | function generateMaintainDocumentationFreshnessPrompt(
452 | context: ProjectContext,
453 | args: Record<string, any>,
454 | ): PromptMessage[] {
455 | const docsPath = args.docs_path || "docs";
456 | const freshnessPreset = args.freshness_preset || "monthly";
457 | const action = args.action || "track";
458 |
459 | const actionDescriptions = {
460 | validate:
461 | "Initialize freshness metadata for documentation files that don't have it yet",
462 | track:
463 | "Scan all documentation for staleness and generate a freshness report",
464 | insights: "Analyze freshness trends over time and get recommendations",
465 | };
466 |
467 | return [
468 | {
469 | role: "user",
470 | content: {
471 | type: "text",
472 | text: `Maintain documentation freshness for a ${
473 | context.projectType
474 | } project with automated staleness tracking.
475 |
476 | **Project Context:**
477 | - Type: ${context.projectType}
478 | - Languages: ${context.languages.join(", ")}
479 | - Documentation Path: ${docsPath}
480 | - Freshness Preset: ${freshnessPreset}
481 | - Action: ${action} (${
482 | actionDescriptions[action as keyof typeof actionDescriptions] ||
483 | "track staleness"
484 | })
485 |
486 | **Documentation Freshness Tracking:**
487 | Documentation freshness tracking helps maintain high-quality, up-to-date documentation by:
488 | 1. Adding temporal metadata to markdown frontmatter (last_updated, last_validated)
489 | 2. Scanning documentation for staleness based on configurable thresholds
490 | 3. Providing insights and trends over time using the knowledge graph
491 | 4. Generating recommendations for which files need attention
492 |
493 | **Available Actions:**
494 |
495 | 1. **Validate** (${action === "validate" ? "SELECTED" : "available"}):
496 | - Initialize freshness metadata for files without it
497 | - Set last_updated and last_validated timestamps
498 | - Link validation to git commits for traceability
499 | - Recommended as first step for new documentation sets
500 |
501 | 2. **Track** (${action === "track" ? "SELECTED" : "available"}):
502 | - Scan all documentation files for staleness
503 | - Categorize as: fresh, warning, stale, or critical
504 | - Generate comprehensive freshness report
505 | - Store results in knowledge graph for historical tracking
506 |
507 | 3. **Insights** (${action === "insights" ? "SELECTED" : "available"}):
508 | - Analyze freshness trends over time
509 | - Compare current vs. historical freshness scores
510 | - Identify chronically stale files
511 | - Get actionable recommendations
512 |
513 | **Freshness Presets:**
514 | - realtime: Minutes (for API docs, status pages)
515 | - active: Hours (for development docs, release notes)
516 | - recent: Days (for tutorials, getting started)
517 | - weekly: 7 days (for how-to guides, examples)
518 | - monthly: 30 days (for reference, architecture) - DEFAULT
519 | - quarterly: 90 days (for explanations, background)
520 |
521 | **Integration Tools:**
522 | - validate_documentation_freshness: Initialize and update metadata
523 | - track_documentation_freshness: Scan and report staleness
524 | - update_existing_documentation: Sync docs with code changes
525 | - sync_code_to_docs: Detect drift between code and docs
526 |
527 | **Workflow Example:**
528 | 1. First time: Run validate_documentation_freshness to initialize metadata
529 | 2. Regular checks: Run track_documentation_freshness to monitor staleness
530 | 3. Deep analysis: Query knowledge graph for trends and insights
531 | 4. Maintenance: Update stale files and re-validate
532 |
533 | Please ${
534 | actionDescriptions[action as keyof typeof actionDescriptions] ||
535 | "track documentation freshness"
536 | } and provide guidance on maintaining documentation quality:`,
537 | },
538 | },
539 | ];
540 | }
541 |
542 | async function hasTestFiles(projectPath: string): Promise<boolean> {
543 | try {
544 | const files = await fs.readdir(projectPath, { recursive: true });
545 | return files.some(
546 | (file) =>
547 | typeof file === "string" &&
548 | (file.includes("test") ||
549 | file.includes("spec") ||
550 | file.endsWith(".test.js") ||
551 | file.endsWith(".test.ts") ||
552 | file.endsWith(".spec.js") ||
553 | file.endsWith(".spec.ts")),
554 | );
555 | } catch {
556 | return false;
557 | }
558 | }
559 |
560 | async function hasCIConfig(projectPath: string): Promise<boolean> {
561 | const ciFiles = [
562 | ".github/workflows",
563 | ".gitlab-ci.yml",
564 | "circle.yml",
565 | ".circleci/config.yml",
566 | "travis.yml",
567 | ".travis.yml",
568 | ];
569 |
570 | for (const file of ciFiles) {
571 | if (await fileExists(join(projectPath, file))) {
572 | return true;
573 | }
574 | }
575 | return false;
576 | }
577 |
578 | async function identifyDocumentationGaps(
579 | projectPath: string,
580 | context: ProjectContext,
581 | ): Promise<string[]> {
582 | const gaps: string[] = [];
583 |
584 | if (!context.readmeExists) {
585 | gaps.push("readme");
586 | }
587 |
588 | // Check for common documentation files
589 | const docFiles = [
590 | "CONTRIBUTING.md",
591 | "CHANGELOG.md",
592 | "LICENSE",
593 | "docs/api.md",
594 | "docs/tutorial.md",
595 | "docs/installation.md",
596 | ];
597 |
598 | for (const docFile of docFiles) {
599 | if (!(await fileExists(join(projectPath, docFile)))) {
600 | gaps.push(docFile.toLowerCase().replace(".md", "").replace("docs/", ""));
601 | }
602 | }
603 |
604 | return gaps;
605 | }
606 |
607 | // Guided workflow prompt generators (ADR-007)
608 |
609 | function generateAnalyzeAndRecommendPrompt(
610 | context: ProjectContext,
611 | args: Record<string, any>,
612 | ): PromptMessage[] {
613 | const analysisDepth = args.analysis_depth || "standard";
614 | const preferences =
615 | args.preferences || "balanced approach with good community support";
616 |
617 | return [
618 | {
619 | role: "user",
620 | content: {
621 | type: "text",
622 | text: `Execute a complete repository analysis and SSG recommendation workflow for this project.
623 |
624 | **Project Context:**
625 | - Type: ${context.projectType}
626 | - Languages: ${context.languages.join(", ")}
627 | - Frameworks: ${context.frameworks.join(", ")}
628 | - Package Manager: ${context.packageManager || "N/A"}
629 | - Has Tests: ${context.hasTests}
630 | - Has CI: ${context.hasCI}
631 | - Documentation Gaps: ${context.documentationGaps.join(", ")}
632 |
633 | **Workflow Parameters:**
634 | - Analysis Depth: ${analysisDepth}
635 | - Preferences: ${preferences}
636 |
637 | **Expected Workflow:**
638 | 1. **Repository Analysis**: Analyze project structure, dependencies, and complexity
639 | 2. **SSG Recommendation**: Recommend the best static site generator based on project characteristics
640 | 3. **Implementation Guidance**: Provide step-by-step setup instructions
641 | 4. **Best Practices**: Include security, performance, and maintenance recommendations
642 |
643 | **Required Output Format:**
644 | - Executive summary with key findings
645 | - Detailed analysis results with metrics
646 | - SSG recommendation with justification
647 | - Implementation roadmap with priorities
648 | - Resource requirements and timeline estimates
649 |
650 | Please execute this workflow systematically and provide actionable recommendations.`,
651 | },
652 | },
653 | ];
654 | }
655 |
656 | function generateSetupDocumentationPrompt(
657 | context: ProjectContext,
658 | args: Record<string, any>,
659 | ): PromptMessage[] {
660 | const ssgType = args.ssg_type || "recommended based on project analysis";
661 | const includeExamples = args.include_examples !== false;
662 |
663 | return [
664 | {
665 | role: "user",
666 | content: {
667 | type: "text",
668 | text: `Create a comprehensive documentation structure with best practices for this project.
669 |
670 | **Project Context:**
671 | - Type: ${context.projectType}
672 | - Languages: ${context.languages.join(", ")}
673 | - Frameworks: ${context.frameworks.join(", ")}
674 | - Current Documentation Gaps: ${context.documentationGaps.join(", ")}
675 |
676 | **Setup Parameters:**
677 | - SSG Type: ${ssgType}
678 | - Include Examples: ${includeExamples}
679 |
680 | **Documentation Structure Requirements:**
681 | 1. **Diataxis Framework Implementation**:
682 | - Tutorials: Learning-oriented content
683 | - How-to Guides: Problem-solving content
684 | - Reference: Information-oriented content
685 | - Explanations: Understanding-oriented content
686 |
687 | 2. **Configuration Setup**:
688 | - SSG configuration files
689 | - GitHub Pages deployment
690 | - Automated workflows
691 | - Security best practices
692 |
693 | 3. **Content Guidelines**:
694 | - Writing style guide
695 | - Contribution guidelines
696 | - Review processes
697 | - Maintenance procedures
698 |
699 | 4. **Development Integration**:
700 | - Build pipeline integration
701 | - Automated testing for docs
702 | - Performance monitoring
703 | - Analytics setup
704 |
705 | **Required Deliverables:**
706 | - Complete directory structure
707 | - Configuration files with comments
708 | - Sample content ${includeExamples ? "with examples" : "templates"}
709 | - Deployment automation
710 | - Maintenance runbook
711 |
712 | Please create a production-ready documentation system that scales with the project.`,
713 | },
714 | },
715 | ];
716 | }
717 |
718 | function generateTroubleshootDeploymentPrompt(
719 | context: ProjectContext,
720 | args: Record<string, any>,
721 | ): PromptMessage[] {
722 | const repository = args.repository;
723 | const deploymentUrl = args.deployment_url || "GitHub Pages URL";
724 | const issueDescription =
725 | args.issue_description || "deployment not working as expected";
726 |
727 | return [
728 | {
729 | role: "user",
730 | content: {
731 | type: "text",
732 | text: `Diagnose and fix GitHub Pages deployment issues for this documentation project.
733 |
734 | **Repository Information:**
735 | - Repository: ${repository}
736 | - Expected URL: ${deploymentUrl}
737 | - Issue Description: ${issueDescription}
738 |
739 | **Project Context:**
740 | - Type: ${context.projectType}
741 | - Languages: ${context.languages.join(", ")}
742 | - Has CI: ${context.hasCI}
743 |
744 | **Troubleshooting Checklist:**
745 |
746 | 1. **Repository Settings**:
747 | - GitHub Pages source configuration
748 | - Branch and folder settings
749 | - Custom domain setup (if applicable)
750 | - Repository visibility and permissions
751 |
752 | 2. **Build Configuration**:
753 | - GitHub Actions workflow validation
754 | - Build dependencies and versions
755 | - Output directory configuration
756 | - Asset and link path issues
757 |
758 | 3. **Content Issues**:
759 | - Markdown syntax validation
760 | - Link and image path verification
761 | - YAML frontmatter validation
762 | - Special character handling
763 |
764 | 4. **Deployment Workflow**:
765 | - Action permissions and secrets
766 | - Deployment job configuration
767 | - Artifact handling
768 | - Cache and dependency issues
769 |
770 | 5. **Performance and Security**:
771 | - Build time optimization
772 | - Security policy compliance
773 | - CDN and caching configuration
774 | - SSL certificate validation
775 |
776 | **Diagnostic Approach:**
777 | 1. **Immediate Assessment**: Check current status and error messages
778 | 2. **Systematic Testing**: Validate each component step-by-step
779 | 3. **Fix Implementation**: Apply targeted solutions with validation
780 | 4. **Prevention Setup**: Implement monitoring and automated checks
781 |
782 | **Required Output:**
783 | - Root cause analysis
784 | - Step-by-step fix instructions
785 | - Validation procedures
786 | - Prevention recommendations
787 | - Monitoring setup guide
788 |
789 | Please provide a comprehensive troubleshooting guide with specific, actionable solutions.`,
790 | },
791 | },
792 | ];
793 | }
794 |
```
--------------------------------------------------------------------------------
/docs/adrs/006-mcp-tools-api-design.md:
--------------------------------------------------------------------------------
```markdown
1 | ---
2 | id: 006-mcp-tools-api-design
3 | title: "ADR-006: MCP Tools API Design and Interface Specification"
4 | sidebar_label: "ADR-006: MCP Tools API Design"
5 | sidebar_position: 6
6 | documcp:
7 | last_updated: "2025-11-20T00:46:21.940Z"
8 | last_validated: "2025-11-20T00:46:21.940Z"
9 | auto_updated: false
10 | update_frequency: monthly
11 | ---
12 |
13 | # ADR-006: MCP Tools API Design and Interface Specification
14 |
15 | ## Status
16 |
17 | Accepted
18 |
19 | ## Context
20 |
21 | DocuMCP must expose its functionality through a carefully designed set of MCP tools that provide comprehensive coverage of the documentation deployment workflow while maintaining clear separation of concerns, appropriate granularity, and excellent developer experience for MCP-enabled clients.
22 |
23 | The MCP Tools API serves as the primary interface between DocuMCP's intelligence and client applications like GitHub Copilot, Claude Desktop, and other MCP-enabled development environments. This API must balance several competing concerns:
24 |
25 | **Functional Requirements:**
26 |
27 | - Comprehensive repository analysis capabilities
28 | - Intelligent SSG recommendation with detailed justifications
29 | - Automated configuration generation for multiple SSGs
30 | - Diataxis-compliant documentation structure creation
31 | - GitHub Pages deployment workflow generation
32 | - Git integration for seamless deployment
33 |
34 | **Usability Requirements:**
35 |
36 | - Intuitive tool names and parameter structures
37 | - Comprehensive input validation with clear error messages
38 | - Consistent response formats across all tools
39 | - Rich metadata for client presentation and user guidance
40 | - Progressive disclosure of complexity (simple to advanced use cases)
41 |
42 | **Technical Requirements:**
43 |
44 | - Full MCP specification compliance
45 | - Robust error handling and recovery
46 | - Efficient parameter validation and sanitization
47 | - Scalable architecture supporting complex multi-step workflows
48 | - Extensible design for future functionality additions
49 |
50 | ## Decision
51 |
52 | We will implement a comprehensive MCP Tools API consisting of six core tools that cover the complete documentation deployment workflow, with additional utility tools for advanced scenarios and troubleshooting.
53 |
54 | ### Core MCP Tools Architecture:
55 |
56 | #### 1. Repository Analysis Tool (`analyzeRepository`)
57 |
58 | **Purpose**: Comprehensive repository analysis and project characterization
59 | **Scope**: Deep analysis of project structure, language ecosystems, existing documentation, and complexity assessment
60 |
61 | #### 2. SSG Recommendation Tool (`recommendSSG`)
62 |
63 | **Purpose**: Intelligent static site generator recommendation with detailed justifications
64 | **Scope**: Multi-criteria decision analysis with confidence scoring and alternative options
65 |
66 | #### 3. Configuration Generation Tool (`generateConfiguration`)
67 |
68 | **Purpose**: Create customized SSG configuration files and directory structures
69 | **Scope**: Template-based generation with project-specific customizations and validation
70 |
71 | #### 4. Diataxis Structure Tool (`createDiataxisStructure`)
72 |
73 | **Purpose**: Generate comprehensive Diataxis-compliant documentation frameworks
74 | **Scope**: Information architecture generation with content planning and navigation design
75 |
76 | #### 5. Deployment Workflow Tool (`generateWorkflow`)
77 |
78 | **Purpose**: Create optimized GitHub Actions workflows for automated deployment
79 | **Scope**: SSG-specific workflow generation with security best practices and performance optimization
80 |
81 | #### 6. Git Integration Tool (`generateGitCommands`)
82 |
83 | **Purpose**: Provide ready-to-execute Git commands for deployment and maintenance
84 | **Scope**: Context-aware command generation with branch management and deployment verification
85 |
86 | ### Supporting Tools:
87 |
88 | - `validateConfiguration`: Validate generated configurations and identify issues
89 | - `troubleshootDeployment`: Analyze deployment failures and provide remediation guidance
90 | - `optimizePerformance`: Analyze and optimize existing documentation site performance
91 | - `migrateDocumentation`: Assist with migration between different SSGs or frameworks
92 |
93 | ## Alternatives Considered
94 |
95 | ### Monolithic Single Tool Approach
96 |
97 | - **Pros**: Simpler API surface, single entry point, easier client integration
98 | - **Cons**: Complex parameter structures, poor separation of concerns, difficult error handling
99 | - **Decision**: Rejected due to poor usability and maintainability
100 |
101 | ### Micro-Tool Architecture (15+ Small Tools)
102 |
103 | - **Pros**: Maximum granularity, precise control, composable workflows
104 | - **Cons**: Complex orchestration, cognitive overhead, fragmented user experience
105 | - **Decision**: Rejected due to complexity and poor user experience
106 |
107 | ### Stateful Session-Based API
108 |
109 | - **Pros**: Could maintain context across tool calls, simplified parameter passing
110 | - **Cons**: Session management complexity, state synchronization issues, harder client integration
111 | - **Decision**: Rejected to maintain MCP stateless principles
112 |
113 | ### External API Integration (REST/GraphQL)
114 |
115 | - **Pros**: Standard web technologies, extensive tooling ecosystem
116 | - **Cons**: Not MCP-compliant, additional infrastructure requirements, authentication complexity
117 | - **Decision**: Rejected due to MCP specification requirements
118 |
119 | ## Consequences
120 |
121 | ### Positive
122 |
123 | - **Clear Separation of Concerns**: Each tool has well-defined responsibility and scope
124 | - **Progressive Complexity**: Users can start simple and add sophistication as needed
125 | - **Excellent Error Handling**: Tool-specific validation and error reporting
126 | - **Client-Friendly**: Rich metadata and consistent response formats enhance client UX
127 | - **Extensible Architecture**: Easy to add new tools without breaking existing functionality
128 |
129 | ### Negative
130 |
131 | - **API Surface Complexity**: Six core tools plus supporting tools require comprehensive documentation
132 | - **Inter-Tool Coordination**: Some workflows require multiple tool calls with parameter passing
133 | - **Validation Overhead**: Each tool requires comprehensive input validation and error handling
134 |
135 | ### Risks and Mitigations
136 |
137 | - **API Complexity**: Provide comprehensive documentation and usage examples
138 | - **Parameter Evolution**: Use versioned schemas with backward compatibility
139 | - **Client Integration**: Offer reference implementations and integration guides
140 |
141 | ## Implementation Details
142 |
143 | ### Tool Parameter Schemas
144 |
145 | ```typescript
146 | // Core tool parameter interfaces
147 | interface AnalyzeRepositoryParams {
148 | repositoryPath: string;
149 | analysisDepth?: "basic" | "comprehensive" | "deep";
150 | focusAreas?: ("structure" | "languages" | "documentation" | "complexity")[];
151 | excludePatterns?: string[];
152 | }
153 |
154 | interface RecommendSSGParams {
155 | projectAnalysis: ProjectAnalysis;
156 | teamCapabilities?: TeamCapabilities;
157 | performanceRequirements?: PerformanceRequirements;
158 | customizationNeeds?: CustomizationNeeds;
159 | existingConstraints?: ProjectConstraints;
160 | }
161 |
162 | interface GenerateConfigurationParams {
163 | selectedSSG: SSGType;
164 | projectAnalysis: ProjectAnalysis;
165 | customizations?: SSGCustomizations;
166 | deploymentTarget?: DeploymentTarget;
167 | advancedOptions?: AdvancedConfigOptions;
168 | }
169 |
170 | interface CreateDiataxisStructureParams {
171 | selectedSSG: SSGType;
172 | projectType: ProjectType;
173 | existingContent?: ExistingContentAnalysis;
174 | contentComplexity?: "minimal" | "standard" | "comprehensive";
175 | navigationPreferences?: NavigationPreferences;
176 | }
177 |
178 | interface GenerateWorkflowParams {
179 | ssgType: SSGType;
180 | deploymentStrategy: "github-actions" | "branch-based" | "hybrid";
181 | securityRequirements?: SecurityRequirements;
182 | performanceOptimizations?: PerformanceOptions;
183 | environmentConfiguration?: EnvironmentConfig;
184 | }
185 |
186 | interface GenerateGitCommandsParams {
187 | deploymentStrategy: DeploymentStrategy;
188 | repositoryState: RepositoryState;
189 | branchConfiguration: BranchConfiguration;
190 | commitPreferences?: CommitPreferences;
191 | }
192 | ```
193 |
194 | ### Response Format Standardization
195 |
196 | ```typescript
197 | // Standardized response structure for all tools
198 | interface MCPToolResponse<T> {
199 | success: boolean;
200 | data?: T;
201 | error?: ErrorDetails;
202 | metadata: ResponseMetadata;
203 | recommendations?: Recommendation[];
204 | nextSteps?: NextStep[];
205 | }
206 |
207 | interface ResponseMetadata {
208 | toolVersion: string;
209 | executionTime: number;
210 | confidenceScore?: number;
211 | analysisDepth: string;
212 | timestamp: string;
213 | correlationId: string;
214 | }
215 |
216 | interface ErrorDetails {
217 | code: string;
218 | message: string;
219 | details: string;
220 | resolution?: string;
221 | documentation?: string;
222 | }
223 |
224 | interface Recommendation {
225 | type: "optimization" | "alternative" | "enhancement";
226 | priority: "low" | "medium" | "high";
227 | description: string;
228 | implementation?: string;
229 | resources?: string[];
230 | }
231 |
232 | interface NextStep {
233 | action: string;
234 | description: string;
235 | toolRequired?: string;
236 | parameters?: Record<string, any>;
237 | estimated_time?: string;
238 | }
239 | ```
240 |
241 | ### analyzeRepository Tool Implementation
242 |
243 | ```typescript
244 | const analyzeRepositoryTool: MCPTool = {
245 | name: "analyzeRepository",
246 | description: "Comprehensive repository analysis for documentation planning",
247 | inputSchema: {
248 | type: "object",
249 | properties: {
250 | repositoryPath: {
251 | type: "string",
252 | description: "Path to the repository to analyze",
253 | },
254 | analysisDepth: {
255 | type: "string",
256 | enum: ["basic", "comprehensive", "deep"],
257 | default: "comprehensive",
258 | description: "Depth of analysis to perform",
259 | },
260 | focusAreas: {
261 | type: "array",
262 | items: {
263 | type: "string",
264 | enum: ["structure", "languages", "documentation", "complexity"],
265 | },
266 | description: "Specific areas to focus analysis on",
267 | },
268 | excludePatterns: {
269 | type: "array",
270 | items: { type: "string" },
271 | description: "File patterns to exclude from analysis",
272 | },
273 | },
274 | required: ["repositoryPath"],
275 | },
276 | };
277 |
278 | async function handleAnalyzeRepository(
279 | params: AnalyzeRepositoryParams,
280 | ): Promise<MCPToolResponse<RepositoryAnalysis>> {
281 | try {
282 | const analysis = await repositoryAnalyzer.analyze(params);
283 |
284 | return {
285 | success: true,
286 | data: analysis,
287 | metadata: {
288 | toolVersion: "1.0.0",
289 | executionTime: analysis.executionTime,
290 | analysisDepth: params.analysisDepth || "comprehensive",
291 | timestamp: new Date().toISOString(),
292 | correlationId: generateCorrelationId(),
293 | },
294 | recommendations: generateAnalysisRecommendations(analysis),
295 | nextSteps: [
296 | {
297 | action: "Get SSG Recommendation",
298 | description:
299 | "Use analysis results to get intelligent SSG recommendations",
300 | toolRequired: "recommendSSG",
301 | parameters: { projectAnalysis: analysis },
302 | estimated_time: "< 1 minute",
303 | },
304 | ],
305 | };
306 | } catch (error) {
307 | return {
308 | success: false,
309 | error: {
310 | code: "ANALYSIS_FAILED",
311 | message: "Repository analysis failed",
312 | details: error.message,
313 | resolution: "Verify repository path and permissions",
314 | documentation: "https://documcp.dev/troubleshooting#analysis-errors",
315 | },
316 | metadata: {
317 | toolVersion: "1.0.0",
318 | executionTime: 0,
319 | analysisDepth: params.analysisDepth || "comprehensive",
320 | timestamp: new Date().toISOString(),
321 | correlationId: generateCorrelationId(),
322 | },
323 | };
324 | }
325 | }
326 | ```
327 |
328 | ### recommendSSG Tool Implementation
329 |
330 | ```typescript
331 | const recommendSSGTool: MCPTool = {
332 | name: "recommendSSG",
333 | description:
334 | "Intelligent static site generator recommendation with detailed justifications",
335 | inputSchema: {
336 | type: "object",
337 | properties: {
338 | projectAnalysis: {
339 | type: "object",
340 | description: "Repository analysis results from analyzeRepository tool",
341 | },
342 | teamCapabilities: {
343 | type: "object",
344 | properties: {
345 | technicalSkills: { type: "array", items: { type: "string" } },
346 | maintenanceCapacity: {
347 | type: "string",
348 | enum: ["minimal", "moderate", "extensive"],
349 | },
350 | learningAppetite: { type: "string", enum: ["low", "medium", "high"] },
351 | },
352 | },
353 | performanceRequirements: {
354 | type: "object",
355 | properties: {
356 | buildTimeImportance: {
357 | type: "string",
358 | enum: ["low", "medium", "high"],
359 | },
360 | siteSpeedPriority: {
361 | type: "string",
362 | enum: ["standard", "fast", "ultra-fast"],
363 | },
364 | scalabilityNeeds: {
365 | type: "string",
366 | enum: ["small", "medium", "large", "enterprise"],
367 | },
368 | },
369 | },
370 | },
371 | required: ["projectAnalysis"],
372 | },
373 | };
374 |
375 | async function handleRecommendSSG(
376 | params: RecommendSSGParams,
377 | ): Promise<MCPToolResponse<SSGRecommendation>> {
378 | try {
379 | const recommendation = await ssgRecommendationEngine.analyze(params);
380 |
381 | return {
382 | success: true,
383 | data: recommendation,
384 | metadata: {
385 | toolVersion: "1.0.0",
386 | executionTime: recommendation.analysisTime,
387 | confidenceScore: recommendation.confidence,
388 | analysisDepth: "comprehensive",
389 | timestamp: new Date().toISOString(),
390 | correlationId: generateCorrelationId(),
391 | },
392 | recommendations: [
393 | {
394 | type: "optimization",
395 | priority: "medium",
396 | description: "Consider performance optimization strategies",
397 | implementation: "Review build caching and incremental build options",
398 | },
399 | ],
400 | nextSteps: [
401 | {
402 | action: "Generate Configuration",
403 | description: "Create customized configuration for recommended SSG",
404 | toolRequired: "generateConfiguration",
405 | parameters: {
406 | selectedSSG: recommendation.primaryRecommendation.ssg,
407 | projectAnalysis: params.projectAnalysis,
408 | },
409 | estimated_time: "2-3 minutes",
410 | },
411 | ],
412 | };
413 | } catch (error) {
414 | console.error("SSG recommendation analysis failed:", error);
415 | return {
416 | success: false,
417 | error: {
418 | code: "SSG_RECOMMENDATION_FAILED",
419 | message: `Failed to analyze SSG recommendations: ${
420 | error instanceof Error ? error.message : "Unknown error"
421 | }`,
422 | resolution:
423 | "Check project analysis data and retry with valid parameters",
424 | },
425 | metadata: {
426 | toolVersion: "1.0.0",
427 | timestamp: new Date().toISOString(),
428 | correlationId: generateCorrelationId(),
429 | },
430 | };
431 | }
432 | }
433 | ```
434 |
435 | ### Input Validation System
436 |
437 | ```typescript
438 | interface ValidationRule {
439 | field: string;
440 | validator: (value: any) => ValidationResult;
441 | required: boolean;
442 | errorMessage: string;
443 | }
444 |
445 | class MCPToolValidator {
446 | validateParameters<T>(params: T, schema: JSONSchema): ValidationResult {
447 | const results = this.runSchemaValidation(params, schema);
448 | const semanticResults = this.runSemanticValidation(params);
449 |
450 | return this.combineValidationResults(results, semanticResults);
451 | }
452 |
453 | private runSemanticValidation(params: any): ValidationResult {
454 | const issues: ValidationIssue[] = [];
455 |
456 | // Repository path validation
457 | if (
458 | params.repositoryPath &&
459 | !this.isValidRepositoryPath(params.repositoryPath)
460 | ) {
461 | issues.push({
462 | field: "repositoryPath",
463 | message: "Repository path does not exist or is not accessible",
464 | severity: "error",
465 | resolution: "Verify the path exists and you have read permissions",
466 | });
467 | }
468 |
469 | // Cross-parameter validation
470 | if (params.analysisDepth === "deep" && params.focusAreas?.length > 2) {
471 | issues.push({
472 | field: "analysisDepth",
473 | message: "Deep analysis with multiple focus areas may be slow",
474 | severity: "warning",
475 | resolution:
476 | "Consider using comprehensive analysis or fewer focus areas",
477 | });
478 | }
479 |
480 | return { valid: issues.length === 0, issues };
481 | }
482 | }
483 | ```
484 |
485 | ## Tool Orchestration Patterns
486 |
487 | ### Sequential Workflow Pattern
488 |
489 | ```typescript
490 | // Common workflow: Analysis → Recommendation → Configuration → Deployment
491 | class DocumentationWorkflow {
492 | async executeCompleteWorkflow(
493 | repositoryPath: string,
494 | ): Promise<WorkflowResult> {
495 | try {
496 | // Step 1: Analyze repository
497 | const analysisResult = await this.callTool("analyzeRepository", {
498 | repositoryPath,
499 | });
500 | if (!analysisResult.success) {
501 | throw new Error(`Analysis failed: ${analysisResult.error?.message}`);
502 | }
503 |
504 | // Step 2: Get SSG recommendation
505 | const recommendationResult = await this.callTool("recommendSSG", {
506 | projectAnalysis: analysisResult.data,
507 | });
508 | if (!recommendationResult.success) {
509 | throw new Error(
510 | `Recommendation failed: ${recommendationResult.error?.message}`,
511 | );
512 | }
513 |
514 | // Step 3: Generate configuration
515 | const configResult = await this.callTool("generateConfiguration", {
516 | selectedSSG: recommendationResult.data.primaryRecommendation.ssg,
517 | projectAnalysis: analysisResult.data,
518 | });
519 | if (!configResult.success) {
520 | throw new Error(
521 | `Configuration generation failed: ${configResult.error?.message}`,
522 | );
523 | }
524 |
525 | // Step 4: Create Diataxis structure
526 | const structureResult = await this.callTool("createDiataxisStructure", {
527 | selectedSSG: recommendationResult.data.primaryRecommendation.ssg,
528 | projectType: analysisResult.data.projectType,
529 | });
530 | if (!structureResult.success) {
531 | console.warn(
532 | `Diataxis structure creation failed: ${structureResult.error?.message}`,
533 | );
534 | }
535 |
536 | // Step 5: Generate deployment workflow
537 | const workflowResult = await this.callTool("generateWorkflow", {
538 | ssgType: recommendationResult.data.primaryRecommendation.ssg,
539 | deploymentStrategy: "github-actions",
540 | });
541 | if (!workflowResult.success) {
542 | console.warn(
543 | `Workflow generation failed: ${workflowResult.error?.message}`,
544 | );
545 | }
546 |
547 | return this.combineResults([
548 | analysisResult,
549 | recommendationResult,
550 | configResult,
551 | structureResult,
552 | workflowResult,
553 | ]);
554 | } catch (error) {
555 | throw new Error(`Complete workflow failed: ${error.message}`);
556 | }
557 | }
558 | }
559 | ```
560 |
561 | ## Error Handling and Recovery
562 |
563 | ### Comprehensive Error Classification
564 |
565 | ```typescript
566 | enum ErrorCategory {
567 | VALIDATION = "validation",
568 | FILESYSTEM = "filesystem",
569 | ANALYSIS = "analysis",
570 | GENERATION = "generation",
571 | CONFIGURATION = "configuration",
572 | DEPLOYMENT = "deployment",
573 | NETWORK = "network",
574 | PERMISSION = "permission",
575 | }
576 |
577 | interface ErrorContext {
578 | tool: string;
579 | operation: string;
580 | parameters: Record<string, any>;
581 | environment: EnvironmentInfo;
582 | }
583 |
584 | class MCPErrorHandler {
585 | handleError(error: Error, context: ErrorContext): MCPToolResponse<null> {
586 | const classification = this.classifyError(error);
587 | const resolution = this.generateResolution(classification, context);
588 |
589 | return {
590 | success: false,
591 | error: {
592 | code: this.generateErrorCode(classification),
593 | message: this.formatUserMessage(error, classification),
594 | details: error.message,
595 | resolution: resolution.guidance,
596 | documentation: resolution.documentationUrl,
597 | },
598 | metadata: this.generateErrorMetadata(context),
599 | nextSteps: resolution.suggestedActions,
600 | };
601 | }
602 |
603 | private generateResolution(
604 | classification: ErrorClassification,
605 | context: ErrorContext,
606 | ): ErrorResolution {
607 | switch (classification.category) {
608 | case ErrorCategory.FILESYSTEM:
609 | return {
610 | guidance: "Verify file paths and permissions",
611 | documentationUrl:
612 | "https://documcp.dev/troubleshooting#filesystem-errors",
613 | suggestedActions: [
614 | {
615 | action: "Check file exists",
616 | description: `Verify ${context.parameters.repositoryPath} exists`,
617 | },
618 | {
619 | action: "Check permissions",
620 | description: "Ensure read access to repository directory",
621 | },
622 | ],
623 | };
624 | // ... other error categories
625 | }
626 | }
627 | }
628 | ```
629 |
630 | ## Performance Optimization
631 |
632 | ### Response Caching Strategy
633 |
634 | ```typescript
635 | interface CacheConfiguration {
636 | analyzeRepository: {
637 | ttl: 300;
638 | keyFields: ["repositoryPath", "analysisDepth"];
639 | };
640 | recommendSSG: { ttl: 3600; keyFields: ["projectAnalysis.signature"] };
641 | generateConfiguration: {
642 | ttl: 1800;
643 | keyFields: ["selectedSSG", "projectAnalysis.signature"];
644 | };
645 | }
646 |
647 | class MCPToolCache {
648 | async getCachedResponse<T>(
649 | toolName: string,
650 | parameters: any,
651 | ): Promise<MCPToolResponse<T> | null> {
652 | const cacheKey = this.generateCacheKey(toolName, parameters);
653 | const cached = await this.cache.get(cacheKey);
654 |
655 | if (cached && !this.isExpired(cached)) {
656 | return {
657 | ...cached,
658 | metadata: {
659 | ...cached.metadata,
660 | fromCache: true,
661 | cacheAge: Date.now() - cached.metadata.timestamp,
662 | },
663 | };
664 | }
665 |
666 | return null;
667 | }
668 | }
669 | ```
670 |
671 | ## Testing Strategy
672 |
673 | ### Tool Testing Framework
674 |
675 | ```typescript
676 | describe("MCP Tools API", () => {
677 | describe("analyzeRepository", () => {
678 | it("should analyze JavaScript project correctly");
679 | it("should handle missing repository gracefully");
680 | it("should respect analysis depth parameters");
681 | it("should exclude specified patterns");
682 | });
683 |
684 | describe("recommendSSG", () => {
685 | it("should recommend Hugo for large documentation sites");
686 | it("should recommend Jekyll for GitHub Pages simple sites");
687 | it("should provide confidence scores for all recommendations");
688 | it("should handle incomplete project analysis");
689 | });
690 |
691 | describe("Tool Integration", () => {
692 | it("should support complete workflow from analysis to deployment");
693 | it("should maintain parameter consistency across tool calls");
694 | it("should provide appropriate next steps guidance");
695 | });
696 | });
697 | ```
698 |
699 | ### Integration Testing
700 |
701 | ```typescript
702 | class MCPToolIntegrationTests {
703 | async testCompleteWorkflow(): Promise<void> {
704 | const testRepo = await this.createTestRepository();
705 |
706 | // Test full workflow
707 | const analysis = await this.callTool("analyzeRepository", {
708 | repositoryPath: testRepo,
709 | });
710 | expect(analysis.success).toBe(true);
711 |
712 | const recommendation = await this.callTool("recommendSSG", {
713 | projectAnalysis: analysis.data,
714 | });
715 | expect(recommendation.success).toBe(true);
716 | expect(recommendation.data.primaryRecommendation).toBeDefined();
717 |
718 | const config = await this.callTool("generateConfiguration", {
719 | selectedSSG: recommendation.data.primaryRecommendation.ssg,
720 | projectAnalysis: analysis.data,
721 | });
722 | expect(config.success).toBe(true);
723 |
724 | // Validate generated configuration
725 | await this.validateGeneratedFiles(config.data.files);
726 | }
727 | }
728 | ```
729 |
730 | ## Documentation and Examples
731 |
732 | ### Tool Usage Examples
733 |
734 | ```typescript
735 | // Example: Complete documentation setup workflow
736 | const examples = {
737 | basicSetup: {
738 | description: "Basic documentation setup for a JavaScript project",
739 | steps: [
740 | {
741 | tool: "analyzeRepository",
742 | parameters: { repositoryPath: "./my-project" },
743 | expectedResult: "Project analysis with language ecosystem detection",
744 | },
745 | {
746 | tool: "recommendSSG",
747 | parameters: { projectAnalysis: "${analysis_result}" },
748 | expectedResult: "SSG recommendation with justification",
749 | },
750 | ],
751 | },
752 | advancedSetup: {
753 | description: "Advanced setup with custom requirements",
754 | steps: [
755 | // ... detailed workflow steps
756 | ],
757 | },
758 | };
759 | ```
760 |
761 | ## Future Enhancements
762 |
763 | ### Planned Tool Additions
764 |
765 | - `analyzeExistingDocs`: Deep analysis of existing documentation quality and structure
766 | - `generateMigrationPlan`: Create migration plans between different documentation systems
767 | - `optimizeContent`: AI-powered content optimization and gap analysis
768 | - `validateAccessibility`: Comprehensive accessibility testing and recommendations
769 |
770 | ### API Evolution Strategy
771 |
772 | - Versioned tool schemas with backward compatibility
773 | - Deprecation notices and migration guidance
774 | - Feature flags for experimental functionality
775 | - Community feedback integration for API improvements
776 |
777 | ## References
778 |
779 | - [Model Context Protocol Specification](https://spec.modelcontextprotocol.io/)
780 | - [JSON Schema Validation](https://json-schema.org/)
781 | - [API Design Best Practices](https://swagger.io/resources/articles/best-practices-in-api-design/)
782 |
```
--------------------------------------------------------------------------------
/src/utils/language-parsers-simple.ts:
--------------------------------------------------------------------------------
```typescript
1 | import { CodeElement, APIEndpoint } from "./code-scanner.js";
2 | import { spawn } from "child_process";
3 |
4 | export interface LanguageParser {
5 | extensions: string[];
6 | name: string;
7 | parseFile(content: string, filePath: string): Promise<LanguageParseResult>;
8 | supportsApiEndpoints?: boolean;
9 | supportsFrameworkDetection?: boolean;
10 | }
11 |
12 | export interface LanguageParseResult {
13 | functions: CodeElement[];
14 | classes: CodeElement[];
15 | interfaces: CodeElement[];
16 | types: CodeElement[];
17 | enums: CodeElement[];
18 | exports: CodeElement[];
19 | imports: CodeElement[];
20 | apiEndpoints: APIEndpoint[];
21 | constants: CodeElement[];
22 | variables: CodeElement[];
23 | }
24 |
25 | export class MultiLanguageCodeScanner {
26 | private parsers = new Map<string, LanguageParser>();
27 |
28 | constructor() {
29 | this.initializeParsers();
30 | }
31 |
32 | private initializeParsers() {
33 | // Register parsers based on your tech stack
34 | this.registerParser(new PythonParser());
35 | this.registerParser(new GoParser());
36 | this.registerParser(new YamlParser());
37 | this.registerParser(new BashParser());
38 | }
39 |
40 | private registerParser(parser: LanguageParser) {
41 | for (const extension of parser.extensions) {
42 | this.parsers.set(extension, parser);
43 | }
44 | }
45 |
46 | async parseFile(
47 | content: string,
48 | filePath: string,
49 | ): Promise<LanguageParseResult> {
50 | const extension = this.getFileExtension(filePath);
51 | const parser = this.parsers.get(extension);
52 |
53 | if (parser) {
54 | return await parser.parseFile(content, filePath);
55 | }
56 |
57 | // Return empty result for unsupported files
58 | return this.getEmptyResult();
59 | }
60 |
61 | private getFileExtension(filePath: string): string {
62 | return filePath.split(".").pop()?.toLowerCase() || "";
63 | }
64 |
65 | private getEmptyResult(): LanguageParseResult {
66 | return {
67 | functions: [],
68 | classes: [],
69 | interfaces: [],
70 | types: [],
71 | enums: [],
72 | exports: [],
73 | imports: [],
74 | apiEndpoints: [],
75 | constants: [],
76 | variables: [],
77 | };
78 | }
79 |
80 | getSupportedExtensions(): string[] {
81 | return Array.from(this.parsers.keys());
82 | }
83 |
84 | getParserInfo(): { extension: string; parser: string }[] {
85 | return Array.from(this.parsers.entries()).map(([ext, parser]) => ({
86 | extension: ext,
87 | parser: parser.name,
88 | }));
89 | }
90 | }
91 |
92 | // Python Parser Implementation using subprocess + regex fallback
93 | export class PythonParser implements LanguageParser {
94 | extensions = ["py", "pyi", "pyx", "pxd"];
95 | name = "Python";
96 | supportsApiEndpoints = true;
97 | supportsFrameworkDetection = true;
98 |
99 | async parseFile(
100 | content: string,
101 | filePath: string,
102 | ): Promise<LanguageParseResult> {
103 | const result: LanguageParseResult = {
104 | functions: [],
105 | classes: [],
106 | interfaces: [],
107 | types: [],
108 | enums: [],
109 | exports: [],
110 | imports: [],
111 | apiEndpoints: [],
112 | constants: [],
113 | variables: [],
114 | };
115 |
116 | try {
117 | // Try subprocess-based AST parsing first
118 | const astResult = await this.parseWithPythonAST(content, filePath);
119 | if (astResult) {
120 | this.mergePythonASTResults(astResult, result, filePath);
121 | } else {
122 | // Fall back to regex-based parsing
123 | this.parseWithRegex(content, result, filePath);
124 | }
125 |
126 | // Look for Flask/FastAPI/Django endpoints
127 | this.findPythonApiEndpoints(content, result, filePath);
128 | } catch (error) {
129 | console.warn(`Failed to parse Python file ${filePath}:`, error);
130 | // Fall back to regex-based parsing
131 | this.parseWithRegex(content, result, filePath);
132 | }
133 |
134 | return result;
135 | }
136 |
137 | private async parseWithPythonAST(
138 | content: string,
139 | _filePath: string,
140 | ): Promise<any> {
141 | return new Promise((resolve) => {
142 | // Create a Python script to parse the AST
143 | const pythonScript = `
144 | import ast
145 | import sys
146 | import json
147 | import tempfile
148 | import os
149 |
150 | try:
151 | # Read content from stdin
152 | content = sys.stdin.read()
153 |
154 | tree = ast.parse(content)
155 |
156 | result = {
157 | 'functions': [],
158 | 'classes': [],
159 | 'imports': [],
160 | 'constants': [],
161 | 'variables': []
162 | }
163 |
164 | for node in ast.walk(tree):
165 | if isinstance(node, ast.FunctionDef):
166 | result['functions'].append({
167 | 'name': node.name,
168 | 'line': node.lineno,
169 | 'has_docstring': ast.get_docstring(node) is not None,
170 | 'docstring': ast.get_docstring(node),
171 | 'is_async': False,
172 | 'exported': not node.name.startswith('_')
173 | })
174 | elif isinstance(node, ast.AsyncFunctionDef):
175 | result['functions'].append({
176 | 'name': node.name,
177 | 'line': node.lineno,
178 | 'has_docstring': ast.get_docstring(node) is not None,
179 | 'docstring': ast.get_docstring(node),
180 | 'is_async': True,
181 | 'exported': not node.name.startswith('_')
182 | })
183 | elif isinstance(node, ast.ClassDef):
184 | result['classes'].append({
185 | 'name': node.name,
186 | 'line': node.lineno,
187 | 'has_docstring': ast.get_docstring(node) is not None,
188 | 'docstring': ast.get_docstring(node),
189 | 'exported': not node.name.startswith('_')
190 | })
191 | elif isinstance(node, (ast.Import, ast.ImportFrom)):
192 | if isinstance(node, ast.Import):
193 | for alias in node.names:
194 | result['imports'].append({
195 | 'name': alias.name,
196 | 'line': node.lineno
197 | })
198 | else: # ImportFrom
199 | result['imports'].append({
200 | 'name': node.module or 'relative',
201 | 'line': node.lineno
202 | })
203 | elif isinstance(node, ast.Assign):
204 | for target in node.targets:
205 | if isinstance(target, ast.Name):
206 | is_constant = target.id.isupper()
207 | result['constants' if is_constant else 'variables'].append({
208 | 'name': target.id,
209 | 'line': node.lineno,
210 | 'exported': not target.id.startswith('_')
211 | })
212 |
213 | print(json.dumps(result))
214 | except Exception as e:
215 | print(json.dumps({'error': str(e)}), file=sys.stderr)
216 | `;
217 |
218 | // Try to execute Python AST parsing
219 | const process = spawn("python3", ["-c", pythonScript], {
220 | stdio: ["pipe", "pipe", "pipe"],
221 | });
222 |
223 | // Send content via stdin
224 | process.stdin.write(content);
225 | process.stdin.end();
226 |
227 | let output = "";
228 | let errorOutput = "";
229 |
230 | process.stdout.on("data", (data) => {
231 | output += data.toString();
232 | });
233 |
234 | process.stderr.on("data", (data) => {
235 | errorOutput += data.toString();
236 | });
237 |
238 | process.on("close", (code) => {
239 | if (code === 0 && output.trim()) {
240 | try {
241 | const result = JSON.parse(output.trim());
242 | if (!result.error) {
243 | resolve(result);
244 | return;
245 | }
246 | } catch (e) {
247 | // JSON parsing failed
248 | console.warn("Failed to parse Python AST output:", e);
249 | }
250 | }
251 | if (errorOutput) {
252 | console.warn("Python AST parsing errors:", errorOutput);
253 | }
254 | resolve(null); // Fall back to regex parsing
255 | });
256 |
257 | process.on("error", () => {
258 | resolve(null); // Python not available or failed
259 | });
260 |
261 | // Timeout after 5 seconds
262 | setTimeout(() => {
263 | process.kill();
264 | resolve(null);
265 | }, 5000);
266 | });
267 | }
268 |
269 | private mergePythonASTResults(
270 | astResult: any,
271 | result: LanguageParseResult,
272 | filePath: string,
273 | ): void {
274 | astResult.functions?.forEach((func: any) => {
275 | result.functions.push({
276 | name: func.name,
277 | type: "function",
278 | filePath,
279 | line: func.line,
280 | column: 0,
281 | exported: func.exported,
282 | isAsync: func.is_async,
283 | hasJSDoc: func.has_docstring,
284 | jsDocDescription: func.docstring || undefined,
285 | });
286 | });
287 |
288 | astResult.classes?.forEach((cls: any) => {
289 | result.classes.push({
290 | name: cls.name,
291 | type: "class",
292 | filePath,
293 | line: cls.line,
294 | column: 0,
295 | exported: cls.exported,
296 | hasJSDoc: cls.has_docstring,
297 | jsDocDescription: cls.docstring || undefined,
298 | });
299 | });
300 |
301 | astResult.imports?.forEach((imp: any) => {
302 | result.imports.push({
303 | name: imp.name,
304 | type: "import",
305 | filePath,
306 | line: imp.line,
307 | column: 0,
308 | exported: false,
309 | });
310 | });
311 |
312 | astResult.constants?.forEach((constant: any) => {
313 | result.constants.push({
314 | name: constant.name,
315 | type: "variable",
316 | filePath,
317 | line: constant.line,
318 | column: 0,
319 | exported: constant.exported,
320 | hasJSDoc: false,
321 | });
322 | });
323 |
324 | astResult.variables?.forEach((variable: any) => {
325 | result.variables.push({
326 | name: variable.name,
327 | type: "variable",
328 | filePath,
329 | line: variable.line,
330 | column: 0,
331 | exported: variable.exported,
332 | hasJSDoc: false,
333 | });
334 | });
335 | }
336 |
337 | private parseWithRegex(
338 | content: string,
339 | result: LanguageParseResult,
340 | filePath: string,
341 | ): void {
342 | const lines = content.split("\n");
343 |
344 | lines.forEach((line, index) => {
345 | const lineNum = index + 1;
346 |
347 | // Function definitions
348 | const funcMatch = line.match(
349 | /^\s*(async\s+)?def\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\(/,
350 | );
351 | if (funcMatch) {
352 | const isAsync = !!funcMatch[1];
353 | const funcName = funcMatch[2];
354 | const hasDocstring = this.hasDocstringAfterLine(lines, index);
355 |
356 | result.functions.push({
357 | name: funcName,
358 | type: "function",
359 | filePath,
360 | line: lineNum,
361 | column: 0,
362 | exported: !funcName.startsWith("_"),
363 | isAsync,
364 | hasJSDoc: hasDocstring,
365 | });
366 | }
367 |
368 | // Class definitions
369 | const classMatch = line.match(/^\s*class\s+([a-zA-Z_][a-zA-Z0-9_]*)/);
370 | if (classMatch) {
371 | const className = classMatch[1];
372 | const hasDocstring = this.hasDocstringAfterLine(lines, index);
373 |
374 | result.classes.push({
375 | name: className,
376 | type: "class",
377 | filePath,
378 | line: lineNum,
379 | column: 0,
380 | exported: !className.startsWith("_"),
381 | hasJSDoc: hasDocstring,
382 | });
383 | }
384 |
385 | // Import statements
386 | const importMatch = line.match(
387 | /^\s*(?:from\s+([^\s]+)\s+)?import\s+(.+)/,
388 | );
389 | if (importMatch) {
390 | const module = importMatch[1] || importMatch[2].split(",")[0].trim();
391 | result.imports.push({
392 | name: module,
393 | type: "import",
394 | filePath,
395 | line: lineNum,
396 | column: 0,
397 | exported: false,
398 | });
399 | }
400 |
401 | // Constants and variables
402 | const assignMatch = line.match(/^\s*([A-Z_][A-Z0-9_]*)\s*=/);
403 | if (assignMatch) {
404 | result.constants.push({
405 | name: assignMatch[1],
406 | type: "variable",
407 | filePath,
408 | line: lineNum,
409 | column: 0,
410 | exported: true,
411 | hasJSDoc: false,
412 | });
413 | }
414 | });
415 | }
416 |
417 | private hasDocstringAfterLine(lines: string[], lineIndex: number): boolean {
418 | // Check if next few lines contain a docstring
419 | for (
420 | let i = lineIndex + 1;
421 | i < Math.min(lineIndex + 3, lines.length);
422 | i++
423 | ) {
424 | const line = lines[i].trim();
425 | if (line.startsWith('"""') || line.startsWith("'''")) {
426 | return true;
427 | }
428 | }
429 | return false;
430 | }
431 |
432 | private findPythonApiEndpoints(
433 | content: string,
434 | result: LanguageParseResult,
435 | filePath: string,
436 | ) {
437 | // Flask patterns
438 | const flaskPatterns = [
439 | /@app\.(route|get|post|put|delete|patch)\s*\(\s*['"]([^'"]+)['"]/g,
440 | /@bp\.(route|get|post|put|delete|patch)\s*\(\s*['"]([^'"]+)['"]/g,
441 | ];
442 |
443 | // FastAPI patterns
444 | const fastApiPatterns = [
445 | /@app\.(get|post|put|delete|patch)\s*\(\s*['"]([^'"]+)['"]/g,
446 | /router\.(get|post|put|delete|patch)\s*\(\s*['"]([^'"]+)['"]/g,
447 | ];
448 |
449 | // Django patterns
450 | const djangoPatterns = [
451 | /path\s*\(\s*['"]([^'"]+)['"]/g,
452 | /url\s*\(\s*r?['"]([^'"]+)['"]/g,
453 | ];
454 |
455 | const allPatterns = [
456 | ...flaskPatterns,
457 | ...fastApiPatterns,
458 | ...djangoPatterns,
459 | ];
460 |
461 | allPatterns.forEach((pattern) => {
462 | let match;
463 | while ((match = pattern.exec(content)) !== null) {
464 | const method =
465 | match[1] === "route"
466 | ? "ALL"
467 | : (match[1].toUpperCase() as APIEndpoint["method"]);
468 | const path = match[2] || match[1]; // Handle different capture groups
469 |
470 | // Find line number
471 | const beforeMatch = content.substring(0, match.index!);
472 | const line = beforeMatch.split("\n").length;
473 |
474 | result.apiEndpoints.push({
475 | method,
476 | path,
477 | filePath,
478 | line,
479 | hasDocumentation: this.hasEndpointDocumentation(
480 | content,
481 | match.index!,
482 | ),
483 | });
484 | }
485 | });
486 | }
487 |
488 | private hasEndpointDocumentation(
489 | content: string,
490 | matchIndex: number,
491 | ): boolean {
492 | const beforeMatch = content.substring(0, matchIndex);
493 | const lines = beforeMatch.split("\n");
494 |
495 | // Check last few lines for docstrings or comments
496 | for (let i = Math.max(0, lines.length - 5); i < lines.length; i++) {
497 | const line = lines[i].trim();
498 | if (
499 | line.startsWith('"""') ||
500 | line.startsWith("'''") ||
501 | line.startsWith("#")
502 | ) {
503 | return true;
504 | }
505 | }
506 | return false;
507 | }
508 | }
509 |
510 | // Go Parser Implementation (regex-based)
511 | export class GoParser implements LanguageParser {
512 | extensions = ["go"];
513 | name = "Go";
514 | supportsApiEndpoints = true;
515 |
516 | async parseFile(
517 | content: string,
518 | filePath: string,
519 | ): Promise<LanguageParseResult> {
520 | const result: LanguageParseResult = {
521 | functions: [],
522 | classes: [],
523 | interfaces: [],
524 | types: [],
525 | enums: [],
526 | exports: [],
527 | imports: [],
528 | apiEndpoints: [],
529 | constants: [],
530 | variables: [],
531 | };
532 |
533 | const lines = content.split("\n");
534 |
535 | lines.forEach((line, index) => {
536 | const lineNum = index + 1;
537 |
538 | // Function declarations
539 | const funcMatch = line.match(
540 | /^\s*func\s+(?:\([^)]*\)\s+)?([a-zA-Z_][a-zA-Z0-9_]*)\s*\(/,
541 | );
542 | if (funcMatch) {
543 | const funcName = funcMatch[1];
544 | result.functions.push({
545 | name: funcName,
546 | type: "function",
547 | filePath,
548 | line: lineNum,
549 | column: 0,
550 | exported: this.isGoExported(funcName),
551 | hasJSDoc: this.hasGoDocComment(lines, index),
552 | });
553 | }
554 |
555 | // Type declarations (struct, interface, etc.)
556 | const typeMatch = line.match(
557 | /^\s*type\s+([a-zA-Z_][a-zA-Z0-9_]*)\s+(struct|interface)/,
558 | );
559 | if (typeMatch) {
560 | const typeName = typeMatch[1];
561 | const typeKind = typeMatch[2];
562 |
563 | if (typeKind === "struct") {
564 | result.classes.push({
565 | name: typeName,
566 | type: "class",
567 | filePath,
568 | line: lineNum,
569 | column: 0,
570 | exported: this.isGoExported(typeName),
571 | hasJSDoc: this.hasGoDocComment(lines, index),
572 | });
573 | } else if (typeKind === "interface") {
574 | result.interfaces.push({
575 | name: typeName,
576 | type: "interface",
577 | filePath,
578 | line: lineNum,
579 | column: 0,
580 | exported: this.isGoExported(typeName),
581 | hasJSDoc: this.hasGoDocComment(lines, index),
582 | });
583 | }
584 | }
585 |
586 | // Import declarations
587 | const importMatch = line.match(/^\s*(?:import\s*)?"([^"]+)"/);
588 | if (importMatch) {
589 | result.imports.push({
590 | name: importMatch[1],
591 | type: "import",
592 | filePath,
593 | line: lineNum,
594 | column: 0,
595 | exported: false,
596 | });
597 | }
598 |
599 | // Constants and variables
600 | const constMatch = line.match(
601 | /^\s*(const|var)\s+([a-zA-Z_][a-zA-Z0-9_]*)/,
602 | );
603 | if (constMatch) {
604 | const declType = constMatch[1];
605 | const varName = constMatch[2];
606 |
607 | const element: CodeElement = {
608 | name: varName,
609 | type: "variable",
610 | filePath,
611 | line: lineNum,
612 | column: 0,
613 | exported: this.isGoExported(varName),
614 | hasJSDoc: this.hasGoDocComment(lines, index),
615 | };
616 |
617 | if (declType === "const") {
618 | result.constants.push(element);
619 | } else {
620 | result.variables.push(element);
621 | }
622 | }
623 | });
624 |
625 | // Find Go API endpoints
626 | this.findGoApiEndpoints(content, result, filePath);
627 |
628 | return result;
629 | }
630 |
631 | private isGoExported(name: string): boolean {
632 | // In Go, exported names start with uppercase letter
633 | return name.length > 0 && name[0] === name[0].toUpperCase();
634 | }
635 |
636 | private hasGoDocComment(lines: string[], lineIndex: number): boolean {
637 | // Check if previous line has a doc comment
638 | if (lineIndex > 0) {
639 | const prevLine = lines[lineIndex - 1].trim();
640 | return prevLine.startsWith("//");
641 | }
642 | return false;
643 | }
644 |
645 | private findGoApiEndpoints(
646 | content: string,
647 | result: LanguageParseResult,
648 | filePath: string,
649 | ) {
650 | // Common Go web framework patterns
651 | const patterns = [
652 | // Gin framework
653 | /\.(GET|POST|PUT|DELETE|PATCH)\s*\(\s*"([^"]+)"/g,
654 | // Echo framework
655 | /\.(Get|Post|Put|Delete|Patch)\s*\(\s*"([^"]+)"/g,
656 | // Gorilla mux
657 | /\.HandleFunc\s*\(\s*"([^"]+)"/g,
658 | // Standard library
659 | /http\.HandleFunc\s*\(\s*"([^"]+)"/g,
660 | ];
661 |
662 | patterns.forEach((pattern) => {
663 | let match;
664 | while ((match = pattern.exec(content)) !== null) {
665 | let method: APIEndpoint["method"] = "ALL";
666 | let path: string;
667 |
668 | if (match[1] && match[2]) {
669 | method = match[1].toUpperCase() as APIEndpoint["method"];
670 | path = match[2];
671 | } else {
672 | path = match[1] || match[2];
673 | }
674 |
675 | const beforeMatch = content.substring(0, match.index!);
676 | const line = beforeMatch.split("\n").length;
677 |
678 | result.apiEndpoints.push({
679 | method,
680 | path,
681 | filePath,
682 | line,
683 | hasDocumentation: this.hasEndpointDocumentation(
684 | content,
685 | match.index!,
686 | ),
687 | });
688 | }
689 | });
690 | }
691 |
692 | private hasEndpointDocumentation(
693 | content: string,
694 | matchIndex: number,
695 | ): boolean {
696 | const beforeMatch = content.substring(0, matchIndex);
697 | const lines = beforeMatch.split("\n");
698 |
699 | for (let i = Math.max(0, lines.length - 5); i < lines.length; i++) {
700 | const line = lines[i].trim();
701 | if (line.startsWith("//") || line.startsWith("/*")) {
702 | return true;
703 | }
704 | }
705 | return false;
706 | }
707 | }
708 |
709 | // YAML Parser for Kubernetes, Terraform, etc.
710 | export class YamlParser implements LanguageParser {
711 | extensions = ["yml", "yaml"];
712 | name = "YAML";
713 | supportsFrameworkDetection = true;
714 |
715 | async parseFile(
716 | content: string,
717 | filePath: string,
718 | ): Promise<LanguageParseResult> {
719 | const result: LanguageParseResult = {
720 | functions: [],
721 | classes: [],
722 | interfaces: [],
723 | types: [],
724 | enums: [],
725 | exports: [],
726 | imports: [],
727 | apiEndpoints: [],
728 | constants: [],
729 | variables: [],
730 | };
731 |
732 | // YAML parsing focuses on identifying Kubernetes resources, Terraform configs, etc.
733 | this.identifyKubernetesResources(content, result, filePath);
734 | this.identifyDockerComposeServices(content, result, filePath);
735 | this.identifyGitHubActions(content, result, filePath);
736 |
737 | return result;
738 | }
739 |
740 | private identifyKubernetesResources(
741 | content: string,
742 | result: LanguageParseResult,
743 | filePath: string,
744 | ) {
745 | const lines = content.split("\n");
746 | let apiVersion = "";
747 | let kind = "";
748 |
749 | lines.forEach((line, index) => {
750 | const lineNum = index + 1;
751 |
752 | const apiMatch = line.match(/^\s*apiVersion:\s*(.+)/);
753 | if (apiMatch) {
754 | apiVersion = apiMatch[1].trim();
755 | }
756 |
757 | const kindMatch = line.match(/^\s*kind:\s*(.+)/);
758 | if (kindMatch) {
759 | kind = kindMatch[1].trim();
760 |
761 | result.types.push({
762 | name: `${kind} (${apiVersion})`,
763 | type: "type",
764 | filePath,
765 | line: lineNum,
766 | column: 0,
767 | exported: true,
768 | hasJSDoc: false,
769 | });
770 | }
771 | });
772 | }
773 |
774 | private identifyDockerComposeServices(
775 | content: string,
776 | result: LanguageParseResult,
777 | filePath: string,
778 | ) {
779 | let inServicesSection = false;
780 |
781 | const lines = content.split("\n");
782 |
783 | lines.forEach((line, index) => {
784 | if (line.trim() === "services:") {
785 | inServicesSection = true;
786 | return;
787 | }
788 |
789 | if (inServicesSection && line.match(/^[a-zA-Z]/)) {
790 | inServicesSection = false; // Left services section
791 | }
792 |
793 | if (inServicesSection) {
794 | const serviceMatch = line.match(/^\s+([a-zA-Z0-9_-]+):\s*$/);
795 | if (serviceMatch) {
796 | result.types.push({
797 | name: `service: ${serviceMatch[1]}`,
798 | type: "type",
799 | filePath,
800 | line: index + 1,
801 | column: 0,
802 | exported: true,
803 | hasJSDoc: false,
804 | });
805 | }
806 | }
807 | });
808 | }
809 |
810 | private identifyGitHubActions(
811 | content: string,
812 | result: LanguageParseResult,
813 | filePath: string,
814 | ) {
815 | if (!filePath.includes(".github/workflows/")) return;
816 |
817 | const lines = content.split("\n");
818 | let inJobsSection = false;
819 |
820 | lines.forEach((line, index) => {
821 | if (line.trim() === "jobs:") {
822 | inJobsSection = true;
823 | return;
824 | }
825 |
826 | if (inJobsSection && line.match(/^[a-zA-Z]/)) {
827 | inJobsSection = false;
828 | }
829 |
830 | if (inJobsSection) {
831 | const jobMatch = line.match(/^\s+([a-zA-Z0-9_-]+):\s*$/);
832 | if (jobMatch) {
833 | result.functions.push({
834 | name: `job: ${jobMatch[1]}`,
835 | type: "function",
836 | filePath,
837 | line: index + 1,
838 | column: 0,
839 | exported: true,
840 | hasJSDoc: false,
841 | });
842 | }
843 | }
844 | });
845 | }
846 | }
847 |
848 | // Bash Parser for DevOps scripts
849 | export class BashParser implements LanguageParser {
850 | extensions = ["sh", "bash", "zsh"];
851 | name = "Bash";
852 |
853 | async parseFile(
854 | content: string,
855 | filePath: string,
856 | ): Promise<LanguageParseResult> {
857 | const result: LanguageParseResult = {
858 | functions: [],
859 | classes: [],
860 | interfaces: [],
861 | types: [],
862 | enums: [],
863 | exports: [],
864 | imports: [],
865 | apiEndpoints: [],
866 | constants: [],
867 | variables: [],
868 | };
869 |
870 | const lines = content.split("\n");
871 |
872 | lines.forEach((line, index) => {
873 | const lineNum = index + 1;
874 |
875 | // Function definitions
876 | const funcMatch = line.match(
877 | /^\s*(?:function\s+)?([a-zA-Z_][a-zA-Z0-9_]*)\s*\(\)/,
878 | );
879 | if (funcMatch) {
880 | const functionName = funcMatch[1];
881 |
882 | result.functions.push({
883 | name: functionName,
884 | type: "function",
885 | filePath,
886 | line: lineNum,
887 | column: 0,
888 | exported: true, // Bash functions are generally available in scope
889 | hasJSDoc: this.hasBashDocComment(lines, index),
890 | });
891 | }
892 |
893 | // Variable assignments
894 | const varMatch = line.match(/^\s*([A-Z_][A-Z0-9_]*)\s*=/);
895 | if (varMatch) {
896 | const varName = varMatch[1];
897 | const isConstant = varName === varName.toUpperCase();
898 |
899 | const element: CodeElement = {
900 | name: varName,
901 | type: "variable",
902 | filePath,
903 | line: lineNum,
904 | column: 0,
905 | exported: true,
906 | hasJSDoc: this.hasBashDocComment(lines, index),
907 | };
908 |
909 | if (isConstant) {
910 | result.constants.push(element);
911 | } else {
912 | result.variables.push(element);
913 | }
914 | }
915 | });
916 |
917 | return result;
918 | }
919 |
920 | private hasBashDocComment(lines: string[], lineIndex: number): boolean {
921 | // Check if previous line has a comment
922 | if (lineIndex > 0) {
923 | const prevLine = lines[lineIndex - 1].trim();
924 | return prevLine.startsWith("#");
925 | }
926 | return false;
927 | }
928 | }
929 |
```
--------------------------------------------------------------------------------
/tests/memory/kg-storage-validation.test.ts:
--------------------------------------------------------------------------------
```typescript
1 | /**
2 | * Tests for uncovered branches in KGStorage
3 | * Covers: Error handling (lines 197, 276), backup restoration with timestamp (lines 453-455),
4 | * validation errors (lines 496, 510), and other edge cases
5 | */
6 |
7 | import { describe, it, expect, beforeEach, afterEach } from "@jest/globals";
8 | import { promises as fs } from "fs";
9 | import { join } from "path";
10 | import { KGStorage } from "../../src/memory/kg-storage.js";
11 | import { GraphNode, GraphEdge } from "../../src/memory/knowledge-graph.js";
12 | import { tmpdir } from "os";
13 |
14 | describe("KGStorage - Validation and Error Handling", () => {
15 | let storage: KGStorage;
16 | let testDir: string;
17 |
18 | beforeEach(async () => {
19 | testDir = join(tmpdir(), `kg-storage-validation-test-${Date.now()}`);
20 | await fs.mkdir(testDir, { recursive: true });
21 |
22 | storage = new KGStorage({
23 | storageDir: testDir,
24 | backupOnWrite: true,
25 | validateOnRead: true,
26 | });
27 |
28 | await storage.initialize();
29 | });
30 |
31 | afterEach(async () => {
32 | try {
33 | await fs.rm(testDir, { recursive: true, force: true });
34 | } catch (error) {
35 | // Ignore cleanup errors
36 | }
37 | });
38 |
39 | describe("Load Error Handling", () => {
40 | it("should handle non-JSON lines in loadEntities gracefully (line 188)", async () => {
41 | const entityFile = join(testDir, "knowledge-graph-entities.jsonl");
42 |
43 | // Write marker + valid entity + invalid JSON + another valid entity
44 | await fs.writeFile(
45 | entityFile,
46 | "# DOCUMCP_KNOWLEDGE_GRAPH_ENTITIES v1.0.0\n" +
47 | '{"id":"e1","type":"project","label":"Project 1","properties":{},"weight":1.0,"lastUpdated":"2024-01-01"}\n' +
48 | "invalid json line {this is not valid}\n" +
49 | '{"id":"e2","type":"project","label":"Project 2","properties":{},"weight":1.0,"lastUpdated":"2024-01-01"}\n',
50 | "utf-8",
51 | );
52 |
53 | // Should load valid entities and skip invalid line
54 | const entities = await storage.loadEntities();
55 |
56 | expect(entities.length).toBe(2);
57 | expect(entities[0].id).toBe("e1");
58 | expect(entities[1].id).toBe("e2");
59 | });
60 |
61 | it("should handle non-JSON lines in loadRelationships gracefully (line 267)", async () => {
62 | const relationshipFile = join(
63 | testDir,
64 | "knowledge-graph-relationships.jsonl",
65 | );
66 |
67 | // Write marker + valid relationship + invalid JSON + another valid relationship
68 | await fs.writeFile(
69 | relationshipFile,
70 | "# DOCUMCP_KNOWLEDGE_GRAPH_RELATIONSHIPS v1.0.0\n" +
71 | '{"id":"r1","source":"s1","target":"t1","type":"uses","label":"Uses","properties":{},"weight":1.0,"lastUpdated":"2024-01-01"}\n' +
72 | "corrupted json {missing quotes and brackets\n" +
73 | '{"id":"r2","source":"s2","target":"t2","type":"uses","label":"Uses","properties":{},"weight":1.0,"lastUpdated":"2024-01-01"}\n',
74 | "utf-8",
75 | );
76 |
77 | // Should load valid relationships and skip invalid line
78 | const relationships = await storage.loadRelationships();
79 |
80 | expect(relationships.length).toBe(2);
81 | expect(relationships[0].id).toBe("r1");
82 | expect(relationships[1].id).toBe("r2");
83 | });
84 |
85 | it("should throw error when loadEntities encounters non-ENOENT error (line 197-201)", async () => {
86 | const entityFile = join(testDir, "knowledge-graph-entities.jsonl");
87 |
88 | // Create file with proper marker
89 | await fs.writeFile(
90 | entityFile,
91 | "# DOCUMCP_KNOWLEDGE_GRAPH_ENTITIES v1.0.0\n",
92 | "utf-8",
93 | );
94 |
95 | // Make file unreadable by changing permissions (Unix-like systems)
96 | if (process.platform !== "win32") {
97 | await fs.chmod(entityFile, 0o000);
98 |
99 | await expect(storage.loadEntities()).rejects.toThrow();
100 |
101 | // Restore permissions for cleanup
102 | await fs.chmod(entityFile, 0o644);
103 | }
104 | });
105 |
106 | it("should throw error when loadRelationships encounters non-ENOENT error (line 276-280)", async () => {
107 | const relationshipFile = join(
108 | testDir,
109 | "knowledge-graph-relationships.jsonl",
110 | );
111 |
112 | // Create file with proper marker
113 | await fs.writeFile(
114 | relationshipFile,
115 | "# DOCUMCP_KNOWLEDGE_GRAPH_RELATIONSHIPS v1.0.0\n",
116 | "utf-8",
117 | );
118 |
119 | // Make file unreadable (Unix-like systems)
120 | if (process.platform !== "win32") {
121 | await fs.chmod(relationshipFile, 0o000);
122 |
123 | await expect(storage.loadRelationships()).rejects.toThrow();
124 |
125 | // Restore permissions for cleanup
126 | await fs.chmod(relationshipFile, 0o644);
127 | }
128 | });
129 | });
130 |
131 | describe("Validation Errors", () => {
132 | it("should validate entity structure and throw on invalid entity (line 496)", async () => {
133 | const invalidEntity = {
134 | // Missing required 'type' and 'label' fields
135 | id: "invalid-entity",
136 | properties: {},
137 | weight: 1.0,
138 | lastUpdated: "2024-01-01",
139 | } as unknown as GraphNode;
140 |
141 | // Create a storage with validation enabled
142 | const validatingStorage = new KGStorage({
143 | storageDir: testDir,
144 | validateOnRead: true,
145 | });
146 | await validatingStorage.initialize();
147 |
148 | // Write invalid entity to file
149 | const entityFile = join(testDir, "knowledge-graph-entities.jsonl");
150 | await fs.writeFile(
151 | entityFile,
152 | "# DOCUMCP_KNOWLEDGE_GRAPH_ENTITIES v1.0.0\n" +
153 | '{"id":"invalid-entity","properties":{},"weight":1.0,"lastUpdated":"2024-01-01"}\n',
154 | "utf-8",
155 | );
156 |
157 | // Loading should skip the invalid entity (caught and logged)
158 | const entities = await validatingStorage.loadEntities();
159 | expect(entities.length).toBe(0); // Invalid entity skipped
160 | });
161 |
162 | it("should validate relationship structure and throw on invalid relationship (line 510)", async () => {
163 | // Create storage with validation enabled
164 | const validatingStorage = new KGStorage({
165 | storageDir: testDir,
166 | validateOnRead: true,
167 | });
168 | await validatingStorage.initialize();
169 |
170 | // Write invalid relationship (missing 'type' field)
171 | const relationshipFile = join(
172 | testDir,
173 | "knowledge-graph-relationships.jsonl",
174 | );
175 | await fs.writeFile(
176 | relationshipFile,
177 | "# DOCUMCP_KNOWLEDGE_GRAPH_RELATIONSHIPS v1.0.0\n" +
178 | '{"id":"r1","source":"s1","target":"t1","label":"Invalid","properties":{},"weight":1.0}\n',
179 | "utf-8",
180 | );
181 |
182 | // Loading should skip the invalid relationship
183 | const relationships = await validatingStorage.loadRelationships();
184 | expect(relationships.length).toBe(0); // Invalid relationship skipped
185 | });
186 |
187 | it("should validate entity has required fields: id, type, label (line 495-497)", async () => {
188 | const validatingStorage = new KGStorage({
189 | storageDir: testDir,
190 | validateOnRead: true,
191 | });
192 | await validatingStorage.initialize();
193 |
194 | const entityFile = join(testDir, "knowledge-graph-entities.jsonl");
195 |
196 | // Test missing 'id'
197 | await fs.writeFile(
198 | entityFile,
199 | "# DOCUMCP_KNOWLEDGE_GRAPH_ENTITIES v1.0.0\n" +
200 | '{"type":"project","label":"No ID","properties":{},"weight":1.0,"lastUpdated":"2024-01-01"}\n',
201 | "utf-8",
202 | );
203 |
204 | let entities = await validatingStorage.loadEntities();
205 | expect(entities.length).toBe(0);
206 |
207 | // Test missing 'type'
208 | await fs.writeFile(
209 | entityFile,
210 | "# DOCUMCP_KNOWLEDGE_GRAPH_ENTITIES v1.0.0\n" +
211 | '{"id":"e1","label":"No Type","properties":{},"weight":1.0,"lastUpdated":"2024-01-01"}\n',
212 | "utf-8",
213 | );
214 |
215 | entities = await validatingStorage.loadEntities();
216 | expect(entities.length).toBe(0);
217 |
218 | // Test missing 'label'
219 | await fs.writeFile(
220 | entityFile,
221 | "# DOCUMCP_KNOWLEDGE_GRAPH_ENTITIES v1.0.0\n" +
222 | '{"id":"e1","type":"project","properties":{},"weight":1.0,"lastUpdated":"2024-01-01"}\n',
223 | "utf-8",
224 | );
225 |
226 | entities = await validatingStorage.loadEntities();
227 | expect(entities.length).toBe(0);
228 | });
229 |
230 | it("should validate relationship has required fields: id, source, target, type (line 504-512)", async () => {
231 | const validatingStorage = new KGStorage({
232 | storageDir: testDir,
233 | validateOnRead: true,
234 | });
235 | await validatingStorage.initialize();
236 |
237 | const relationshipFile = join(
238 | testDir,
239 | "knowledge-graph-relationships.jsonl",
240 | );
241 |
242 | // Test missing 'id'
243 | await fs.writeFile(
244 | relationshipFile,
245 | "# DOCUMCP_KNOWLEDGE_GRAPH_RELATIONSHIPS v1.0.0\n" +
246 | '{"source":"s1","target":"t1","type":"uses","label":"Uses","properties":{},"weight":1.0}\n',
247 | "utf-8",
248 | );
249 |
250 | let relationships = await validatingStorage.loadRelationships();
251 | expect(relationships.length).toBe(0);
252 |
253 | // Test missing 'source'
254 | await fs.writeFile(
255 | relationshipFile,
256 | "# DOCUMCP_KNOWLEDGE_GRAPH_RELATIONSHIPS v1.0.0\n" +
257 | '{"id":"r1","target":"t1","type":"uses","label":"Uses","properties":{},"weight":1.0}\n',
258 | "utf-8",
259 | );
260 |
261 | relationships = await validatingStorage.loadRelationships();
262 | expect(relationships.length).toBe(0);
263 |
264 | // Test missing 'target'
265 | await fs.writeFile(
266 | relationshipFile,
267 | "# DOCUMCP_KNOWLEDGE_GRAPH_RELATIONSHIPS v1.0.0\n" +
268 | '{"id":"r1","source":"s1","type":"uses","label":"Uses","properties":{},"weight":1.0}\n',
269 | "utf-8",
270 | );
271 |
272 | relationships = await validatingStorage.loadRelationships();
273 | expect(relationships.length).toBe(0);
274 |
275 | // Test missing 'type'
276 | await fs.writeFile(
277 | relationshipFile,
278 | "# DOCUMCP_KNOWLEDGE_GRAPH_RELATIONSHIPS v1.0.0\n" +
279 | '{"id":"r1","source":"s1","target":"t1","label":"Uses","properties":{},"weight":1.0}\n',
280 | "utf-8",
281 | );
282 |
283 | relationships = await validatingStorage.loadRelationships();
284 | expect(relationships.length).toBe(0);
285 | });
286 |
287 | it("should not validate when validateOnRead is false", async () => {
288 | const nonValidatingStorage = new KGStorage({
289 | storageDir: testDir,
290 | validateOnRead: false,
291 | });
292 | await nonValidatingStorage.initialize();
293 |
294 | const entityFile = join(testDir, "knowledge-graph-entities.jsonl");
295 |
296 | // Write entity missing required fields
297 | await fs.writeFile(
298 | entityFile,
299 | "# DOCUMCP_KNOWLEDGE_GRAPH_ENTITIES v1.0.0\n" +
300 | '{"id":"e1","properties":{}}\n',
301 | "utf-8",
302 | );
303 |
304 | // Should load without validation (parse as-is)
305 | const entities = await nonValidatingStorage.loadEntities();
306 | expect(entities.length).toBe(1);
307 | expect(entities[0].id).toBe("e1");
308 | });
309 | });
310 |
311 | describe("Backup Restoration with Timestamp", () => {
312 | // TODO: Fix timing issue with backup file creation
313 | it.skip("should restore from backup with specific timestamp (lines 451-455)", async () => {
314 | const entities: GraphNode[] = [
315 | {
316 | id: "project:backup1",
317 | type: "project",
318 | label: "Backup Test 1",
319 | properties: {},
320 | weight: 1.0,
321 | lastUpdated: "2024-01-01",
322 | },
323 | ];
324 |
325 | // Save first version
326 | await storage.saveEntities(entities);
327 |
328 | // Get list of backups to find the timestamp
329 | const backupDir = join(testDir, "backups");
330 | const backups = await fs.readdir(backupDir);
331 | const entityBackups = backups.filter((f) => f.startsWith("entities-"));
332 |
333 | expect(entityBackups.length).toBeGreaterThan(0);
334 |
335 | // Extract timestamp from backup filename (format: entities-YYYY-MM-DDTHH-MM-SS-MMMZ.jsonl)
336 | const backupFilename = entityBackups[0];
337 | const timestampMatch = backupFilename.match(/entities-(.*?)\.jsonl/);
338 | expect(timestampMatch).not.toBeNull();
339 |
340 | const timestamp = timestampMatch![1];
341 |
342 | // Modify entities
343 | const modifiedEntities: GraphNode[] = [
344 | {
345 | id: "project:backup2",
346 | type: "project",
347 | label: "Modified",
348 | properties: {},
349 | weight: 1.0,
350 | lastUpdated: "2024-01-02",
351 | },
352 | ];
353 | await storage.saveEntities(modifiedEntities);
354 |
355 | // Verify current state
356 | let current = await storage.loadEntities();
357 | expect(current.length).toBe(1);
358 | expect(current[0].id).toBe("project:backup2");
359 |
360 | // Restore from backup using specific timestamp
361 | await storage.restoreFromBackup("entities", timestamp);
362 |
363 | // Verify restored state
364 | current = await storage.loadEntities();
365 | expect(current.length).toBe(1);
366 | expect(current[0].id).toBe("project:backup1");
367 | });
368 |
369 | it("should throw error when backup with timestamp not found (line 454-456)", async () => {
370 | const entities: GraphNode[] = [
371 | {
372 | id: "project:test",
373 | type: "project",
374 | label: "Test",
375 | properties: {},
376 | weight: 1.0,
377 | lastUpdated: "2024-01-01",
378 | },
379 | ];
380 |
381 | await storage.saveEntities(entities);
382 |
383 | // Try to restore with non-existent timestamp
384 | await expect(
385 | storage.restoreFromBackup("entities", "2099-12-31T23-59-59-999Z"),
386 | ).rejects.toThrow("Backup with timestamp");
387 | });
388 |
389 | it("should restore most recent backup when no timestamp specified (line 458-467)", async () => {
390 | const entities1: GraphNode[] = [
391 | {
392 | id: "project:v1",
393 | type: "project",
394 | label: "Version 1",
395 | properties: {},
396 | weight: 1.0,
397 | lastUpdated: "2024-01-01",
398 | },
399 | ];
400 |
401 | await storage.saveEntities(entities1);
402 |
403 | // Wait a bit to ensure different timestamps
404 | await new Promise((resolve) => setTimeout(resolve, 100));
405 |
406 | const entities2: GraphNode[] = [
407 | {
408 | id: "project:v2",
409 | type: "project",
410 | label: "Version 2",
411 | properties: {},
412 | weight: 1.0,
413 | lastUpdated: "2024-01-02",
414 | },
415 | ];
416 |
417 | await storage.saveEntities(entities2);
418 |
419 | await new Promise((resolve) => setTimeout(resolve, 100));
420 |
421 | const entities3: GraphNode[] = [
422 | {
423 | id: "project:v3",
424 | type: "project",
425 | label: "Version 3 (current)",
426 | properties: {},
427 | weight: 1.0,
428 | lastUpdated: "2024-01-03",
429 | },
430 | ];
431 |
432 | await storage.saveEntities(entities3);
433 |
434 | // Current state should be v3
435 | let current = await storage.loadEntities();
436 | expect(current[0].id).toBe("project:v3");
437 |
438 | // Restore without timestamp (should get most recent backup = v2)
439 | await storage.restoreFromBackup("entities");
440 |
441 | current = await storage.loadEntities();
442 | expect(current[0].id).toBe("project:v2");
443 | });
444 |
445 | // TODO: Fix timing issue with backup file creation
446 | it.skip("should restore relationships with timestamp", async () => {
447 | const relationships1: GraphEdge[] = [
448 | {
449 | id: "rel:v1",
450 | source: "s1",
451 | target: "t1",
452 | type: "uses",
453 | properties: {},
454 | weight: 1.0,
455 | confidence: 1.0,
456 | lastUpdated: "2024-01-01",
457 | },
458 | ];
459 |
460 | await storage.saveRelationships(relationships1);
461 |
462 | // Get backup timestamp
463 | const backupDir = join(testDir, "backups");
464 | const backups = await fs.readdir(backupDir);
465 | const relBackups = backups.filter((f) => f.startsWith("relationships-"));
466 |
467 | expect(relBackups.length).toBeGreaterThan(0);
468 |
469 | const timestampMatch = relBackups[0].match(/relationships-(.*?)\.jsonl/);
470 | const timestamp = timestampMatch![1];
471 |
472 | // Modify relationships
473 | const relationships2: GraphEdge[] = [
474 | {
475 | id: "rel:v2",
476 | source: "s2",
477 | target: "t2",
478 | type: "uses",
479 | properties: {},
480 | weight: 1.0,
481 | confidence: 1.0,
482 | lastUpdated: "2024-01-02",
483 | },
484 | ];
485 |
486 | await storage.saveRelationships(relationships2);
487 |
488 | // Restore from backup using timestamp
489 | await storage.restoreFromBackup("relationships", timestamp);
490 |
491 | const restored = await storage.loadRelationships();
492 | expect(restored[0].id).toBe("rel:v1");
493 | });
494 |
495 | it("should throw error when no backups exist (line 445-447)", async () => {
496 | // Try to restore when no backups exist
497 | await expect(storage.restoreFromBackup("entities")).rejects.toThrow(
498 | "No backups found",
499 | );
500 | });
501 |
502 | it("should log restoration in debug mode (line 478-481)", async () => {
503 | const entities: GraphNode[] = [
504 | {
505 | id: "project:debug",
506 | type: "project",
507 | label: "Debug Test",
508 | properties: {},
509 | weight: 1.0,
510 | lastUpdated: "2024-01-01",
511 | },
512 | ];
513 |
514 | await storage.saveEntities(entities);
515 |
516 | // Set DEBUG env var
517 | const originalDebug = process.env.DEBUG;
518 | process.env.DEBUG = "true";
519 |
520 | // Modify
521 | const modifiedEntities: GraphNode[] = [
522 | {
523 | id: "project:modified",
524 | type: "project",
525 | label: "Modified",
526 | properties: {},
527 | weight: 1.0,
528 | lastUpdated: "2024-01-02",
529 | },
530 | ];
531 | await storage.saveEntities(modifiedEntities);
532 |
533 | // Restore (should log in debug mode)
534 | await storage.restoreFromBackup("entities");
535 |
536 | // Restore original DEBUG setting
537 | if (originalDebug !== undefined) {
538 | process.env.DEBUG = originalDebug;
539 | } else {
540 | delete process.env.DEBUG;
541 | }
542 |
543 | // Verify restoration worked
544 | const restored = await storage.loadEntities();
545 | expect(restored[0].id).toBe("project:debug");
546 | });
547 | });
548 |
549 | describe("Error Handling Edge Cases", () => {
550 | it("should handle backup file access errors gracefully (line 337-340)", async () => {
551 | // This tests the warning path when backup fails due to file access issues
552 | const storage2 = new KGStorage({
553 | storageDir: testDir,
554 | backupOnWrite: true,
555 | });
556 |
557 | await storage2.initialize();
558 |
559 | // Save initial entities to create a file
560 | const initialEntities: GraphNode[] = [
561 | {
562 | id: "project:initial",
563 | type: "project",
564 | label: "Initial",
565 | properties: {},
566 | weight: 1.0,
567 | lastUpdated: "2024-01-01",
568 | },
569 | ];
570 | await storage2.saveEntities(initialEntities);
571 |
572 | // Make entity file unreadable (Unix-like systems only) to trigger backup error
573 | const entityFile = join(testDir, "knowledge-graph-entities.jsonl");
574 | if (process.platform !== "win32") {
575 | try {
576 | await fs.chmod(entityFile, 0o000);
577 | } catch (e) {
578 | // Skip test if chmod not supported
579 | return;
580 | }
581 | }
582 |
583 | // Saving should still attempt even if backup fails with non-ENOENT error
584 | const newEntities: GraphNode[] = [
585 | {
586 | id: "project:no-backup",
587 | type: "project",
588 | label: "No Backup",
589 | properties: {},
590 | weight: 1.0,
591 | lastUpdated: "2024-01-02",
592 | },
593 | ];
594 |
595 | // Will fail during backup read, but should warn and continue
596 | // This tests line 337-339: if (error.code !== "ENOENT")
597 | try {
598 | await storage2.saveEntities(newEntities);
599 | } catch (error) {
600 | // Might throw due to unreadable file
601 | }
602 |
603 | // Restore permissions for cleanup
604 | if (process.platform !== "win32") {
605 | try {
606 | await fs.chmod(entityFile, 0o644);
607 | } catch (e) {
608 | // Ignore
609 | }
610 | }
611 | });
612 |
613 | it("should handle cleanup of backups when file is deleted during iteration (line 369-371)", async () => {
614 | // Create multiple backups
615 | const entities: GraphNode[] = [
616 | {
617 | id: "project:cleanup",
618 | type: "project",
619 | label: "Cleanup Test",
620 | properties: {},
621 | weight: 1.0,
622 | lastUpdated: "2024-01-01",
623 | },
624 | ];
625 |
626 | // Create many backups (more than keepCount of 10)
627 | for (let i = 0; i < 15; i++) {
628 | await storage.saveEntities([
629 | {
630 | ...entities[0],
631 | id: `project:cleanup-${i}`,
632 | label: `Cleanup Test ${i}`,
633 | },
634 | ]);
635 | await new Promise((resolve) => setTimeout(resolve, 10));
636 | }
637 |
638 | // Old backups should be cleaned up automatically
639 | const backupDir = join(testDir, "backups");
640 | const backups = await fs.readdir(backupDir);
641 | const entityBackups = backups.filter((f) => f.startsWith("entities-"));
642 |
643 | // Should keep last 10 backups
644 | expect(entityBackups.length).toBeLessThanOrEqual(10);
645 | });
646 |
647 | it("should handle missing backup directory gracefully (line 388-391)", async () => {
648 | // Create storage without creating backups first
649 | const testDir2 = join(tmpdir(), `kg-no-backup-${Date.now()}`);
650 | await fs.mkdir(testDir2, { recursive: true });
651 |
652 | const storage2 = new KGStorage({
653 | storageDir: testDir2,
654 | backupOnWrite: false, // Disable backups
655 | });
656 |
657 | await storage2.initialize();
658 |
659 | const entities: GraphNode[] = [
660 | {
661 | id: "project:no-backup-dir",
662 | type: "project",
663 | label: "No Backup Dir",
664 | properties: {},
665 | weight: 1.0,
666 | lastUpdated: "2024-01-01",
667 | },
668 | ];
669 |
670 | // Should work fine without backup directory
671 | await storage2.saveEntities(entities);
672 |
673 | const loaded = await storage2.loadEntities();
674 | expect(loaded.length).toBe(1);
675 |
676 | await fs.rm(testDir2, { recursive: true, force: true });
677 | });
678 | });
679 |
680 | describe("Verify Integrity Coverage", () => {
681 | it("should detect orphaned relationships - missing source (line 535-538)", async () => {
682 | const entities: GraphNode[] = [
683 | {
684 | id: "project:exists",
685 | type: "project",
686 | label: "Exists",
687 | properties: {},
688 | weight: 1.0,
689 | lastUpdated: "2024-01-01",
690 | },
691 | ];
692 |
693 | const relationships: GraphEdge[] = [
694 | {
695 | id: "rel:orphan",
696 | source: "project:missing",
697 | target: "project:exists",
698 | type: "uses",
699 | properties: {},
700 | weight: 1.0,
701 | confidence: 1.0,
702 | lastUpdated: "2024-01-01",
703 | },
704 | ];
705 |
706 | await storage.saveEntities(entities);
707 | await storage.saveRelationships(relationships);
708 |
709 | const result = await storage.verifyIntegrity();
710 |
711 | expect(result.warnings.length).toBeGreaterThan(0);
712 | expect(
713 | result.warnings.some((w) => w.includes("missing source entity")),
714 | ).toBe(true);
715 | });
716 |
717 | it("should detect orphaned relationships - missing target (line 540-544)", async () => {
718 | const entities: GraphNode[] = [
719 | {
720 | id: "project:exists",
721 | type: "project",
722 | label: "Exists",
723 | properties: {},
724 | weight: 1.0,
725 | lastUpdated: "2024-01-01",
726 | },
727 | ];
728 |
729 | const relationships: GraphEdge[] = [
730 | {
731 | id: "rel:orphan",
732 | source: "project:exists",
733 | target: "project:missing",
734 | type: "uses",
735 | properties: {},
736 | weight: 1.0,
737 | confidence: 1.0,
738 | lastUpdated: "2024-01-01",
739 | },
740 | ];
741 |
742 | await storage.saveEntities(entities);
743 | await storage.saveRelationships(relationships);
744 |
745 | const result = await storage.verifyIntegrity();
746 |
747 | expect(result.warnings.length).toBeGreaterThan(0);
748 | expect(
749 | result.warnings.some((w) => w.includes("missing target entity")),
750 | ).toBe(true);
751 | });
752 |
753 | // TODO: Fix - validation prevents corrupted data from being loaded
754 | it.skip("should catch errors during integrity check (lines 564-570)", async () => {
755 | // Save valid data
756 | const entities: GraphNode[] = [
757 | {
758 | id: "project:test",
759 | type: "project",
760 | label: "Test",
761 | properties: {},
762 | weight: 1.0,
763 | lastUpdated: "2024-01-01",
764 | },
765 | ];
766 |
767 | await storage.saveEntities(entities);
768 |
769 | // Corrupt the entity file to cause a parse error
770 | const entityFile = join(testDir, "knowledge-graph-entities.jsonl");
771 | await fs.writeFile(
772 | entityFile,
773 | "# DOCUMCP_KNOWLEDGE_GRAPH_ENTITIES v1.0.0\nthis is not valid json\n",
774 | "utf-8",
775 | );
776 |
777 | // Integrity check should catch the error
778 | const result = await storage.verifyIntegrity();
779 |
780 | expect(result.valid).toBe(false);
781 | expect(result.errors.length).toBeGreaterThan(0);
782 | expect(result.errors[0]).toContain("Integrity check failed");
783 | });
784 | });
785 | });
786 |
```