This is page 24 of 29. Use http://codebase.md/tosin2013/documcp?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .dockerignore
├── .eslintignore
├── .eslintrc.json
├── .github
│ ├── agents
│ │ ├── documcp-ast.md
│ │ ├── documcp-deploy.md
│ │ ├── documcp-memory.md
│ │ ├── documcp-test.md
│ │ └── documcp-tool.md
│ ├── copilot-instructions.md
│ ├── dependabot.yml
│ ├── ISSUE_TEMPLATE
│ │ ├── automated-changelog.md
│ │ ├── bug_report.md
│ │ ├── bug_report.yml
│ │ ├── documentation_issue.md
│ │ ├── feature_request.md
│ │ ├── feature_request.yml
│ │ ├── npm-publishing-fix.md
│ │ └── release_improvements.md
│ ├── PULL_REQUEST_TEMPLATE.md
│ ├── release-drafter.yml
│ └── workflows
│ ├── auto-merge.yml
│ ├── ci.yml
│ ├── codeql.yml
│ ├── dependency-review.yml
│ ├── deploy-docs.yml
│ ├── README.md
│ ├── release-drafter.yml
│ └── release.yml
├── .gitignore
├── .husky
│ ├── commit-msg
│ └── pre-commit
├── .linkcheck.config.json
├── .markdown-link-check.json
├── .nvmrc
├── .pre-commit-config.yaml
├── .versionrc.json
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── commitlint.config.js
├── CONTRIBUTING.md
├── docker-compose.docs.yml
├── Dockerfile.docs
├── docs
│ ├── .docusaurus
│ │ ├── docusaurus-plugin-content-docs
│ │ │ └── default
│ │ │ └── __mdx-loader-dependency.json
│ │ └── docusaurus-plugin-content-pages
│ │ └── default
│ │ └── __plugin.json
│ ├── adrs
│ │ ├── 001-mcp-server-architecture.md
│ │ ├── 002-repository-analysis-engine.md
│ │ ├── 003-static-site-generator-recommendation-engine.md
│ │ ├── 004-diataxis-framework-integration.md
│ │ ├── 005-github-pages-deployment-automation.md
│ │ ├── 006-mcp-tools-api-design.md
│ │ ├── 007-mcp-prompts-and-resources-integration.md
│ │ ├── 008-intelligent-content-population-engine.md
│ │ ├── 009-content-accuracy-validation-framework.md
│ │ ├── 010-mcp-resource-pattern-redesign.md
│ │ └── README.md
│ ├── api
│ │ ├── .nojekyll
│ │ ├── assets
│ │ │ ├── hierarchy.js
│ │ │ ├── highlight.css
│ │ │ ├── icons.js
│ │ │ ├── icons.svg
│ │ │ ├── main.js
│ │ │ ├── navigation.js
│ │ │ ├── search.js
│ │ │ └── style.css
│ │ ├── hierarchy.html
│ │ ├── index.html
│ │ ├── modules.html
│ │ └── variables
│ │ └── TOOLS.html
│ ├── assets
│ │ └── logo.svg
│ ├── development
│ │ └── MCP_INSPECTOR_TESTING.md
│ ├── docusaurus.config.js
│ ├── explanation
│ │ ├── architecture.md
│ │ └── index.md
│ ├── guides
│ │ ├── link-validation.md
│ │ ├── playwright-integration.md
│ │ └── playwright-testing-workflow.md
│ ├── how-to
│ │ ├── analytics-setup.md
│ │ ├── custom-domains.md
│ │ ├── documentation-freshness-tracking.md
│ │ ├── github-pages-deployment.md
│ │ ├── index.md
│ │ ├── local-testing.md
│ │ ├── performance-optimization.md
│ │ ├── prompting-guide.md
│ │ ├── repository-analysis.md
│ │ ├── seo-optimization.md
│ │ ├── site-monitoring.md
│ │ ├── troubleshooting.md
│ │ └── usage-examples.md
│ ├── index.md
│ ├── knowledge-graph.md
│ ├── package-lock.json
│ ├── package.json
│ ├── phase-2-intelligence.md
│ ├── reference
│ │ ├── api-overview.md
│ │ ├── cli.md
│ │ ├── configuration.md
│ │ ├── deploy-pages.md
│ │ ├── index.md
│ │ ├── mcp-tools.md
│ │ └── prompt-templates.md
│ ├── research
│ │ ├── cross-domain-integration
│ │ │ └── README.md
│ │ ├── domain-1-mcp-architecture
│ │ │ ├── index.md
│ │ │ └── mcp-performance-research.md
│ │ ├── domain-2-repository-analysis
│ │ │ └── README.md
│ │ ├── domain-3-ssg-recommendation
│ │ │ ├── index.md
│ │ │ └── ssg-performance-analysis.md
│ │ ├── domain-4-diataxis-integration
│ │ │ └── README.md
│ │ ├── domain-5-github-deployment
│ │ │ ├── github-pages-security-analysis.md
│ │ │ └── index.md
│ │ ├── domain-6-api-design
│ │ │ └── README.md
│ │ ├── README.md
│ │ ├── research-integration-summary-2025-01-14.md
│ │ ├── research-progress-template.md
│ │ └── research-questions-2025-01-14.md
│ ├── robots.txt
│ ├── sidebars.js
│ ├── sitemap.xml
│ ├── src
│ │ └── css
│ │ └── custom.css
│ └── tutorials
│ ├── development-setup.md
│ ├── environment-setup.md
│ ├── first-deployment.md
│ ├── getting-started.md
│ ├── index.md
│ ├── memory-workflows.md
│ └── user-onboarding.md
├── jest.config.js
├── LICENSE
├── Makefile
├── MCP_PHASE2_IMPLEMENTATION.md
├── mcp-config-example.json
├── mcp.json
├── package-lock.json
├── package.json
├── README.md
├── release.sh
├── scripts
│ └── check-package-structure.cjs
├── SECURITY.md
├── setup-precommit.sh
├── src
│ ├── benchmarks
│ │ └── performance.ts
│ ├── index.ts
│ ├── memory
│ │ ├── contextual-retrieval.ts
│ │ ├── deployment-analytics.ts
│ │ ├── enhanced-manager.ts
│ │ ├── export-import.ts
│ │ ├── freshness-kg-integration.ts
│ │ ├── index.ts
│ │ ├── integration.ts
│ │ ├── kg-code-integration.ts
│ │ ├── kg-health.ts
│ │ ├── kg-integration.ts
│ │ ├── kg-link-validator.ts
│ │ ├── kg-storage.ts
│ │ ├── knowledge-graph.ts
│ │ ├── learning.ts
│ │ ├── manager.ts
│ │ ├── multi-agent-sharing.ts
│ │ ├── pruning.ts
│ │ ├── schemas.ts
│ │ ├── storage.ts
│ │ ├── temporal-analysis.ts
│ │ ├── user-preferences.ts
│ │ └── visualization.ts
│ ├── prompts
│ │ └── technical-writer-prompts.ts
│ ├── scripts
│ │ └── benchmark.ts
│ ├── templates
│ │ └── playwright
│ │ ├── accessibility.spec.template.ts
│ │ ├── Dockerfile.template
│ │ ├── docs-e2e.workflow.template.yml
│ │ ├── link-validation.spec.template.ts
│ │ └── playwright.config.template.ts
│ ├── tools
│ │ ├── analyze-deployments.ts
│ │ ├── analyze-readme.ts
│ │ ├── analyze-repository.ts
│ │ ├── check-documentation-links.ts
│ │ ├── deploy-pages.ts
│ │ ├── detect-gaps.ts
│ │ ├── evaluate-readme-health.ts
│ │ ├── generate-config.ts
│ │ ├── generate-contextual-content.ts
│ │ ├── generate-llm-context.ts
│ │ ├── generate-readme-template.ts
│ │ ├── generate-technical-writer-prompts.ts
│ │ ├── kg-health-check.ts
│ │ ├── manage-preferences.ts
│ │ ├── manage-sitemap.ts
│ │ ├── optimize-readme.ts
│ │ ├── populate-content.ts
│ │ ├── readme-best-practices.ts
│ │ ├── recommend-ssg.ts
│ │ ├── setup-playwright-tests.ts
│ │ ├── setup-structure.ts
│ │ ├── sync-code-to-docs.ts
│ │ ├── test-local-deployment.ts
│ │ ├── track-documentation-freshness.ts
│ │ ├── update-existing-documentation.ts
│ │ ├── validate-content.ts
│ │ ├── validate-documentation-freshness.ts
│ │ ├── validate-readme-checklist.ts
│ │ └── verify-deployment.ts
│ ├── types
│ │ └── api.ts
│ ├── utils
│ │ ├── ast-analyzer.ts
│ │ ├── code-scanner.ts
│ │ ├── content-extractor.ts
│ │ ├── drift-detector.ts
│ │ ├── freshness-tracker.ts
│ │ ├── language-parsers-simple.ts
│ │ ├── permission-checker.ts
│ │ └── sitemap-generator.ts
│ └── workflows
│ └── documentation-workflow.ts
├── test-docs-local.sh
├── tests
│ ├── api
│ │ └── mcp-responses.test.ts
│ ├── benchmarks
│ │ └── performance.test.ts
│ ├── edge-cases
│ │ └── error-handling.test.ts
│ ├── functional
│ │ └── tools.test.ts
│ ├── integration
│ │ ├── kg-documentation-workflow.test.ts
│ │ ├── knowledge-graph-workflow.test.ts
│ │ ├── mcp-readme-tools.test.ts
│ │ ├── memory-mcp-tools.test.ts
│ │ ├── readme-technical-writer.test.ts
│ │ └── workflow.test.ts
│ ├── memory
│ │ ├── contextual-retrieval.test.ts
│ │ ├── enhanced-manager.test.ts
│ │ ├── export-import.test.ts
│ │ ├── freshness-kg-integration.test.ts
│ │ ├── kg-code-integration.test.ts
│ │ ├── kg-health.test.ts
│ │ ├── kg-link-validator.test.ts
│ │ ├── kg-storage-validation.test.ts
│ │ ├── kg-storage.test.ts
│ │ ├── knowledge-graph-enhanced.test.ts
│ │ ├── knowledge-graph.test.ts
│ │ ├── learning.test.ts
│ │ ├── manager-advanced.test.ts
│ │ ├── manager.test.ts
│ │ ├── mcp-resource-integration.test.ts
│ │ ├── mcp-tool-persistence.test.ts
│ │ ├── schemas.test.ts
│ │ ├── storage.test.ts
│ │ ├── temporal-analysis.test.ts
│ │ └── user-preferences.test.ts
│ ├── performance
│ │ ├── memory-load-testing.test.ts
│ │ └── memory-stress-testing.test.ts
│ ├── prompts
│ │ ├── guided-workflow-prompts.test.ts
│ │ └── technical-writer-prompts.test.ts
│ ├── server.test.ts
│ ├── setup.ts
│ ├── tools
│ │ ├── all-tools.test.ts
│ │ ├── analyze-coverage.test.ts
│ │ ├── analyze-deployments.test.ts
│ │ ├── analyze-readme.test.ts
│ │ ├── analyze-repository.test.ts
│ │ ├── check-documentation-links.test.ts
│ │ ├── deploy-pages-kg-retrieval.test.ts
│ │ ├── deploy-pages-tracking.test.ts
│ │ ├── deploy-pages.test.ts
│ │ ├── detect-gaps.test.ts
│ │ ├── evaluate-readme-health.test.ts
│ │ ├── generate-contextual-content.test.ts
│ │ ├── generate-llm-context.test.ts
│ │ ├── generate-readme-template.test.ts
│ │ ├── generate-technical-writer-prompts.test.ts
│ │ ├── kg-health-check.test.ts
│ │ ├── manage-sitemap.test.ts
│ │ ├── optimize-readme.test.ts
│ │ ├── readme-best-practices.test.ts
│ │ ├── recommend-ssg-historical.test.ts
│ │ ├── recommend-ssg-preferences.test.ts
│ │ ├── recommend-ssg.test.ts
│ │ ├── simple-coverage.test.ts
│ │ ├── sync-code-to-docs.test.ts
│ │ ├── test-local-deployment.test.ts
│ │ ├── tool-error-handling.test.ts
│ │ ├── track-documentation-freshness.test.ts
│ │ ├── validate-content.test.ts
│ │ ├── validate-documentation-freshness.test.ts
│ │ └── validate-readme-checklist.test.ts
│ ├── types
│ │ └── type-safety.test.ts
│ └── utils
│ ├── ast-analyzer.test.ts
│ ├── content-extractor.test.ts
│ ├── drift-detector.test.ts
│ ├── freshness-tracker.test.ts
│ └── sitemap-generator.test.ts
├── tsconfig.json
└── typedoc.json
```
# Files
--------------------------------------------------------------------------------
/docs/adrs/008-intelligent-content-population-engine.md:
--------------------------------------------------------------------------------
```markdown
1 | ---
2 | id: 008-intelligent-content-population-engine
3 | title: "ADR-008: Intelligent Content Population Engine"
4 | sidebar_label: "ADR-8: Intelligent Content Population Engine"
5 | sidebar_position: 8
6 | documcp:
7 | last_updated: "2025-11-20T00:46:21.942Z"
8 | last_validated: "2025-11-20T00:46:21.942Z"
9 | auto_updated: false
10 | update_frequency: monthly
11 | ---
12 |
13 | # ADR-008: Intelligent Content Population Engine for Diataxis Documentation
14 |
15 | ## Status
16 |
17 | Accepted
18 |
19 | ## Context
20 |
21 | DocuMCP currently creates excellent Diataxis-compliant documentation structures through ADR-004 and ADR-006, but produces only skeleton content with placeholder text. This creates a significant gap between the framework's potential and delivered value, requiring users to manually populate all documentation content despite having comprehensive repository analysis data available.
22 |
23 | The current `setup-structure` tool (from ADR-006) provides:
24 |
25 | - ✅ Professional Diataxis directory structure
26 | - ✅ SSG-specific configuration and frontmatter
27 | - ✅ Basic template content explaining Diataxis categories
28 | - ❌ **Missing**: Project-specific content analysis and intelligent population
29 | - ❌ **Missing**: Repository analysis integration for content suggestions
30 | - ❌ **Missing**: Technology-specific documentation generation
31 |
32 | **Current User Journey:**
33 |
34 | 1. Repository analysis identifies TypeScript project with Express.js, PostgreSQL, Jest tests
35 | 2. Diataxis structure created with generic placeholder content
36 | 3. User must manually research and write all tutorials, how-to guides, reference docs, and explanations
37 | 4. **Result**: 8-20 hours of manual documentation work despite intelligent analysis
38 |
39 | **Target User Journey:**
40 |
41 | 1. Repository analysis identifies project characteristics and technology stack
42 | 2. Intelligent content population generates project-specific documentation
43 | 3. User reviews and refines 60-80% pre-populated, contextually relevant content
44 | 4. **Result**: 1-2 hours of refinement work with professional-quality starting point
45 |
46 | Key gaps identified:
47 |
48 | - Repository analysis data (125 files, TypeScript/JavaScript ecosystem, test infrastructure) not leveraged for content generation
49 | - Extensive technology detection capabilities underutilized for creating relevant examples
50 | - Diataxis framework implementation incomplete without intelligent content planning (ADR-004, lines 153-192)
51 | - Competitive disadvantage: users get empty templates instead of intelligent assistance
52 |
53 | ## Decision
54 |
55 | We will implement an Intelligent Content Population Engine that bridges repository analysis with Diataxis content generation, creating the missing layer between structural generation and user-ready documentation.
56 |
57 | ### Architecture Overview:
58 |
59 | #### 1. Content Intelligence Engine
60 |
61 | **Purpose**: Transform repository analysis into structured content plans
62 | **Core Capabilities**:
63 |
64 | - Project characteristic analysis (technology stack, architecture patterns, API surfaces)
65 | - User journey mapping to appropriate Diataxis categories
66 | - Content gap identification and priority assignment
67 | - Technology-specific example and code snippet generation
68 |
69 | #### 2. Project-Aware Content Generators
70 |
71 | **Purpose**: Create contextually relevant content for each Diataxis category
72 | **Scope**: Four specialized generators aligned with Diataxis framework:
73 |
74 | ##### Tutorial Content Generator
75 |
76 | - **Getting Started**: Framework-specific installation, setup, and first success
77 | - **Feature Tutorials**: Based on detected APIs, key dependencies, and project complexity
78 | - **Integration Tutorials**: For detected services, databases, and external dependencies
79 |
80 | ##### How-To Guide Generator
81 |
82 | - **Common Tasks**: Derived from project type and technology stack
83 | - **Troubleshooting**: Based on detected tools, frameworks, and common pain points
84 | - **Deployment Guides**: Technology-specific deployment patterns and best practices
85 |
86 | ##### Reference Documentation Generator
87 |
88 | - **API Documentation**: Auto-generate from detected API surfaces and endpoints
89 | - **Configuration Reference**: Based on identified config files and environment variables
90 | - **CLI Reference**: For detected command-line tools and scripts
91 |
92 | ##### Explanation Content Generator
93 |
94 | - **Architecture Overview**: Based on detected patterns, dependencies, and project structure
95 | - **Design Decisions**: Technology choices and their implications
96 | - **Concept Explanations**: Framework and domain-specific concepts
97 |
98 | #### 3. Repository Analysis Integration Layer
99 |
100 | **Purpose**: Bridge analysis data with content generation
101 | **Integration Points**:
102 |
103 | - Language ecosystem analysis → Technology-specific content
104 | - Dependency analysis → Framework integration guides
105 | - Project structure analysis → Architecture documentation
106 | - Complexity assessment → Content depth and sophistication level
107 |
108 | ### Implementation Architecture:
109 |
110 | ```typescript
111 | interface ContentPopulationEngine {
112 | // Core engine interface
113 | populateContent(
114 | analysisId: string,
115 | docsPath: string,
116 | options: PopulationOptions,
117 | ): Promise<PopulationResult>;
118 |
119 | // Content planning
120 | generateContentPlan(analysis: RepositoryAnalysis): ContentPlan;
121 | identifyContentGaps(
122 | existing: ExistingContent,
123 | plan: ContentPlan,
124 | ): ContentGap[];
125 |
126 | // Content generation
127 | generateTutorialContent(
128 | plan: TutorialPlan,
129 | context: ProjectContext,
130 | ): TutorialContent;
131 | generateHowToContent(plan: HowToPlan, context: ProjectContext): HowToContent;
132 | generateReferenceContent(
133 | plan: ReferencePlan,
134 | context: ProjectContext,
135 | ): ReferenceContent;
136 | generateExplanationContent(
137 | plan: ExplanationPlan,
138 | context: ProjectContext,
139 | ): ExplanationContent;
140 | }
141 |
142 | interface PopulationOptions {
143 | level: "basic" | "comprehensive" | "intelligent";
144 | includeCodeExamples: boolean;
145 | projectSpecific: boolean;
146 | preserveExisting: boolean;
147 | customizationProfile?: CustomizationProfile;
148 | }
149 |
150 | interface ContentPlan {
151 | tutorials: TutorialSuggestion[];
152 | howToGuides: HowToSuggestion[];
153 | reference: ReferenceSuggestion[];
154 | explanation: ExplanationSuggestion[];
155 | crossReferences: ContentRelationship[];
156 | estimatedEffort: EffortEstimate;
157 | }
158 |
159 | interface ProjectContext {
160 | primaryLanguage: string;
161 | frameworks: Framework[];
162 | architecture: ArchitecturePattern;
163 | apiSurfaces: APIAnalysis[];
164 | deploymentTargets: DeploymentTarget[];
165 | testingFrameworks: TestingFramework[];
166 | dependencies: DependencyAnalysis;
167 | }
168 | ```
169 |
170 | ### Content Generation Algorithms:
171 |
172 | #### Tutorial Generation Algorithm
173 |
174 | ```typescript
175 | function generateTutorials(analysis: RepositoryAnalysis): TutorialSuggestion[] {
176 | const suggestions: TutorialSuggestion[] = [];
177 |
178 | // Always include getting started
179 | suggestions.push({
180 | title: `Getting Started with ${analysis.metadata.projectName}`,
181 | description: `Learn ${analysis.recommendations.primaryLanguage} development with ${analysis.metadata.projectName}`,
182 | priority: "high",
183 | sections: generateGettingStartedSections(analysis),
184 | codeExamples: generateTechnologySpecificExamples(
185 | analysis.dependencies.ecosystem,
186 | ),
187 | });
188 |
189 | // Framework-specific tutorials
190 | if (analysis.dependencies.packages.includes("express")) {
191 | suggestions.push({
192 | title: "Building REST APIs with Express.js",
193 | description: "Complete guide to creating RESTful services",
194 | priority: "high",
195 | sections: generateExpressTutorialSections(analysis),
196 | });
197 | }
198 |
199 | // Database integration tutorials
200 | const dbDeps = detectDatabaseDependencies(analysis.dependencies.packages);
201 | dbDeps.forEach((db) => {
202 | suggestions.push({
203 | title: `Database Integration with ${db.name}`,
204 | description: `Connect and interact with ${db.name} databases`,
205 | priority: "medium",
206 | sections: generateDatabaseTutorialSections(db, analysis),
207 | });
208 | });
209 |
210 | return suggestions;
211 | }
212 | ```
213 |
214 | #### Reference Generation Algorithm
215 |
216 | ```typescript
217 | function generateReference(
218 | analysis: RepositoryAnalysis,
219 | ): ReferenceSuggestion[] {
220 | const suggestions: ReferenceSuggestion[] = [];
221 |
222 | // API documentation from detected endpoints
223 | const apiSurfaces = detectAPIEndpoints(analysis);
224 | if (apiSurfaces.length > 0) {
225 | suggestions.push({
226 | title: "API Reference",
227 | description: "Complete API endpoint documentation",
228 | content: generateAPIDocumentation(apiSurfaces),
229 | format: "openapi-spec",
230 | });
231 | }
232 |
233 | // Configuration reference from detected config files
234 | const configFiles = detectConfigurationFiles(analysis);
235 | configFiles.forEach((config) => {
236 | suggestions.push({
237 | title: `${config.type} Configuration`,
238 | description: `Configuration options for ${config.name}`,
239 | content: generateConfigurationReference(config),
240 | format: "configuration-table",
241 | });
242 | });
243 |
244 | // CLI reference from detected scripts
245 | const cliCommands = detectCLICommands(analysis);
246 | if (cliCommands.length > 0) {
247 | suggestions.push({
248 | title: "Command Line Interface",
249 | description: "Available commands and options",
250 | content: generateCLIReference(cliCommands),
251 | format: "cli-documentation",
252 | });
253 | }
254 |
255 | return suggestions;
256 | }
257 | ```
258 |
259 | ### Technology-Specific Content Templates:
260 |
261 | #### JavaScript/TypeScript Ecosystem
262 |
263 | ```typescript
264 | const JAVASCRIPT_TEMPLATES = {
265 | gettingStarted: {
266 | prerequisites: ["Node.js 20.0.0+", "npm or yarn", "Git"],
267 | installationSteps: [
268 | "Clone the repository",
269 | "Install dependencies with npm install",
270 | "Copy environment variables",
271 | "Run development server",
272 | ],
273 | verificationSteps: [
274 | "Check server starts successfully",
275 | "Access application in browser",
276 | "Run test suite to verify setup",
277 | ],
278 | },
279 |
280 | expressAPI: {
281 | sections: [
282 | "Project Structure Overview",
283 | "Creating Your First Route",
284 | "Middleware Configuration",
285 | "Database Integration",
286 | "Error Handling",
287 | "Testing Your API",
288 | ],
289 | codeExamples: generateExpressCodeExamples,
290 | },
291 |
292 | testingGuides: {
293 | jest: generateJestHowToGuides,
294 | cypress: generateCypressHowToGuides,
295 | playwright: generatePlaywrightHowToGuides,
296 | },
297 | };
298 | ```
299 |
300 | #### Multi-Language Framework Support
301 |
302 | ##### JavaScript/TypeScript Ecosystem
303 |
304 | ```typescript
305 | const JAVASCRIPT_TEMPLATES = {
306 | gettingStarted: {
307 | prerequisites: ["Node.js 20.0.0+", "npm or yarn", "Git"],
308 | installationSteps: [
309 | "Clone the repository",
310 | "Install dependencies with npm install",
311 | "Copy environment variables",
312 | "Run development server",
313 | ],
314 | verificationSteps: [
315 | "Check server starts successfully",
316 | "Access application in browser",
317 | "Run test suite to verify setup",
318 | ],
319 | },
320 |
321 | frameworks: {
322 | express: {
323 | tutorials: [
324 | "REST API Development",
325 | "Middleware Configuration",
326 | "Database Integration",
327 | ],
328 | howToGuides: [
329 | "Performance Optimization",
330 | "Error Handling",
331 | "Authentication Setup",
332 | ],
333 | reference: [
334 | "Route Configuration",
335 | "Middleware Reference",
336 | "Configuration Options",
337 | ],
338 | explanation: [
339 | "Express Architecture",
340 | "Middleware Pattern",
341 | "Async Handling",
342 | ],
343 | },
344 | react: {
345 | tutorials: ["Component Development", "State Management", "React Router"],
346 | howToGuides: [
347 | "Performance Optimization",
348 | "Testing Components",
349 | "Deployment",
350 | ],
351 | reference: ["Component API", "Hooks Reference", "Build Configuration"],
352 | explanation: [
353 | "Component Architecture",
354 | "State Flow",
355 | "Rendering Lifecycle",
356 | ],
357 | },
358 | nestjs: {
359 | tutorials: [
360 | "Dependency Injection",
361 | "Controllers and Services",
362 | "Database Integration",
363 | ],
364 | howToGuides: [
365 | "Custom Decorators",
366 | "Microservices",
367 | "GraphQL Integration",
368 | ],
369 | reference: ["Decorator Reference", "Module System", "Configuration"],
370 | explanation: ["DI Architecture", "Module Design", "Enterprise Patterns"],
371 | },
372 | },
373 | };
374 | ```
375 |
376 | ##### Python Ecosystem Support
377 |
378 | ```typescript
379 | const PYTHON_TEMPLATES = {
380 | gettingStarted: {
381 | prerequisites: ["Python 3.8+", "pip or poetry", "Virtual environment"],
382 | installationSteps: [
383 | "Create virtual environment",
384 | "Activate virtual environment",
385 | "Install dependencies from requirements.txt/pyproject.toml",
386 | "Set up environment variables",
387 | "Run development server",
388 | ],
389 | verificationSteps: [
390 | "Check application starts successfully",
391 | "Run test suite with pytest",
392 | "Verify API endpoints respond correctly",
393 | ],
394 | },
395 |
396 | frameworks: {
397 | django: {
398 | tutorials: [
399 | "Django Project Setup and Configuration",
400 | "Models and Database Integration",
401 | "Views and URL Routing",
402 | "Django REST Framework APIs",
403 | "User Authentication and Permissions",
404 | ],
405 | howToGuides: [
406 | "Deploy Django to Production",
407 | "Optimize Database Queries",
408 | "Implement Caching Strategies",
409 | "Handle File Uploads",
410 | "Configure CORS and Security",
411 | ],
412 | reference: [
413 | "Django Settings Reference",
414 | "Model Field Types",
415 | "URL Configuration Patterns",
416 | "Middleware Reference",
417 | "Management Commands",
418 | ],
419 | explanation: [
420 | "Django MTV Architecture",
421 | "ORM Design Decisions",
422 | "Security Model",
423 | "Scalability Patterns",
424 | ],
425 | },
426 | fastapi: {
427 | tutorials: [
428 | "FastAPI Application Structure",
429 | "Pydantic Models and Validation",
430 | "Dependency Injection System",
431 | "Database Integration with SQLAlchemy",
432 | "Authentication and Security",
433 | ],
434 | howToGuides: [
435 | "Optimize FastAPI Performance",
436 | "Implement Background Tasks",
437 | "Handle File Processing",
438 | "Set up Monitoring and Logging",
439 | "Deploy with Docker and Kubernetes",
440 | ],
441 | reference: [
442 | "FastAPI Decorators Reference",
443 | "Pydantic Model Configuration",
444 | "Dependency System Reference",
445 | "Security Utilities",
446 | "Testing Utilities",
447 | ],
448 | explanation: [
449 | "ASGI vs WSGI Architecture",
450 | "Type Hints and Validation",
451 | "Dependency Injection Benefits",
452 | "Performance Characteristics",
453 | ],
454 | },
455 | flask: {
456 | tutorials: [
457 | "Flask Application Factory Pattern",
458 | "Blueprint Organization",
459 | "Database Integration with SQLAlchemy",
460 | "User Session Management",
461 | "RESTful API Development",
462 | ],
463 | howToGuides: [
464 | "Structure Large Flask Applications",
465 | "Implement Rate Limiting",
466 | "Handle Background Jobs",
467 | "Configure Production Deployment",
468 | "Debug Flask Applications",
469 | ],
470 | reference: [
471 | "Flask Configuration Reference",
472 | "Request and Response Objects",
473 | "Template Engine Reference",
474 | "Extension Integration",
475 | "CLI Commands",
476 | ],
477 | explanation: [
478 | "Flask Philosophy and Design",
479 | "WSGI Application Structure",
480 | "Extension Ecosystem",
481 | "Microframework Benefits",
482 | ],
483 | },
484 | },
485 | };
486 |
487 | class PythonContentGenerator implements FrameworkContentGenerator {
488 | detectFramework(analysis: RepositoryAnalysis): Framework[] {
489 | const frameworks: Framework[] = [];
490 |
491 | // Django detection
492 | if (
493 | this.hasDependency(analysis, "django") ||
494 | this.hasFile(analysis, "manage.py") ||
495 | this.hasFile(analysis, "settings.py")
496 | ) {
497 | frameworks.push({
498 | name: "django",
499 | version: this.extractVersion(analysis, "django"),
500 | configFiles: ["settings.py", "urls.py", "wsgi.py"],
501 | appStructure: this.analyzeDjangoApps(analysis),
502 | });
503 | }
504 |
505 | // FastAPI detection
506 | if (
507 | this.hasDependency(analysis, "fastapi") ||
508 | this.hasImport(analysis, "from fastapi import")
509 | ) {
510 | frameworks.push({
511 | name: "fastapi",
512 | version: this.extractVersion(analysis, "fastapi"),
513 | configFiles: this.getFastAPIConfigFiles(analysis),
514 | routerStructure: this.analyzeFastAPIRouters(analysis),
515 | });
516 | }
517 |
518 | // Flask detection
519 | if (
520 | this.hasDependency(analysis, "flask") ||
521 | this.hasImport(analysis, "from flask import")
522 | ) {
523 | frameworks.push({
524 | name: "flask",
525 | version: this.extractVersion(analysis, "flask"),
526 | configFiles: this.getFlaskConfigFiles(analysis),
527 | blueprintStructure: this.analyzeFlaskBlueprints(analysis),
528 | });
529 | }
530 |
531 | return frameworks;
532 | }
533 |
534 | generateFrameworkContent(
535 | framework: Framework,
536 | context: ProjectContext,
537 | ): FrameworkContent {
538 | const templates = PYTHON_TEMPLATES.frameworks[framework.name];
539 |
540 | return {
541 | tutorials: templates.tutorials.map((title) => ({
542 | title: `${title} for ${context.projectName}`,
543 | content: this.generatePythonTutorialContent(framework, title, context),
544 | codeExamples: this.generatePythonCodeExamples(
545 | framework,
546 | title,
547 | context,
548 | ),
549 | })),
550 | howToGuides: templates.howToGuides.map((title) => ({
551 | title,
552 | content: this.generatePythonHowToContent(framework, title, context),
553 | tasks: this.generatePythonTasks(framework, title, context),
554 | })),
555 | reference: templates.reference.map((title) => ({
556 | title,
557 | content: this.generatePythonReferenceContent(framework, title, context),
558 | })),
559 | explanation: templates.explanation.map((title) => ({
560 | title,
561 | content: this.generatePythonExplanationContent(
562 | framework,
563 | title,
564 | context,
565 | ),
566 | })),
567 | };
568 | }
569 | }
570 | ```
571 |
572 | #### Framework-Specific Content Generation
573 |
574 | ```typescript
575 | interface FrameworkContentGenerator {
576 | detectFramework(dependencies: string[]): Framework | null;
577 | generateFrameworkContent(
578 | framework: Framework,
579 | context: ProjectContext,
580 | ): FrameworkContent;
581 | }
582 |
583 | const FRAMEWORK_GENERATORS: Record<string, FrameworkContentGenerator> = {
584 | // JavaScript/TypeScript frameworks
585 | express: new ExpressContentGenerator(),
586 | react: new ReactContentGenerator(),
587 | vue: new VueContentGenerator(),
588 | angular: new AngularContentGenerator(),
589 | nestjs: new NestJSContentGenerator(),
590 | fastify: new FastifyContentGenerator(),
591 |
592 | // Python frameworks
593 | django: new DjangoContentGenerator(),
594 | fastapi: new FastAPIContentGenerator(),
595 | flask: new FlaskContentGenerator(),
596 | pyramid: new PyramidContentGenerator(),
597 |
598 | // Future language support
599 | "spring-boot": new SpringBootContentGenerator(), // Java
600 | gin: new GinContentGenerator(), // Go
601 | "actix-web": new ActixContentGenerator(), // Rust
602 | };
603 | ```
604 |
605 | ## Alternatives Considered
606 |
607 | ### Manual Content Creation Only
608 |
609 | - **Pros**: Simple implementation, full user control, no AI dependency
610 | - **Cons**: Massive user effort, inconsistent quality, underutilizes analysis capabilities
611 | - **Decision**: Rejected - provides minimal value over generic templates
612 |
613 | ### AI-Generated Content via External APIs
614 |
615 | - **Pros**: Advanced content generation, natural language processing
616 | - **Cons**: External dependencies, costs, inconsistent quality, latency issues
617 | - **Decision**: Rejected for initial version - adds complexity without guaranteed quality
618 |
619 | ### Community-Contributed Content Templates
620 |
621 | - **Pros**: Diverse perspectives, battle-tested content, community engagement
622 | - **Cons**: Quality control challenges, maintenance overhead, incomplete coverage
623 | - **Decision**: Considered for future enhancement - focus on algorithmic generation first
624 |
625 | ### Generic Template Expansion
626 |
627 | - **Pros**: Easier implementation, consistent structure
628 | - **Cons**: Still requires significant manual work, doesn't leverage analysis intelligence
629 | - **Decision**: Rejected - doesn't address core value proposition gap
630 |
631 | ## Consequences
632 |
633 | ### Positive
634 |
635 | - **Dramatic User Value Increase**: 60-80% content pre-population vs. empty templates
636 | - **Competitive Differentiation**: Only documentation tool with intelligent content generation
637 | - **Analysis ROI**: Comprehensive repository analysis finally delivers proportional value
638 | - **Framework Completion**: Fulfills ADR-004 vision for content planning intelligence
639 | - **User Experience**: Transform from "structure generator" to "documentation assistant"
640 |
641 | ### Negative
642 |
643 | - **Implementation Complexity**: Significant engineering effort for content generation algorithms
644 | - **Content Quality Risk**: Generated content may require refinement for accuracy
645 | - **Technology Coverage**: Initial version limited to well-known frameworks and patterns
646 | - **Maintenance Overhead**: Content templates require updates as technologies evolve
647 |
648 | ### Risks and Mitigations
649 |
650 | - **Quality Control**: Implement content validation and user review workflows
651 | - **Technology Coverage**: Start with most common frameworks, expand based on usage
652 | - **Algorithm Accuracy**: Validate generated content against project reality
653 | - **User Expectations**: Clear communication about generated vs. curated content
654 |
655 | ## Implementation Details
656 |
657 | ### MCP Tool Interface
658 |
659 | ```typescript
660 | // New tool: populate_diataxis_content
661 | interface PopulateDiataxisContentTool {
662 | name: "populate_diataxis_content";
663 | description: "Intelligently populate Diataxis documentation with project-specific content";
664 | inputSchema: {
665 | type: "object";
666 | properties: {
667 | analysisId: {
668 | type: "string";
669 | description: "Repository analysis ID from analyze_repository tool";
670 | };
671 | docsPath: {
672 | type: "string";
673 | description: "Path to documentation directory";
674 | };
675 | populationLevel: {
676 | type: "string";
677 | enum: ["basic", "comprehensive", "intelligent"];
678 | default: "comprehensive";
679 | description: "Level of content generation detail";
680 | };
681 | includeProjectSpecific: {
682 | type: "boolean";
683 | default: true;
684 | description: "Generate project-specific examples and code";
685 | };
686 | preserveExisting: {
687 | type: "boolean";
688 | default: true;
689 | description: "Preserve any existing content";
690 | };
691 | technologyFocus: {
692 | type: "array";
693 | items: { type: "string" };
694 | description: "Specific technologies to emphasize in content";
695 | };
696 | };
697 | required: ["analysisId", "docsPath"];
698 | };
699 | }
700 | ```
701 |
702 | ### Content Generation Pipeline
703 |
704 | ```typescript
705 | class ContentPopulationEngine {
706 | async populateContent(args: PopulationArgs): Promise<PopulationResult> {
707 | try {
708 | // 1. Retrieve and validate repository analysis
709 | const analysis = await this.getRepositoryAnalysis(args.analysisId);
710 | this.validateAnalysis(analysis);
711 |
712 | // 2. Generate content plan based on project characteristics
713 | const contentPlan = await this.generateContentPlan(
714 | analysis,
715 | args.populationLevel,
716 | );
717 |
718 | // 3. Generate content for each Diataxis category
719 | const [tutorials, howTos, reference, explanation] = await Promise.all([
720 | this.generateTutorialContent(contentPlan.tutorials, analysis),
721 | this.generateHowToContent(contentPlan.howToGuides, analysis),
722 | this.generateReferenceContent(contentPlan.reference, analysis),
723 | this.generateExplanationContent(contentPlan.explanation, analysis),
724 | ]);
725 |
726 | // 4. Write content to documentation structure
727 | const filesCreated = await this.writeContentToStructure(
728 | args.docsPath,
729 | { tutorials, howTos, reference, explanation },
730 | args.preserveExisting,
731 | );
732 |
733 | // 5. Generate cross-references and navigation updates
734 | await this.updateNavigationAndCrossReferences(args.docsPath, contentPlan);
735 |
736 | return {
737 | success: true,
738 | filesCreated,
739 | contentPlan,
740 | populationMetrics: this.calculatePopulationMetrics(filesCreated),
741 | nextSteps: this.generateNextSteps(analysis, contentPlan),
742 | };
743 | } catch (error) {
744 | console.error("Content population failed:", error);
745 | return {
746 | success: false,
747 | error: {
748 | code: "CONTENT_POPULATION_FAILED",
749 | message: `Failed to populate content: ${
750 | error instanceof Error ? error.message : "Unknown error"
751 | }`,
752 | resolution:
753 | "Check repository analysis and documentation path accessibility",
754 | },
755 | filesCreated: [],
756 | populationMetrics: { totalFiles: 0, totalWords: 0, totalSections: 0 },
757 | };
758 | }
759 | }
760 | }
761 | ```
762 |
763 | ### Technology Detection and Content Mapping
764 |
765 | ```typescript
766 | interface TechnologyMapper {
767 | detectTechnologies(analysis: RepositoryAnalysis): TechnologyProfile;
768 | mapToContentTemplates(technologies: TechnologyProfile): ContentTemplateSet;
769 | generateTechnologySpecificExamples(
770 | technology: Technology,
771 | context: ProjectContext,
772 | ): CodeExample[];
773 | }
774 |
775 | class JavaScriptTechnologyMapper implements TechnologyMapper {
776 | detectTechnologies(analysis: RepositoryAnalysis): TechnologyProfile {
777 | const profile: TechnologyProfile = {
778 | runtime: this.detectRuntime(analysis), // Node.js, Deno, Bun
779 | framework: this.detectFramework(analysis), // Express, Fastify, Koa
780 | frontend: this.detectFrontend(analysis), // React, Vue, Angular
781 | database: this.detectDatabase(analysis), // PostgreSQL, MongoDB, Redis
782 | testing: this.detectTesting(analysis), // Jest, Mocha, Playwright
783 | deployment: this.detectDeployment(analysis), // Docker, Kubernetes, Vercel
784 | devops: this.detectDevOpsTools(analysis), // Ansible, Tekton, OpenShift, Podman
785 | };
786 |
787 | return profile;
788 | }
789 |
790 | mapToContentTemplates(technologies: TechnologyProfile): ContentTemplateSet {
791 | return {
792 | tutorials: this.generateTutorialTemplates(technologies),
793 | howToGuides: this.generateHowToTemplates(technologies),
794 | reference: this.generateReferenceTemplates(technologies),
795 | explanation: this.generateExplanationTemplates(technologies),
796 | };
797 | }
798 | }
799 | ```
800 |
801 | ### DevOps and Infrastructure Tooling Support
802 |
803 | #### DevOps Tool Detection and Content Generation
804 |
805 | ```typescript
806 | interface DevOpsToolMapper {
807 | detectDevOpsTools(analysis: RepositoryAnalysis): DevOpsToolProfile;
808 | generateDevOpsContent(
809 | tools: DevOpsToolProfile,
810 | context: ProjectContext,
811 | ): DevOpsContent;
812 | createInfrastructureDocumentation(
813 | infrastructure: InfrastructureProfile,
814 | deploymentPattern: DeploymentPattern,
815 | ): InfrastructureDocumentation;
816 | }
817 |
818 | interface DevOpsToolProfile {
819 | containerization: ContainerTechnology[]; // Docker, Podman, Buildah
820 | orchestration: OrchestrationTechnology[]; // Kubernetes, OpenShift, Nomad
821 | cicd: CICDTechnology[]; // Tekton, GitHub Actions, Jenkins, GitLab CI
822 | configuration: ConfigManagementTechnology[]; // Ansible, Terraform, Helm
823 | monitoring: MonitoringTechnology[]; // Prometheus, Grafana, Jaeger
824 | security: SecurityTechnology[]; // Falco, OPA, Vault
825 | }
826 |
827 | class DevOpsContentGenerator implements DevOpsToolMapper {
828 | detectDevOpsTools(analysis: RepositoryAnalysis): DevOpsToolProfile {
829 | return {
830 | containerization: this.detectContainerization(analysis),
831 | orchestration: this.detectOrchestration(analysis),
832 | cicd: this.detectCICD(analysis),
833 | configuration: this.detectConfigManagement(analysis),
834 | monitoring: this.detectMonitoring(analysis),
835 | security: this.detectSecurity(analysis),
836 | };
837 | }
838 |
839 | private detectContainerization(
840 | analysis: RepositoryAnalysis,
841 | ): ContainerTechnology[] {
842 | const detected: ContainerTechnology[] = [];
843 |
844 | // Docker detection
845 | if (
846 | this.hasFile(analysis, "Dockerfile") ||
847 | this.hasFile(analysis, "docker-compose.yml") ||
848 | this.hasFile(analysis, "docker-compose.yaml")
849 | ) {
850 | detected.push({
851 | name: "docker",
852 | version: this.extractDockerVersion(analysis),
853 | configFiles: this.getDockerFiles(analysis),
854 | usage: this.analyzeDockerUsage(analysis),
855 | });
856 | }
857 |
858 | // Podman detection
859 | if (
860 | this.hasFile(analysis, "Containerfile") ||
861 | this.hasReference(analysis, "podman") ||
862 | this.hasFile(analysis, "podman-compose.yml")
863 | ) {
864 | detected.push({
865 | name: "podman",
866 | version: this.extractPodmanVersion(analysis),
867 | configFiles: this.getPodmanFiles(analysis),
868 | usage: this.analyzePodmanUsage(analysis),
869 | });
870 | }
871 |
872 | return detected;
873 | }
874 |
875 | private detectOrchestration(
876 | analysis: RepositoryAnalysis,
877 | ): OrchestrationTechnology[] {
878 | const detected: OrchestrationTechnology[] = [];
879 |
880 | // Kubernetes detection
881 | if (
882 | this.hasDirectory(analysis, "k8s/") ||
883 | this.hasDirectory(analysis, "kubernetes/") ||
884 | this.hasFilePattern(analysis, "*.yaml", "apiVersion: apps/v1") ||
885 | this.hasFilePattern(analysis, "*.yml", "kind: Deployment")
886 | ) {
887 | detected.push({
888 | name: "kubernetes",
889 | manifests: this.getKubernetesManifests(analysis),
890 | resources: this.analyzeKubernetesResources(analysis),
891 | namespaces: this.extractNamespaces(analysis),
892 | });
893 | }
894 |
895 | // OpenShift detection
896 | if (
897 | this.hasDirectory(analysis, ".s2i/") ||
898 | this.hasReference(analysis, "openshift") ||
899 | this.hasFileContent(analysis, "kind: DeploymentConfig") ||
900 | this.hasFileContent(analysis, "kind: Route")
901 | ) {
902 | detected.push({
903 | name: "openshift",
904 | templates: this.getOpenShiftTemplates(analysis),
905 | buildConfigs: this.getBuildConfigs(analysis),
906 | routes: this.getRoutes(analysis),
907 | });
908 | }
909 |
910 | return detected;
911 | }
912 |
913 | private detectCICD(analysis: RepositoryAnalysis): CICDTechnology[] {
914 | const detected: CICDTechnology[] = [];
915 |
916 | // Tekton detection
917 | if (
918 | this.hasDirectory(analysis, ".tekton/") ||
919 | this.hasFileContent(analysis, "apiVersion: tekton.dev") ||
920 | this.hasFilePattern(analysis, "*.yaml", "kind: Pipeline")
921 | ) {
922 | detected.push({
923 | name: "tekton",
924 | pipelines: this.getTektonPipelines(analysis),
925 | tasks: this.getTektonTasks(analysis),
926 | triggers: this.getTektonTriggers(analysis),
927 | });
928 | }
929 |
930 | return detected;
931 | }
932 |
933 | private detectConfigManagement(
934 | analysis: RepositoryAnalysis,
935 | ): ConfigManagementTechnology[] {
936 | const detected: ConfigManagementTechnology[] = [];
937 |
938 | // Ansible detection
939 | if (
940 | this.hasFile(analysis, "ansible.cfg") ||
941 | this.hasDirectory(analysis, "playbooks/") ||
942 | this.hasDirectory(analysis, "roles/") ||
943 | this.hasFile(analysis, "inventory") ||
944 | this.hasFilePattern(analysis, "*.yml", "hosts:") ||
945 | this.hasFilePattern(analysis, "*.yaml", "tasks:")
946 | ) {
947 | detected.push({
948 | name: "ansible",
949 | playbooks: this.getAnsiblePlaybooks(analysis),
950 | roles: this.getAnsibleRoles(analysis),
951 | inventory: this.getAnsibleInventory(analysis),
952 | vaultFiles: this.getAnsibleVault(analysis),
953 | });
954 | }
955 |
956 | return detected;
957 | }
958 | }
959 | ```
960 |
961 | #### DevOps-Specific Content Templates and Generation
962 |
963 | **Key DevOps Documentation Patterns**:
964 |
965 | - **Container Tutorials**: Project-specific Dockerfile optimization, multi-stage builds
966 | - **Orchestration Guides**: Kubernetes/OpenShift deployment strategies
967 | - **Infrastructure as Code**: Ansible playbooks for application deployment
968 | - **CI/CD Pipelines**: Tekton pipeline configuration and best practices
969 |
970 | ```typescript
971 | const DEVOPS_CONTENT_TEMPLATES = {
972 | docker: {
973 | tutorial: "Containerizing {projectName} with Docker",
974 | howto: ["Optimize Docker Images", "Debug Container Issues"],
975 | reference: "Dockerfile Configuration Reference",
976 | explanation: "Container Architecture Decisions",
977 | },
978 | kubernetes: {
979 | tutorial: "Deploying {projectName} to Kubernetes",
980 | howto: ["Scale Applications", "Troubleshoot Deployments"],
981 | reference: "Kubernetes Manifest Specifications",
982 | explanation: "Orchestration Strategy",
983 | },
984 | ansible: {
985 | tutorial: "Infrastructure as Code with Ansible",
986 | howto: ["Automate Deployment", "Manage Multi-Environment"],
987 | reference: "Playbook and Role Reference",
988 | explanation: "Configuration Management Strategy",
989 | },
990 | tekton: {
991 | tutorial: "CI/CD Pipeline with Tekton",
992 | howto: ["Build and Deploy", "Manage Secrets"],
993 | reference: "Pipeline Specifications",
994 | explanation: "Cloud Native CI/CD Architecture",
995 | },
996 | };
997 |
998 | function generateDevOpsContent(
999 | devopsProfile: DevOpsToolProfile,
1000 | projectContext: ProjectContext,
1001 | ): DevOpsContentPlan {
1002 | // Generate project-specific DevOps documentation
1003 | // based on detected tools and project characteristics
1004 | }
1005 | ```
1006 |
1007 | ### Community Contribution Framework for Language and Tool Support
1008 |
1009 | #### Language Extension Architecture
1010 |
1011 | ```typescript
1012 | interface LanguageExtension {
1013 | name: string;
1014 | ecosystem: string;
1015 | packageManagers: string[];
1016 | detectionPatterns: DetectionPattern[];
1017 | frameworks: FrameworkDefinition[];
1018 | contentTemplates: LanguageContentTemplates;
1019 | validationRules: ValidationRule[];
1020 | }
1021 |
1022 | interface DetectionPattern {
1023 | type: "file" | "dependency" | "import" | "content";
1024 | pattern: string | RegExp;
1025 | weight: number; // 1-10, higher = more confident
1026 | description: string;
1027 | }
1028 |
1029 | interface FrameworkDefinition {
1030 | name: string;
1031 | detectionPatterns: DetectionPattern[];
1032 | contentTemplates: FrameworkContentTemplates;
1033 | codeExamples: CodeExampleGenerator;
1034 | bestPractices: BestPractice[];
1035 | }
1036 | ```
1037 |
1038 | #### Contribution Guidelines for New Language Support
1039 |
1040 | ##### Step 1: Language Detection Implementation
1041 |
1042 | ```typescript
1043 | // Example: Adding Go language support
1044 | const GO_LANGUAGE_EXTENSION: LanguageExtension = {
1045 | name: "go",
1046 | ecosystem: "go",
1047 | packageManagers: ["go mod", "dep"],
1048 | detectionPatterns: [
1049 | {
1050 | type: "file",
1051 | pattern: "go.mod",
1052 | weight: 10,
1053 | description: "Go module definition file",
1054 | },
1055 | {
1056 | type: "file",
1057 | pattern: "go.sum",
1058 | weight: 8,
1059 | description: "Go module checksums",
1060 | },
1061 | {
1062 | type: "file",
1063 | pattern: /.*\.go$/,
1064 | weight: 6,
1065 | description: "Go source files",
1066 | },
1067 | {
1068 | type: "content",
1069 | pattern: /^package main$/m,
1070 | weight: 7,
1071 | description: "Go main package declaration",
1072 | },
1073 | ],
1074 | frameworks: [
1075 | // Framework definitions...
1076 | ],
1077 | contentTemplates: {
1078 | // Content templates...
1079 | },
1080 | };
1081 | ```
1082 |
1083 | ##### Step 2: Framework-Specific Content Templates
1084 |
1085 | ```typescript
1086 | // Example: Adding Gin framework support for Go
1087 | const GIN_FRAMEWORK: FrameworkDefinition = {
1088 | name: "gin",
1089 | detectionPatterns: [
1090 | {
1091 | type: "dependency",
1092 | pattern: "github.com/gin-gonic/gin",
1093 | weight: 10,
1094 | description: "Gin framework dependency",
1095 | },
1096 | {
1097 | type: "import",
1098 | pattern: 'gin "github.com/gin-gonic/gin"',
1099 | weight: 9,
1100 | description: "Gin framework import",
1101 | },
1102 | ],
1103 | contentTemplates: {
1104 | tutorials: [
1105 | {
1106 | title: "Building REST APIs with Gin",
1107 | diataxisType: "tutorial",
1108 | sections: [
1109 | "Setting up Gin Application",
1110 | "Defining Routes and Handlers",
1111 | "Middleware Configuration",
1112 | "Database Integration",
1113 | "Testing Gin Applications",
1114 | ],
1115 | prerequisites: [
1116 | "Go installed (1.19+)",
1117 | "Basic Go language knowledge",
1118 | "Understanding of HTTP concepts",
1119 | ],
1120 | estimatedTime: "60 minutes",
1121 | difficulty: "beginner",
1122 | },
1123 | ],
1124 | howToGuides: [
1125 | {
1126 | title: "Optimize Gin Performance",
1127 | diataxisType: "how-to",
1128 | tasks: [
1129 | "Configure connection pooling",
1130 | "Implement caching strategies",
1131 | "Set up rate limiting",
1132 | "Profile and benchmark endpoints",
1133 | ],
1134 | },
1135 | ],
1136 | reference: [
1137 | {
1138 | title: "Gin Router Configuration",
1139 | diataxisType: "reference",
1140 | sections: [
1141 | "Route definition patterns",
1142 | "Middleware registration",
1143 | "Context object methods",
1144 | "Error handling patterns",
1145 | ],
1146 | },
1147 | ],
1148 | explanation: [
1149 | {
1150 | title: "Gin Architecture and Design Decisions",
1151 | diataxisType: "explanation",
1152 | topics: [
1153 | "HTTP router performance characteristics",
1154 | "Middleware pipeline design",
1155 | "Context lifecycle management",
1156 | "Comparison with other Go frameworks",
1157 | ],
1158 | },
1159 | ],
1160 | },
1161 | codeExamples: {
1162 | basicServer: `package main
1163 |
1164 | import (
1165 | "net/http"
1166 | "github.com/gin-gonic/gin"
1167 | )
1168 |
1169 | func main() {
1170 | r := gin.Default()
1171 |
1172 | r.GET("/health", func(c *gin.Context) {
1173 | c.JSON(http.StatusOK, gin.H{
1174 | "status": "healthy",
1175 | })
1176 | })
1177 |
1178 | r.Run(":8080")
1179 | }`,
1180 | middleware: `func LoggerMiddleware() gin.HandlerFunc {
1181 | return func(c *gin.Context) {
1182 | start := time.Now()
1183 | c.Next()
1184 | duration := time.Since(start)
1185 | log.Printf("%s %s %v", c.Request.Method, c.Request.URL.Path, duration)
1186 | }
1187 | }`,
1188 | },
1189 | };
1190 | ```
1191 |
1192 | ##### Step 3: Content Generation Logic
1193 |
1194 | ```typescript
1195 | class GoContentGenerator implements FrameworkContentGenerator {
1196 | detectFramework(analysis: RepositoryAnalysis): Framework[] {
1197 | const frameworks: Framework[] = [];
1198 |
1199 | // Check for Gin framework
1200 | if (this.hasGoModule(analysis, "github.com/gin-gonic/gin")) {
1201 | frameworks.push({
1202 | name: "gin",
1203 | version: this.extractGoModuleVersion(
1204 | analysis,
1205 | "github.com/gin-gonic/gin",
1206 | ),
1207 | configFiles: this.getGinConfigFiles(analysis),
1208 | routeStructure: this.analyzeGinRoutes(analysis),
1209 | });
1210 | }
1211 |
1212 | // Check for Echo framework
1213 | if (this.hasGoModule(analysis, "github.com/labstack/echo")) {
1214 | frameworks.push({
1215 | name: "echo",
1216 | version: this.extractGoModuleVersion(
1217 | analysis,
1218 | "github.com/labstack/echo",
1219 | ),
1220 | configFiles: this.getEchoConfigFiles(analysis),
1221 | routeStructure: this.analyzeEchoRoutes(analysis),
1222 | });
1223 | }
1224 |
1225 | return frameworks;
1226 | }
1227 |
1228 | generateFrameworkContent(
1229 | framework: Framework,
1230 | context: ProjectContext,
1231 | ): FrameworkContent {
1232 | const templates = GO_LANGUAGE_EXTENSION.frameworks.find(
1233 | (f) => f.name === framework.name,
1234 | )?.contentTemplates;
1235 |
1236 | if (!templates) return this.generateGenericGoContent(framework, context);
1237 |
1238 | return this.populateTemplatesWithProjectContext(
1239 | templates,
1240 | framework,
1241 | context,
1242 | );
1243 | }
1244 |
1245 | private generateProjectSpecificGoDockerfile(context: ProjectContext): string {
1246 | return `# Multi-stage build for ${context.projectName}
1247 | FROM golang:1.21-alpine AS builder
1248 |
1249 | WORKDIR /app
1250 | COPY go.mod go.sum ./
1251 | RUN go mod download
1252 |
1253 | COPY . .
1254 | RUN CGO_ENABLED=0 GOOS=linux go build -o main .
1255 |
1256 | # Final stage
1257 | FROM alpine:latest
1258 | RUN apk --no-cache add ca-certificates
1259 | WORKDIR /root/
1260 | COPY --from=builder /app/main .
1261 | EXPOSE 8080
1262 | CMD ["./main"]`;
1263 | }
1264 | }
1265 | ```
1266 |
1267 | #### Contribution Process and Standards
1268 |
1269 | ##### Community Contribution Workflow
1270 |
1271 | 1. **Language Proposal**: Submit GitHub issue with language/framework proposal
1272 | 2. **Detection Patterns**: Define comprehensive detection patterns
1273 | 3. **Content Templates**: Create Diataxis-compliant content templates
1274 | 4. **Code Examples**: Provide working, project-specific code examples
1275 | 5. **Testing**: Include validation tests for detection and generation
1276 | 6. **Documentation**: Document contribution for future maintainers
1277 | 7. **Review Process**: Community and maintainer review
1278 | 8. **Integration**: Merge into main extension registry
1279 |
1280 | ##### Quality Standards for Contributions
1281 |
1282 | ```typescript
1283 | interface ContributionStandards {
1284 | detection: {
1285 | minimumPatterns: 3;
1286 | requiredTypes: ["file", "dependency"];
1287 | weightDistribution: "balanced"; // No single pattern > 70% weight
1288 | falsePositiveRate: "<5%";
1289 | };
1290 |
1291 | content: {
1292 | diataxisCompliance: "strict";
1293 | tutorialCount: "minimum 2";
1294 | howToGuideCount: "minimum 3";
1295 | referenceCompleteness: "80%";
1296 | explanationDepth: "architectural decisions covered";
1297 | };
1298 |
1299 | codeExamples: {
1300 | compilationSuccess: "100%";
1301 | projectSpecific: "true";
1302 | bestPractices: "current industry standards";
1303 | securityConsiderations: "included";
1304 | };
1305 |
1306 | testing: {
1307 | detectionAccuracy: ">90%";
1308 | contentGeneration: "functional tests";
1309 | integrationTests: "with existing systems";
1310 | performanceImpact: "<10% generation time increase";
1311 | };
1312 | }
1313 | ```
1314 |
1315 | ##### Template Contribution Format
1316 |
1317 | ```typescript
1318 | // Required structure for new language contributions
1319 | interface LanguageContributionTemplate {
1320 | metadata: {
1321 | contributorName: string;
1322 | contributorEmail: string;
1323 | languageName: string;
1324 | version: string;
1325 | lastUpdated: string;
1326 | maintenanceCommitment: "ongoing" | "initial-only";
1327 | };
1328 |
1329 | detection: DetectionPatternSet;
1330 | frameworks: FrameworkDefinition[];
1331 | contentTemplates: ContentTemplateSet;
1332 | validation: ValidationTestSuite;
1333 | documentation: ContributionDocumentation;
1334 | }
1335 |
1336 | // Example contribution file structure:
1337 | // src/languages/
1338 | // ├── go/
1339 | // │ ├── detection.ts
1340 | // │ ├── frameworks/
1341 | // │ │ ├── gin.ts
1342 | // │ │ ├── echo.ts
1343 | // │ │ └── fiber.ts
1344 | // │ ├── templates/
1345 | // │ │ ├── tutorials.ts
1346 | // │ │ ├── howto.ts
1347 | // │ │ ├── reference.ts
1348 | // │ │ └── explanation.ts
1349 | // │ ├── tests/
1350 | // │ │ ├── detection.test.ts
1351 | // │ │ └── generation.test.ts
1352 | // │ └── README.md
1353 | ```
1354 |
1355 | #### Community Validation and Review Process
1356 |
1357 | ##### Automated Validation Pipeline
1358 |
1359 | ```typescript
1360 | interface ContributionValidation {
1361 | // Automated checks
1362 | syntaxValidation: "TypeScript compilation success";
1363 | patternTesting: "Detection accuracy against test repositories";
1364 | contentValidation: "Diataxis compliance checking";
1365 | performanceImpact: "Generation time benchmarking";
1366 |
1367 | // Community review
1368 | peerReview: "Two community developer approvals";
1369 | maintainerReview: "Core team architectural review";
1370 | expertValidation: "Language expert accuracy verification";
1371 |
1372 | // Integration testing
1373 | endToEndTesting: "Full workflow validation";
1374 | regressionTesting: "No impact on existing languages";
1375 | documentationReview: "Contribution documentation completeness";
1376 | }
1377 | ```
1378 |
1379 | ##### Long-term Maintenance Framework
1380 |
1381 | ```typescript
1382 | interface MaintenanceFramework {
1383 | languageUpdates: {
1384 | frameworkVersions: "automated dependency tracking";
1385 | newFrameworks: "community contribution process";
1386 | deprecatedPatterns: "automated detection and flagging";
1387 | };
1388 |
1389 | communityGovernance: {
1390 | languageMaintainers: "designated community experts";
1391 | updateProcess: "structured enhancement proposals";
1392 | qualityAssurance: "continuous validation and testing";
1393 | };
1394 |
1395 | toolingSupport: {
1396 | contributionCLI: "automated scaffolding for new languages";
1397 | validationTools: "automated testing and verification";
1398 | documentationGeneration: "automated API documentation";
1399 | };
1400 | }
1401 | ```
1402 |
1403 | ## Quality Assurance
1404 |
1405 | ### Content Validation Framework
1406 |
1407 | ```typescript
1408 | interface ContentValidator {
1409 | validateAccuracy(
1410 | content: GeneratedContent,
1411 | analysis: RepositoryAnalysis,
1412 | ): ValidationResult;
1413 | checkDiataxisCompliance(content: GeneratedContent): ComplianceResult;
1414 | verifyCodeExamples(
1415 | examples: CodeExample[],
1416 | projectContext: ProjectContext,
1417 | ): ValidationResult;
1418 | assessContentCompleteness(
1419 | content: GeneratedContent,
1420 | plan: ContentPlan,
1421 | ): CompletenessResult;
1422 | }
1423 |
1424 | interface ValidationResult {
1425 | isValid: boolean;
1426 | issues: ValidationIssue[];
1427 | suggestions: ImprovementSuggestion[];
1428 | confidence: number;
1429 | }
1430 | ```
1431 |
1432 | ### Testing Strategy
1433 |
1434 | ```typescript
1435 | describe("ContentPopulationEngine", () => {
1436 | describe("Tutorial Generation", () => {
1437 | it(
1438 | "should generate appropriate getting started tutorial for Express.js project",
1439 | );
1440 | it("should include technology-specific setup steps");
1441 | it("should provide working code examples");
1442 | it("should maintain Diataxis tutorial principles");
1443 | });
1444 |
1445 | describe("Technology Detection", () => {
1446 | it("should correctly identify primary framework from package.json");
1447 | it("should detect database dependencies and generate appropriate content");
1448 | it("should handle multi-framework projects appropriately");
1449 | });
1450 |
1451 | describe("Content Quality", () => {
1452 | it("should generate accurate code examples that match project structure");
1453 | it("should maintain consistent tone and style across content types");
1454 | it("should create appropriate cross-references between content sections");
1455 | });
1456 | });
1457 | ```
1458 |
1459 | ### Performance Requirements
1460 |
1461 | - **Content Generation Time**: < 30 seconds for comprehensive population
1462 | - **Memory Usage**: < 500MB for large repository analysis and content generation
1463 | - **Content Quality**: 80%+ accuracy for generated technical content
1464 | - **Coverage**: Support for 15+ major JavaScript/TypeScript frameworks initially
1465 |
1466 | ## Integration Points
1467 |
1468 | ### Repository Analysis Integration (ADR-002)
1469 |
1470 | - Leverage multi-layered analysis results for informed content generation
1471 | - Use complexity assessment to determine content depth and sophistication
1472 | - Integrate dependency analysis for framework-specific content selection
1473 |
1474 | ### Diataxis Framework Integration (ADR-004)
1475 |
1476 | - Implement content planning intelligence outlined in ADR-004 lines 153-192
1477 | - Generate content that strictly adheres to Diataxis category principles
1478 | - Create appropriate cross-references and user journey flows
1479 |
1480 | ### MCP Tools API Integration (ADR-006)
1481 |
1482 | - Add populate_diataxis_content as seventh core MCP tool
1483 | - Maintain consistent error handling and response format patterns
1484 | - Integrate with existing setup_structure tool for seamless workflow
1485 |
1486 | ### SSG Configuration Integration (ADR-006)
1487 |
1488 | - Generate content with appropriate frontmatter for target SSG
1489 | - Adapt content format and structure to SSG capabilities
1490 | - Ensure generated content renders correctly across all supported SSGs
1491 |
1492 | ## Future Enhancements
1493 |
1494 | ### Advanced AI Integration
1495 |
1496 | - **Large Language Model Integration**: Use specialized models for content refinement
1497 | - **Code Analysis AI**: Advanced analysis of project patterns for more accurate content
1498 | - **Natural Language Generation**: Improve content quality and readability
1499 |
1500 | ### Extended Technology Support
1501 |
1502 | #### Python Ecosystem (Priority Implementation)
1503 |
1504 | - **Web Frameworks**: Django, Flask, FastAPI, Pyramid, Bottle
1505 | - **Data Science**: Jupyter, Pandas, NumPy, SciPy documentation patterns
1506 | - **ML/AI**: TensorFlow, PyTorch, Scikit-learn integration guides
1507 | - **API Development**: Django REST Framework, FastAPI advanced patterns
1508 | - **Testing**: pytest, unittest, behave testing documentation
1509 | - **Deployment**: Gunicorn, uWSGI, Celery configuration guides
1510 |
1511 | #### Additional Language Ecosystems
1512 |
1513 | - **Go Ecosystem**: Gin, Echo, Fiber, Buffalo framework support
1514 | - **Rust Ecosystem**: Actix-web, Warp, Rocket, Axum content generation
1515 | - **Java Ecosystem**: Spring Boot, Quarkus, Micronaut, Play Framework
1516 | - **C# Ecosystem**: ASP.NET Core, Entity Framework, Blazor
1517 | - **Ruby Ecosystem**: Rails, Sinatra, Hanami framework support
1518 | - **PHP Ecosystem**: Laravel, Symfony, CodeIgniter patterns
1519 |
1520 | ### DevOps and Infrastructure Expansion
1521 |
1522 | - **Extended Container Support**: Buildah, Skopeo, LXC/LXD integration
1523 | - **Advanced Orchestration**: Nomad, Docker Swarm, Cloud Foundry support
1524 | - **CI/CD Platforms**: Jenkins, GitLab CI, Azure DevOps, CircleCI integration
1525 | - **Infrastructure Tools**: Terraform, Pulumi, CloudFormation content generation
1526 | - **Service Mesh**: Istio, Linkerd, Consul Connect documentation patterns
1527 | - **Monitoring Stack**: Prometheus, Grafana, ELK Stack, Jaeger integration guides
1528 |
1529 | ### Community and Learning Features
1530 |
1531 | - **Content Quality Feedback**: User ratings and improvement suggestions
1532 | - **Template Sharing**: Community-contributed content templates
1533 | - **Usage Analytics**: Track which content types provide most value
1534 | - **Personalization**: Adapt content style to team preferences and expertise level
1535 |
1536 | ### Community Ecosystem and Contributions
1537 |
1538 | - **Language Extension Registry**: Centralized repository for community language support
1539 | - **Contribution Tooling**: CLI tools for scaffolding new language extensions
1540 | - **Validation Pipeline**: Automated testing and quality assurance for contributions
1541 | - **Community Governance**: Language maintainer program and review processes
1542 | - **Documentation Portal**: Comprehensive guides for extending DocuMCP capabilities
1543 | - **Template Marketplace**: Sharing and discovery of specialized content templates
1544 |
1545 | ### Enterprise Features
1546 |
1547 | - **Custom Content Standards**: Organization-specific content templates and style guides
1548 | - **Multi-language Support**: Generate content in multiple languages
1549 | - **Integration APIs**: Connect with existing documentation management systems
1550 | - **Approval Workflows**: Review and approval processes for generated content
1551 |
1552 | ## Success Metrics
1553 |
1554 | ### User Value Metrics
1555 |
1556 | - **Time to Usable Documentation**: Target < 30 minutes (vs. 8-20 hours manually)
1557 | - **Content Completeness**: 60-80% populated content out of the box
1558 | - **User Satisfaction**: 85%+ positive feedback on generated content quality
1559 | - **Adoption Rate**: 90%+ of users use content population vs. structure-only
1560 |
1561 | ### Technical Metrics
1562 |
1563 | - **Content Accuracy**: 80%+ technical accuracy for generated code examples
1564 | - **Framework Coverage**: Support 95% of detected JavaScript/TypeScript frameworks
1565 | - **DevOps Tool Coverage**: Support 90% of detected containerization and orchestration tools
1566 | - **Performance**: Content generation completes within 30 seconds
1567 | - **Error Rate**: < 5% content generation failures
1568 |
1569 | ### Business Metrics
1570 |
1571 | - **Competitive Differentiation**: Only tool providing intelligent content population
1572 | - **Market Position**: Establish DocuMCP as "intelligent documentation assistant"
1573 | - **User Retention**: Increase from documentation structure to full workflow adoption
1574 | - **Community Growth**: Attract technical writers and documentation specialists
1575 |
1576 | ## References
1577 |
1578 | - [ADR-002: Multi-Layered Repository Analysis Engine Design](002-repository-analysis-engine.md)
1579 | - [ADR-004: Diataxis Framework Integration](004-diataxis-framework-integration.md)
1580 | - [ADR-006: MCP Tools API Design](006-mcp-tools-api-design.md)
1581 | - [Diataxis Framework Documentation](https://diataxis.fr/)
1582 | - [Technical Writing Best Practices](https://developers.google.com/tech-writing)
1583 | - [Documentation as Code Principles](https://www.writethedocs.org/guide/docs-as-code/)
1584 |
```
--------------------------------------------------------------------------------
/src/memory/export-import.ts:
--------------------------------------------------------------------------------
```typescript
1 | /**
2 | * Memory Export/Import System for DocuMCP
3 | * Comprehensive data portability, backup, and migration capabilities
4 | */
5 |
6 | import { EventEmitter } from "events";
7 | import { promises as fs } from "fs";
8 | import { createWriteStream } from "fs";
9 | import { MemoryEntry, JSONLStorage } from "./storage.js";
10 | import { MemoryManager } from "./manager.js";
11 | import { IncrementalLearningSystem } from "./learning.js";
12 | import { KnowledgeGraph } from "./knowledge-graph.js";
13 | import { MemoryPruningSystem } from "./pruning.js";
14 |
15 | export interface ExportOptions {
16 | format: "json" | "jsonl" | "csv" | "xml" | "yaml" | "sqlite" | "archive";
17 | compression?: "gzip" | "zip" | "none";
18 | includeMetadata: boolean;
19 | includeLearning: boolean;
20 | includeKnowledgeGraph: boolean;
21 | filters?: {
22 | types?: string[];
23 | dateRange?: { start: Date; end: Date };
24 | projects?: string[];
25 | tags?: string[];
26 | outcomes?: string[];
27 | };
28 | anonymize?: {
29 | enabled: boolean;
30 | fields: string[];
31 | method: "hash" | "remove" | "pseudonymize";
32 | };
33 | encryption?: {
34 | enabled: boolean;
35 | algorithm: "aes-256-gcm" | "aes-192-gcm" | "aes-128-gcm";
36 | password?: string;
37 | };
38 | }
39 |
40 | export interface ImportOptions {
41 | format: "json" | "jsonl" | "csv" | "xml" | "yaml" | "sqlite" | "archive";
42 | mode: "merge" | "replace" | "append" | "update";
43 | validation: "strict" | "loose" | "none";
44 | conflictResolution: "skip" | "overwrite" | "merge" | "rename";
45 | backup: boolean;
46 | dryRun: boolean;
47 | mapping?: Record<string, string>; // Field mapping for different schemas
48 | transformation?: {
49 | enabled: boolean;
50 | rules: Array<{
51 | field: string;
52 | operation: "convert" | "transform" | "validate";
53 | params: any;
54 | }>;
55 | };
56 | }
57 |
58 | export interface ExportResult {
59 | success: boolean;
60 | filePath?: string;
61 | format: string;
62 | size: number;
63 | entries: number;
64 | metadata: {
65 | exportedAt: Date;
66 | version: string;
67 | source: string;
68 | includes: string[];
69 | compression?: string;
70 | encryption?: boolean;
71 | };
72 | warnings: string[];
73 | errors: string[];
74 | }
75 |
76 | export interface ImportResult {
77 | success: boolean;
78 | processed: number;
79 | imported: number;
80 | skipped: number;
81 | errors: number;
82 | errorDetails: string[]; // Detailed error messages
83 | conflicts: number;
84 | validation: {
85 | valid: number;
86 | invalid: number;
87 | warnings: string[];
88 | };
89 | summary: {
90 | newEntries: number;
91 | updatedEntries: number;
92 | duplicateEntries: number;
93 | failedEntries: number;
94 | };
95 | metadata: {
96 | importedAt: Date;
97 | source: string;
98 | format: string;
99 | mode: string;
100 | };
101 | }
102 |
103 | export interface MigrationPlan {
104 | sourceSystem: string;
105 | targetSystem: string;
106 | mapping: Record<string, string>;
107 | transformations: Array<{
108 | field: string;
109 | type: "rename" | "convert" | "merge" | "split" | "calculate";
110 | source: string | string[];
111 | target: string;
112 | operation?: string;
113 | }>;
114 | validation: Array<{
115 | field: string;
116 | rules: string[];
117 | required: boolean;
118 | }>;
119 | postProcessing: string[];
120 | }
121 |
122 | export interface ArchiveMetadata {
123 | version: string;
124 | created: Date;
125 | source: string;
126 | description: string;
127 | manifest: {
128 | files: Array<{
129 | name: string;
130 | type: string;
131 | size: number;
132 | checksum: string;
133 | entries?: number;
134 | }>;
135 | total: {
136 | files: number;
137 | size: number;
138 | entries: number;
139 | };
140 | };
141 | options: ExportOptions;
142 | }
143 |
144 | export class MemoryExportImportSystem extends EventEmitter {
145 | private storage: JSONLStorage;
146 | private manager: MemoryManager;
147 | private learningSystem: IncrementalLearningSystem;
148 | private knowledgeGraph: KnowledgeGraph;
149 | private pruningSystem?: MemoryPruningSystem;
150 | private readonly version = "1.0.0";
151 |
152 | constructor(
153 | storage: JSONLStorage,
154 | manager: MemoryManager,
155 | learningSystem: IncrementalLearningSystem,
156 | knowledgeGraph: KnowledgeGraph,
157 | pruningSystem?: MemoryPruningSystem,
158 | ) {
159 | super();
160 | this.storage = storage;
161 | this.manager = manager;
162 | this.learningSystem = learningSystem;
163 | this.knowledgeGraph = knowledgeGraph;
164 | this.pruningSystem = pruningSystem;
165 | }
166 |
167 | /**
168 | * Export memory data to specified format
169 | */
170 | async exportMemories(
171 | outputPath: string,
172 | options: Partial<ExportOptions> = {},
173 | ): Promise<ExportResult> {
174 | const defaultOptions: ExportOptions = {
175 | format: "json",
176 | compression: "none",
177 | includeMetadata: true,
178 | includeLearning: true,
179 | includeKnowledgeGraph: true,
180 | anonymize: {
181 | enabled: false,
182 | fields: ["userId", "email", "personalInfo"],
183 | method: "hash",
184 | },
185 | encryption: {
186 | enabled: false,
187 | algorithm: "aes-256-gcm",
188 | },
189 | };
190 |
191 | const activeOptions = { ...defaultOptions, ...options };
192 | const startTime = Date.now();
193 |
194 | this.emit("export_started", { outputPath, options: activeOptions });
195 |
196 | try {
197 | // Get filtered entries
198 | const entries = await this.getFilteredEntries(activeOptions.filters);
199 |
200 | // Prepare export data
201 | const exportData = await this.prepareExportData(entries, activeOptions);
202 |
203 | // Apply anonymization if enabled
204 | if (activeOptions.anonymize?.enabled) {
205 | this.applyAnonymization(exportData, activeOptions.anonymize);
206 | }
207 |
208 | // Prepare output path - if compression is requested, use temp file first
209 | let actualOutputPath = outputPath;
210 | if (activeOptions.compression && activeOptions.compression !== "none") {
211 | // For compressed exports, export to temp file first
212 | if (outputPath.endsWith(".gz")) {
213 | actualOutputPath = outputPath.slice(0, -3); // Remove .gz suffix
214 | } else if (outputPath.endsWith(".zip")) {
215 | actualOutputPath = outputPath.slice(0, -4); // Remove .zip suffix
216 | }
217 | }
218 |
219 | // Export to specified format
220 | let filePath: string;
221 | let size = 0;
222 |
223 | switch (activeOptions.format) {
224 | case "json":
225 | filePath = await this.exportToJSON(
226 | actualOutputPath,
227 | exportData,
228 | activeOptions,
229 | );
230 | break;
231 | case "jsonl":
232 | filePath = await this.exportToJSONL(
233 | actualOutputPath,
234 | exportData,
235 | activeOptions,
236 | );
237 | break;
238 | case "csv":
239 | filePath = await this.exportToCSV(
240 | actualOutputPath,
241 | exportData,
242 | activeOptions,
243 | );
244 | break;
245 | case "xml":
246 | filePath = await this.exportToXML(
247 | actualOutputPath,
248 | exportData,
249 | activeOptions,
250 | );
251 | break;
252 | case "yaml":
253 | filePath = await this.exportToYAML(
254 | actualOutputPath,
255 | exportData,
256 | activeOptions,
257 | );
258 | break;
259 | case "sqlite":
260 | filePath = await this.exportToSQLite(
261 | actualOutputPath,
262 | exportData,
263 | activeOptions,
264 | );
265 | break;
266 | case "archive":
267 | filePath = await this.exportToArchive(
268 | actualOutputPath,
269 | exportData,
270 | activeOptions,
271 | );
272 | break;
273 | default:
274 | throw new Error(`Unsupported export format: ${activeOptions.format}`);
275 | }
276 |
277 | // Apply compression if specified
278 | if (activeOptions.compression && activeOptions.compression !== "none") {
279 | filePath = await this.applyCompression(
280 | filePath,
281 | activeOptions.compression,
282 | outputPath,
283 | );
284 | }
285 |
286 | // Apply encryption if enabled
287 | if (activeOptions.encryption?.enabled) {
288 | filePath = await this.applyEncryption(
289 | filePath,
290 | activeOptions.encryption,
291 | );
292 | }
293 |
294 | // Get file size
295 | const stats = await fs.stat(filePath);
296 | size = stats.size;
297 |
298 | const result: ExportResult = {
299 | success: true,
300 | filePath,
301 | format: activeOptions.format,
302 | size,
303 | entries: entries.length,
304 | metadata: {
305 | exportedAt: new Date(),
306 | version: this.version,
307 | source: "DocuMCP Memory System",
308 | includes: this.getIncludedComponents(activeOptions),
309 | compression:
310 | activeOptions.compression !== "none"
311 | ? activeOptions.compression
312 | : undefined,
313 | encryption: activeOptions.encryption?.enabled,
314 | },
315 | warnings: [],
316 | errors: [],
317 | };
318 |
319 | this.emit("export_completed", {
320 | result,
321 | duration: Date.now() - startTime,
322 | });
323 |
324 | return result;
325 | } catch (error) {
326 | const errorMessage =
327 | error instanceof Error ? error.message : String(error);
328 | this.emit("export_error", { error: errorMessage });
329 |
330 | return {
331 | success: false,
332 | format: activeOptions.format,
333 | size: 0,
334 | entries: 0,
335 | metadata: {
336 | exportedAt: new Date(),
337 | version: this.version,
338 | source: "DocuMCP Memory System",
339 | includes: [],
340 | },
341 | warnings: [],
342 | errors: [errorMessage],
343 | };
344 | }
345 | }
346 |
347 | /**
348 | * Import memory data from specified source
349 | */
350 | async importMemories(
351 | inputPath: string,
352 | options: Partial<ImportOptions> = {},
353 | ): Promise<ImportResult> {
354 | const defaultOptions: ImportOptions = {
355 | format: "json",
356 | mode: "merge",
357 | validation: "strict",
358 | conflictResolution: "skip",
359 | backup: true,
360 | dryRun: false,
361 | };
362 |
363 | const activeOptions = { ...defaultOptions, ...options };
364 | const startTime = Date.now();
365 |
366 | this.emit("import_started", { inputPath, options: activeOptions });
367 |
368 | try {
369 | // Create backup if requested
370 | if (activeOptions.backup && !activeOptions.dryRun) {
371 | await this.createBackup();
372 | }
373 |
374 | // Detect and verify format
375 | const detectedFormat = await this.detectFormat(inputPath);
376 | if (detectedFormat !== activeOptions.format) {
377 | this.emit("format_mismatch", {
378 | detected: detectedFormat,
379 | specified: activeOptions.format,
380 | });
381 | }
382 |
383 | // Load and parse import data
384 | const importData = await this.loadImportData(inputPath, activeOptions);
385 |
386 | // Validate import data
387 | const validationResult = await this.validateImportData(
388 | importData,
389 | activeOptions,
390 | );
391 |
392 | if (
393 | validationResult.invalid > 0 &&
394 | activeOptions.validation === "strict"
395 | ) {
396 | throw new Error(
397 | `Validation failed: ${validationResult.invalid} invalid entries`,
398 | );
399 | }
400 |
401 | // Process import data
402 | const result = await this.processImportData(importData, activeOptions);
403 |
404 | this.emit("import_completed", {
405 | result,
406 | duration: Date.now() - startTime,
407 | });
408 |
409 | return result;
410 | } catch (error) {
411 | const errorMessage =
412 | error instanceof Error ? error.message : String(error);
413 | this.emit("import_error", { error: errorMessage });
414 |
415 | return {
416 | success: false,
417 | processed: 0,
418 | imported: 0,
419 | skipped: 0,
420 | errors: 1,
421 | errorDetails: [errorMessage],
422 | conflicts: 0,
423 | validation: {
424 | valid: 0,
425 | invalid: 0,
426 | warnings: [],
427 | },
428 | summary: {
429 | newEntries: 0,
430 | updatedEntries: 0,
431 | duplicateEntries: 0,
432 | failedEntries: 0,
433 | },
434 | metadata: {
435 | importedAt: new Date(),
436 | source: inputPath,
437 | format: activeOptions.format,
438 | mode: activeOptions.mode,
439 | },
440 | };
441 | }
442 | }
443 |
444 | /**
445 | * Create migration plan between different systems
446 | */
447 | async createMigrationPlan(
448 | sourceSchema: any,
449 | targetSchema: any,
450 | options?: {
451 | autoMap?: boolean;
452 | preserveStructure?: boolean;
453 | customMappings?: Record<string, string>;
454 | },
455 | ): Promise<MigrationPlan> {
456 | const plan: MigrationPlan = {
457 | sourceSystem: sourceSchema.system || "Unknown",
458 | targetSystem: "DocuMCP",
459 | mapping: {},
460 | transformations: [],
461 | validation: [],
462 | postProcessing: [],
463 | };
464 |
465 | // Auto-generate field mappings
466 | if (options?.autoMap !== false) {
467 | plan.mapping = this.generateFieldMappings(sourceSchema, targetSchema);
468 | }
469 |
470 | // Apply custom mappings
471 | if (options?.customMappings) {
472 | Object.assign(plan.mapping, options.customMappings);
473 | }
474 |
475 | // Generate transformations
476 | plan.transformations = this.generateTransformations(
477 | sourceSchema,
478 | targetSchema,
479 | plan.mapping,
480 | );
481 |
482 | // Generate validation rules
483 | plan.validation = this.generateValidationRules(targetSchema);
484 |
485 | // Generate post-processing steps
486 | plan.postProcessing = this.generatePostProcessingSteps(targetSchema);
487 |
488 | return plan;
489 | }
490 |
491 | /**
492 | * Execute migration plan
493 | */
494 | async executeMigration(
495 | inputPath: string,
496 | migrationPlan: MigrationPlan,
497 | options?: Partial<ImportOptions>,
498 | ): Promise<ImportResult> {
499 | this.emit("migration_started", { inputPath, plan: migrationPlan });
500 |
501 | try {
502 | // Load source data
503 | const sourceData = await this.loadRawData(inputPath);
504 |
505 | // Apply transformations
506 | const transformedData = await this.applyTransformations(
507 | sourceData,
508 | migrationPlan,
509 | );
510 |
511 | // Convert to import format
512 | const importData = this.convertToImportFormat(
513 | transformedData,
514 | migrationPlan,
515 | );
516 |
517 | // Execute import with migration settings
518 | const importOptions: ImportOptions = {
519 | format: "json",
520 | mode: "merge",
521 | validation: "strict",
522 | conflictResolution: "merge",
523 | backup: true,
524 | dryRun: false,
525 | ...options,
526 | transformation: {
527 | enabled: true,
528 | rules: migrationPlan.transformations.map((t) => ({
529 | field: t.target,
530 | operation: t.type as any,
531 | params: { source: t.source, operation: t.operation },
532 | })),
533 | },
534 | };
535 |
536 | const result = await this.processImportData(importData, importOptions);
537 |
538 | // Execute post-processing
539 | if (migrationPlan.postProcessing.length > 0) {
540 | await this.executePostProcessing(migrationPlan.postProcessing);
541 | }
542 |
543 | this.emit("migration_completed", { result });
544 | return result;
545 | } catch (error) {
546 | this.emit("migration_error", {
547 | error: error instanceof Error ? error.message : String(error),
548 | });
549 | throw error;
550 | }
551 | }
552 |
553 | /**
554 | * Get supported formats
555 | */
556 | getSupportedFormats(): {
557 | export: string[];
558 | import: string[];
559 | compression: string[];
560 | encryption: string[];
561 | } {
562 | return {
563 | export: ["json", "jsonl", "csv", "xml", "yaml", "sqlite", "archive"],
564 | import: ["json", "jsonl", "csv", "xml", "yaml", "sqlite", "archive"],
565 | compression: ["gzip", "zip", "none"],
566 | encryption: ["aes-256-gcm", "aes-192-gcm", "aes-128-gcm"],
567 | };
568 | }
569 |
570 | /**
571 | * Validate export/import compatibility
572 | */
573 | async validateCompatibility(
574 | sourcePath: string,
575 | _targetSystem: string = "DocuMCP",
576 | ): Promise<{
577 | compatible: boolean;
578 | issues: string[];
579 | recommendations: string[];
580 | migrationRequired: boolean;
581 | }> {
582 | try {
583 | const format = await this.detectFormat(sourcePath);
584 | const sampleData = await this.loadSampleData(sourcePath, format);
585 |
586 | const issues: string[] = [];
587 | const recommendations: string[] = [];
588 | let compatible = true;
589 | let migrationRequired = false;
590 |
591 | // Check format compatibility
592 | if (!this.getSupportedFormats().import.includes(format)) {
593 | issues.push(`Unsupported format: ${format}`);
594 | compatible = false;
595 | }
596 |
597 | // Check schema compatibility
598 | const schemaIssues = this.validateSchema(sampleData);
599 | if (schemaIssues.length > 0) {
600 | issues.push(...schemaIssues);
601 | migrationRequired = true;
602 | }
603 |
604 | // Check data integrity
605 | const integrityIssues = this.validateDataIntegrity(sampleData);
606 | if (integrityIssues.length > 0) {
607 | issues.push(...integrityIssues);
608 | recommendations.push("Consider data cleaning before import");
609 | }
610 |
611 | // Generate recommendations
612 | if (migrationRequired) {
613 | recommendations.push("Create migration plan for schema transformation");
614 | }
615 |
616 | if (format === "csv") {
617 | recommendations.push(
618 | "Consider using JSON or JSONL for better data preservation",
619 | );
620 | }
621 |
622 | return {
623 | compatible,
624 | issues,
625 | recommendations,
626 | migrationRequired,
627 | };
628 | } catch (error) {
629 | return {
630 | compatible: false,
631 | issues: [error instanceof Error ? error.message : String(error)],
632 | recommendations: ["Verify file format and accessibility"],
633 | migrationRequired: false,
634 | };
635 | }
636 | }
637 |
638 | /**
639 | * Private helper methods
640 | */
641 | private async getFilteredEntries(
642 | filters?: ExportOptions["filters"],
643 | ): Promise<MemoryEntry[]> {
644 | let entries = await this.storage.getAll();
645 |
646 | if (!filters) return entries;
647 |
648 | // Apply type filter
649 | if (filters.types && filters.types.length > 0) {
650 | entries = entries.filter((entry) => filters.types!.includes(entry.type));
651 | }
652 |
653 | // Apply date range filter
654 | if (filters.dateRange) {
655 | entries = entries.filter((entry) => {
656 | const entryDate = new Date(entry.timestamp);
657 | return (
658 | entryDate >= filters.dateRange!.start &&
659 | entryDate <= filters.dateRange!.end
660 | );
661 | });
662 | }
663 |
664 | // Apply project filter
665 | if (filters.projects && filters.projects.length > 0) {
666 | entries = entries.filter((entry) =>
667 | filters.projects!.some(
668 | (project) =>
669 | entry.data.projectPath?.includes(project) ||
670 | entry.data.projectId === project,
671 | ),
672 | );
673 | }
674 |
675 | // Apply tags filter
676 | if (filters.tags && filters.tags.length > 0) {
677 | entries = entries.filter(
678 | (entry) => entry.tags?.some((tag) => filters.tags!.includes(tag)),
679 | );
680 | }
681 |
682 | // Apply outcomes filter
683 | if (filters.outcomes && filters.outcomes.length > 0) {
684 | entries = entries.filter(
685 | (entry) =>
686 | filters.outcomes!.includes(entry.data.outcome) ||
687 | (entry.data.success === true &&
688 | filters.outcomes!.includes("success")) ||
689 | (entry.data.success === false &&
690 | filters.outcomes!.includes("failure")),
691 | );
692 | }
693 |
694 | return entries;
695 | }
696 |
697 | private async prepareExportData(
698 | entries: MemoryEntry[],
699 | options: ExportOptions,
700 | ): Promise<any> {
701 | const exportData: any = {
702 | metadata: {
703 | version: this.version,
704 | exportedAt: new Date().toISOString(),
705 | source: "DocuMCP Memory System",
706 | entries: entries.length,
707 | options: {
708 | includeMetadata: options.includeMetadata,
709 | includeLearning: options.includeLearning,
710 | includeKnowledgeGraph: options.includeKnowledgeGraph,
711 | },
712 | },
713 | memories: entries,
714 | };
715 |
716 | // Include learning data if requested
717 | if (options.includeLearning) {
718 | const patterns = await this.learningSystem.getPatterns();
719 | exportData.learning = {
720 | patterns,
721 | statistics: await this.learningSystem.getStatistics(),
722 | };
723 | }
724 |
725 | // Include knowledge graph if requested
726 | if (options.includeKnowledgeGraph) {
727 | const nodes = await this.knowledgeGraph.getAllNodes();
728 | const edges = await this.knowledgeGraph.getAllEdges();
729 | exportData.knowledgeGraph = {
730 | nodes,
731 | edges,
732 | statistics: await this.knowledgeGraph.getStatistics(),
733 | };
734 | }
735 |
736 | return exportData;
737 | }
738 |
739 | private applyAnonymization(
740 | data: any,
741 | anonymizeOptions: { fields: string[]; method: string },
742 | ): void {
743 | const anonymizeValue = (value: any, method: string): any => {
744 | if (typeof value !== "string") return value;
745 |
746 | switch (method) {
747 | case "hash":
748 | return this.hashValue(value);
749 | case "remove":
750 | return null;
751 | case "pseudonymize":
752 | return this.pseudonymizeValue(value);
753 | default:
754 | return value;
755 | }
756 | };
757 |
758 | const anonymizeObject = (obj: any): void => {
759 | for (const [key, value] of Object.entries(obj)) {
760 | if (anonymizeOptions.fields.includes(key)) {
761 | obj[key] = anonymizeValue(value, anonymizeOptions.method);
762 | } else if (typeof value === "object" && value !== null) {
763 | anonymizeObject(value);
764 | }
765 | }
766 | };
767 |
768 | anonymizeObject(data);
769 | }
770 |
771 | private hashValue(value: string): string {
772 | // Simple hash - in production, use a proper cryptographic hash
773 | let hash = 0;
774 | for (let i = 0; i < value.length; i++) {
775 | const char = value.charCodeAt(i);
776 | hash = (hash << 5) - hash + char;
777 | hash = hash & hash;
778 | }
779 | return `hash_${Math.abs(hash).toString(36)}`;
780 | }
781 |
782 | private pseudonymizeValue(_value: string): string {
783 | // Simple pseudonymization - in production, use proper techniques
784 | const prefixes = ["user", "project", "system", "item"];
785 | const suffix = Math.random().toString(36).substr(2, 8);
786 | const prefix = prefixes[Math.floor(Math.random() * prefixes.length)];
787 | return `${prefix}_${suffix}`;
788 | }
789 |
790 | private async exportToJSON(
791 | outputPath: string,
792 | data: any,
793 | _options: ExportOptions,
794 | ): Promise<string> {
795 | const jsonData = JSON.stringify(data, null, 2);
796 | // Handle compression-aware file paths (e.g., file.json.gz)
797 | let filePath = outputPath;
798 | if (!outputPath.includes(".json")) {
799 | filePath = `${outputPath}.json`;
800 | }
801 | await fs.writeFile(filePath, jsonData, "utf8");
802 | return filePath;
803 | }
804 |
805 | private async exportToJSONL(
806 | outputPath: string,
807 | data: any,
808 | _options: ExportOptions,
809 | ): Promise<string> {
810 | const filePath = outputPath.endsWith(".jsonl")
811 | ? outputPath
812 | : `${outputPath}.jsonl`;
813 |
814 | return new Promise((resolve, reject) => {
815 | const writeStream = createWriteStream(filePath);
816 |
817 | writeStream.on("error", (error) => {
818 | reject(error);
819 | });
820 |
821 | writeStream.on("finish", () => {
822 | resolve(filePath);
823 | });
824 |
825 | // Write metadata as first line
826 | writeStream.write(JSON.stringify(data.metadata) + "\n");
827 |
828 | // Write each memory entry as a separate line
829 | for (const entry of data.memories) {
830 | writeStream.write(JSON.stringify(entry) + "\n");
831 | }
832 |
833 | // Write learning data if included
834 | if (data.learning) {
835 | writeStream.write(
836 | JSON.stringify({ type: "learning", data: data.learning }) + "\n",
837 | );
838 | }
839 |
840 | // Write knowledge graph if included
841 | if (data.knowledgeGraph) {
842 | writeStream.write(
843 | JSON.stringify({
844 | type: "knowledgeGraph",
845 | data: data.knowledgeGraph,
846 | }) + "\n",
847 | );
848 | }
849 |
850 | writeStream.end();
851 | });
852 | }
853 |
854 | private async exportToCSV(
855 | outputPath: string,
856 | data: any,
857 | _options: ExportOptions,
858 | ): Promise<string> {
859 | const filePath = outputPath.endsWith(".csv")
860 | ? outputPath
861 | : `${outputPath}.csv`;
862 |
863 | // Flatten memory entries for CSV format
864 | const flattenedEntries = data.memories.map((entry: MemoryEntry) => ({
865 | id: entry.id,
866 | timestamp: entry.timestamp,
867 | type: entry.type,
868 | projectPath: entry.data.projectPath || "",
869 | projectId: entry.data.projectId || "",
870 | language: entry.data.language || "",
871 | framework: entry.data.framework || "",
872 | outcome: entry.data.outcome || "",
873 | success: entry.data.success || false,
874 | tags: entry.tags?.join(";") || "",
875 | metadata: JSON.stringify(entry.metadata || {}),
876 | }));
877 |
878 | // Generate CSV headers
879 | const headers = Object.keys(flattenedEntries[0] || {});
880 | const csvLines = [headers.join(",")];
881 |
882 | // Generate CSV rows
883 | for (const entry of flattenedEntries) {
884 | const row = headers.map((header) => {
885 | const value = entry[header as keyof typeof entry];
886 | const stringValue =
887 | typeof value === "string" ? value : JSON.stringify(value);
888 | return `"${stringValue.replace(/"/g, '""')}"`;
889 | });
890 | csvLines.push(row.join(","));
891 | }
892 |
893 | await fs.writeFile(filePath, csvLines.join("\n"), "utf8");
894 | return filePath;
895 | }
896 |
897 | private async exportToXML(
898 | outputPath: string,
899 | data: any,
900 | _options: ExportOptions,
901 | ): Promise<string> {
902 | const filePath = outputPath.endsWith(".xml")
903 | ? outputPath
904 | : `${outputPath}.xml`;
905 |
906 | const xmlData = this.convertToXML(data);
907 | await fs.writeFile(filePath, xmlData, "utf8");
908 | return filePath;
909 | }
910 |
911 | private async exportToYAML(
912 | outputPath: string,
913 | data: any,
914 | _options: ExportOptions,
915 | ): Promise<string> {
916 | const filePath = outputPath.endsWith(".yaml")
917 | ? outputPath
918 | : `${outputPath}.yaml`;
919 |
920 | // Simple YAML conversion - in production, use a proper YAML library
921 | const yamlData = this.convertToYAML(data);
922 | await fs.writeFile(filePath, yamlData, "utf8");
923 | return filePath;
924 | }
925 |
926 | private async exportToSQLite(
927 | _outputPath: string,
928 | _data: any,
929 | _options: ExportOptions,
930 | ): Promise<string> {
931 | // This would require a SQLite library like better-sqlite3
932 | // For now, throw an error indicating additional dependencies needed
933 | throw new Error(
934 | "SQLite export requires additional dependencies (better-sqlite3)",
935 | );
936 | }
937 |
938 | private async exportToArchive(
939 | outputPath: string,
940 | data: any,
941 | options: ExportOptions,
942 | ): Promise<string> {
943 | const archivePath = outputPath.endsWith(".tar")
944 | ? outputPath
945 | : `${outputPath}.tar`;
946 |
947 | // Create archive metadata
948 | const metadata: ArchiveMetadata = {
949 | version: this.version,
950 | created: new Date(),
951 | source: "DocuMCP Memory System",
952 | description: "Complete memory system export archive",
953 | manifest: {
954 | files: [],
955 | total: { files: 0, size: 0, entries: data.memories.length },
956 | },
957 | options,
958 | };
959 |
960 | // This would require archiving capabilities
961 | // For now, create multiple files and reference them in metadata
962 | const baseDir = archivePath.replace(".tar", "");
963 | await fs.mkdir(baseDir, { recursive: true });
964 |
965 | // Export memories as JSON
966 | const memoriesPath = `${baseDir}/memories.json`;
967 | await this.exportToJSON(memoriesPath, { memories: data.memories }, options);
968 | metadata.manifest.files.push({
969 | name: "memories.json",
970 | type: "memories",
971 | size: (await fs.stat(memoriesPath)).size,
972 | checksum: "sha256-placeholder",
973 | entries: data.memories.length,
974 | });
975 |
976 | // Export learning data if included
977 | if (data.learning) {
978 | const learningPath = `${baseDir}/learning.json`;
979 | await this.exportToJSON(learningPath, data.learning, options);
980 | metadata.manifest.files.push({
981 | name: "learning.json",
982 | type: "learning",
983 | size: (await fs.stat(learningPath)).size,
984 | checksum: "sha256-placeholder",
985 | });
986 | }
987 |
988 | // Export knowledge graph if included
989 | if (data.knowledgeGraph) {
990 | const kgPath = `${baseDir}/knowledge-graph.json`;
991 | await this.exportToJSON(kgPath, data.knowledgeGraph, options);
992 | metadata.manifest.files.push({
993 | name: "knowledge-graph.json",
994 | type: "knowledge-graph",
995 | size: (await fs.stat(kgPath)).size,
996 | checksum: "sha256-placeholder",
997 | });
998 | }
999 |
1000 | // Write metadata
1001 | const metadataPath = `${baseDir}/metadata.json`;
1002 | await this.exportToJSON(metadataPath, metadata, options);
1003 |
1004 | return baseDir;
1005 | }
1006 |
1007 | private async applyCompression(
1008 | filePath: string,
1009 | compression: string,
1010 | targetPath?: string,
1011 | ): Promise<string> {
1012 | if (compression === "gzip") {
1013 | const compressedPath = targetPath || `${filePath}.gz`;
1014 | const content = await fs.readFile(filePath, "utf8");
1015 | // Simple mock compression - just add a header and write the content
1016 | await fs.writeFile(compressedPath, `GZIP_HEADER\n${content}`, "utf8");
1017 |
1018 | // Clean up temp file if we used one
1019 | if (targetPath && targetPath !== filePath) {
1020 | await fs.unlink(filePath);
1021 | }
1022 |
1023 | return compressedPath;
1024 | }
1025 |
1026 | // For other compression types or 'none', return original path
1027 | this.emit("compression_skipped", {
1028 | reason: "Not implemented",
1029 | compression,
1030 | });
1031 | return filePath;
1032 | }
1033 |
1034 | private async applyEncryption(
1035 | filePath: string,
1036 | encryption: any,
1037 | ): Promise<string> {
1038 | // This would require encryption capabilities
1039 | // For now, return the original path
1040 | this.emit("encryption_skipped", { reason: "Not implemented", encryption });
1041 | return filePath;
1042 | }
1043 |
1044 | private getIncludedComponents(options: ExportOptions): string[] {
1045 | const components = ["memories"];
1046 | if (options.includeMetadata) components.push("metadata");
1047 | if (options.includeLearning) components.push("learning");
1048 | if (options.includeKnowledgeGraph) components.push("knowledge-graph");
1049 | return components;
1050 | }
1051 |
1052 | private async detectFormat(filePath: string): Promise<string> {
1053 | const extension = filePath.split(".").pop()?.toLowerCase();
1054 |
1055 | switch (extension) {
1056 | case "json":
1057 | return "json";
1058 | case "jsonl":
1059 | return "jsonl";
1060 | case "csv":
1061 | return "csv";
1062 | case "xml":
1063 | return "xml";
1064 | case "yaml":
1065 | case "yml":
1066 | return "yaml";
1067 | case "db":
1068 | case "sqlite":
1069 | return "sqlite";
1070 | case "tar":
1071 | case "zip":
1072 | return "archive";
1073 | default: {
1074 | // Try to detect by content
1075 | const content = await fs.readFile(filePath, "utf8");
1076 | if (content.trim().startsWith("{") || content.trim().startsWith("[")) {
1077 | return "json";
1078 | }
1079 | if (content.includes("<?xml")) {
1080 | return "xml";
1081 | }
1082 | return "unknown";
1083 | }
1084 | }
1085 | }
1086 |
1087 | private async loadImportData(
1088 | filePath: string,
1089 | options: ImportOptions,
1090 | ): Promise<any> {
1091 | switch (options.format) {
1092 | case "json":
1093 | return JSON.parse(await fs.readFile(filePath, "utf8"));
1094 | case "jsonl":
1095 | return this.loadJSONLData(filePath);
1096 | case "csv":
1097 | return this.loadCSVData(filePath);
1098 | case "xml":
1099 | return this.loadXMLData(filePath);
1100 | case "yaml":
1101 | return this.loadYAMLData(filePath);
1102 | default:
1103 | throw new Error(`Unsupported import format: ${options.format}`);
1104 | }
1105 | }
1106 |
1107 | private async loadJSONLData(filePath: string): Promise<any> {
1108 | const content = await fs.readFile(filePath, "utf8");
1109 | const lines = content.trim().split("\n");
1110 |
1111 | const data: any = { memories: [], learning: null, knowledgeGraph: null };
1112 |
1113 | for (const line of lines) {
1114 | const parsed = JSON.parse(line);
1115 |
1116 | if (parsed.type === "learning") {
1117 | data.learning = parsed.data;
1118 | } else if (parsed.type === "knowledgeGraph") {
1119 | data.knowledgeGraph = parsed.data;
1120 | } else if (parsed.version) {
1121 | data.metadata = parsed;
1122 | } else {
1123 | data.memories.push(parsed);
1124 | }
1125 | }
1126 |
1127 | return data;
1128 | }
1129 |
1130 | private async loadCSVData(filePath: string): Promise<any> {
1131 | const content = await fs.readFile(filePath, "utf8");
1132 | const lines = content.trim().split("\n");
1133 | const headers = lines[0].split(",").map((h) => h.replace(/"/g, ""));
1134 |
1135 | const memories = [];
1136 | for (let i = 1; i < lines.length; i++) {
1137 | const values = this.parseCSVLine(lines[i]);
1138 | const entry: any = {};
1139 |
1140 | for (let j = 0; j < headers.length; j++) {
1141 | const header = headers[j];
1142 | const value = values[j];
1143 |
1144 | // Parse special fields
1145 | if (header === "tags") {
1146 | entry.tags = value ? value.split(";") : [];
1147 | } else if (header === "metadata") {
1148 | try {
1149 | entry.metadata = JSON.parse(value);
1150 | } catch {
1151 | entry.metadata = {};
1152 | }
1153 | } else if (header === "success") {
1154 | entry.data = entry.data || {};
1155 | entry.data.success = value === "true";
1156 | } else if (
1157 | [
1158 | "projectPath",
1159 | "projectId",
1160 | "language",
1161 | "framework",
1162 | "outcome",
1163 | ].includes(header)
1164 | ) {
1165 | entry.data = entry.data || {};
1166 | entry.data[header] = value;
1167 | } else {
1168 | entry[header] = value;
1169 | }
1170 | }
1171 |
1172 | memories.push(entry);
1173 | }
1174 |
1175 | return { memories };
1176 | }
1177 |
1178 | private parseCSVLine(line: string): string[] {
1179 | const values: string[] = [];
1180 | let current = "";
1181 | let inQuotes = false;
1182 |
1183 | for (let i = 0; i < line.length; i++) {
1184 | const char = line[i];
1185 |
1186 | if (char === '"') {
1187 | if (inQuotes && line[i + 1] === '"') {
1188 | current += '"';
1189 | i++;
1190 | } else {
1191 | inQuotes = !inQuotes;
1192 | }
1193 | } else if (char === "," && !inQuotes) {
1194 | values.push(current);
1195 | current = "";
1196 | } else {
1197 | current += char;
1198 | }
1199 | }
1200 |
1201 | values.push(current);
1202 | return values;
1203 | }
1204 |
1205 | private async loadXMLData(_filePath: string): Promise<any> {
1206 | // This would require an XML parser
1207 | throw new Error("XML import requires additional dependencies (xml2js)");
1208 | }
1209 |
1210 | private async loadYAMLData(_filePath: string): Promise<any> {
1211 | // This would require a YAML parser
1212 | throw new Error("YAML import requires additional dependencies (js-yaml)");
1213 | }
1214 |
1215 | private async validateImportData(
1216 | data: any,
1217 | options: ImportOptions,
1218 | ): Promise<{ valid: number; invalid: number; warnings: string[] }> {
1219 | const result = { valid: 0, invalid: 0, warnings: [] as string[] };
1220 |
1221 | if (!data.memories || !Array.isArray(data.memories)) {
1222 | result.warnings.push("No memories array found in import data");
1223 | return result;
1224 | }
1225 |
1226 | for (const entry of data.memories) {
1227 | if (this.validateMemoryEntry(entry, options.validation)) {
1228 | result.valid++;
1229 | } else {
1230 | result.invalid++;
1231 | }
1232 | }
1233 |
1234 | return result;
1235 | }
1236 |
1237 | private validateMemoryEntry(entry: any, validation: string): boolean {
1238 | // Check for completely missing or null required fields
1239 | if (
1240 | !entry.id ||
1241 | !entry.timestamp ||
1242 | entry.type === null ||
1243 | entry.type === undefined ||
1244 | entry.data === null
1245 | ) {
1246 | return false; // These are invalid regardless of validation level
1247 | }
1248 |
1249 | if (!entry.type) {
1250 | return validation !== "strict";
1251 | }
1252 |
1253 | if (validation === "strict") {
1254 | return Boolean(entry.data && typeof entry.data === "object");
1255 | }
1256 |
1257 | // For loose validation, still require data to be defined (not null)
1258 | if (validation === "loose" && entry.data === null) {
1259 | return false;
1260 | }
1261 |
1262 | return true;
1263 | }
1264 |
1265 | private async processImportData(
1266 | data: any,
1267 | options: ImportOptions,
1268 | ): Promise<ImportResult> {
1269 | const result: ImportResult = {
1270 | success: true,
1271 | processed: 0,
1272 | imported: 0,
1273 | skipped: 0,
1274 | errors: 0,
1275 | errorDetails: [],
1276 | conflicts: 0,
1277 | validation: { valid: 0, invalid: 0, warnings: [] },
1278 | summary: {
1279 | newEntries: 0,
1280 | updatedEntries: 0,
1281 | duplicateEntries: 0,
1282 | failedEntries: 0,
1283 | },
1284 | metadata: {
1285 | importedAt: new Date(),
1286 | source: "imported data",
1287 | format: options.format,
1288 | mode: options.mode,
1289 | },
1290 | };
1291 |
1292 | if (!data.memories || !Array.isArray(data.memories)) {
1293 | result.success = false;
1294 | result.errors = 1;
1295 | result.errorDetails = ["No valid memories array found in import data"];
1296 | return result;
1297 | }
1298 |
1299 | for (const entry of data.memories) {
1300 | result.processed++;
1301 |
1302 | try {
1303 | // Apply transformations and mappings
1304 | let transformedEntry = { ...entry };
1305 | if (options.mapping || options.transformation?.enabled) {
1306 | transformedEntry = this.applyDataTransformations(entry, options);
1307 | }
1308 |
1309 | if (!this.validateMemoryEntry(transformedEntry, options.validation)) {
1310 | result.validation.invalid++;
1311 | result.errors++;
1312 | result.summary.failedEntries++;
1313 | result.errorDetails.push(
1314 | `Invalid memory entry: ${
1315 | transformedEntry.id || "unknown"
1316 | } - validation failed`,
1317 | );
1318 | continue;
1319 | }
1320 |
1321 | result.validation.valid++;
1322 |
1323 | // Check for conflicts
1324 | const existing = await this.storage.get(transformedEntry.id);
1325 | if (existing) {
1326 | result.conflicts++;
1327 |
1328 | switch (options.conflictResolution) {
1329 | case "skip":
1330 | result.skipped++;
1331 | result.summary.duplicateEntries++;
1332 | continue;
1333 | case "overwrite":
1334 | if (!options.dryRun) {
1335 | await this.storage.update(
1336 | transformedEntry.id,
1337 | transformedEntry,
1338 | );
1339 | result.imported++;
1340 | result.summary.updatedEntries++;
1341 | }
1342 | break;
1343 | case "merge":
1344 | if (!options.dryRun) {
1345 | const merged = this.mergeEntries(existing, transformedEntry);
1346 | await this.storage.update(transformedEntry.id, merged);
1347 | result.imported++;
1348 | result.summary.updatedEntries++;
1349 | }
1350 | break;
1351 | case "rename": {
1352 | const newId = `${transformedEntry.id}_imported_${Date.now()}`;
1353 | if (!options.dryRun) {
1354 | await this.storage.store({ ...transformedEntry, id: newId });
1355 | result.imported++;
1356 | result.summary.newEntries++;
1357 | }
1358 | break;
1359 | }
1360 | }
1361 | } else {
1362 | if (!options.dryRun) {
1363 | await this.storage.store(transformedEntry);
1364 | result.imported++;
1365 | result.summary.newEntries++;
1366 | }
1367 | }
1368 | } catch (error) {
1369 | result.errors++;
1370 | result.summary.failedEntries++;
1371 | result.errorDetails.push(
1372 | error instanceof Error ? error.message : String(error),
1373 | );
1374 | }
1375 | }
1376 |
1377 | // Import learning data if present
1378 | if (data.learning && !options.dryRun) {
1379 | await this.importLearningData(data.learning);
1380 | }
1381 |
1382 | // Import knowledge graph if present
1383 | if (data.knowledgeGraph && !options.dryRun) {
1384 | await this.importKnowledgeGraphData(data.knowledgeGraph);
1385 | }
1386 |
1387 | return result;
1388 | }
1389 |
1390 | private mergeEntries(
1391 | existing: MemoryEntry,
1392 | imported: MemoryEntry,
1393 | ): MemoryEntry {
1394 | return {
1395 | ...existing,
1396 | ...imported,
1397 | data: { ...existing.data, ...imported.data },
1398 | metadata: { ...existing.metadata, ...imported.metadata },
1399 | tags: [...new Set([...(existing.tags || []), ...(imported.tags || [])])],
1400 | timestamp: imported.timestamp || existing.timestamp,
1401 | };
1402 | }
1403 |
1404 | private async importLearningData(learningData: any): Promise<void> {
1405 | if (learningData.patterns && Array.isArray(learningData.patterns)) {
1406 | for (const pattern of learningData.patterns) {
1407 | // This would require methods to import patterns into the learning system
1408 | // For now, just emit an event
1409 | this.emit("learning_pattern_imported", pattern);
1410 | }
1411 | }
1412 | }
1413 |
1414 | private async importKnowledgeGraphData(kgData: any): Promise<void> {
1415 | if (kgData.nodes && Array.isArray(kgData.nodes)) {
1416 | for (const node of kgData.nodes) {
1417 | await this.knowledgeGraph.addNode(node);
1418 | }
1419 | }
1420 |
1421 | if (kgData.edges && Array.isArray(kgData.edges)) {
1422 | for (const edge of kgData.edges) {
1423 | await this.knowledgeGraph.addEdge(edge);
1424 | }
1425 | }
1426 | }
1427 |
1428 | private async createBackup(): Promise<string> {
1429 | const backupPath = `backup_${Date.now()}.json`;
1430 | const exportResult = await this.exportMemories(backupPath, {
1431 | format: "json",
1432 | includeMetadata: true,
1433 | includeLearning: true,
1434 | includeKnowledgeGraph: true,
1435 | });
1436 |
1437 | this.emit("backup_created", { path: exportResult.filePath });
1438 | return exportResult.filePath || backupPath;
1439 | }
1440 |
1441 | private convertToXML(data: any): string {
1442 | // Simple XML conversion - in production, use a proper XML library
1443 | const escapeXML = (str: string) =>
1444 | str
1445 | .replace(/&/g, "&")
1446 | .replace(/</g, "<")
1447 | .replace(/>/g, ">")
1448 | .replace(/"/g, """)
1449 | .replace(/'/g, "'");
1450 |
1451 | let xml = '<?xml version="1.0" encoding="UTF-8"?>\n<export>\n';
1452 | xml += ` <metadata>\n`;
1453 | xml += ` <version>${escapeXML(data.metadata.version)}</version>\n`;
1454 | xml += ` <exportedAt>${escapeXML(
1455 | data.metadata.exportedAt,
1456 | )}</exportedAt>\n`;
1457 | xml += ` <entries>${data.metadata.entries}</entries>\n`;
1458 | xml += ` </metadata>\n`;
1459 | xml += ` <memories>\n`;
1460 |
1461 | for (const memory of data.memories) {
1462 | xml += ` <memory>\n`;
1463 | xml += ` <id>${escapeXML(memory.id)}</id>\n`;
1464 | xml += ` <timestamp>${escapeXML(memory.timestamp)}</timestamp>\n`;
1465 | xml += ` <type>${escapeXML(memory.type)}</type>\n`;
1466 | xml += ` <data>${escapeXML(JSON.stringify(memory.data))}</data>\n`;
1467 | xml += ` </memory>\n`;
1468 | }
1469 |
1470 | xml += ` </memories>\n`;
1471 | xml += "</export>";
1472 |
1473 | return xml;
1474 | }
1475 |
1476 | private convertToYAML(data: any): string {
1477 | // Simple YAML conversion - in production, use a proper YAML library
1478 | const indent = (level: number) => " ".repeat(level);
1479 | const toYAML = (obj: any, level: number = 0): string => {
1480 | if (obj === null) return "null";
1481 | if (typeof obj === "boolean") return obj.toString();
1482 | if (typeof obj === "number") return obj.toString();
1483 | if (typeof obj === "string") return `"${obj.replace(/"/g, '\\"')}"`;
1484 |
1485 | if (Array.isArray(obj)) {
1486 | if (obj.length === 0) return "[]";
1487 | return (
1488 | "\n" +
1489 | obj
1490 | .map(
1491 | (item) => `${indent(level)}- ${toYAML(item, level + 1).trim()}`,
1492 | )
1493 | .join("\n")
1494 | );
1495 | }
1496 |
1497 | if (typeof obj === "object") {
1498 | const keys = Object.keys(obj);
1499 | if (keys.length === 0) return "{}";
1500 | return (
1501 | "\n" +
1502 | keys
1503 | .map(
1504 | (key) =>
1505 | `${indent(level)}${key}: ${toYAML(obj[key], level + 1).trim()}`,
1506 | )
1507 | .join("\n")
1508 | );
1509 | }
1510 |
1511 | return obj.toString();
1512 | };
1513 |
1514 | return `# DocuMCP Memory Export\n${toYAML(data)}`;
1515 | }
1516 |
1517 | // Additional helper methods for migration
1518 | private generateFieldMappings(
1519 | sourceSchema: any,
1520 | targetSchema: any,
1521 | ): Record<string, string> {
1522 | const mappings: Record<string, string> = {};
1523 |
1524 | // Simple field name matching - in production, use more sophisticated mapping
1525 | const sourceFields = Object.keys(sourceSchema.fields || {});
1526 | const targetFields = Object.keys(targetSchema.fields || {});
1527 |
1528 | for (const sourceField of sourceFields) {
1529 | // Direct match
1530 | if (targetFields.includes(sourceField)) {
1531 | mappings[sourceField] = sourceField;
1532 | continue;
1533 | }
1534 |
1535 | // Fuzzy matching
1536 | const similar = targetFields.find(
1537 | (tf) =>
1538 | tf.toLowerCase().includes(sourceField.toLowerCase()) ||
1539 | sourceField.toLowerCase().includes(tf.toLowerCase()),
1540 | );
1541 |
1542 | if (similar) {
1543 | mappings[sourceField] = similar;
1544 | }
1545 | }
1546 |
1547 | return mappings;
1548 | }
1549 |
1550 | private generateTransformations(
1551 | sourceSchema: any,
1552 | targetSchema: any,
1553 | mapping: Record<string, string>,
1554 | ): MigrationPlan["transformations"] {
1555 | const transformations: MigrationPlan["transformations"] = [];
1556 |
1557 | // Generate transformations based on field mappings and type differences
1558 | for (const [sourceField, targetField] of Object.entries(mapping)) {
1559 | const sourceType = sourceSchema.fields?.[sourceField]?.type;
1560 | const targetType = targetSchema.fields?.[targetField]?.type;
1561 |
1562 | if (sourceType !== targetType) {
1563 | transformations.push({
1564 | field: targetField,
1565 | type: "convert",
1566 | source: sourceField,
1567 | target: targetField,
1568 | operation: `${sourceType}_to_${targetType}`,
1569 | });
1570 | } else {
1571 | transformations.push({
1572 | field: targetField,
1573 | type: "rename",
1574 | source: sourceField,
1575 | target: targetField,
1576 | });
1577 | }
1578 | }
1579 |
1580 | return transformations;
1581 | }
1582 |
1583 | private generateValidationRules(
1584 | targetSchema: any,
1585 | ): MigrationPlan["validation"] {
1586 | const validation: MigrationPlan["validation"] = [];
1587 |
1588 | // Generate validation rules based on target schema
1589 | if (targetSchema.fields) {
1590 | for (const [field, config] of Object.entries(targetSchema.fields)) {
1591 | const rules: string[] = [];
1592 | const fieldConfig = config as any;
1593 |
1594 | if (fieldConfig.required) {
1595 | rules.push("required");
1596 | }
1597 |
1598 | if (fieldConfig.type) {
1599 | rules.push(`type:${fieldConfig.type}`);
1600 | }
1601 |
1602 | if (fieldConfig.format) {
1603 | rules.push(`format:${fieldConfig.format}`);
1604 | }
1605 |
1606 | validation.push({
1607 | field,
1608 | rules,
1609 | required: fieldConfig.required || false,
1610 | });
1611 | }
1612 | }
1613 |
1614 | return validation;
1615 | }
1616 |
1617 | private generatePostProcessingSteps(targetSchema: any): string[] {
1618 | const steps: string[] = [];
1619 |
1620 | // Generate post-processing steps
1621 | steps.push("rebuild_indices");
1622 | steps.push("update_references");
1623 | steps.push("validate_integrity");
1624 |
1625 | if (targetSchema.features?.learning) {
1626 | steps.push("retrain_models");
1627 | }
1628 |
1629 | if (targetSchema.features?.knowledgeGraph) {
1630 | steps.push("rebuild_graph");
1631 | }
1632 |
1633 | return steps;
1634 | }
1635 |
1636 | private async loadRawData(inputPath: string): Promise<any> {
1637 | const content = await fs.readFile(inputPath, "utf8");
1638 | try {
1639 | return JSON.parse(content);
1640 | } catch {
1641 | return { raw: content };
1642 | }
1643 | }
1644 |
1645 | private async applyTransformations(
1646 | data: any,
1647 | plan: MigrationPlan,
1648 | ): Promise<any> {
1649 | const transformed = JSON.parse(JSON.stringify(data)); // Deep clone
1650 |
1651 | for (const transformation of plan.transformations) {
1652 | // Apply transformation based on type
1653 | switch (transformation.type) {
1654 | case "rename":
1655 | this.renameField(
1656 | transformed,
1657 | transformation.source as string,
1658 | transformation.target,
1659 | );
1660 | break;
1661 | case "convert":
1662 | this.convertField(
1663 | transformed,
1664 | transformation.source as string,
1665 | transformation.target,
1666 | transformation.operation,
1667 | );
1668 | break;
1669 | // Add more transformation types as needed
1670 | }
1671 | }
1672 |
1673 | return transformed;
1674 | }
1675 |
1676 | private renameField(obj: any, oldName: string, newName: string): void {
1677 | if (typeof obj !== "object" || obj === null) return;
1678 |
1679 | if (Array.isArray(obj)) {
1680 | obj.forEach((item) => this.renameField(item, oldName, newName));
1681 | } else {
1682 | if (oldName in obj) {
1683 | obj[newName] = obj[oldName];
1684 | delete obj[oldName];
1685 | }
1686 |
1687 | Object.values(obj).forEach((value) =>
1688 | this.renameField(value, oldName, newName),
1689 | );
1690 | }
1691 | }
1692 |
1693 | private convertField(
1694 | obj: any,
1695 | fieldName: string,
1696 | targetName: string,
1697 | operation?: string,
1698 | ): void {
1699 | if (typeof obj !== "object" || obj === null) return;
1700 |
1701 | if (Array.isArray(obj)) {
1702 | obj.forEach((item) =>
1703 | this.convertField(item, fieldName, targetName, operation),
1704 | );
1705 | } else {
1706 | if (fieldName in obj) {
1707 | const value = obj[fieldName];
1708 |
1709 | // Apply conversion based on operation
1710 | switch (operation) {
1711 | case "string_to_number":
1712 | obj[targetName] = Number(value);
1713 | break;
1714 | case "number_to_string":
1715 | obj[targetName] = String(value);
1716 | break;
1717 | case "array_to_string":
1718 | obj[targetName] = Array.isArray(value) ? value.join(",") : value;
1719 | break;
1720 | case "string_to_array":
1721 | obj[targetName] =
1722 | typeof value === "string" ? value.split(",") : value;
1723 | break;
1724 | default:
1725 | obj[targetName] = value;
1726 | }
1727 |
1728 | if (fieldName !== targetName) {
1729 | delete obj[fieldName];
1730 | }
1731 | }
1732 |
1733 | Object.values(obj).forEach((value) =>
1734 | this.convertField(value, fieldName, targetName, operation),
1735 | );
1736 | }
1737 | }
1738 |
1739 | private convertToImportFormat(data: any, plan: MigrationPlan): any {
1740 | // Convert transformed data to standard import format
1741 | const memories = Array.isArray(data) ? data : data.memories || [data];
1742 |
1743 | // Convert old format to new MemoryEntry format
1744 | const convertedMemories = memories.map((entry: any) => {
1745 | // If already in new format, return as-is
1746 | if (entry.data && entry.metadata) {
1747 | return entry;
1748 | }
1749 |
1750 | // Convert old flat format to new structured format
1751 | const converted: any = {
1752 | id:
1753 | entry.id ||
1754 | `migrated_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
1755 | type: entry.type || "analysis",
1756 | timestamp: entry.timestamp || new Date().toISOString(),
1757 | data: {},
1758 | metadata: {},
1759 | };
1760 |
1761 | // Move known fields to appropriate locations
1762 | const dataFields = [
1763 | "language",
1764 | "recommendation",
1765 | "framework",
1766 | "outcome",
1767 | "success",
1768 | ];
1769 | const metadataFields = [
1770 | "project",
1771 | "projectId",
1772 | "repository",
1773 | "ssg",
1774 | "tags",
1775 | ];
1776 |
1777 | for (const [key, value] of Object.entries(entry)) {
1778 | if (["id", "type", "timestamp"].includes(key)) {
1779 | // Already handled above
1780 | continue;
1781 | } else if (dataFields.includes(key)) {
1782 | converted.data[key] = value;
1783 | } else if (metadataFields.includes(key)) {
1784 | if (key === "project") {
1785 | converted.metadata.projectId = value; // Convert old 'project' field to 'projectId'
1786 | } else {
1787 | converted.metadata[key] = value;
1788 | }
1789 | } else {
1790 | // Put unknown fields in data
1791 | converted.data[key] = value;
1792 | }
1793 | }
1794 |
1795 | return converted;
1796 | });
1797 |
1798 | return {
1799 | metadata: {
1800 | version: this.version,
1801 | migrated: true,
1802 | migrationPlan: plan.sourceSystem,
1803 | importedAt: new Date().toISOString(),
1804 | },
1805 | memories: convertedMemories,
1806 | };
1807 | }
1808 |
1809 | private async executePostProcessing(steps: string[]): Promise<void> {
1810 | for (const step of steps) {
1811 | try {
1812 | switch (step) {
1813 | case "rebuild_indices":
1814 | await this.storage.rebuildIndex();
1815 | break;
1816 | case "update_references":
1817 | // Update cross-references in data
1818 | break;
1819 | case "validate_integrity":
1820 | // Validate data integrity
1821 | break;
1822 | case "retrain_models":
1823 | // Trigger learning system retraining
1824 | break;
1825 | case "rebuild_graph":
1826 | // Rebuild knowledge graph
1827 | break;
1828 | }
1829 |
1830 | this.emit("post_processing_step_completed", { step });
1831 | } catch (error) {
1832 | this.emit("post_processing_step_failed", {
1833 | step,
1834 | error: error instanceof Error ? error.message : String(error),
1835 | });
1836 | }
1837 | }
1838 | }
1839 |
1840 | private async loadSampleData(
1841 | sourcePath: string,
1842 | format: string,
1843 | ): Promise<any> {
1844 | // Load a small sample of data for validation
1845 | if (format === "json") {
1846 | const content = await fs.readFile(sourcePath, "utf8");
1847 | const data = JSON.parse(content);
1848 |
1849 | if (data.memories && Array.isArray(data.memories)) {
1850 | return { memories: data.memories.slice(0, 10) }; // First 10 entries
1851 | }
1852 |
1853 | return data;
1854 | }
1855 |
1856 | // For other formats, return basic structure info
1857 | return { format, sampleLoaded: true };
1858 | }
1859 |
1860 | private validateSchema(sampleData: any): string[] {
1861 | const issues: string[] = [];
1862 |
1863 | if (!sampleData.memories && !Array.isArray(sampleData)) {
1864 | issues.push("Expected memories array not found");
1865 | }
1866 |
1867 | const memories =
1868 | sampleData.memories || (Array.isArray(sampleData) ? sampleData : []);
1869 |
1870 | if (memories.length > 0) {
1871 | const firstEntry = memories[0];
1872 |
1873 | if (!firstEntry.id) {
1874 | issues.push("Memory entries missing required id field");
1875 | }
1876 |
1877 | if (!firstEntry.timestamp) {
1878 | issues.push("Memory entries missing required timestamp field");
1879 | }
1880 |
1881 | if (!firstEntry.type) {
1882 | issues.push("Memory entries missing required type field");
1883 | }
1884 |
1885 | if (!firstEntry.data) {
1886 | issues.push("Memory entries missing required data field");
1887 | }
1888 | }
1889 |
1890 | return issues;
1891 | }
1892 |
1893 | private validateDataIntegrity(sampleData: any): string[] {
1894 | const issues: string[] = [];
1895 |
1896 | const memories =
1897 | sampleData.memories || (Array.isArray(sampleData) ? sampleData : []);
1898 |
1899 | // Check for duplicate IDs
1900 | const ids = new Set();
1901 | const duplicates = new Set();
1902 |
1903 | for (const entry of memories) {
1904 | if (entry.id) {
1905 | if (ids.has(entry.id)) {
1906 | duplicates.add(entry.id);
1907 | } else {
1908 | ids.add(entry.id);
1909 | }
1910 | }
1911 | }
1912 |
1913 | if (duplicates.size > 0) {
1914 | issues.push(`Found ${duplicates.size} duplicate IDs`);
1915 | }
1916 |
1917 | // Check timestamp validity
1918 | let invalidTimestamps = 0;
1919 | for (const entry of memories) {
1920 | if (entry.timestamp && isNaN(new Date(entry.timestamp).getTime())) {
1921 | invalidTimestamps++;
1922 | }
1923 | }
1924 |
1925 | if (invalidTimestamps > 0) {
1926 | issues.push(`Found ${invalidTimestamps} invalid timestamps`);
1927 | }
1928 |
1929 | return issues;
1930 | }
1931 |
1932 | /**
1933 | * Apply field mappings and transformations to import data
1934 | */
1935 | private applyDataTransformations(entry: any, options: ImportOptions): any {
1936 | const transformed = JSON.parse(JSON.stringify(entry)); // Deep clone
1937 |
1938 | // Apply field mappings first
1939 | if (options.mapping) {
1940 | for (const [sourcePath, targetPath] of Object.entries(options.mapping)) {
1941 | const sourceValue = this.getValueByPath(transformed, sourcePath);
1942 | if (sourceValue !== undefined) {
1943 | this.setValueByPath(transformed, targetPath, sourceValue);
1944 | this.deleteValueByPath(transformed, sourcePath);
1945 | }
1946 | }
1947 | }
1948 |
1949 | // Apply transformations
1950 | if (options.transformation?.enabled && options.transformation.rules) {
1951 | for (const rule of options.transformation.rules) {
1952 | switch (rule.operation) {
1953 | case "transform":
1954 | if (rule.params?.value !== undefined) {
1955 | this.setValueByPath(transformed, rule.field, rule.params.value);
1956 | }
1957 | break;
1958 | case "convert":
1959 | // Apply conversion based on params
1960 | break;
1961 | }
1962 | }
1963 | }
1964 |
1965 | return transformed;
1966 | }
1967 |
1968 | /**
1969 | * Get value from object using dot notation path
1970 | */
1971 | private getValueByPath(obj: any, path: string): any {
1972 | return path.split(".").reduce((current, key) => current?.[key], obj);
1973 | }
1974 |
1975 | /**
1976 | * Set value in object using dot notation path
1977 | */
1978 | private setValueByPath(obj: any, path: string, value: any): void {
1979 | const keys = path.split(".");
1980 | const lastKey = keys.pop()!;
1981 | const target = keys.reduce((current, key) => {
1982 | if (!(key in current)) {
1983 | current[key] = {};
1984 | }
1985 | return current[key];
1986 | }, obj);
1987 | target[lastKey] = value;
1988 | }
1989 |
1990 | /**
1991 | * Delete value from object using dot notation path
1992 | */
1993 | private deleteValueByPath(obj: any, path: string): void {
1994 | const keys = path.split(".");
1995 | const lastKey = keys.pop()!;
1996 | const target = keys.reduce((current, key) => current?.[key], obj);
1997 | if (target && typeof target === "object") {
1998 | delete target[lastKey];
1999 | }
2000 | }
2001 | }
2002 |
```