This is page 28 of 33. Use http://codebase.md/tosin2013/documcp?lines=true&page={x} to view the full context.
# Directory Structure
```
├── .dockerignore
├── .eslintignore
├── .eslintrc.json
├── .github
│ ├── agents
│ │ ├── documcp-ast.md
│ │ ├── documcp-deploy.md
│ │ ├── documcp-memory.md
│ │ ├── documcp-test.md
│ │ └── documcp-tool.md
│ ├── copilot-instructions.md
│ ├── dependabot.yml
│ ├── ISSUE_TEMPLATE
│ │ ├── automated-changelog.md
│ │ ├── bug_report.md
│ │ ├── bug_report.yml
│ │ ├── documentation_issue.md
│ │ ├── feature_request.md
│ │ ├── feature_request.yml
│ │ ├── npm-publishing-fix.md
│ │ └── release_improvements.md
│ ├── PULL_REQUEST_TEMPLATE.md
│ ├── release-drafter.yml
│ └── workflows
│ ├── auto-merge.yml
│ ├── ci.yml
│ ├── codeql.yml
│ ├── dependency-review.yml
│ ├── deploy-docs.yml
│ ├── README.md
│ ├── release-drafter.yml
│ └── release.yml
├── .gitignore
├── .husky
│ ├── commit-msg
│ └── pre-commit
├── .linkcheck.config.json
├── .markdown-link-check.json
├── .nvmrc
├── .pre-commit-config.yaml
├── .versionrc.json
├── ARCHITECTURAL_CHANGES_SUMMARY.md
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── commitlint.config.js
├── CONTRIBUTING.md
├── docker-compose.docs.yml
├── Dockerfile.docs
├── docs
│ ├── .docusaurus
│ │ ├── docusaurus-plugin-content-docs
│ │ │ └── default
│ │ │ └── __mdx-loader-dependency.json
│ │ └── docusaurus-plugin-content-pages
│ │ └── default
│ │ └── __plugin.json
│ ├── adrs
│ │ ├── adr-0001-mcp-server-architecture.md
│ │ ├── adr-0002-repository-analysis-engine.md
│ │ ├── adr-0003-static-site-generator-recommendation-engine.md
│ │ ├── adr-0004-diataxis-framework-integration.md
│ │ ├── adr-0005-github-pages-deployment-automation.md
│ │ ├── adr-0006-mcp-tools-api-design.md
│ │ ├── adr-0007-mcp-prompts-and-resources-integration.md
│ │ ├── adr-0008-intelligent-content-population-engine.md
│ │ ├── adr-0009-content-accuracy-validation-framework.md
│ │ ├── adr-0010-mcp-resource-pattern-redesign.md
│ │ ├── adr-0011-ce-mcp-compatibility.md
│ │ ├── adr-0012-priority-scoring-system-for-documentation-drift.md
│ │ ├── adr-0013-release-pipeline-and-package-distribution.md
│ │ └── README.md
│ ├── api
│ │ ├── .nojekyll
│ │ ├── assets
│ │ │ ├── hierarchy.js
│ │ │ ├── highlight.css
│ │ │ ├── icons.js
│ │ │ ├── icons.svg
│ │ │ ├── main.js
│ │ │ ├── navigation.js
│ │ │ ├── search.js
│ │ │ └── style.css
│ │ ├── hierarchy.html
│ │ ├── index.html
│ │ ├── modules.html
│ │ └── variables
│ │ └── TOOLS.html
│ ├── assets
│ │ └── logo.svg
│ ├── CE-MCP-FINDINGS.md
│ ├── development
│ │ └── MCP_INSPECTOR_TESTING.md
│ ├── docusaurus.config.js
│ ├── explanation
│ │ ├── architecture.md
│ │ └── index.md
│ ├── guides
│ │ ├── link-validation.md
│ │ ├── playwright-integration.md
│ │ └── playwright-testing-workflow.md
│ ├── how-to
│ │ ├── analytics-setup.md
│ │ ├── change-watcher.md
│ │ ├── custom-domains.md
│ │ ├── documentation-freshness-tracking.md
│ │ ├── drift-priority-scoring.md
│ │ ├── github-pages-deployment.md
│ │ ├── index.md
│ │ ├── llm-integration.md
│ │ ├── local-testing.md
│ │ ├── performance-optimization.md
│ │ ├── prompting-guide.md
│ │ ├── repository-analysis.md
│ │ ├── seo-optimization.md
│ │ ├── site-monitoring.md
│ │ ├── troubleshooting.md
│ │ └── usage-examples.md
│ ├── index.md
│ ├── knowledge-graph.md
│ ├── package-lock.json
│ ├── package.json
│ ├── phase-2-intelligence.md
│ ├── reference
│ │ ├── api-overview.md
│ │ ├── cli.md
│ │ ├── configuration.md
│ │ ├── deploy-pages.md
│ │ ├── index.md
│ │ ├── mcp-tools.md
│ │ └── prompt-templates.md
│ ├── research
│ │ ├── cross-domain-integration
│ │ │ └── README.md
│ │ ├── domain-1-mcp-architecture
│ │ │ ├── index.md
│ │ │ └── mcp-performance-research.md
│ │ ├── domain-2-repository-analysis
│ │ │ └── README.md
│ │ ├── domain-3-ssg-recommendation
│ │ │ ├── index.md
│ │ │ └── ssg-performance-analysis.md
│ │ ├── domain-4-diataxis-integration
│ │ │ └── README.md
│ │ ├── domain-5-github-deployment
│ │ │ ├── github-pages-security-analysis.md
│ │ │ └── index.md
│ │ ├── domain-6-api-design
│ │ │ └── README.md
│ │ ├── README.md
│ │ ├── research-integration-summary-2025-01-14.md
│ │ ├── research-progress-template.md
│ │ └── research-questions-2025-01-14.md
│ ├── robots.txt
│ ├── sidebars.js
│ ├── sitemap.xml
│ ├── src
│ │ └── css
│ │ └── custom.css
│ └── tutorials
│ ├── development-setup.md
│ ├── environment-setup.md
│ ├── first-deployment.md
│ ├── getting-started.md
│ ├── index.md
│ ├── memory-workflows.md
│ └── user-onboarding.md
├── ISSUE_IMPLEMENTATION_SUMMARY.md
├── jest.config.js
├── LICENSE
├── Makefile
├── MCP_PHASE2_IMPLEMENTATION.md
├── mcp-config-example.json
├── mcp.json
├── package-lock.json
├── package.json
├── README.md
├── release.sh
├── scripts
│ └── check-package-structure.cjs
├── SECURITY.md
├── setup-precommit.sh
├── src
│ ├── benchmarks
│ │ └── performance.ts
│ ├── index.ts
│ ├── memory
│ │ ├── contextual-retrieval.ts
│ │ ├── deployment-analytics.ts
│ │ ├── enhanced-manager.ts
│ │ ├── export-import.ts
│ │ ├── freshness-kg-integration.ts
│ │ ├── index.ts
│ │ ├── integration.ts
│ │ ├── kg-code-integration.ts
│ │ ├── kg-health.ts
│ │ ├── kg-integration.ts
│ │ ├── kg-link-validator.ts
│ │ ├── kg-storage.ts
│ │ ├── knowledge-graph.ts
│ │ ├── learning.ts
│ │ ├── manager.ts
│ │ ├── multi-agent-sharing.ts
│ │ ├── pruning.ts
│ │ ├── schemas.ts
│ │ ├── storage.ts
│ │ ├── temporal-analysis.ts
│ │ ├── user-preferences.ts
│ │ └── visualization.ts
│ ├── prompts
│ │ └── technical-writer-prompts.ts
│ ├── scripts
│ │ └── benchmark.ts
│ ├── templates
│ │ └── playwright
│ │ ├── accessibility.spec.template.ts
│ │ ├── Dockerfile.template
│ │ ├── docs-e2e.workflow.template.yml
│ │ ├── link-validation.spec.template.ts
│ │ └── playwright.config.template.ts
│ ├── tools
│ │ ├── analyze-deployments.ts
│ │ ├── analyze-readme.ts
│ │ ├── analyze-repository.ts
│ │ ├── change-watcher.ts
│ │ ├── check-documentation-links.ts
│ │ ├── cleanup-agent-artifacts.ts
│ │ ├── deploy-pages.ts
│ │ ├── detect-gaps.ts
│ │ ├── evaluate-readme-health.ts
│ │ ├── generate-config.ts
│ │ ├── generate-contextual-content.ts
│ │ ├── generate-llm-context.ts
│ │ ├── generate-readme-template.ts
│ │ ├── generate-technical-writer-prompts.ts
│ │ ├── kg-health-check.ts
│ │ ├── manage-preferences.ts
│ │ ├── manage-sitemap.ts
│ │ ├── optimize-readme.ts
│ │ ├── populate-content.ts
│ │ ├── readme-best-practices.ts
│ │ ├── recommend-ssg.ts
│ │ ├── setup-playwright-tests.ts
│ │ ├── setup-structure.ts
│ │ ├── simulate-execution.ts
│ │ ├── sync-code-to-docs.ts
│ │ ├── test-local-deployment.ts
│ │ ├── track-documentation-freshness.ts
│ │ ├── update-existing-documentation.ts
│ │ ├── validate-content.ts
│ │ ├── validate-documentation-freshness.ts
│ │ ├── validate-readme-checklist.ts
│ │ └── verify-deployment.ts
│ ├── types
│ │ └── api.ts
│ ├── utils
│ │ ├── artifact-detector.ts
│ │ ├── ast-analyzer.ts
│ │ ├── change-watcher.ts
│ │ ├── code-scanner.ts
│ │ ├── content-extractor.ts
│ │ ├── drift-detector.ts
│ │ ├── execution-simulator.ts
│ │ ├── freshness-tracker.ts
│ │ ├── language-parsers-simple.ts
│ │ ├── llm-client.ts
│ │ ├── permission-checker.ts
│ │ ├── semantic-analyzer.ts
│ │ ├── sitemap-generator.ts
│ │ ├── usage-metadata.ts
│ │ └── user-feedback-integration.ts
│ └── workflows
│ └── documentation-workflow.ts
├── test-docs-local.sh
├── tests
│ ├── api
│ │ └── mcp-responses.test.ts
│ ├── benchmarks
│ │ └── performance.test.ts
│ ├── call-graph-builder.test.ts
│ ├── change-watcher-priority.integration.test.ts
│ ├── change-watcher.test.ts
│ ├── edge-cases
│ │ └── error-handling.test.ts
│ ├── execution-simulator.test.ts
│ ├── functional
│ │ └── tools.test.ts
│ ├── integration
│ │ ├── kg-documentation-workflow.test.ts
│ │ ├── knowledge-graph-workflow.test.ts
│ │ ├── mcp-readme-tools.test.ts
│ │ ├── memory-mcp-tools.test.ts
│ │ ├── readme-technical-writer.test.ts
│ │ └── workflow.test.ts
│ ├── memory
│ │ ├── contextual-retrieval.test.ts
│ │ ├── enhanced-manager.test.ts
│ │ ├── export-import.test.ts
│ │ ├── freshness-kg-integration.test.ts
│ │ ├── kg-code-integration.test.ts
│ │ ├── kg-health.test.ts
│ │ ├── kg-link-validator.test.ts
│ │ ├── kg-storage-validation.test.ts
│ │ ├── kg-storage.test.ts
│ │ ├── knowledge-graph-documentation-examples.test.ts
│ │ ├── knowledge-graph-enhanced.test.ts
│ │ ├── knowledge-graph.test.ts
│ │ ├── learning.test.ts
│ │ ├── manager-advanced.test.ts
│ │ ├── manager.test.ts
│ │ ├── mcp-resource-integration.test.ts
│ │ ├── mcp-tool-persistence.test.ts
│ │ ├── schemas-documentation-examples.test.ts
│ │ ├── schemas.test.ts
│ │ ├── storage.test.ts
│ │ ├── temporal-analysis.test.ts
│ │ └── user-preferences.test.ts
│ ├── performance
│ │ ├── memory-load-testing.test.ts
│ │ └── memory-stress-testing.test.ts
│ ├── prompts
│ │ ├── guided-workflow-prompts.test.ts
│ │ └── technical-writer-prompts.test.ts
│ ├── server.test.ts
│ ├── setup.ts
│ ├── tools
│ │ ├── all-tools.test.ts
│ │ ├── analyze-coverage.test.ts
│ │ ├── analyze-deployments.test.ts
│ │ ├── analyze-readme.test.ts
│ │ ├── analyze-repository.test.ts
│ │ ├── check-documentation-links.test.ts
│ │ ├── cleanup-agent-artifacts.test.ts
│ │ ├── deploy-pages-kg-retrieval.test.ts
│ │ ├── deploy-pages-tracking.test.ts
│ │ ├── deploy-pages.test.ts
│ │ ├── detect-gaps.test.ts
│ │ ├── evaluate-readme-health.test.ts
│ │ ├── generate-contextual-content.test.ts
│ │ ├── generate-llm-context.test.ts
│ │ ├── generate-readme-template.test.ts
│ │ ├── generate-technical-writer-prompts.test.ts
│ │ ├── kg-health-check.test.ts
│ │ ├── manage-sitemap.test.ts
│ │ ├── optimize-readme.test.ts
│ │ ├── readme-best-practices.test.ts
│ │ ├── recommend-ssg-historical.test.ts
│ │ ├── recommend-ssg-preferences.test.ts
│ │ ├── recommend-ssg.test.ts
│ │ ├── simple-coverage.test.ts
│ │ ├── sync-code-to-docs.test.ts
│ │ ├── test-local-deployment.test.ts
│ │ ├── tool-error-handling.test.ts
│ │ ├── track-documentation-freshness.test.ts
│ │ ├── validate-content.test.ts
│ │ ├── validate-documentation-freshness.test.ts
│ │ └── validate-readme-checklist.test.ts
│ ├── types
│ │ └── type-safety.test.ts
│ └── utils
│ ├── artifact-detector.test.ts
│ ├── ast-analyzer.test.ts
│ ├── content-extractor.test.ts
│ ├── drift-detector-diataxis.test.ts
│ ├── drift-detector-priority.test.ts
│ ├── drift-detector.test.ts
│ ├── freshness-tracker.test.ts
│ ├── llm-client.test.ts
│ ├── semantic-analyzer.test.ts
│ ├── sitemap-generator.test.ts
│ ├── usage-metadata.test.ts
│ └── user-feedback-integration.test.ts
├── tsconfig.json
└── typedoc.json
```
# Files
--------------------------------------------------------------------------------
/src/utils/drift-detector.ts:
--------------------------------------------------------------------------------
```typescript
1 | /**
2 | * Documentation Drift Detection System (Phase 3)
3 | *
4 | * Detects when code changes invalidate existing documentation
5 | * Provides automatic update suggestions based on code changes
6 | */
7 |
8 | import { promises as fs } from "fs";
9 | import path from "path";
10 | import { ASTAnalyzer, ASTAnalysisResult, CodeDiff } from "./ast-analyzer.js";
11 |
12 | export interface DriftDetectionResult {
13 | filePath: string;
14 | hasDrift: boolean;
15 | severity: "none" | "low" | "medium" | "high" | "critical";
16 | drifts: DocumentationDrift[];
17 | suggestions: DriftSuggestion[];
18 | impactAnalysis: ImpactAnalysis;
19 | }
20 |
21 | export interface DocumentationDrift {
22 | type: "outdated" | "incorrect" | "missing" | "breaking";
23 | affectedDocs: string[];
24 | codeChanges: CodeDiff[];
25 | description: string;
26 | detectedAt: string;
27 | severity: "low" | "medium" | "high" | "critical";
28 | }
29 |
30 | export interface DriftSuggestion {
31 | docFile: string;
32 | section: string;
33 | currentContent: string;
34 | suggestedContent: string;
35 | reasoning: string;
36 | confidence: number;
37 | autoApplicable: boolean;
38 | }
39 |
40 | export interface ImpactAnalysis {
41 | breakingChanges: number;
42 | majorChanges: number;
43 | minorChanges: number;
44 | affectedDocFiles: string[];
45 | estimatedUpdateEffort: "low" | "medium" | "high";
46 | requiresManualReview: boolean;
47 | }
48 |
49 | export interface DriftSnapshot {
50 | projectPath: string;
51 | timestamp: string;
52 | files: Map<string, ASTAnalysisResult>;
53 | documentation: Map<string, DocumentationSnapshot>;
54 | }
55 |
56 | export interface DocumentationSnapshot {
57 | filePath: string;
58 | contentHash: string;
59 | referencedCode: string[];
60 | lastUpdated: string;
61 | sections: DocumentationSection[];
62 | }
63 |
64 | export interface DocumentationSection {
65 | title: string;
66 | content: string;
67 | referencedFunctions: string[];
68 | referencedClasses: string[];
69 | referencedTypes: string[];
70 | codeExamples: CodeExample[];
71 | startLine: number;
72 | endLine: number;
73 | }
74 |
75 | export interface CodeExample {
76 | language: string;
77 | code: string;
78 | description: string;
79 | referencedSymbols: string[];
80 | diataxisType?: "tutorial" | "how-to" | "reference" | "explanation";
81 | validationHints?: {
82 | expectedBehavior?: string;
83 | dependencies?: string[];
84 | contextRequired?: boolean;
85 | };
86 | }
87 |
88 | /**
89 | * Priority scoring for documentation drift
90 | */
91 | export interface DriftPriorityScore {
92 | overall: number; // 0-100
93 | factors: {
94 | codeComplexity: number; // 0-100
95 | usageFrequency: number; // 0-100
96 | changeMagnitude: number; // 0-100
97 | documentationCoverage: number; // 0-100
98 | staleness: number; // 0-100
99 | userFeedback: number; // 0-100
100 | };
101 | recommendation: "critical" | "high" | "medium" | "low";
102 | suggestedAction: string;
103 | }
104 |
105 | /**
106 | * Configuration for priority scoring weights
107 | */
108 | export interface PriorityWeights {
109 | codeComplexity: number; // default: 0.20
110 | usageFrequency: number; // default: 0.25
111 | changeMagnitude: number; // default: 0.25
112 | documentationCoverage: number; // default: 0.15
113 | staleness: number; // default: 0.10
114 | userFeedback: number; // default: 0.05
115 | }
116 |
117 | /**
118 | * Extended drift detection result with priority scoring
119 | */
120 | export interface PrioritizedDriftResult extends DriftDetectionResult {
121 | priorityScore?: DriftPriorityScore;
122 | }
123 |
124 | /**
125 | * Usage metadata for calculating usage frequency
126 | */
127 | export interface UsageMetadata {
128 | filePath: string;
129 | functionCalls: Map<string, number>; // function name -> call count
130 | classInstantiations: Map<string, number>; // class name -> instantiation count
131 | imports: Map<string, number>; // symbol -> import count
132 | }
133 |
134 | /**
135 | * Main Drift Detector class
136 | */
137 | export class DriftDetector {
138 | // Constants for code analysis
139 | private static readonly DESCRIPTION_LOOKBACK_LINES = 3;
140 | private static readonly IMPORT_REGEX =
141 | /import\s+.*?\s+from\s+["']([^"']+)["']/g;
142 | private static readonly REQUIRE_REGEX = /require\(["']([^"']+)["']\)/g;
143 |
144 | // Default priority weights
145 | private static readonly DEFAULT_WEIGHTS: PriorityWeights = {
146 | codeComplexity: 0.2,
147 | usageFrequency: 0.25,
148 | changeMagnitude: 0.25,
149 | documentationCoverage: 0.15,
150 | staleness: 0.1,
151 | userFeedback: 0.05,
152 | };
153 |
154 | private analyzer: ASTAnalyzer;
155 | private snapshotDir: string;
156 | private currentSnapshot: DriftSnapshot | null = null;
157 | private previousSnapshot: DriftSnapshot | null = null;
158 | private customWeights?: PriorityWeights;
159 | private userFeedbackIntegration?: (
160 | result: DriftDetectionResult,
161 | ) => Promise<number> | number;
162 |
163 | constructor(projectPath: string, snapshotDir?: string) {
164 | this.analyzer = new ASTAnalyzer();
165 | this.snapshotDir =
166 | snapshotDir || path.join(projectPath, ".documcp", "snapshots");
167 | }
168 |
169 | /**
170 | * Initialize the drift detector
171 | */
172 | async initialize(): Promise<void> {
173 | await this.analyzer.initialize();
174 | await fs.mkdir(this.snapshotDir, { recursive: true });
175 | }
176 |
177 | /**
178 | * Create a snapshot of the current codebase and documentation
179 | */
180 | async createSnapshot(
181 | projectPath: string,
182 | docsPath: string,
183 | ): Promise<DriftSnapshot> {
184 | const files = new Map<string, ASTAnalysisResult>();
185 | const documentation = new Map<string, DocumentationSnapshot>();
186 |
187 | // Analyze source files
188 | const sourceFiles = await this.findSourceFiles(projectPath);
189 | for (const filePath of sourceFiles) {
190 | const analysis = await this.analyzer.analyzeFile(filePath);
191 | if (analysis) {
192 | files.set(filePath, analysis);
193 | }
194 | }
195 |
196 | // Analyze documentation files
197 | const docFiles = await this.findDocumentationFiles(docsPath);
198 | for (const docPath of docFiles) {
199 | const docSnapshot = await this.analyzeDocumentation(docPath);
200 | if (docSnapshot) {
201 | documentation.set(docPath, docSnapshot);
202 | }
203 | }
204 |
205 | const snapshot: DriftSnapshot = {
206 | projectPath,
207 | timestamp: new Date().toISOString(),
208 | files,
209 | documentation,
210 | };
211 |
212 | // Save snapshot
213 | await this.saveSnapshot(snapshot);
214 |
215 | return snapshot;
216 | }
217 |
218 | /**
219 | * Detect drift between two snapshots
220 | */
221 | async detectDrift(
222 | oldSnapshot: DriftSnapshot,
223 | newSnapshot: DriftSnapshot,
224 | ): Promise<DriftDetectionResult[]> {
225 | const results: DriftDetectionResult[] = [];
226 |
227 | // Compare each file
228 | for (const [filePath, newAnalysis] of newSnapshot.files) {
229 | const oldAnalysis = oldSnapshot.files.get(filePath);
230 |
231 | if (!oldAnalysis) {
232 | // New file - check if documentation is needed
233 | continue;
234 | }
235 |
236 | // Detect code changes
237 | const codeDiffs = await this.analyzer.detectDrift(
238 | oldAnalysis,
239 | newAnalysis,
240 | );
241 |
242 | if (codeDiffs.length > 0) {
243 | // Find affected documentation
244 | const affectedDocs = this.findAffectedDocumentation(
245 | filePath,
246 | codeDiffs,
247 | newSnapshot.documentation,
248 | );
249 |
250 | // Report drift even if no documentation is affected
251 | // (missing documentation is also a type of drift)
252 | const driftResult = await this.analyzeDrift(
253 | filePath,
254 | codeDiffs,
255 | affectedDocs,
256 | oldSnapshot,
257 | newSnapshot,
258 | );
259 |
260 | results.push(driftResult);
261 | }
262 | }
263 |
264 | return results;
265 | }
266 |
267 | /**
268 | * Analyze drift and generate suggestions
269 | */
270 | private async analyzeDrift(
271 | filePath: string,
272 | codeDiffs: CodeDiff[],
273 | affectedDocs: string[],
274 | oldSnapshot: DriftSnapshot,
275 | newSnapshot: DriftSnapshot,
276 | ): Promise<DriftDetectionResult> {
277 | const drifts: DocumentationDrift[] = [];
278 | const suggestions: DriftSuggestion[] = [];
279 |
280 | // Categorize drifts by severity
281 | const breakingChanges = codeDiffs.filter(
282 | (d) => d.impactLevel === "breaking",
283 | );
284 | const majorChanges = codeDiffs.filter((d) => d.impactLevel === "major");
285 | const minorChanges = codeDiffs.filter((d) => d.impactLevel === "minor");
286 |
287 | // Create drift entries
288 | for (const diff of codeDiffs) {
289 | const drift: DocumentationDrift = {
290 | type: this.determineDriftType(diff),
291 | affectedDocs,
292 | codeChanges: [diff],
293 | description: this.generateDriftDescription(diff),
294 | detectedAt: new Date().toISOString(),
295 | severity: this.mapImpactToSeverity(diff.impactLevel),
296 | };
297 |
298 | drifts.push(drift);
299 |
300 | // Generate suggestions for each affected doc
301 | for (const docPath of affectedDocs) {
302 | const docSnapshot = newSnapshot.documentation.get(docPath);
303 | if (docSnapshot) {
304 | const docSuggestions = await this.generateSuggestions(
305 | diff,
306 | docSnapshot,
307 | newSnapshot,
308 | );
309 | suggestions.push(...docSuggestions);
310 | }
311 | }
312 | }
313 |
314 | const impactAnalysis: ImpactAnalysis = {
315 | breakingChanges: breakingChanges.length,
316 | majorChanges: majorChanges.length,
317 | minorChanges: minorChanges.length,
318 | affectedDocFiles: affectedDocs,
319 | estimatedUpdateEffort: this.estimateUpdateEffort(drifts),
320 | requiresManualReview:
321 | breakingChanges.length > 0 || majorChanges.length > 3,
322 | };
323 |
324 | const severity = this.calculateOverallSeverity(drifts);
325 |
326 | return {
327 | filePath,
328 | hasDrift: drifts.length > 0,
329 | severity,
330 | drifts,
331 | suggestions,
332 | impactAnalysis,
333 | };
334 | }
335 |
336 | /**
337 | * Generate update suggestions for documentation
338 | */
339 | private async generateSuggestions(
340 | diff: CodeDiff,
341 | docSnapshot: DocumentationSnapshot,
342 | snapshot: DriftSnapshot,
343 | ): Promise<DriftSuggestion[]> {
344 | const suggestions: DriftSuggestion[] = [];
345 |
346 | // Find sections that reference the changed code
347 | for (const section of docSnapshot.sections) {
348 | const isAffected = this.isSectionAffected(section, diff);
349 |
350 | if (isAffected) {
351 | const suggestion = await this.createSuggestion(
352 | diff,
353 | docSnapshot,
354 | section,
355 | snapshot,
356 | );
357 |
358 | if (suggestion) {
359 | suggestions.push(suggestion);
360 | }
361 | }
362 | }
363 |
364 | return suggestions;
365 | }
366 |
367 | /**
368 | * Create a specific suggestion for a documentation section
369 | */
370 | private async createSuggestion(
371 | diff: CodeDiff,
372 | docSnapshot: DocumentationSnapshot,
373 | section: DocumentationSection,
374 | snapshot: DriftSnapshot,
375 | ): Promise<DriftSuggestion | null> {
376 | let suggestedContent = section.content;
377 | let reasoning = "";
378 | let confidence = 0.5;
379 | let autoApplicable = false;
380 |
381 | switch (diff.type) {
382 | case "removed":
383 | reasoning = `The ${diff.category} '${diff.name}' has been removed from the codebase. This section should be updated or removed.`;
384 | suggestedContent = this.generateRemovalSuggestion(section, diff);
385 | confidence = 0.8;
386 | autoApplicable = false;
387 | break;
388 |
389 | case "added":
390 | reasoning = `A new ${diff.category} '${diff.name}' has been added. Consider documenting it.`;
391 | suggestedContent = this.generateAdditionSuggestion(
392 | section,
393 | diff,
394 | snapshot,
395 | );
396 | confidence = 0.6;
397 | autoApplicable = false;
398 | break;
399 |
400 | case "modified":
401 | reasoning = `The ${diff.category} '${diff.name}' has been modified: ${diff.details}`;
402 | suggestedContent = this.generateModificationSuggestion(
403 | section,
404 | diff,
405 | snapshot,
406 | );
407 | confidence = 0.7;
408 | autoApplicable = diff.impactLevel === "patch";
409 | break;
410 | }
411 |
412 | return {
413 | docFile: docSnapshot.filePath,
414 | section: section.title,
415 | currentContent: section.content,
416 | suggestedContent,
417 | reasoning,
418 | confidence,
419 | autoApplicable,
420 | };
421 | }
422 |
423 | /**
424 | * Generate suggestion for removed code
425 | */
426 | private generateRemovalSuggestion(
427 | section: DocumentationSection,
428 | diff: CodeDiff,
429 | ): string {
430 | let content = section.content;
431 |
432 | // Remove references to the deleted symbol
433 | const symbolRegex = new RegExp(`\\b${diff.name}\\b`, "g");
434 | content = content.replace(symbolRegex, `~~${diff.name}~~ (removed)`);
435 |
436 | // Add deprecation notice
437 | const notice = `\n\n> **Note**: The \`${diff.name}\` ${diff.category} has been removed in the latest version.\n`;
438 | content = notice + content;
439 |
440 | return content;
441 | }
442 |
443 | /**
444 | * Generate suggestion for added code
445 | */
446 | private generateAdditionSuggestion(
447 | section: DocumentationSection,
448 | diff: CodeDiff,
449 | _snapshot: DriftSnapshot,
450 | ): string {
451 | let content = section.content;
452 |
453 | // Add new section for the added symbol
454 | const additionNotice = `\n\n## ${diff.name}\n\nA new ${diff.category} has been added.\n\n`;
455 |
456 | // Try to extract signature if available
457 | if (diff.newSignature) {
458 | content +=
459 | additionNotice + `\`\`\`typescript\n${diff.newSignature}\n\`\`\`\n`;
460 | } else {
461 | content +=
462 | additionNotice +
463 | `> **Documentation needed**: Please document the \`${diff.name}\` ${diff.category}.\n`;
464 | }
465 |
466 | return content;
467 | }
468 |
469 | /**
470 | * Generate suggestion for modified code
471 | */
472 | private generateModificationSuggestion(
473 | section: DocumentationSection,
474 | diff: CodeDiff,
475 | _snapshot: DriftSnapshot,
476 | ): string {
477 | let content = section.content;
478 |
479 | // Update signature references
480 | if (diff.oldSignature && diff.newSignature) {
481 | content = content.replace(diff.oldSignature, diff.newSignature);
482 | }
483 |
484 | // Add update notice
485 | const updateNotice = `\n\n> **Updated**: ${diff.details}\n`;
486 | content = updateNotice + content;
487 |
488 | return content;
489 | }
490 |
491 | /**
492 | * Check if a section is affected by a code change
493 | */
494 | private isSectionAffected(
495 | section: DocumentationSection,
496 | diff: CodeDiff,
497 | ): boolean {
498 | switch (diff.category) {
499 | case "function":
500 | return section.referencedFunctions.includes(diff.name);
501 | case "class":
502 | return section.referencedClasses.includes(diff.name);
503 | case "interface":
504 | case "type":
505 | return section.referencedTypes.includes(diff.name);
506 | default:
507 | return false;
508 | }
509 | }
510 |
511 | /**
512 | * Find documentation files that reference changed code
513 | */
514 | private findAffectedDocumentation(
515 | filePath: string,
516 | codeDiffs: CodeDiff[],
517 | documentation: Map<string, DocumentationSnapshot>,
518 | ): string[] {
519 | const affected: string[] = [];
520 |
521 | for (const [docPath, docSnapshot] of documentation) {
522 | // Check if doc references the changed file
523 | if (docSnapshot.referencedCode.includes(filePath)) {
524 | affected.push(docPath);
525 | continue;
526 | }
527 |
528 | // Check if doc references changed symbols
529 | for (const diff of codeDiffs) {
530 | for (const section of docSnapshot.sections) {
531 | if (this.isSectionAffected(section, diff)) {
532 | affected.push(docPath);
533 | break;
534 | }
535 | }
536 | }
537 | }
538 |
539 | return [...new Set(affected)];
540 | }
541 |
542 | /**
543 | * Analyze a documentation file
544 | */
545 | private async analyzeDocumentation(
546 | docPath: string,
547 | ): Promise<DocumentationSnapshot | null> {
548 | try {
549 | const content = await fs.readFile(docPath, "utf-8");
550 | const crypto = await import("crypto");
551 | const contentHash = crypto
552 | .createHash("sha256")
553 | .update(content)
554 | .digest("hex");
555 | const stats = await fs.stat(docPath);
556 |
557 | const sections = this.extractDocSections(content, docPath);
558 | const referencedCode = this.extractCodeReferences(content);
559 |
560 | return {
561 | filePath: docPath,
562 | contentHash,
563 | referencedCode,
564 | lastUpdated: stats.mtime.toISOString(),
565 | sections,
566 | };
567 | } catch (error) {
568 | console.warn(`Failed to analyze documentation ${docPath}:`, error);
569 | return null;
570 | }
571 | }
572 |
573 | /**
574 | * Generate validation hints based on Diataxis type
575 | */
576 | private generateValidationHints(
577 | diataxisType: "tutorial" | "how-to" | "reference" | "explanation",
578 | code: string,
579 | language: string,
580 | ): {
581 | expectedBehavior?: string;
582 | dependencies?: string[];
583 | contextRequired?: boolean;
584 | } {
585 | const hints: {
586 | expectedBehavior?: string;
587 | dependencies?: string[];
588 | contextRequired?: boolean;
589 | } = {};
590 |
591 | // Default: no dependencies for reference and explanation types
592 | let dependencies: string[] = [];
593 |
594 | // Extract dependencies for executable types (tutorial, how-to)
595 | if (diataxisType === "tutorial" || diataxisType === "how-to") {
596 | dependencies = this.extractDependencies(code, language);
597 | }
598 |
599 | switch (diataxisType) {
600 | case "tutorial":
601 | // Tutorials should have complete, executable examples
602 | hints.expectedBehavior = "Complete step-by-step execution flow";
603 | hints.contextRequired = false; // Should be self-contained
604 | hints.dependencies = dependencies;
605 | break;
606 |
607 | case "how-to":
608 | // How-to guides focus on solving specific problems
609 | hints.expectedBehavior = "Practical outcome achievable";
610 | hints.contextRequired = true; // May require setup
611 | hints.dependencies = dependencies;
612 | break;
613 |
614 | case "reference":
615 | // Reference documentation shows API usage
616 | hints.expectedBehavior = "API signatures match implementation";
617 | hints.contextRequired = false;
618 | hints.dependencies = dependencies;
619 | break;
620 |
621 | case "explanation":
622 | // Explanation examples illustrate concepts
623 | hints.expectedBehavior = "Concepts align with code behavior";
624 | hints.contextRequired = true;
625 | hints.dependencies = dependencies;
626 | break;
627 | }
628 |
629 | return hints;
630 | }
631 |
632 | /**
633 | * Extract dependencies from code
634 | */
635 | private extractDependencies(code: string, language: string): string[] {
636 | const dependencies: string[] = [];
637 |
638 | switch (language.toLowerCase()) {
639 | case "typescript":
640 | case "javascript":
641 | case "tsx":
642 | case "jsx": {
643 | // Extract import statements
644 | const importMatches = code.matchAll(DriftDetector.IMPORT_REGEX);
645 | for (const match of importMatches) {
646 | dependencies.push(match[1]);
647 | }
648 | // Extract require statements
649 | const requireMatches = code.matchAll(DriftDetector.REQUIRE_REGEX);
650 | for (const match of requireMatches) {
651 | dependencies.push(match[1]);
652 | }
653 | break;
654 | }
655 |
656 | case "python": {
657 | // Extract "from X import Y" statements
658 | const pyFromImportMatches = code.matchAll(/^from\s+(\S+)\s+import/gm);
659 | for (const match of pyFromImportMatches) {
660 | dependencies.push(match[1]);
661 | }
662 | // Extract simple "import X" statements
663 | const pySimpleImportMatches = code.matchAll(/^import\s+(\S+)/gm);
664 | for (const match of pySimpleImportMatches) {
665 | dependencies.push(match[1]);
666 | }
667 | break;
668 | }
669 |
670 | case "go": {
671 | // Extract import statements
672 | const goImportMatches = code.matchAll(/import\s+["']([^"']+)["']/g);
673 | for (const match of goImportMatches) {
674 | dependencies.push(match[1]);
675 | }
676 | break;
677 | }
678 |
679 | case "rust": {
680 | // Extract use statements
681 | const useMatches = code.matchAll(/use\s+([^;]+);/g);
682 | for (const match of useMatches) {
683 | dependencies.push(match[1].split("::")[0]);
684 | }
685 | break;
686 | }
687 |
688 | case "java": {
689 | // Extract import statements
690 | const javaImportMatches = code.matchAll(/import\s+([^;]+);/g);
691 | for (const match of javaImportMatches) {
692 | dependencies.push(match[1]);
693 | }
694 | break;
695 | }
696 | }
697 |
698 | return [...new Set(dependencies)];
699 | }
700 |
701 | /**
702 | * Detect Diataxis type from file path
703 | */
704 | private detectDiataxisTypeFromPath(
705 | filePath: string,
706 | ): "tutorial" | "how-to" | "reference" | "explanation" | undefined {
707 | const lowerPath = filePath.toLowerCase();
708 |
709 | if (
710 | lowerPath.includes("/tutorial") ||
711 | lowerPath.includes("/tutorials") ||
712 | lowerPath.includes("getting-started") ||
713 | lowerPath.includes("getting_started")
714 | ) {
715 | return "tutorial";
716 | }
717 |
718 | if (
719 | lowerPath.includes("/how-to") ||
720 | lowerPath.includes("/howto") ||
721 | lowerPath.includes("/guides") ||
722 | lowerPath.includes("/guide")
723 | ) {
724 | return "how-to";
725 | }
726 |
727 | if (
728 | lowerPath.includes("/reference") ||
729 | lowerPath.includes("/api") ||
730 | lowerPath.includes("/api-reference")
731 | ) {
732 | return "reference";
733 | }
734 |
735 | if (
736 | lowerPath.includes("/explanation") ||
737 | lowerPath.includes("/concept") ||
738 | lowerPath.includes("/architecture") ||
739 | lowerPath.includes("/background")
740 | ) {
741 | return "explanation";
742 | }
743 |
744 | return undefined;
745 | }
746 |
747 | /**
748 | * Detect Diataxis type from frontmatter
749 | */
750 | private detectDiataxisTypeFromFrontmatter(
751 | content: string,
752 | ): "tutorial" | "how-to" | "reference" | "explanation" | undefined {
753 | // Look for YAML frontmatter
754 | const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/);
755 | if (!frontmatterMatch) {
756 | return undefined;
757 | }
758 |
759 | const frontmatter = frontmatterMatch[1];
760 |
761 | // Check for explicit diataxis type
762 | const diataxisMatch = frontmatter.match(
763 | /diataxis[_-]?type:\s*["']?(tutorial|how-to|reference|explanation)["']?/i,
764 | );
765 | if (diataxisMatch) {
766 | return diataxisMatch[1] as
767 | | "tutorial"
768 | | "how-to"
769 | | "reference"
770 | | "explanation";
771 | }
772 |
773 | // Check for category field
774 | const categoryMatch = frontmatter.match(
775 | /category:\s*["']?(tutorial|how-to|reference|explanation)["']?/i,
776 | );
777 | if (categoryMatch) {
778 | return categoryMatch[1] as
779 | | "tutorial"
780 | | "how-to"
781 | | "reference"
782 | | "explanation";
783 | }
784 |
785 | return undefined;
786 | }
787 |
788 | /**
789 | * Infer Diataxis type from section content and context
790 | */
791 | private inferDiataxisTypeFromContext(
792 | sectionTitle: string,
793 | sectionContent: string,
794 | ): "tutorial" | "how-to" | "reference" | "explanation" | undefined {
795 | const title = sectionTitle.toLowerCase();
796 | const content = sectionContent.toLowerCase();
797 |
798 | // Tutorial indicators
799 | const tutorialIndicators = [
800 | "getting started",
801 | "introduction",
802 | "step-by-step",
803 | "learning",
804 | "beginner",
805 | "first steps",
806 | "walkthrough",
807 | ];
808 | if (
809 | tutorialIndicators.some(
810 | (indicator) => title.includes(indicator) || content.includes(indicator),
811 | )
812 | ) {
813 | return "tutorial";
814 | }
815 |
816 | // How-to indicators
817 | const howToIndicators = [
818 | "how to",
819 | "how do i",
820 | "recipe",
821 | "problem",
822 | "solution",
823 | "task",
824 | "accomplish",
825 | ];
826 | if (
827 | howToIndicators.some(
828 | (indicator) => title.includes(indicator) || content.includes(indicator),
829 | )
830 | ) {
831 | return "how-to";
832 | }
833 |
834 | // Reference indicators
835 | const referenceIndicators = [
836 | "api",
837 | "reference",
838 | "parameters",
839 | "returns",
840 | "arguments",
841 | "signature",
842 | "type",
843 | "interface",
844 | ];
845 | if (
846 | referenceIndicators.some(
847 | (indicator) => title.includes(indicator) || content.includes(indicator),
848 | )
849 | ) {
850 | return "reference";
851 | }
852 |
853 | // Explanation indicators
854 | const explanationIndicators = [
855 | "architecture",
856 | "concept",
857 | "background",
858 | "why",
859 | "understand",
860 | "theory",
861 | "design",
862 | "overview",
863 | ];
864 | if (
865 | explanationIndicators.some(
866 | (indicator) => title.includes(indicator) || content.includes(indicator),
867 | )
868 | ) {
869 | return "explanation";
870 | }
871 |
872 | return undefined;
873 | }
874 |
875 | /**
876 | * Extract sections from documentation
877 | * @param content - The markdown content to parse
878 | * @param filePath - The file path used for Diataxis type detection
879 | */
880 | private extractDocSections(
881 | content: string,
882 | filePath: string,
883 | ): DocumentationSection[] {
884 | const sections: DocumentationSection[] = [];
885 | const lines = content.split("\n");
886 | let currentSection: Partial<DocumentationSection> | null = null;
887 | let currentContent: string[] = [];
888 |
889 | // Detect Diataxis type from path and frontmatter
890 | const pathType = this.detectDiataxisTypeFromPath(filePath);
891 | const frontmatterType = this.detectDiataxisTypeFromFrontmatter(content);
892 | const documentType = frontmatterType || pathType;
893 |
894 | for (let i = 0; i < lines.length; i++) {
895 | const line = lines[i];
896 |
897 | // Detect headings
898 | const headingMatch = line.match(/^(#{1,6})\s+(.+)/);
899 | if (headingMatch) {
900 | // Save previous section
901 | if (currentSection) {
902 | currentSection.content = currentContent.join("\n");
903 | currentSection.endLine = i - 1;
904 | sections.push(currentSection as DocumentationSection);
905 | }
906 |
907 | const title = headingMatch[2];
908 | const referencedFunctions: string[] = [];
909 | const referencedClasses: string[] = [];
910 |
911 | // Extract function name from heading if it looks like a function signature
912 | // e.g., "## calculate(x: number): number" or "## myFunction()"
913 | const funcMatch = title.match(/^([a-z][A-Za-z0-9_]*)\s*\(/);
914 | if (funcMatch) {
915 | referencedFunctions.push(funcMatch[1]);
916 | }
917 |
918 | // Extract class name from heading if it starts with uppercase
919 | const classMatch = title.match(/^([A-Z][A-Za-z0-9_]*)/);
920 | if (classMatch && !funcMatch) {
921 | referencedClasses.push(classMatch[1]);
922 | }
923 |
924 | // Start new section
925 | currentSection = {
926 | title,
927 | startLine: i,
928 | referencedFunctions,
929 | referencedClasses,
930 | referencedTypes: [],
931 | codeExamples: [],
932 | };
933 | currentContent = [];
934 | } else if (currentSection) {
935 | currentContent.push(line);
936 |
937 | // Extract code examples
938 | if (line.startsWith("```")) {
939 | const langMatch = line.match(/```(\w+)/);
940 | const language = langMatch ? langMatch[1] : "text";
941 | const codeLines: string[] = [];
942 | const codeStartLine = i;
943 | i++;
944 |
945 | // Collect code lines
946 | while (i < lines.length && !lines[i].startsWith("```")) {
947 | codeLines.push(lines[i]);
948 | i++;
949 | }
950 |
951 | const codeContent = codeLines.join("\n");
952 |
953 | // Look for description before the code block
954 | let description = "";
955 | for (
956 | let j = Math.max(
957 | 0,
958 | codeStartLine - DriftDetector.DESCRIPTION_LOOKBACK_LINES,
959 | );
960 | j < codeStartLine;
961 | j++
962 | ) {
963 | const descLine = lines[j].trim();
964 | if (descLine && !descLine.startsWith("#")) {
965 | description = descLine;
966 | }
967 | }
968 |
969 | // Determine Diataxis type for this code example
970 | let codeExampleType = documentType;
971 | if (!codeExampleType && currentSection.title) {
972 | // Infer from section context if not determined from document
973 | codeExampleType = this.inferDiataxisTypeFromContext(
974 | currentSection.title,
975 | currentContent.join("\n"),
976 | );
977 | }
978 |
979 | const codeExample: CodeExample = {
980 | language,
981 | code: codeContent,
982 | description,
983 | referencedSymbols: this.extractSymbolsFromCode(codeContent),
984 | diataxisType: codeExampleType,
985 | };
986 |
987 | // Add validation hints based on Diataxis type
988 | if (codeExampleType) {
989 | codeExample.validationHints = this.generateValidationHints(
990 | codeExampleType,
991 | codeContent,
992 | language,
993 | );
994 | }
995 |
996 | currentSection.codeExamples!.push(codeExample);
997 | }
998 |
999 | // Extract inline code references (with or without parentheses for functions)
1000 | const inlineCodeMatches = line.matchAll(
1001 | /`([A-Za-z_][A-Za-z0-9_]*)\(\)?`/g,
1002 | );
1003 | for (const match of inlineCodeMatches) {
1004 | const symbol = match[1];
1005 | // Heuristic: CamelCase = class/type, camelCase = function
1006 | if (/^[A-Z]/.test(symbol)) {
1007 | if (!currentSection.referencedClasses!.includes(symbol)) {
1008 | currentSection.referencedClasses!.push(symbol);
1009 | }
1010 | } else {
1011 | if (!currentSection.referencedFunctions!.includes(symbol)) {
1012 | currentSection.referencedFunctions!.push(symbol);
1013 | }
1014 | }
1015 | }
1016 |
1017 | // Also extract identifiers without parentheses
1018 | const plainIdentifiers = line.matchAll(/`([A-Za-z_][A-Za-z0-9_]*)`/g);
1019 | for (const match of plainIdentifiers) {
1020 | const symbol = match[1];
1021 | if (/^[A-Z]/.test(symbol)) {
1022 | if (!currentSection.referencedClasses!.includes(symbol)) {
1023 | currentSection.referencedClasses!.push(symbol);
1024 | }
1025 | } else {
1026 | if (!currentSection.referencedFunctions!.includes(symbol)) {
1027 | currentSection.referencedFunctions!.push(symbol);
1028 | }
1029 | }
1030 | }
1031 | }
1032 | }
1033 |
1034 | // Save last section
1035 | if (currentSection) {
1036 | currentSection.content = currentContent.join("\n");
1037 | currentSection.endLine = lines.length - 1;
1038 | sections.push(currentSection as DocumentationSection);
1039 | }
1040 |
1041 | return sections;
1042 | }
1043 |
1044 | /**
1045 | * Extract code file references from documentation
1046 | */
1047 | private extractCodeReferences(content: string): string[] {
1048 | const references: string[] = [];
1049 |
1050 | // Extract from markdown links
1051 | const linkMatches = content.matchAll(
1052 | /\[.*?\]\((.*?\.(ts|js|py|go|rs|java|rb).*?)\)/g,
1053 | );
1054 | for (const match of linkMatches) {
1055 | references.push(match[1]);
1056 | }
1057 |
1058 | // Extract from inline code
1059 | const codeMatches = content.matchAll(
1060 | /`([^`]+\.(ts|js|py|go|rs|java|rb))`/g,
1061 | );
1062 | for (const match of codeMatches) {
1063 | references.push(match[1]);
1064 | }
1065 |
1066 | return [...new Set(references)];
1067 | }
1068 |
1069 | /**
1070 | * Extract symbols from code examples
1071 | */
1072 | private extractSymbolsFromCode(code: string): string[] {
1073 | const symbols: string[] = [];
1074 |
1075 | // Extract function calls
1076 | const functionMatches = code.matchAll(/\b([a-z][A-Za-z0-9_]*)\s*\(/g);
1077 | for (const match of functionMatches) {
1078 | symbols.push(match[1]);
1079 | }
1080 |
1081 | // Extract class/type references
1082 | const classMatches = code.matchAll(/\b([A-Z][A-Za-z0-9_]*)\b/g);
1083 | for (const match of classMatches) {
1084 | symbols.push(match[1]);
1085 | }
1086 |
1087 | return [...new Set(symbols)];
1088 | }
1089 |
1090 | /**
1091 | * Find all source files in project
1092 | */
1093 | private async findSourceFiles(projectPath: string): Promise<string[]> {
1094 | const files: string[] = [];
1095 | const extensions = [
1096 | ".ts",
1097 | ".tsx",
1098 | ".js",
1099 | ".jsx",
1100 | ".py",
1101 | ".go",
1102 | ".rs",
1103 | ".java",
1104 | ".rb",
1105 | ];
1106 |
1107 | const walk = async (dir: string) => {
1108 | try {
1109 | const entries = await fs.readdir(dir, { withFileTypes: true });
1110 |
1111 | for (const entry of entries) {
1112 | const fullPath = path.join(dir, entry.name);
1113 |
1114 | if (entry.isDirectory()) {
1115 | if (
1116 | !["node_modules", "dist", "build", ".git", ".next"].includes(
1117 | entry.name,
1118 | )
1119 | ) {
1120 | await walk(fullPath);
1121 | }
1122 | } else {
1123 | const ext = path.extname(entry.name);
1124 | if (extensions.includes(ext)) {
1125 | files.push(fullPath);
1126 | }
1127 | }
1128 | }
1129 | } catch (error) {
1130 | console.warn(`Failed to read directory ${dir}:`, error);
1131 | }
1132 | };
1133 |
1134 | await walk(projectPath);
1135 | return files;
1136 | }
1137 |
1138 | /**
1139 | * Find all documentation files
1140 | */
1141 | private async findDocumentationFiles(docsPath: string): Promise<string[]> {
1142 | const files: string[] = [];
1143 |
1144 | const walk = async (dir: string) => {
1145 | try {
1146 | const entries = await fs.readdir(dir, { withFileTypes: true });
1147 |
1148 | for (const entry of entries) {
1149 | const fullPath = path.join(dir, entry.name);
1150 |
1151 | if (entry.isDirectory()) {
1152 | await walk(fullPath);
1153 | } else if (
1154 | entry.name.endsWith(".md") ||
1155 | entry.name.endsWith(".mdx")
1156 | ) {
1157 | files.push(fullPath);
1158 | }
1159 | }
1160 | } catch (error) {
1161 | console.warn(`Failed to read documentation directory ${dir}:`, error);
1162 | }
1163 | };
1164 |
1165 | try {
1166 | await walk(docsPath);
1167 | } catch {
1168 | // Docs path doesn't exist
1169 | }
1170 |
1171 | return files;
1172 | }
1173 |
1174 | /**
1175 | * Save snapshot to disk
1176 | */
1177 | private async saveSnapshot(snapshot: DriftSnapshot): Promise<void> {
1178 | const timestamp = new Date().toISOString().replace(/:/g, "-");
1179 | const snapshotPath = path.join(
1180 | this.snapshotDir,
1181 | `snapshot-${timestamp}.json`,
1182 | );
1183 |
1184 | // Convert Maps to objects for JSON serialization
1185 | const serializable = {
1186 | projectPath: snapshot.projectPath,
1187 | timestamp: snapshot.timestamp,
1188 | files: Object.fromEntries(snapshot.files),
1189 | documentation: Object.fromEntries(snapshot.documentation),
1190 | };
1191 |
1192 | await fs.writeFile(snapshotPath, JSON.stringify(serializable, null, 2));
1193 | }
1194 |
1195 | /**
1196 | * Load the latest snapshot
1197 | */
1198 | async loadLatestSnapshot(): Promise<DriftSnapshot | null> {
1199 | try {
1200 | const files = await fs.readdir(this.snapshotDir);
1201 | const snapshotFiles = files
1202 | .filter((f) => f.startsWith("snapshot-"))
1203 | .sort()
1204 | .reverse();
1205 |
1206 | if (snapshotFiles.length === 0) return null;
1207 |
1208 | const latestPath = path.join(this.snapshotDir, snapshotFiles[0]);
1209 | const content = await fs.readFile(latestPath, "utf-8");
1210 | const data = JSON.parse(content);
1211 |
1212 | return {
1213 | projectPath: data.projectPath,
1214 | timestamp: data.timestamp,
1215 | files: new Map(Object.entries(data.files)),
1216 | documentation: new Map(Object.entries(data.documentation)),
1217 | };
1218 | } catch {
1219 | return null;
1220 | }
1221 | }
1222 |
1223 | // Helper methods
1224 |
1225 | private determineDriftType(
1226 | diff: CodeDiff,
1227 | ): "outdated" | "incorrect" | "missing" | "breaking" {
1228 | if (diff.impactLevel === "breaking") return "breaking";
1229 | if (diff.type === "removed") return "incorrect";
1230 | if (diff.type === "modified") return "outdated";
1231 | return "missing";
1232 | }
1233 |
1234 | private generateDriftDescription(diff: CodeDiff): string {
1235 | const action =
1236 | diff.type === "added"
1237 | ? "added"
1238 | : diff.type === "removed"
1239 | ? "removed"
1240 | : "modified";
1241 | return `${diff.category} '${diff.name}' was ${action}: ${diff.details}`;
1242 | }
1243 |
1244 | private mapImpactToSeverity(
1245 | impact: "breaking" | "major" | "minor" | "patch",
1246 | ): "low" | "medium" | "high" | "critical" {
1247 | switch (impact) {
1248 | case "breaking":
1249 | return "critical";
1250 | case "major":
1251 | return "high";
1252 | case "minor":
1253 | return "medium";
1254 | case "patch":
1255 | return "low";
1256 | }
1257 | }
1258 |
1259 | private estimateUpdateEffort(
1260 | drifts: DocumentationDrift[],
1261 | ): "low" | "medium" | "high" {
1262 | const critical = drifts.filter((d) => d.severity === "critical").length;
1263 | const high = drifts.filter((d) => d.severity === "high").length;
1264 |
1265 | if (critical > 0 || high > 5) return "high";
1266 | if (high > 0 || drifts.length > 10) return "medium";
1267 | return "low";
1268 | }
1269 |
1270 | private calculateOverallSeverity(
1271 | drifts: DocumentationDrift[],
1272 | ): "none" | "low" | "medium" | "high" | "critical" {
1273 | if (drifts.length === 0) return "none";
1274 |
1275 | const hasCritical = drifts.some((d) => d.severity === "critical");
1276 | if (hasCritical) return "critical";
1277 |
1278 | const hasHigh = drifts.some((d) => d.severity === "high");
1279 | if (hasHigh) return "high";
1280 |
1281 | const hasMedium = drifts.some((d) => d.severity === "medium");
1282 | if (hasMedium) return "medium";
1283 |
1284 | return "low";
1285 | }
1286 |
1287 | // Priority Scoring Methods
1288 |
1289 | /**
1290 | * Set custom weights for priority scoring
1291 | * Note: Weights don't need to sum to 1.0 - they are applied as-is in the weighted sum
1292 | */
1293 | setCustomWeights(weights: Partial<PriorityWeights>): void {
1294 | this.customWeights = {
1295 | ...DriftDetector.DEFAULT_WEIGHTS,
1296 | ...weights,
1297 | };
1298 | }
1299 |
1300 | /**
1301 | * Get current weights (custom or default)
1302 | */
1303 | getWeights(): PriorityWeights {
1304 | return this.customWeights || DriftDetector.DEFAULT_WEIGHTS;
1305 | }
1306 |
1307 | /**
1308 | * Calculate priority score for a drift detection result
1309 | */
1310 | calculatePriorityScore(
1311 | result: DriftDetectionResult,
1312 | snapshot: DriftSnapshot,
1313 | usageMetadata?: UsageMetadata,
1314 | ): DriftPriorityScore {
1315 | const weights = this.getWeights();
1316 |
1317 | // Calculate individual factors
1318 | const codeComplexity = this.calculateCodeComplexityScore(result, snapshot);
1319 | const usageFrequency = this.calculateUsageFrequencyScore(
1320 | result,
1321 | snapshot,
1322 | usageMetadata,
1323 | );
1324 | const changeMagnitude = this.calculateChangeMagnitudeScore(result);
1325 | const documentationCoverage = this.calculateDocumentationCoverageScore(
1326 | result,
1327 | snapshot,
1328 | );
1329 | const staleness = this.calculateStalenessScore(result, snapshot);
1330 | const userFeedback = this.calculateUserFeedbackScore(result);
1331 |
1332 | // Calculate weighted overall score
1333 | const overall =
1334 | codeComplexity * weights.codeComplexity +
1335 | usageFrequency * weights.usageFrequency +
1336 | changeMagnitude * weights.changeMagnitude +
1337 | documentationCoverage * weights.documentationCoverage +
1338 | staleness * weights.staleness +
1339 | userFeedback * weights.userFeedback;
1340 |
1341 | const recommendation = this.determineRecommendation(overall);
1342 | const suggestedAction = this.generateSuggestedAction(
1343 | recommendation,
1344 | result,
1345 | );
1346 |
1347 | return {
1348 | overall: Math.round(overall),
1349 | factors: {
1350 | codeComplexity: Math.round(codeComplexity),
1351 | usageFrequency: Math.round(usageFrequency),
1352 | changeMagnitude: Math.round(changeMagnitude),
1353 | documentationCoverage: Math.round(documentationCoverage),
1354 | staleness: Math.round(staleness),
1355 | userFeedback: Math.round(userFeedback),
1356 | },
1357 | recommendation,
1358 | suggestedAction,
1359 | };
1360 | }
1361 |
1362 | /**
1363 | * Calculate code complexity score (0-100)
1364 | * Higher complexity = higher priority
1365 | */
1366 | private calculateCodeComplexityScore(
1367 | result: DriftDetectionResult,
1368 | snapshot: DriftSnapshot,
1369 | ): number {
1370 | const fileAnalysis = snapshot.files.get(result.filePath);
1371 | if (!fileAnalysis) return 50; // Default moderate score
1372 |
1373 | // Use existing complexity metric from AST analysis
1374 | const complexity = fileAnalysis.complexity || 0;
1375 |
1376 | // Normalize complexity to 0-100 scale
1377 | // Assume complexity ranges from 0 (simple) to 50+ (very complex)
1378 | const normalizedComplexity = Math.min(complexity * 2, 100);
1379 |
1380 | // Adjust based on drift severity
1381 | const severityMultiplier =
1382 | result.severity === "critical"
1383 | ? 1.2
1384 | : result.severity === "high"
1385 | ? 1.1
1386 | : result.severity === "medium"
1387 | ? 1.0
1388 | : 0.9;
1389 |
1390 | return Math.min(normalizedComplexity * severityMultiplier, 100);
1391 | }
1392 |
1393 | /**
1394 | * Calculate usage frequency score (0-100)
1395 | * More used APIs = higher priority
1396 | */
1397 | private calculateUsageFrequencyScore(
1398 | result: DriftDetectionResult,
1399 | snapshot: DriftSnapshot,
1400 | usageMetadata?: UsageMetadata,
1401 | ): number {
1402 | // Scoring constants for usage estimation
1403 | const DEFAULT_SCORE = 60; // Moderate usage assumption
1404 | const EXPORT_WEIGHT = 15; // Points per export (max ~60 for 4 exports)
1405 | const EXPORT_MAX = 60; // Cap on export-based score
1406 | const DOC_REF_WEIGHT = 25; // Points per doc reference (max ~40 for 2 refs)
1407 | const DOC_REF_MAX = 40; // Cap on documentation reference score
1408 | const PUBLIC_API_BONUS = 30; // Bonus for being exported (public API)
1409 |
1410 | if (!usageMetadata) {
1411 | // Estimate based on exports and documentation references
1412 | const fileAnalysis = snapshot.files.get(result.filePath);
1413 | if (!fileAnalysis) return DEFAULT_SCORE;
1414 |
1415 | const exportCount = fileAnalysis.exports.length;
1416 | const isPublicAPI = exportCount > 0;
1417 |
1418 | // Count documentation references
1419 | let docReferences = 0;
1420 | for (const docSnapshot of snapshot.documentation.values()) {
1421 | if (docSnapshot.referencedCode.includes(result.filePath)) {
1422 | docReferences++;
1423 | }
1424 | }
1425 |
1426 | // Score based on heuristics
1427 | const exportScore = Math.min(exportCount * EXPORT_WEIGHT, EXPORT_MAX);
1428 | const referenceScore = Math.min(
1429 | docReferences * DOC_REF_WEIGHT,
1430 | DOC_REF_MAX,
1431 | );
1432 | const publicAPIBonus = isPublicAPI ? PUBLIC_API_BONUS : 0;
1433 |
1434 | return Math.min(exportScore + referenceScore + publicAPIBonus, 100);
1435 | }
1436 |
1437 | // Use actual usage data if available
1438 | let totalUsage = 0;
1439 | for (const drift of result.drifts) {
1440 | for (const diff of drift.codeChanges) {
1441 | if (diff.category === "function") {
1442 | totalUsage += usageMetadata.functionCalls.get(diff.name) || 0;
1443 | } else if (diff.category === "class") {
1444 | totalUsage += usageMetadata.classInstantiations.get(diff.name) || 0;
1445 | }
1446 | totalUsage += usageMetadata.imports.get(diff.name) || 0;
1447 | }
1448 | }
1449 |
1450 | // Normalize to 0-100 (assume 100+ usages is very high)
1451 | return Math.min(totalUsage, 100);
1452 | }
1453 |
1454 | /**
1455 | * Calculate change magnitude score (0-100)
1456 | * Larger changes = higher priority
1457 | */
1458 | private calculateChangeMagnitudeScore(result: DriftDetectionResult): number {
1459 | const { breakingChanges, majorChanges, minorChanges } =
1460 | result.impactAnalysis;
1461 |
1462 | // Weighted score for different change types
1463 | // Breaking changes are critical - even 1 breaking change should score high
1464 | if (breakingChanges > 0) {
1465 | return 100;
1466 | }
1467 |
1468 | const majorScore = majorChanges * 20; // Multiple major changes add up
1469 | const minorScore = minorChanges * 8; // Minor changes have some impact
1470 |
1471 | const totalScore = majorScore + minorScore;
1472 | return Math.min(totalScore, 100);
1473 | }
1474 |
1475 | /**
1476 | * Calculate documentation coverage score (0-100)
1477 | * Lower coverage = higher priority (inverted score)
1478 | */
1479 | private calculateDocumentationCoverageScore(
1480 | result: DriftDetectionResult,
1481 | snapshot: DriftSnapshot,
1482 | ): number {
1483 | const { affectedDocFiles } = result.impactAnalysis;
1484 |
1485 | // If there are affected docs, give a reasonable base score
1486 | // Documentation exists but may need updates
1487 | if (affectedDocFiles.length > 0) {
1488 | // Calculate how well the changed code is documented
1489 | let totalSymbols = 0;
1490 | let documentedSymbols = 0;
1491 |
1492 | for (const drift of result.drifts) {
1493 | for (const diff of drift.codeChanges) {
1494 | totalSymbols++;
1495 |
1496 | // Check if this symbol is documented
1497 | for (const docPath of affectedDocFiles) {
1498 | const docSnapshot = snapshot.documentation.get(docPath);
1499 | if (docSnapshot) {
1500 | for (const section of docSnapshot.sections) {
1501 | const isDocumented =
1502 | section.referencedFunctions.includes(diff.name) ||
1503 | section.referencedClasses.includes(diff.name) ||
1504 | section.referencedTypes.includes(diff.name);
1505 |
1506 | if (isDocumented) {
1507 | documentedSymbols++;
1508 | break;
1509 | }
1510 | }
1511 | }
1512 | }
1513 | }
1514 | }
1515 |
1516 | if (totalSymbols === 0) return 40; // Docs exist, moderate priority
1517 |
1518 | // Invert coverage ratio: low coverage = high priority
1519 | // But cap at 80 since docs do exist
1520 | const coverageRatio = documentedSymbols / totalSymbols;
1521 | return Math.round((1 - coverageRatio) * 80);
1522 | }
1523 |
1524 | // Missing documentation is high priority
1525 | return 90;
1526 | }
1527 |
1528 | /**
1529 | * Calculate staleness score (0-100)
1530 | * Older docs = higher priority
1531 | */
1532 | private calculateStalenessScore(
1533 | result: DriftDetectionResult,
1534 | snapshot: DriftSnapshot,
1535 | ): number {
1536 | const { affectedDocFiles } = result.impactAnalysis;
1537 |
1538 | if (affectedDocFiles.length === 0) return 50;
1539 |
1540 | let oldestDocAge = 0;
1541 |
1542 | for (const docPath of affectedDocFiles) {
1543 | const docSnapshot = snapshot.documentation.get(docPath);
1544 | if (docSnapshot) {
1545 | const lastUpdated = new Date(docSnapshot.lastUpdated);
1546 | const ageInDays =
1547 | (Date.now() - lastUpdated.getTime()) / (1000 * 60 * 60 * 24);
1548 | oldestDocAge = Math.max(oldestDocAge, ageInDays);
1549 | }
1550 | }
1551 |
1552 | // Score based on age: 0-7 days = low, 7-30 days = medium, 30+ days = high
1553 | if (oldestDocAge > 90) return 100;
1554 | if (oldestDocAge > 30) return 80;
1555 | if (oldestDocAge > 14) return 60;
1556 | if (oldestDocAge > 7) return 40;
1557 | return 20;
1558 | }
1559 |
1560 | /**
1561 | * Calculate user feedback score (0-100)
1562 | * More reported issues = higher priority
1563 | * Integrates with issue tracking systems when configured
1564 | */
1565 | private calculateUserFeedbackScore(result: DriftDetectionResult): number {
1566 | // If user feedback integration is configured, use it
1567 | if (this.userFeedbackIntegration) {
1568 | const score = this.userFeedbackIntegration(result);
1569 | // Handle both sync and async returns
1570 | if (score instanceof Promise) {
1571 | // For async, return 0 immediately (caller should use async version)
1572 | return 0;
1573 | }
1574 | return score;
1575 | }
1576 | return 0; // Default: no feedback integration
1577 | }
1578 |
1579 | /**
1580 | * Set user feedback integration for calculating feedback scores
1581 | * This allows external configuration of issue tracker integration
1582 | */
1583 | setUserFeedbackIntegration(
1584 | integration: (result: DriftDetectionResult) => Promise<number> | number,
1585 | ): void {
1586 | this.userFeedbackIntegration = integration;
1587 | }
1588 |
1589 | /**
1590 | * Calculate priority score with async user feedback support
1591 | */
1592 | async calculatePriorityScoreAsync(
1593 | result: DriftDetectionResult,
1594 | snapshot: DriftSnapshot,
1595 | usageMetadata?: UsageMetadata,
1596 | ): Promise<DriftPriorityScore> {
1597 | const weights = this.getWeights();
1598 |
1599 | // Calculate individual factors
1600 | const codeComplexity = this.calculateCodeComplexityScore(result, snapshot);
1601 | const usageFrequency = this.calculateUsageFrequencyScore(
1602 | result,
1603 | snapshot,
1604 | usageMetadata,
1605 | );
1606 | const changeMagnitude = this.calculateChangeMagnitudeScore(result);
1607 | const documentationCoverage = this.calculateDocumentationCoverageScore(
1608 | result,
1609 | snapshot,
1610 | );
1611 | const staleness = this.calculateStalenessScore(result, snapshot);
1612 |
1613 | // Get user feedback asynchronously if integration is configured
1614 | let userFeedback = 0;
1615 | if (this.userFeedbackIntegration) {
1616 | const feedbackScore = this.userFeedbackIntegration(result);
1617 | userFeedback =
1618 | feedbackScore instanceof Promise ? await feedbackScore : feedbackScore;
1619 | }
1620 |
1621 | // Calculate weighted overall score
1622 | const overall =
1623 | codeComplexity * weights.codeComplexity +
1624 | usageFrequency * weights.usageFrequency +
1625 | changeMagnitude * weights.changeMagnitude +
1626 | documentationCoverage * weights.documentationCoverage +
1627 | staleness * weights.staleness +
1628 | userFeedback * weights.userFeedback;
1629 |
1630 | const recommendation = this.determineRecommendation(overall);
1631 | const suggestedAction = this.generateSuggestedAction(
1632 | recommendation,
1633 | result,
1634 | );
1635 |
1636 | return {
1637 | overall: Math.round(overall),
1638 | factors: {
1639 | codeComplexity: Math.round(codeComplexity),
1640 | usageFrequency: Math.round(usageFrequency),
1641 | changeMagnitude: Math.round(changeMagnitude),
1642 | documentationCoverage: Math.round(documentationCoverage),
1643 | staleness: Math.round(staleness),
1644 | userFeedback: Math.round(userFeedback),
1645 | },
1646 | recommendation,
1647 | suggestedAction,
1648 | };
1649 | }
1650 |
1651 | /**
1652 | * Determine priority recommendation based on overall score
1653 | */
1654 | private determineRecommendation(
1655 | score: number,
1656 | ): "critical" | "high" | "medium" | "low" {
1657 | if (score >= 80) return "critical";
1658 | if (score >= 60) return "high";
1659 | if (score >= 40) return "medium";
1660 | return "low";
1661 | }
1662 |
1663 | /**
1664 | * Generate suggested action based on priority level
1665 | */
1666 | private generateSuggestedAction(
1667 | recommendation: "critical" | "high" | "medium" | "low",
1668 | result: DriftDetectionResult,
1669 | ): string {
1670 | const affectedCount = result.impactAnalysis.affectedDocFiles.length;
1671 |
1672 | switch (recommendation) {
1673 | case "critical":
1674 | return `Update immediately: ${result.impactAnalysis.breakingChanges} breaking change(s) affecting ${affectedCount} documentation file(s). Review and update within hours.`;
1675 | case "high":
1676 | return `Update within 1 day: ${result.drifts.length} drift(s) detected in ${affectedCount} file(s). Schedule update soon.`;
1677 | case "medium":
1678 | return `Update within 1 week: ${result.drifts.length} drift(s) affecting ${affectedCount} file(s). Plan update in next sprint.`;
1679 | case "low":
1680 | return `Update when convenient: Minor drift detected. Consider batching with other low-priority updates.`;
1681 | }
1682 | }
1683 |
1684 | /**
1685 | * Detect drift with priority scoring (synchronous user feedback)
1686 | */
1687 | async detectDriftWithPriority(
1688 | oldSnapshot: DriftSnapshot,
1689 | newSnapshot: DriftSnapshot,
1690 | usageMetadata?: UsageMetadata,
1691 | ): Promise<PrioritizedDriftResult[]> {
1692 | const results = await this.detectDrift(oldSnapshot, newSnapshot);
1693 |
1694 | return results.map((result) => ({
1695 | ...result,
1696 | priorityScore: this.calculatePriorityScore(
1697 | result,
1698 | newSnapshot,
1699 | usageMetadata,
1700 | ),
1701 | }));
1702 | }
1703 |
1704 | /**
1705 | * Detect drift with priority scoring (async user feedback support)
1706 | */
1707 | async detectDriftWithPriorityAsync(
1708 | oldSnapshot: DriftSnapshot,
1709 | newSnapshot: DriftSnapshot,
1710 | usageMetadata?: UsageMetadata,
1711 | ): Promise<PrioritizedDriftResult[]> {
1712 | const results = await this.detectDrift(oldSnapshot, newSnapshot);
1713 |
1714 | // Calculate priority scores with async user feedback
1715 | const prioritizedResults = await Promise.all(
1716 | results.map(async (result) => ({
1717 | ...result,
1718 | priorityScore: await this.calculatePriorityScoreAsync(
1719 | result,
1720 | newSnapshot,
1721 | usageMetadata,
1722 | ),
1723 | })),
1724 | );
1725 |
1726 | return prioritizedResults;
1727 | }
1728 |
1729 | /**
1730 | * Get prioritized drift results sorted by priority score
1731 | */
1732 | async getPrioritizedDriftResults(
1733 | oldSnapshot: DriftSnapshot,
1734 | newSnapshot: DriftSnapshot,
1735 | usageMetadata?: UsageMetadata,
1736 | ): Promise<PrioritizedDriftResult[]> {
1737 | // Use async version if user feedback integration is configured
1738 | const useAsyncFeedback = this.userFeedbackIntegration !== undefined;
1739 |
1740 | const results = useAsyncFeedback
1741 | ? await this.detectDriftWithPriorityAsync(
1742 | oldSnapshot,
1743 | newSnapshot,
1744 | usageMetadata,
1745 | )
1746 | : await this.detectDriftWithPriority(
1747 | oldSnapshot,
1748 | newSnapshot,
1749 | usageMetadata,
1750 | );
1751 |
1752 | // Sort by overall score (descending - highest priority first)
1753 | return results.sort((a, b) => {
1754 | const scoreA = a.priorityScore?.overall ?? 0;
1755 | const scoreB = b.priorityScore?.overall ?? 0;
1756 | return scoreB - scoreA;
1757 | });
1758 | }
1759 | }
1760 |
```
--------------------------------------------------------------------------------
/src/memory/export-import.ts:
--------------------------------------------------------------------------------
```typescript
1 | /**
2 | * Memory Export/Import System for DocuMCP
3 | * Comprehensive data portability, backup, and migration capabilities
4 | */
5 |
6 | import { EventEmitter } from "events";
7 | import { promises as fs } from "fs";
8 | import { createWriteStream } from "fs";
9 | import { MemoryEntry, JSONLStorage } from "./storage.js";
10 | import { MemoryManager } from "./manager.js";
11 | import { IncrementalLearningSystem } from "./learning.js";
12 | import { KnowledgeGraph } from "./knowledge-graph.js";
13 | import { MemoryPruningSystem } from "./pruning.js";
14 |
15 | export interface ExportOptions {
16 | format: "json" | "jsonl" | "csv" | "xml" | "yaml" | "sqlite" | "archive";
17 | compression?: "gzip" | "zip" | "none";
18 | includeMetadata: boolean;
19 | includeLearning: boolean;
20 | includeKnowledgeGraph: boolean;
21 | filters?: {
22 | types?: string[];
23 | dateRange?: { start: Date; end: Date };
24 | projects?: string[];
25 | tags?: string[];
26 | outcomes?: string[];
27 | };
28 | anonymize?: {
29 | enabled: boolean;
30 | fields: string[];
31 | method: "hash" | "remove" | "pseudonymize";
32 | };
33 | encryption?: {
34 | enabled: boolean;
35 | algorithm: "aes-256-gcm" | "aes-192-gcm" | "aes-128-gcm";
36 | password?: string;
37 | };
38 | }
39 |
40 | export interface ImportOptions {
41 | format: "json" | "jsonl" | "csv" | "xml" | "yaml" | "sqlite" | "archive";
42 | mode: "merge" | "replace" | "append" | "update";
43 | validation: "strict" | "loose" | "none";
44 | conflictResolution: "skip" | "overwrite" | "merge" | "rename";
45 | backup: boolean;
46 | dryRun: boolean;
47 | mapping?: Record<string, string>; // Field mapping for different schemas
48 | transformation?: {
49 | enabled: boolean;
50 | rules: Array<{
51 | field: string;
52 | operation: "convert" | "transform" | "validate";
53 | params: any;
54 | }>;
55 | };
56 | }
57 |
58 | export interface ExportResult {
59 | success: boolean;
60 | filePath?: string;
61 | format: string;
62 | size: number;
63 | entries: number;
64 | metadata: {
65 | exportedAt: Date;
66 | version: string;
67 | source: string;
68 | includes: string[];
69 | compression?: string;
70 | encryption?: boolean;
71 | };
72 | warnings: string[];
73 | errors: string[];
74 | }
75 |
76 | export interface ImportResult {
77 | success: boolean;
78 | processed: number;
79 | imported: number;
80 | skipped: number;
81 | errors: number;
82 | errorDetails: string[]; // Detailed error messages
83 | conflicts: number;
84 | validation: {
85 | valid: number;
86 | invalid: number;
87 | warnings: string[];
88 | };
89 | summary: {
90 | newEntries: number;
91 | updatedEntries: number;
92 | duplicateEntries: number;
93 | failedEntries: number;
94 | };
95 | metadata: {
96 | importedAt: Date;
97 | source: string;
98 | format: string;
99 | mode: string;
100 | };
101 | }
102 |
103 | export interface MigrationPlan {
104 | sourceSystem: string;
105 | targetSystem: string;
106 | mapping: Record<string, string>;
107 | transformations: Array<{
108 | field: string;
109 | type: "rename" | "convert" | "merge" | "split" | "calculate";
110 | source: string | string[];
111 | target: string;
112 | operation?: string;
113 | }>;
114 | validation: Array<{
115 | field: string;
116 | rules: string[];
117 | required: boolean;
118 | }>;
119 | postProcessing: string[];
120 | }
121 |
122 | export interface ArchiveMetadata {
123 | version: string;
124 | created: Date;
125 | source: string;
126 | description: string;
127 | manifest: {
128 | files: Array<{
129 | name: string;
130 | type: string;
131 | size: number;
132 | checksum: string;
133 | entries?: number;
134 | }>;
135 | total: {
136 | files: number;
137 | size: number;
138 | entries: number;
139 | };
140 | };
141 | options: ExportOptions;
142 | }
143 |
144 | export class MemoryExportImportSystem extends EventEmitter {
145 | private storage: JSONLStorage;
146 | private manager: MemoryManager;
147 | private learningSystem: IncrementalLearningSystem;
148 | private knowledgeGraph: KnowledgeGraph;
149 | private pruningSystem?: MemoryPruningSystem;
150 | private readonly version = "1.0.0";
151 |
152 | constructor(
153 | storage: JSONLStorage,
154 | manager: MemoryManager,
155 | learningSystem: IncrementalLearningSystem,
156 | knowledgeGraph: KnowledgeGraph,
157 | pruningSystem?: MemoryPruningSystem,
158 | ) {
159 | super();
160 | this.storage = storage;
161 | this.manager = manager;
162 | this.learningSystem = learningSystem;
163 | this.knowledgeGraph = knowledgeGraph;
164 | this.pruningSystem = pruningSystem;
165 | }
166 |
167 | /**
168 | * Export memory data to specified format
169 | */
170 | async exportMemories(
171 | outputPath: string,
172 | options: Partial<ExportOptions> = {},
173 | ): Promise<ExportResult> {
174 | const defaultOptions: ExportOptions = {
175 | format: "json",
176 | compression: "none",
177 | includeMetadata: true,
178 | includeLearning: true,
179 | includeKnowledgeGraph: true,
180 | anonymize: {
181 | enabled: false,
182 | fields: ["userId", "email", "personalInfo"],
183 | method: "hash",
184 | },
185 | encryption: {
186 | enabled: false,
187 | algorithm: "aes-256-gcm",
188 | },
189 | };
190 |
191 | const activeOptions = { ...defaultOptions, ...options };
192 | const startTime = Date.now();
193 |
194 | this.emit("export_started", { outputPath, options: activeOptions });
195 |
196 | try {
197 | // Get filtered entries
198 | const entries = await this.getFilteredEntries(activeOptions.filters);
199 |
200 | // Prepare export data
201 | const exportData = await this.prepareExportData(entries, activeOptions);
202 |
203 | // Apply anonymization if enabled
204 | if (activeOptions.anonymize?.enabled) {
205 | this.applyAnonymization(exportData, activeOptions.anonymize);
206 | }
207 |
208 | // Prepare output path - if compression is requested, use temp file first
209 | let actualOutputPath = outputPath;
210 | if (activeOptions.compression && activeOptions.compression !== "none") {
211 | // For compressed exports, export to temp file first
212 | if (outputPath.endsWith(".gz")) {
213 | actualOutputPath = outputPath.slice(0, -3); // Remove .gz suffix
214 | } else if (outputPath.endsWith(".zip")) {
215 | actualOutputPath = outputPath.slice(0, -4); // Remove .zip suffix
216 | }
217 | }
218 |
219 | // Export to specified format
220 | let filePath: string;
221 | let size = 0;
222 |
223 | switch (activeOptions.format) {
224 | case "json":
225 | filePath = await this.exportToJSON(
226 | actualOutputPath,
227 | exportData,
228 | activeOptions,
229 | );
230 | break;
231 | case "jsonl":
232 | filePath = await this.exportToJSONL(
233 | actualOutputPath,
234 | exportData,
235 | activeOptions,
236 | );
237 | break;
238 | case "csv":
239 | filePath = await this.exportToCSV(
240 | actualOutputPath,
241 | exportData,
242 | activeOptions,
243 | );
244 | break;
245 | case "xml":
246 | filePath = await this.exportToXML(
247 | actualOutputPath,
248 | exportData,
249 | activeOptions,
250 | );
251 | break;
252 | case "yaml":
253 | filePath = await this.exportToYAML(
254 | actualOutputPath,
255 | exportData,
256 | activeOptions,
257 | );
258 | break;
259 | case "sqlite":
260 | filePath = await this.exportToSQLite(
261 | actualOutputPath,
262 | exportData,
263 | activeOptions,
264 | );
265 | break;
266 | case "archive":
267 | filePath = await this.exportToArchive(
268 | actualOutputPath,
269 | exportData,
270 | activeOptions,
271 | );
272 | break;
273 | default:
274 | throw new Error(`Unsupported export format: ${activeOptions.format}`);
275 | }
276 |
277 | // Apply compression if specified
278 | if (activeOptions.compression && activeOptions.compression !== "none") {
279 | filePath = await this.applyCompression(
280 | filePath,
281 | activeOptions.compression,
282 | outputPath,
283 | );
284 | }
285 |
286 | // Apply encryption if enabled
287 | if (activeOptions.encryption?.enabled) {
288 | filePath = await this.applyEncryption(
289 | filePath,
290 | activeOptions.encryption,
291 | );
292 | }
293 |
294 | // Get file size
295 | const stats = await fs.stat(filePath);
296 | size = stats.size;
297 |
298 | const result: ExportResult = {
299 | success: true,
300 | filePath,
301 | format: activeOptions.format,
302 | size,
303 | entries: entries.length,
304 | metadata: {
305 | exportedAt: new Date(),
306 | version: this.version,
307 | source: "DocuMCP Memory System",
308 | includes: this.getIncludedComponents(activeOptions),
309 | compression:
310 | activeOptions.compression !== "none"
311 | ? activeOptions.compression
312 | : undefined,
313 | encryption: activeOptions.encryption?.enabled,
314 | },
315 | warnings: [],
316 | errors: [],
317 | };
318 |
319 | this.emit("export_completed", {
320 | result,
321 | duration: Date.now() - startTime,
322 | });
323 |
324 | return result;
325 | } catch (error) {
326 | const errorMessage =
327 | error instanceof Error ? error.message : String(error);
328 | this.emit("export_error", { error: errorMessage });
329 |
330 | return {
331 | success: false,
332 | format: activeOptions.format,
333 | size: 0,
334 | entries: 0,
335 | metadata: {
336 | exportedAt: new Date(),
337 | version: this.version,
338 | source: "DocuMCP Memory System",
339 | includes: [],
340 | },
341 | warnings: [],
342 | errors: [errorMessage],
343 | };
344 | }
345 | }
346 |
347 | /**
348 | * Import memory data from specified source
349 | */
350 | async importMemories(
351 | inputPath: string,
352 | options: Partial<ImportOptions> = {},
353 | ): Promise<ImportResult> {
354 | const defaultOptions: ImportOptions = {
355 | format: "json",
356 | mode: "merge",
357 | validation: "strict",
358 | conflictResolution: "skip",
359 | backup: true,
360 | dryRun: false,
361 | };
362 |
363 | const activeOptions = { ...defaultOptions, ...options };
364 | const startTime = Date.now();
365 |
366 | this.emit("import_started", { inputPath, options: activeOptions });
367 |
368 | try {
369 | // Create backup if requested
370 | if (activeOptions.backup && !activeOptions.dryRun) {
371 | await this.createBackup();
372 | }
373 |
374 | // Detect and verify format
375 | const detectedFormat = await this.detectFormat(inputPath);
376 | if (detectedFormat !== activeOptions.format) {
377 | this.emit("format_mismatch", {
378 | detected: detectedFormat,
379 | specified: activeOptions.format,
380 | });
381 | }
382 |
383 | // Load and parse import data
384 | const importData = await this.loadImportData(inputPath, activeOptions);
385 |
386 | // Validate import data
387 | const validationResult = await this.validateImportData(
388 | importData,
389 | activeOptions,
390 | );
391 |
392 | if (
393 | validationResult.invalid > 0 &&
394 | activeOptions.validation === "strict"
395 | ) {
396 | throw new Error(
397 | `Validation failed: ${validationResult.invalid} invalid entries`,
398 | );
399 | }
400 |
401 | // Process import data
402 | const result = await this.processImportData(importData, activeOptions);
403 |
404 | this.emit("import_completed", {
405 | result,
406 | duration: Date.now() - startTime,
407 | });
408 |
409 | return result;
410 | } catch (error) {
411 | const errorMessage =
412 | error instanceof Error ? error.message : String(error);
413 | this.emit("import_error", { error: errorMessage });
414 |
415 | return {
416 | success: false,
417 | processed: 0,
418 | imported: 0,
419 | skipped: 0,
420 | errors: 1,
421 | errorDetails: [errorMessage],
422 | conflicts: 0,
423 | validation: {
424 | valid: 0,
425 | invalid: 0,
426 | warnings: [],
427 | },
428 | summary: {
429 | newEntries: 0,
430 | updatedEntries: 0,
431 | duplicateEntries: 0,
432 | failedEntries: 0,
433 | },
434 | metadata: {
435 | importedAt: new Date(),
436 | source: inputPath,
437 | format: activeOptions.format,
438 | mode: activeOptions.mode,
439 | },
440 | };
441 | }
442 | }
443 |
444 | /**
445 | * Create migration plan between different systems
446 | */
447 | async createMigrationPlan(
448 | sourceSchema: any,
449 | targetSchema: any,
450 | options?: {
451 | autoMap?: boolean;
452 | preserveStructure?: boolean;
453 | customMappings?: Record<string, string>;
454 | },
455 | ): Promise<MigrationPlan> {
456 | const plan: MigrationPlan = {
457 | sourceSystem: sourceSchema.system || "Unknown",
458 | targetSystem: "DocuMCP",
459 | mapping: {},
460 | transformations: [],
461 | validation: [],
462 | postProcessing: [],
463 | };
464 |
465 | // Auto-generate field mappings
466 | if (options?.autoMap !== false) {
467 | plan.mapping = this.generateFieldMappings(sourceSchema, targetSchema);
468 | }
469 |
470 | // Apply custom mappings
471 | if (options?.customMappings) {
472 | Object.assign(plan.mapping, options.customMappings);
473 | }
474 |
475 | // Generate transformations
476 | plan.transformations = this.generateTransformations(
477 | sourceSchema,
478 | targetSchema,
479 | plan.mapping,
480 | );
481 |
482 | // Generate validation rules
483 | plan.validation = this.generateValidationRules(targetSchema);
484 |
485 | // Generate post-processing steps
486 | plan.postProcessing = this.generatePostProcessingSteps(targetSchema);
487 |
488 | return plan;
489 | }
490 |
491 | /**
492 | * Execute migration plan
493 | */
494 | async executeMigration(
495 | inputPath: string,
496 | migrationPlan: MigrationPlan,
497 | options?: Partial<ImportOptions>,
498 | ): Promise<ImportResult> {
499 | this.emit("migration_started", { inputPath, plan: migrationPlan });
500 |
501 | try {
502 | // Load source data
503 | const sourceData = await this.loadRawData(inputPath);
504 |
505 | // Apply transformations
506 | const transformedData = await this.applyTransformations(
507 | sourceData,
508 | migrationPlan,
509 | );
510 |
511 | // Convert to import format
512 | const importData = this.convertToImportFormat(
513 | transformedData,
514 | migrationPlan,
515 | );
516 |
517 | // Execute import with migration settings
518 | const importOptions: ImportOptions = {
519 | format: "json",
520 | mode: "merge",
521 | validation: "strict",
522 | conflictResolution: "merge",
523 | backup: true,
524 | dryRun: false,
525 | ...options,
526 | transformation: {
527 | enabled: true,
528 | rules: migrationPlan.transformations.map((t) => ({
529 | field: t.target,
530 | operation: t.type as any,
531 | params: { source: t.source, operation: t.operation },
532 | })),
533 | },
534 | };
535 |
536 | const result = await this.processImportData(importData, importOptions);
537 |
538 | // Execute post-processing
539 | if (migrationPlan.postProcessing.length > 0) {
540 | await this.executePostProcessing(migrationPlan.postProcessing);
541 | }
542 |
543 | this.emit("migration_completed", { result });
544 | return result;
545 | } catch (error) {
546 | this.emit("migration_error", {
547 | error: error instanceof Error ? error.message : String(error),
548 | });
549 | throw error;
550 | }
551 | }
552 |
553 | /**
554 | * Get supported formats
555 | */
556 | getSupportedFormats(): {
557 | export: string[];
558 | import: string[];
559 | compression: string[];
560 | encryption: string[];
561 | } {
562 | return {
563 | export: ["json", "jsonl", "csv", "xml", "yaml", "sqlite", "archive"],
564 | import: ["json", "jsonl", "csv", "xml", "yaml", "sqlite", "archive"],
565 | compression: ["gzip", "zip", "none"],
566 | encryption: ["aes-256-gcm", "aes-192-gcm", "aes-128-gcm"],
567 | };
568 | }
569 |
570 | /**
571 | * Validate export/import compatibility
572 | */
573 | async validateCompatibility(
574 | sourcePath: string,
575 | _targetSystem: string = "DocuMCP",
576 | ): Promise<{
577 | compatible: boolean;
578 | issues: string[];
579 | recommendations: string[];
580 | migrationRequired: boolean;
581 | }> {
582 | try {
583 | const format = await this.detectFormat(sourcePath);
584 | const sampleData = await this.loadSampleData(sourcePath, format);
585 |
586 | const issues: string[] = [];
587 | const recommendations: string[] = [];
588 | let compatible = true;
589 | let migrationRequired = false;
590 |
591 | // Check format compatibility
592 | if (!this.getSupportedFormats().import.includes(format)) {
593 | issues.push(`Unsupported format: ${format}`);
594 | compatible = false;
595 | }
596 |
597 | // Check schema compatibility
598 | const schemaIssues = this.validateSchema(sampleData);
599 | if (schemaIssues.length > 0) {
600 | issues.push(...schemaIssues);
601 | migrationRequired = true;
602 | }
603 |
604 | // Check data integrity
605 | const integrityIssues = this.validateDataIntegrity(sampleData);
606 | if (integrityIssues.length > 0) {
607 | issues.push(...integrityIssues);
608 | recommendations.push("Consider data cleaning before import");
609 | }
610 |
611 | // Generate recommendations
612 | if (migrationRequired) {
613 | recommendations.push("Create migration plan for schema transformation");
614 | }
615 |
616 | if (format === "csv") {
617 | recommendations.push(
618 | "Consider using JSON or JSONL for better data preservation",
619 | );
620 | }
621 |
622 | return {
623 | compatible,
624 | issues,
625 | recommendations,
626 | migrationRequired,
627 | };
628 | } catch (error) {
629 | return {
630 | compatible: false,
631 | issues: [error instanceof Error ? error.message : String(error)],
632 | recommendations: ["Verify file format and accessibility"],
633 | migrationRequired: false,
634 | };
635 | }
636 | }
637 |
638 | /**
639 | * Private helper methods
640 | */
641 | private async getFilteredEntries(
642 | filters?: ExportOptions["filters"],
643 | ): Promise<MemoryEntry[]> {
644 | let entries = await this.storage.getAll();
645 |
646 | if (!filters) return entries;
647 |
648 | // Apply type filter
649 | if (filters.types && filters.types.length > 0) {
650 | entries = entries.filter((entry) => filters.types!.includes(entry.type));
651 | }
652 |
653 | // Apply date range filter
654 | if (filters.dateRange) {
655 | entries = entries.filter((entry) => {
656 | const entryDate = new Date(entry.timestamp);
657 | return (
658 | entryDate >= filters.dateRange!.start &&
659 | entryDate <= filters.dateRange!.end
660 | );
661 | });
662 | }
663 |
664 | // Apply project filter
665 | if (filters.projects && filters.projects.length > 0) {
666 | entries = entries.filter((entry) =>
667 | filters.projects!.some(
668 | (project) =>
669 | entry.data.projectPath?.includes(project) ||
670 | entry.data.projectId === project,
671 | ),
672 | );
673 | }
674 |
675 | // Apply tags filter
676 | if (filters.tags && filters.tags.length > 0) {
677 | entries = entries.filter(
678 | (entry) => entry.tags?.some((tag) => filters.tags!.includes(tag)),
679 | );
680 | }
681 |
682 | // Apply outcomes filter
683 | if (filters.outcomes && filters.outcomes.length > 0) {
684 | entries = entries.filter(
685 | (entry) =>
686 | filters.outcomes!.includes(entry.data.outcome) ||
687 | (entry.data.success === true &&
688 | filters.outcomes!.includes("success")) ||
689 | (entry.data.success === false &&
690 | filters.outcomes!.includes("failure")),
691 | );
692 | }
693 |
694 | return entries;
695 | }
696 |
697 | private async prepareExportData(
698 | entries: MemoryEntry[],
699 | options: ExportOptions,
700 | ): Promise<any> {
701 | const exportData: any = {
702 | metadata: {
703 | version: this.version,
704 | exportedAt: new Date().toISOString(),
705 | source: "DocuMCP Memory System",
706 | entries: entries.length,
707 | options: {
708 | includeMetadata: options.includeMetadata,
709 | includeLearning: options.includeLearning,
710 | includeKnowledgeGraph: options.includeKnowledgeGraph,
711 | },
712 | },
713 | memories: entries,
714 | };
715 |
716 | // Include learning data if requested
717 | if (options.includeLearning) {
718 | const patterns = await this.learningSystem.getPatterns();
719 | exportData.learning = {
720 | patterns,
721 | statistics: await this.learningSystem.getStatistics(),
722 | };
723 | }
724 |
725 | // Include knowledge graph if requested
726 | if (options.includeKnowledgeGraph) {
727 | const nodes = await this.knowledgeGraph.getAllNodes();
728 | const edges = await this.knowledgeGraph.getAllEdges();
729 | exportData.knowledgeGraph = {
730 | nodes,
731 | edges,
732 | statistics: await this.knowledgeGraph.getStatistics(),
733 | };
734 | }
735 |
736 | return exportData;
737 | }
738 |
739 | private applyAnonymization(
740 | data: any,
741 | anonymizeOptions: { fields: string[]; method: string },
742 | ): void {
743 | const anonymizeValue = (value: any, method: string): any => {
744 | if (typeof value !== "string") return value;
745 |
746 | switch (method) {
747 | case "hash":
748 | return this.hashValue(value);
749 | case "remove":
750 | return null;
751 | case "pseudonymize":
752 | return this.pseudonymizeValue(value);
753 | default:
754 | return value;
755 | }
756 | };
757 |
758 | const anonymizeObject = (obj: any): void => {
759 | for (const [key, value] of Object.entries(obj)) {
760 | if (anonymizeOptions.fields.includes(key)) {
761 | obj[key] = anonymizeValue(value, anonymizeOptions.method);
762 | } else if (typeof value === "object" && value !== null) {
763 | anonymizeObject(value);
764 | }
765 | }
766 | };
767 |
768 | anonymizeObject(data);
769 | }
770 |
771 | private hashValue(value: string): string {
772 | // Simple hash - in production, use a proper cryptographic hash
773 | let hash = 0;
774 | for (let i = 0; i < value.length; i++) {
775 | const char = value.charCodeAt(i);
776 | hash = (hash << 5) - hash + char;
777 | hash = hash & hash;
778 | }
779 | return `hash_${Math.abs(hash).toString(36)}`;
780 | }
781 |
782 | private pseudonymizeValue(_value: string): string {
783 | // Simple pseudonymization - in production, use proper techniques
784 | const prefixes = ["user", "project", "system", "item"];
785 | const suffix = Math.random().toString(36).substr(2, 8);
786 | const prefix = prefixes[Math.floor(Math.random() * prefixes.length)];
787 | return `${prefix}_${suffix}`;
788 | }
789 |
790 | private async exportToJSON(
791 | outputPath: string,
792 | data: any,
793 | _options: ExportOptions,
794 | ): Promise<string> {
795 | const jsonData = JSON.stringify(data, null, 2);
796 | // Handle compression-aware file paths (e.g., file.json.gz)
797 | let filePath = outputPath;
798 | if (!outputPath.includes(".json")) {
799 | filePath = `${outputPath}.json`;
800 | }
801 | await fs.writeFile(filePath, jsonData, "utf8");
802 | return filePath;
803 | }
804 |
805 | private async exportToJSONL(
806 | outputPath: string,
807 | data: any,
808 | _options: ExportOptions,
809 | ): Promise<string> {
810 | const filePath = outputPath.endsWith(".jsonl")
811 | ? outputPath
812 | : `${outputPath}.jsonl`;
813 |
814 | return new Promise((resolve, reject) => {
815 | const writeStream = createWriteStream(filePath);
816 |
817 | writeStream.on("error", (error) => {
818 | reject(error);
819 | });
820 |
821 | writeStream.on("finish", () => {
822 | resolve(filePath);
823 | });
824 |
825 | // Write metadata as first line
826 | writeStream.write(JSON.stringify(data.metadata) + "\n");
827 |
828 | // Write each memory entry as a separate line
829 | for (const entry of data.memories) {
830 | writeStream.write(JSON.stringify(entry) + "\n");
831 | }
832 |
833 | // Write learning data if included
834 | if (data.learning) {
835 | writeStream.write(
836 | JSON.stringify({ type: "learning", data: data.learning }) + "\n",
837 | );
838 | }
839 |
840 | // Write knowledge graph if included
841 | if (data.knowledgeGraph) {
842 | writeStream.write(
843 | JSON.stringify({
844 | type: "knowledgeGraph",
845 | data: data.knowledgeGraph,
846 | }) + "\n",
847 | );
848 | }
849 |
850 | writeStream.end();
851 | });
852 | }
853 |
854 | private async exportToCSV(
855 | outputPath: string,
856 | data: any,
857 | _options: ExportOptions,
858 | ): Promise<string> {
859 | const filePath = outputPath.endsWith(".csv")
860 | ? outputPath
861 | : `${outputPath}.csv`;
862 |
863 | // Flatten memory entries for CSV format
864 | const flattenedEntries = data.memories.map((entry: MemoryEntry) => ({
865 | id: entry.id,
866 | timestamp: entry.timestamp,
867 | type: entry.type,
868 | projectPath: entry.data.projectPath || "",
869 | projectId: entry.data.projectId || "",
870 | language: entry.data.language || "",
871 | framework: entry.data.framework || "",
872 | outcome: entry.data.outcome || "",
873 | success: entry.data.success || false,
874 | tags: entry.tags?.join(";") || "",
875 | metadata: JSON.stringify(entry.metadata || {}),
876 | }));
877 |
878 | // Generate CSV headers
879 | const headers = Object.keys(flattenedEntries[0] || {});
880 | const csvLines = [headers.join(",")];
881 |
882 | // Generate CSV rows
883 | for (const entry of flattenedEntries) {
884 | const row = headers.map((header) => {
885 | const value = entry[header as keyof typeof entry];
886 | const stringValue =
887 | typeof value === "string" ? value : JSON.stringify(value);
888 | return `"${stringValue.replace(/"/g, '""')}"`;
889 | });
890 | csvLines.push(row.join(","));
891 | }
892 |
893 | await fs.writeFile(filePath, csvLines.join("\n"), "utf8");
894 | return filePath;
895 | }
896 |
897 | private async exportToXML(
898 | outputPath: string,
899 | data: any,
900 | _options: ExportOptions,
901 | ): Promise<string> {
902 | const filePath = outputPath.endsWith(".xml")
903 | ? outputPath
904 | : `${outputPath}.xml`;
905 |
906 | const xmlData = this.convertToXML(data);
907 | await fs.writeFile(filePath, xmlData, "utf8");
908 | return filePath;
909 | }
910 |
911 | private async exportToYAML(
912 | outputPath: string,
913 | data: any,
914 | _options: ExportOptions,
915 | ): Promise<string> {
916 | const filePath = outputPath.endsWith(".yaml")
917 | ? outputPath
918 | : `${outputPath}.yaml`;
919 |
920 | // Simple YAML conversion - in production, use a proper YAML library
921 | const yamlData = this.convertToYAML(data);
922 | await fs.writeFile(filePath, yamlData, "utf8");
923 | return filePath;
924 | }
925 |
926 | private async exportToSQLite(
927 | _outputPath: string,
928 | _data: any,
929 | _options: ExportOptions,
930 | ): Promise<string> {
931 | // This would require a SQLite library like better-sqlite3
932 | // For now, throw an error indicating additional dependencies needed
933 | throw new Error(
934 | "SQLite export requires additional dependencies (better-sqlite3)",
935 | );
936 | }
937 |
938 | private async exportToArchive(
939 | outputPath: string,
940 | data: any,
941 | options: ExportOptions,
942 | ): Promise<string> {
943 | const archivePath = outputPath.endsWith(".tar")
944 | ? outputPath
945 | : `${outputPath}.tar`;
946 |
947 | // Create archive metadata
948 | const metadata: ArchiveMetadata = {
949 | version: this.version,
950 | created: new Date(),
951 | source: "DocuMCP Memory System",
952 | description: "Complete memory system export archive",
953 | manifest: {
954 | files: [],
955 | total: { files: 0, size: 0, entries: data.memories.length },
956 | },
957 | options,
958 | };
959 |
960 | // This would require archiving capabilities
961 | // For now, create multiple files and reference them in metadata
962 | const baseDir = archivePath.replace(".tar", "");
963 | await fs.mkdir(baseDir, { recursive: true });
964 |
965 | // Export memories as JSON
966 | const memoriesPath = `${baseDir}/memories.json`;
967 | await this.exportToJSON(memoriesPath, { memories: data.memories }, options);
968 | metadata.manifest.files.push({
969 | name: "memories.json",
970 | type: "memories",
971 | size: (await fs.stat(memoriesPath)).size,
972 | checksum: "sha256-placeholder",
973 | entries: data.memories.length,
974 | });
975 |
976 | // Export learning data if included
977 | if (data.learning) {
978 | const learningPath = `${baseDir}/learning.json`;
979 | await this.exportToJSON(learningPath, data.learning, options);
980 | metadata.manifest.files.push({
981 | name: "learning.json",
982 | type: "learning",
983 | size: (await fs.stat(learningPath)).size,
984 | checksum: "sha256-placeholder",
985 | });
986 | }
987 |
988 | // Export knowledge graph if included
989 | if (data.knowledgeGraph) {
990 | const kgPath = `${baseDir}/knowledge-graph.json`;
991 | await this.exportToJSON(kgPath, data.knowledgeGraph, options);
992 | metadata.manifest.files.push({
993 | name: "knowledge-graph.json",
994 | type: "knowledge-graph",
995 | size: (await fs.stat(kgPath)).size,
996 | checksum: "sha256-placeholder",
997 | });
998 | }
999 |
1000 | // Write metadata
1001 | const metadataPath = `${baseDir}/metadata.json`;
1002 | await this.exportToJSON(metadataPath, metadata, options);
1003 |
1004 | return baseDir;
1005 | }
1006 |
1007 | private async applyCompression(
1008 | filePath: string,
1009 | compression: string,
1010 | targetPath?: string,
1011 | ): Promise<string> {
1012 | if (compression === "gzip") {
1013 | const compressedPath = targetPath || `${filePath}.gz`;
1014 | const content = await fs.readFile(filePath, "utf8");
1015 | // Simple mock compression - just add a header and write the content
1016 | await fs.writeFile(compressedPath, `GZIP_HEADER\n${content}`, "utf8");
1017 |
1018 | // Clean up temp file if we used one
1019 | if (targetPath && targetPath !== filePath) {
1020 | await fs.unlink(filePath);
1021 | }
1022 |
1023 | return compressedPath;
1024 | }
1025 |
1026 | // For other compression types or 'none', return original path
1027 | this.emit("compression_skipped", {
1028 | reason: "Not implemented",
1029 | compression,
1030 | });
1031 | return filePath;
1032 | }
1033 |
1034 | private async applyEncryption(
1035 | filePath: string,
1036 | encryption: any,
1037 | ): Promise<string> {
1038 | // This would require encryption capabilities
1039 | // For now, return the original path
1040 | this.emit("encryption_skipped", { reason: "Not implemented", encryption });
1041 | return filePath;
1042 | }
1043 |
1044 | private getIncludedComponents(options: ExportOptions): string[] {
1045 | const components = ["memories"];
1046 | if (options.includeMetadata) components.push("metadata");
1047 | if (options.includeLearning) components.push("learning");
1048 | if (options.includeKnowledgeGraph) components.push("knowledge-graph");
1049 | return components;
1050 | }
1051 |
1052 | private async detectFormat(filePath: string): Promise<string> {
1053 | const extension = filePath.split(".").pop()?.toLowerCase();
1054 |
1055 | switch (extension) {
1056 | case "json":
1057 | return "json";
1058 | case "jsonl":
1059 | return "jsonl";
1060 | case "csv":
1061 | return "csv";
1062 | case "xml":
1063 | return "xml";
1064 | case "yaml":
1065 | case "yml":
1066 | return "yaml";
1067 | case "db":
1068 | case "sqlite":
1069 | return "sqlite";
1070 | case "tar":
1071 | case "zip":
1072 | return "archive";
1073 | default: {
1074 | // Try to detect by content
1075 | const content = await fs.readFile(filePath, "utf8");
1076 | if (content.trim().startsWith("{") || content.trim().startsWith("[")) {
1077 | return "json";
1078 | }
1079 | if (content.includes("<?xml")) {
1080 | return "xml";
1081 | }
1082 | return "unknown";
1083 | }
1084 | }
1085 | }
1086 |
1087 | private async loadImportData(
1088 | filePath: string,
1089 | options: ImportOptions,
1090 | ): Promise<any> {
1091 | switch (options.format) {
1092 | case "json":
1093 | return JSON.parse(await fs.readFile(filePath, "utf8"));
1094 | case "jsonl":
1095 | return this.loadJSONLData(filePath);
1096 | case "csv":
1097 | return this.loadCSVData(filePath);
1098 | case "xml":
1099 | return this.loadXMLData(filePath);
1100 | case "yaml":
1101 | return this.loadYAMLData(filePath);
1102 | default:
1103 | throw new Error(`Unsupported import format: ${options.format}`);
1104 | }
1105 | }
1106 |
1107 | private async loadJSONLData(filePath: string): Promise<any> {
1108 | const content = await fs.readFile(filePath, "utf8");
1109 | const lines = content.trim().split("\n");
1110 |
1111 | const data: any = { memories: [], learning: null, knowledgeGraph: null };
1112 |
1113 | for (const line of lines) {
1114 | const parsed = JSON.parse(line);
1115 |
1116 | if (parsed.type === "learning") {
1117 | data.learning = parsed.data;
1118 | } else if (parsed.type === "knowledgeGraph") {
1119 | data.knowledgeGraph = parsed.data;
1120 | } else if (parsed.version) {
1121 | data.metadata = parsed;
1122 | } else {
1123 | data.memories.push(parsed);
1124 | }
1125 | }
1126 |
1127 | return data;
1128 | }
1129 |
1130 | private async loadCSVData(filePath: string): Promise<any> {
1131 | const content = await fs.readFile(filePath, "utf8");
1132 | const lines = content.trim().split("\n");
1133 | const headers = lines[0].split(",").map((h) => h.replace(/"/g, ""));
1134 |
1135 | const memories = [];
1136 | for (let i = 1; i < lines.length; i++) {
1137 | const values = this.parseCSVLine(lines[i]);
1138 | const entry: any = {};
1139 |
1140 | for (let j = 0; j < headers.length; j++) {
1141 | const header = headers[j];
1142 | const value = values[j];
1143 |
1144 | // Parse special fields
1145 | if (header === "tags") {
1146 | entry.tags = value ? value.split(";") : [];
1147 | } else if (header === "metadata") {
1148 | try {
1149 | entry.metadata = JSON.parse(value);
1150 | } catch {
1151 | entry.metadata = {};
1152 | }
1153 | } else if (header === "success") {
1154 | entry.data = entry.data || {};
1155 | entry.data.success = value === "true";
1156 | } else if (
1157 | [
1158 | "projectPath",
1159 | "projectId",
1160 | "language",
1161 | "framework",
1162 | "outcome",
1163 | ].includes(header)
1164 | ) {
1165 | entry.data = entry.data || {};
1166 | entry.data[header] = value;
1167 | } else {
1168 | entry[header] = value;
1169 | }
1170 | }
1171 |
1172 | memories.push(entry);
1173 | }
1174 |
1175 | return { memories };
1176 | }
1177 |
1178 | private parseCSVLine(line: string): string[] {
1179 | const values: string[] = [];
1180 | let current = "";
1181 | let inQuotes = false;
1182 |
1183 | for (let i = 0; i < line.length; i++) {
1184 | const char = line[i];
1185 |
1186 | if (char === '"') {
1187 | if (inQuotes && line[i + 1] === '"') {
1188 | current += '"';
1189 | i++;
1190 | } else {
1191 | inQuotes = !inQuotes;
1192 | }
1193 | } else if (char === "," && !inQuotes) {
1194 | values.push(current);
1195 | current = "";
1196 | } else {
1197 | current += char;
1198 | }
1199 | }
1200 |
1201 | values.push(current);
1202 | return values;
1203 | }
1204 |
1205 | private async loadXMLData(_filePath: string): Promise<any> {
1206 | // This would require an XML parser
1207 | throw new Error("XML import requires additional dependencies (xml2js)");
1208 | }
1209 |
1210 | private async loadYAMLData(_filePath: string): Promise<any> {
1211 | // This would require a YAML parser
1212 | throw new Error("YAML import requires additional dependencies (js-yaml)");
1213 | }
1214 |
1215 | private async validateImportData(
1216 | data: any,
1217 | options: ImportOptions,
1218 | ): Promise<{ valid: number; invalid: number; warnings: string[] }> {
1219 | const result = { valid: 0, invalid: 0, warnings: [] as string[] };
1220 |
1221 | if (!data.memories || !Array.isArray(data.memories)) {
1222 | result.warnings.push("No memories array found in import data");
1223 | return result;
1224 | }
1225 |
1226 | for (const entry of data.memories) {
1227 | if (this.validateMemoryEntry(entry, options.validation)) {
1228 | result.valid++;
1229 | } else {
1230 | result.invalid++;
1231 | }
1232 | }
1233 |
1234 | return result;
1235 | }
1236 |
1237 | private validateMemoryEntry(entry: any, validation: string): boolean {
1238 | // Check for completely missing or null required fields
1239 | if (
1240 | !entry.id ||
1241 | !entry.timestamp ||
1242 | entry.type === null ||
1243 | entry.type === undefined ||
1244 | entry.data === null
1245 | ) {
1246 | return false; // These are invalid regardless of validation level
1247 | }
1248 |
1249 | if (!entry.type) {
1250 | return validation !== "strict";
1251 | }
1252 |
1253 | if (validation === "strict") {
1254 | return Boolean(entry.data && typeof entry.data === "object");
1255 | }
1256 |
1257 | // For loose validation, still require data to be defined (not null)
1258 | if (validation === "loose" && entry.data === null) {
1259 | return false;
1260 | }
1261 |
1262 | return true;
1263 | }
1264 |
1265 | private async processImportData(
1266 | data: any,
1267 | options: ImportOptions,
1268 | ): Promise<ImportResult> {
1269 | const result: ImportResult = {
1270 | success: true,
1271 | processed: 0,
1272 | imported: 0,
1273 | skipped: 0,
1274 | errors: 0,
1275 | errorDetails: [],
1276 | conflicts: 0,
1277 | validation: { valid: 0, invalid: 0, warnings: [] },
1278 | summary: {
1279 | newEntries: 0,
1280 | updatedEntries: 0,
1281 | duplicateEntries: 0,
1282 | failedEntries: 0,
1283 | },
1284 | metadata: {
1285 | importedAt: new Date(),
1286 | source: "imported data",
1287 | format: options.format,
1288 | mode: options.mode,
1289 | },
1290 | };
1291 |
1292 | if (!data.memories || !Array.isArray(data.memories)) {
1293 | result.success = false;
1294 | result.errors = 1;
1295 | result.errorDetails = ["No valid memories array found in import data"];
1296 | return result;
1297 | }
1298 |
1299 | for (const entry of data.memories) {
1300 | result.processed++;
1301 |
1302 | try {
1303 | // Apply transformations and mappings
1304 | let transformedEntry = { ...entry };
1305 | if (options.mapping || options.transformation?.enabled) {
1306 | transformedEntry = this.applyDataTransformations(entry, options);
1307 | }
1308 |
1309 | if (!this.validateMemoryEntry(transformedEntry, options.validation)) {
1310 | result.validation.invalid++;
1311 | result.errors++;
1312 | result.summary.failedEntries++;
1313 | result.errorDetails.push(
1314 | `Invalid memory entry: ${
1315 | transformedEntry.id || "unknown"
1316 | } - validation failed`,
1317 | );
1318 | continue;
1319 | }
1320 |
1321 | result.validation.valid++;
1322 |
1323 | // Check for conflicts
1324 | const existing = await this.storage.get(transformedEntry.id);
1325 | if (existing) {
1326 | result.conflicts++;
1327 |
1328 | switch (options.conflictResolution) {
1329 | case "skip":
1330 | result.skipped++;
1331 | result.summary.duplicateEntries++;
1332 | continue;
1333 | case "overwrite":
1334 | if (!options.dryRun) {
1335 | await this.storage.update(
1336 | transformedEntry.id,
1337 | transformedEntry,
1338 | );
1339 | result.imported++;
1340 | result.summary.updatedEntries++;
1341 | }
1342 | break;
1343 | case "merge":
1344 | if (!options.dryRun) {
1345 | const merged = this.mergeEntries(existing, transformedEntry);
1346 | await this.storage.update(transformedEntry.id, merged);
1347 | result.imported++;
1348 | result.summary.updatedEntries++;
1349 | }
1350 | break;
1351 | case "rename": {
1352 | const newId = `${transformedEntry.id}_imported_${Date.now()}`;
1353 | if (!options.dryRun) {
1354 | await this.storage.store({ ...transformedEntry, id: newId });
1355 | result.imported++;
1356 | result.summary.newEntries++;
1357 | }
1358 | break;
1359 | }
1360 | }
1361 | } else {
1362 | if (!options.dryRun) {
1363 | await this.storage.store(transformedEntry);
1364 | result.imported++;
1365 | result.summary.newEntries++;
1366 | }
1367 | }
1368 | } catch (error) {
1369 | result.errors++;
1370 | result.summary.failedEntries++;
1371 | result.errorDetails.push(
1372 | error instanceof Error ? error.message : String(error),
1373 | );
1374 | }
1375 | }
1376 |
1377 | // Import learning data if present
1378 | if (data.learning && !options.dryRun) {
1379 | await this.importLearningData(data.learning);
1380 | }
1381 |
1382 | // Import knowledge graph if present
1383 | if (data.knowledgeGraph && !options.dryRun) {
1384 | await this.importKnowledgeGraphData(data.knowledgeGraph);
1385 | }
1386 |
1387 | return result;
1388 | }
1389 |
1390 | private mergeEntries(
1391 | existing: MemoryEntry,
1392 | imported: MemoryEntry,
1393 | ): MemoryEntry {
1394 | return {
1395 | ...existing,
1396 | ...imported,
1397 | data: { ...existing.data, ...imported.data },
1398 | metadata: { ...existing.metadata, ...imported.metadata },
1399 | tags: [...new Set([...(existing.tags || []), ...(imported.tags || [])])],
1400 | timestamp: imported.timestamp || existing.timestamp,
1401 | };
1402 | }
1403 |
1404 | private async importLearningData(learningData: any): Promise<void> {
1405 | if (learningData.patterns && Array.isArray(learningData.patterns)) {
1406 | for (const pattern of learningData.patterns) {
1407 | // This would require methods to import patterns into the learning system
1408 | // For now, just emit an event
1409 | this.emit("learning_pattern_imported", pattern);
1410 | }
1411 | }
1412 | }
1413 |
1414 | private async importKnowledgeGraphData(kgData: any): Promise<void> {
1415 | if (kgData.nodes && Array.isArray(kgData.nodes)) {
1416 | for (const node of kgData.nodes) {
1417 | await this.knowledgeGraph.addNode(node);
1418 | }
1419 | }
1420 |
1421 | if (kgData.edges && Array.isArray(kgData.edges)) {
1422 | for (const edge of kgData.edges) {
1423 | await this.knowledgeGraph.addEdge(edge);
1424 | }
1425 | }
1426 | }
1427 |
1428 | private async createBackup(): Promise<string> {
1429 | const backupPath = `backup_${Date.now()}.json`;
1430 | const exportResult = await this.exportMemories(backupPath, {
1431 | format: "json",
1432 | includeMetadata: true,
1433 | includeLearning: true,
1434 | includeKnowledgeGraph: true,
1435 | });
1436 |
1437 | this.emit("backup_created", { path: exportResult.filePath });
1438 | return exportResult.filePath || backupPath;
1439 | }
1440 |
1441 | private convertToXML(data: any): string {
1442 | // Simple XML conversion - in production, use a proper XML library
1443 | const escapeXML = (str: string) =>
1444 | str
1445 | .replace(/&/g, "&")
1446 | .replace(/</g, "<")
1447 | .replace(/>/g, ">")
1448 | .replace(/"/g, """)
1449 | .replace(/'/g, "'");
1450 |
1451 | let xml = '<?xml version="1.0" encoding="UTF-8"?>\n<export>\n';
1452 | xml += ` <metadata>\n`;
1453 | xml += ` <version>${escapeXML(data.metadata.version)}</version>\n`;
1454 | xml += ` <exportedAt>${escapeXML(
1455 | data.metadata.exportedAt,
1456 | )}</exportedAt>\n`;
1457 | xml += ` <entries>${data.metadata.entries}</entries>\n`;
1458 | xml += ` </metadata>\n`;
1459 | xml += ` <memories>\n`;
1460 |
1461 | for (const memory of data.memories) {
1462 | xml += ` <memory>\n`;
1463 | xml += ` <id>${escapeXML(memory.id)}</id>\n`;
1464 | xml += ` <timestamp>${escapeXML(memory.timestamp)}</timestamp>\n`;
1465 | xml += ` <type>${escapeXML(memory.type)}</type>\n`;
1466 | xml += ` <data>${escapeXML(JSON.stringify(memory.data))}</data>\n`;
1467 | xml += ` </memory>\n`;
1468 | }
1469 |
1470 | xml += ` </memories>\n`;
1471 | xml += "</export>";
1472 |
1473 | return xml;
1474 | }
1475 |
1476 | private convertToYAML(data: any): string {
1477 | // Simple YAML conversion - in production, use a proper YAML library
1478 | const indent = (level: number) => " ".repeat(level);
1479 | const toYAML = (obj: any, level: number = 0): string => {
1480 | if (obj === null) return "null";
1481 | if (typeof obj === "boolean") return obj.toString();
1482 | if (typeof obj === "number") return obj.toString();
1483 | if (typeof obj === "string") return `"${obj.replace(/"/g, '\\"')}"`;
1484 |
1485 | if (Array.isArray(obj)) {
1486 | if (obj.length === 0) return "[]";
1487 | return (
1488 | "\n" +
1489 | obj
1490 | .map(
1491 | (item) => `${indent(level)}- ${toYAML(item, level + 1).trim()}`,
1492 | )
1493 | .join("\n")
1494 | );
1495 | }
1496 |
1497 | if (typeof obj === "object") {
1498 | const keys = Object.keys(obj);
1499 | if (keys.length === 0) return "{}";
1500 | return (
1501 | "\n" +
1502 | keys
1503 | .map(
1504 | (key) =>
1505 | `${indent(level)}${key}: ${toYAML(obj[key], level + 1).trim()}`,
1506 | )
1507 | .join("\n")
1508 | );
1509 | }
1510 |
1511 | return obj.toString();
1512 | };
1513 |
1514 | return `# DocuMCP Memory Export\n${toYAML(data)}`;
1515 | }
1516 |
1517 | // Additional helper methods for migration
1518 | private generateFieldMappings(
1519 | sourceSchema: any,
1520 | targetSchema: any,
1521 | ): Record<string, string> {
1522 | const mappings: Record<string, string> = {};
1523 |
1524 | // Simple field name matching - in production, use more sophisticated mapping
1525 | const sourceFields = Object.keys(sourceSchema.fields || {});
1526 | const targetFields = Object.keys(targetSchema.fields || {});
1527 |
1528 | for (const sourceField of sourceFields) {
1529 | // Direct match
1530 | if (targetFields.includes(sourceField)) {
1531 | mappings[sourceField] = sourceField;
1532 | continue;
1533 | }
1534 |
1535 | // Fuzzy matching
1536 | const similar = targetFields.find(
1537 | (tf) =>
1538 | tf.toLowerCase().includes(sourceField.toLowerCase()) ||
1539 | sourceField.toLowerCase().includes(tf.toLowerCase()),
1540 | );
1541 |
1542 | if (similar) {
1543 | mappings[sourceField] = similar;
1544 | }
1545 | }
1546 |
1547 | return mappings;
1548 | }
1549 |
1550 | private generateTransformations(
1551 | sourceSchema: any,
1552 | targetSchema: any,
1553 | mapping: Record<string, string>,
1554 | ): MigrationPlan["transformations"] {
1555 | const transformations: MigrationPlan["transformations"] = [];
1556 |
1557 | // Generate transformations based on field mappings and type differences
1558 | for (const [sourceField, targetField] of Object.entries(mapping)) {
1559 | const sourceType = sourceSchema.fields?.[sourceField]?.type;
1560 | const targetType = targetSchema.fields?.[targetField]?.type;
1561 |
1562 | if (sourceType !== targetType) {
1563 | transformations.push({
1564 | field: targetField,
1565 | type: "convert",
1566 | source: sourceField,
1567 | target: targetField,
1568 | operation: `${sourceType}_to_${targetType}`,
1569 | });
1570 | } else {
1571 | transformations.push({
1572 | field: targetField,
1573 | type: "rename",
1574 | source: sourceField,
1575 | target: targetField,
1576 | });
1577 | }
1578 | }
1579 |
1580 | return transformations;
1581 | }
1582 |
1583 | private generateValidationRules(
1584 | targetSchema: any,
1585 | ): MigrationPlan["validation"] {
1586 | const validation: MigrationPlan["validation"] = [];
1587 |
1588 | // Generate validation rules based on target schema
1589 | if (targetSchema.fields) {
1590 | for (const [field, config] of Object.entries(targetSchema.fields)) {
1591 | const rules: string[] = [];
1592 | const fieldConfig = config as any;
1593 |
1594 | if (fieldConfig.required) {
1595 | rules.push("required");
1596 | }
1597 |
1598 | if (fieldConfig.type) {
1599 | rules.push(`type:${fieldConfig.type}`);
1600 | }
1601 |
1602 | if (fieldConfig.format) {
1603 | rules.push(`format:${fieldConfig.format}`);
1604 | }
1605 |
1606 | validation.push({
1607 | field,
1608 | rules,
1609 | required: fieldConfig.required || false,
1610 | });
1611 | }
1612 | }
1613 |
1614 | return validation;
1615 | }
1616 |
1617 | private generatePostProcessingSteps(targetSchema: any): string[] {
1618 | const steps: string[] = [];
1619 |
1620 | // Generate post-processing steps
1621 | steps.push("rebuild_indices");
1622 | steps.push("update_references");
1623 | steps.push("validate_integrity");
1624 |
1625 | if (targetSchema.features?.learning) {
1626 | steps.push("retrain_models");
1627 | }
1628 |
1629 | if (targetSchema.features?.knowledgeGraph) {
1630 | steps.push("rebuild_graph");
1631 | }
1632 |
1633 | return steps;
1634 | }
1635 |
1636 | private async loadRawData(inputPath: string): Promise<any> {
1637 | const content = await fs.readFile(inputPath, "utf8");
1638 | try {
1639 | return JSON.parse(content);
1640 | } catch {
1641 | return { raw: content };
1642 | }
1643 | }
1644 |
1645 | private async applyTransformations(
1646 | data: any,
1647 | plan: MigrationPlan,
1648 | ): Promise<any> {
1649 | const transformed = JSON.parse(JSON.stringify(data)); // Deep clone
1650 |
1651 | for (const transformation of plan.transformations) {
1652 | // Apply transformation based on type
1653 | switch (transformation.type) {
1654 | case "rename":
1655 | this.renameField(
1656 | transformed,
1657 | transformation.source as string,
1658 | transformation.target,
1659 | );
1660 | break;
1661 | case "convert":
1662 | this.convertField(
1663 | transformed,
1664 | transformation.source as string,
1665 | transformation.target,
1666 | transformation.operation,
1667 | );
1668 | break;
1669 | // Add more transformation types as needed
1670 | }
1671 | }
1672 |
1673 | return transformed;
1674 | }
1675 |
1676 | private renameField(obj: any, oldName: string, newName: string): void {
1677 | if (typeof obj !== "object" || obj === null) return;
1678 |
1679 | if (Array.isArray(obj)) {
1680 | obj.forEach((item) => this.renameField(item, oldName, newName));
1681 | } else {
1682 | if (oldName in obj) {
1683 | obj[newName] = obj[oldName];
1684 | delete obj[oldName];
1685 | }
1686 |
1687 | Object.values(obj).forEach((value) =>
1688 | this.renameField(value, oldName, newName),
1689 | );
1690 | }
1691 | }
1692 |
1693 | private convertField(
1694 | obj: any,
1695 | fieldName: string,
1696 | targetName: string,
1697 | operation?: string,
1698 | ): void {
1699 | if (typeof obj !== "object" || obj === null) return;
1700 |
1701 | if (Array.isArray(obj)) {
1702 | obj.forEach((item) =>
1703 | this.convertField(item, fieldName, targetName, operation),
1704 | );
1705 | } else {
1706 | if (fieldName in obj) {
1707 | const value = obj[fieldName];
1708 |
1709 | // Apply conversion based on operation
1710 | switch (operation) {
1711 | case "string_to_number":
1712 | obj[targetName] = Number(value);
1713 | break;
1714 | case "number_to_string":
1715 | obj[targetName] = String(value);
1716 | break;
1717 | case "array_to_string":
1718 | obj[targetName] = Array.isArray(value) ? value.join(",") : value;
1719 | break;
1720 | case "string_to_array":
1721 | obj[targetName] =
1722 | typeof value === "string" ? value.split(",") : value;
1723 | break;
1724 | default:
1725 | obj[targetName] = value;
1726 | }
1727 |
1728 | if (fieldName !== targetName) {
1729 | delete obj[fieldName];
1730 | }
1731 | }
1732 |
1733 | Object.values(obj).forEach((value) =>
1734 | this.convertField(value, fieldName, targetName, operation),
1735 | );
1736 | }
1737 | }
1738 |
1739 | private convertToImportFormat(data: any, plan: MigrationPlan): any {
1740 | // Convert transformed data to standard import format
1741 | const memories = Array.isArray(data) ? data : data.memories || [data];
1742 |
1743 | // Convert old format to new MemoryEntry format
1744 | const convertedMemories = memories.map((entry: any) => {
1745 | // If already in new format, return as-is
1746 | if (entry.data && entry.metadata) {
1747 | return entry;
1748 | }
1749 |
1750 | // Convert old flat format to new structured format
1751 | const converted: any = {
1752 | id:
1753 | entry.id ||
1754 | `migrated_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
1755 | type: entry.type || "analysis",
1756 | timestamp: entry.timestamp || new Date().toISOString(),
1757 | data: {},
1758 | metadata: {},
1759 | };
1760 |
1761 | // Move known fields to appropriate locations
1762 | const dataFields = [
1763 | "language",
1764 | "recommendation",
1765 | "framework",
1766 | "outcome",
1767 | "success",
1768 | ];
1769 | const metadataFields = [
1770 | "project",
1771 | "projectId",
1772 | "repository",
1773 | "ssg",
1774 | "tags",
1775 | ];
1776 |
1777 | for (const [key, value] of Object.entries(entry)) {
1778 | if (["id", "type", "timestamp"].includes(key)) {
1779 | // Already handled above
1780 | continue;
1781 | } else if (dataFields.includes(key)) {
1782 | converted.data[key] = value;
1783 | } else if (metadataFields.includes(key)) {
1784 | if (key === "project") {
1785 | converted.metadata.projectId = value; // Convert old 'project' field to 'projectId'
1786 | } else {
1787 | converted.metadata[key] = value;
1788 | }
1789 | } else {
1790 | // Put unknown fields in data
1791 | converted.data[key] = value;
1792 | }
1793 | }
1794 |
1795 | return converted;
1796 | });
1797 |
1798 | return {
1799 | metadata: {
1800 | version: this.version,
1801 | migrated: true,
1802 | migrationPlan: plan.sourceSystem,
1803 | importedAt: new Date().toISOString(),
1804 | },
1805 | memories: convertedMemories,
1806 | };
1807 | }
1808 |
1809 | private async executePostProcessing(steps: string[]): Promise<void> {
1810 | for (const step of steps) {
1811 | try {
1812 | switch (step) {
1813 | case "rebuild_indices":
1814 | await this.storage.rebuildIndex();
1815 | break;
1816 | case "update_references":
1817 | // Update cross-references in data
1818 | break;
1819 | case "validate_integrity":
1820 | // Validate data integrity
1821 | break;
1822 | case "retrain_models":
1823 | // Trigger learning system retraining
1824 | break;
1825 | case "rebuild_graph":
1826 | // Rebuild knowledge graph
1827 | break;
1828 | }
1829 |
1830 | this.emit("post_processing_step_completed", { step });
1831 | } catch (error) {
1832 | this.emit("post_processing_step_failed", {
1833 | step,
1834 | error: error instanceof Error ? error.message : String(error),
1835 | });
1836 | }
1837 | }
1838 | }
1839 |
1840 | private async loadSampleData(
1841 | sourcePath: string,
1842 | format: string,
1843 | ): Promise<any> {
1844 | // Load a small sample of data for validation
1845 | if (format === "json") {
1846 | const content = await fs.readFile(sourcePath, "utf8");
1847 | const data = JSON.parse(content);
1848 |
1849 | if (data.memories && Array.isArray(data.memories)) {
1850 | return { memories: data.memories.slice(0, 10) }; // First 10 entries
1851 | }
1852 |
1853 | return data;
1854 | }
1855 |
1856 | // For other formats, return basic structure info
1857 | return { format, sampleLoaded: true };
1858 | }
1859 |
1860 | private validateSchema(sampleData: any): string[] {
1861 | const issues: string[] = [];
1862 |
1863 | if (!sampleData.memories && !Array.isArray(sampleData)) {
1864 | issues.push("Expected memories array not found");
1865 | }
1866 |
1867 | const memories =
1868 | sampleData.memories || (Array.isArray(sampleData) ? sampleData : []);
1869 |
1870 | if (memories.length > 0) {
1871 | const firstEntry = memories[0];
1872 |
1873 | if (!firstEntry.id) {
1874 | issues.push("Memory entries missing required id field");
1875 | }
1876 |
1877 | if (!firstEntry.timestamp) {
1878 | issues.push("Memory entries missing required timestamp field");
1879 | }
1880 |
1881 | if (!firstEntry.type) {
1882 | issues.push("Memory entries missing required type field");
1883 | }
1884 |
1885 | if (!firstEntry.data) {
1886 | issues.push("Memory entries missing required data field");
1887 | }
1888 | }
1889 |
1890 | return issues;
1891 | }
1892 |
1893 | private validateDataIntegrity(sampleData: any): string[] {
1894 | const issues: string[] = [];
1895 |
1896 | const memories =
1897 | sampleData.memories || (Array.isArray(sampleData) ? sampleData : []);
1898 |
1899 | // Check for duplicate IDs
1900 | const ids = new Set();
1901 | const duplicates = new Set();
1902 |
1903 | for (const entry of memories) {
1904 | if (entry.id) {
1905 | if (ids.has(entry.id)) {
1906 | duplicates.add(entry.id);
1907 | } else {
1908 | ids.add(entry.id);
1909 | }
1910 | }
1911 | }
1912 |
1913 | if (duplicates.size > 0) {
1914 | issues.push(`Found ${duplicates.size} duplicate IDs`);
1915 | }
1916 |
1917 | // Check timestamp validity
1918 | let invalidTimestamps = 0;
1919 | for (const entry of memories) {
1920 | if (entry.timestamp && isNaN(new Date(entry.timestamp).getTime())) {
1921 | invalidTimestamps++;
1922 | }
1923 | }
1924 |
1925 | if (invalidTimestamps > 0) {
1926 | issues.push(`Found ${invalidTimestamps} invalid timestamps`);
1927 | }
1928 |
1929 | return issues;
1930 | }
1931 |
1932 | /**
1933 | * Apply field mappings and transformations to import data
1934 | */
1935 | private applyDataTransformations(entry: any, options: ImportOptions): any {
1936 | const transformed = JSON.parse(JSON.stringify(entry)); // Deep clone
1937 |
1938 | // Apply field mappings first
1939 | if (options.mapping) {
1940 | for (const [sourcePath, targetPath] of Object.entries(options.mapping)) {
1941 | const sourceValue = this.getValueByPath(transformed, sourcePath);
1942 | if (sourceValue !== undefined) {
1943 | this.setValueByPath(transformed, targetPath, sourceValue);
1944 | this.deleteValueByPath(transformed, sourcePath);
1945 | }
1946 | }
1947 | }
1948 |
1949 | // Apply transformations
1950 | if (options.transformation?.enabled && options.transformation.rules) {
1951 | for (const rule of options.transformation.rules) {
1952 | switch (rule.operation) {
1953 | case "transform":
1954 | if (rule.params?.value !== undefined) {
1955 | this.setValueByPath(transformed, rule.field, rule.params.value);
1956 | }
1957 | break;
1958 | case "convert":
1959 | // Apply conversion based on params
1960 | break;
1961 | }
1962 | }
1963 | }
1964 |
1965 | return transformed;
1966 | }
1967 |
1968 | /**
1969 | * Get value from object using dot notation path
1970 | */
1971 | private getValueByPath(obj: any, path: string): any {
1972 | return path.split(".").reduce((current, key) => current?.[key], obj);
1973 | }
1974 |
1975 | /**
1976 | * Set value in object using dot notation path
1977 | */
1978 | private setValueByPath(obj: any, path: string, value: any): void {
1979 | const keys = path.split(".");
1980 | const lastKey = keys.pop()!;
1981 | const target = keys.reduce((current, key) => {
1982 | if (!(key in current)) {
1983 | current[key] = {};
1984 | }
1985 | return current[key];
1986 | }, obj);
1987 | target[lastKey] = value;
1988 | }
1989 |
1990 | /**
1991 | * Delete value from object using dot notation path
1992 | */
1993 | private deleteValueByPath(obj: any, path: string): void {
1994 | const keys = path.split(".");
1995 | const lastKey = keys.pop()!;
1996 | const target = keys.reduce((current, key) => current?.[key], obj);
1997 | if (target && typeof target === "object") {
1998 | delete target[lastKey];
1999 | }
2000 | }
2001 | }
2002 |
```