#
tokens: 8065/50000 14/14 files
lines: off (toggle) GitHub
raw markdown copy
# Directory Structure

```
├── .cargo
│   └── config.toml
├── .env
├── .gitignore
├── .vscode
│   ├── extensions.json
│   └── settings.json
├── Cargo.lock
├── Cargo.toml
├── data
│   └── .keep
├── documents
│   ├── .DS_Store
│   └── .keep
├── how-to-use.md
├── Makefile
├── README.md
├── setup.md
└── src
    ├── embeddings.rs
    ├── main.rs
    ├── mcp_server.rs
    └── rag_engine.rs
```

# Files

--------------------------------------------------------------------------------
/data/.keep:
--------------------------------------------------------------------------------

```

```

--------------------------------------------------------------------------------
/documents/.keep:
--------------------------------------------------------------------------------

```

```

--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------

```
/target
/logs

documents/*.pdf
data/chunks.json

!documents/.keep
!data/.keep

```

--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------

```
DATA_DIR=./data
DOCUMENTS_DIR=./documents
LOG_DIR=./logs
LOG_LEVEL=info
LOG_MAX_MB=5

# Development and debugging configuration (uncomment to enable)
# DEVELOPMENT=true
# DEV=true
# CONSOLE_LOGS=true

```

--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------

```json
{
    "recommendations": [
        "rust-lang.rust-analyzer",
        "vadimcn.vscode-lldb",
        "serayuzgur.crates"
    ]
}

```

--------------------------------------------------------------------------------
/.cargo/config.toml:
--------------------------------------------------------------------------------

```toml
[alias]
c = "check"
cc = "clippy"
ccf = "clippy --fix --allow-dirty --allow-staged"
ccd = ["clippy", "--", "-D", "warnings"]
f = "fmt"
t = "test"
r = "run"
b = "build"
br = "build --release"

[build]
rustflags = ["-D", "warnings"]

```

--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------

```toml
[package]
name = "rust-local-rag"
version = "0.1.0"
edition = "2024"
authors = ["Mehmet Koray Sariteke"]
description = "A local RAG (Retrieval-Augmented Generation) MCP server built with Rust"
license = "MIT"
readme = "README.md"
repository = "https://github.com/yourusername/rust-local-rag"
keywords = ["rag", "ai", "llm", "embeddings", "pdf", "search", "mcp"]
categories = ["command-line-utilities"]

[[bin]]
name = "rust-local-rag"
path = "src/main.rs"

[dependencies]
rmcp = { version = "0.1", features = ["transport-io"] }
rmcp-macros = "0.1"
schemars = "0.9"
tokio = { version = "1", features = ["full"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
reqwest = { version = "0.12", features = ["json"] }
anyhow = "1.0"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = [
    "fmt",
    "json",
    "time",
    "env-filter",
] }
uuid = { version = "1.17", features = ["v4"] }
walkdir = "2.5"
dotenv = "0.15"

```

--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------

```json
{
    "rust-analyzer.server.path": "rust-analyzer",
    "rust-analyzer.cargo.buildScripts.enable": true,
    "rust-analyzer.checkOnSave.command": "cargo check",
    "rust-analyzer.completion.addCallParentheses": true,
    "rust-analyzer.completion.addCallArgumentSnippets": true,
    "rust-analyzer.inlayHints.enable": true,
    "rust-analyzer.inlayHints.parameterHints.enable": true,
    "rust-analyzer.inlayHints.typeHints.enable": true,
    "rust-analyzer.lens.enable": true,
    "rust-analyzer.lens.methodReferences": true,
    "rust-analyzer.hover.actions.enable": true,
    "editor.formatOnSave": true,
    "[rust]": {
        "editor.defaultFormatter": "rust-lang.rust-analyzer",
        "editor.tabSize": 4,
        "editor.insertSpaces": true
    },
    "rust-analyzer.diagnostics.enable": true,
    "rust-analyzer.procMacro.enable": true,
    "rust-analyzer.cargo.allFeatures": true,
    "rust-analyzer.cargo.loadOutDirsFromCheck": true,
    "rust-analyzer.cargo.runBuildScripts": true,
    "rust-analyzer.workspace.symbol.search.scope": "workspace_and_dependencies",
    "rust-analyzer.workspace.symbol.search.kind": "all_symbols",
    "rust-analyzer.check.command": "clippy",
    "rust-analyzer.check.allTargets": false,
    "rust-analyzer.checkOnSave": true,
    "rust-analyzer.cargo.autoreload": true
}

```

--------------------------------------------------------------------------------
/src/embeddings.rs:
--------------------------------------------------------------------------------

```rust
use anyhow::Result;
use serde::{Deserialize, Serialize};

#[derive(Serialize)]
struct OllamaEmbeddingRequest {
    model: String,
    prompt: String,
}

#[derive(Deserialize)]
struct OllamaEmbeddingResponse {
    embedding: Vec<f32>,
}

pub struct EmbeddingService {
    client: reqwest::Client,
    ollama_url: String,
    model: String,
}

impl EmbeddingService {
    pub async fn new() -> Result<Self> {
        let service = Self {
            client: reqwest::Client::new(),
            ollama_url: "http://localhost:11434".to_string(),
            model: "nomic-embed-text".to_string(),
        };

        service.test_connection().await?;

        Ok(service)
    }

    pub async fn get_embedding(&self, text: &str) -> Result<Vec<f32>> {
        let request = OllamaEmbeddingRequest {
            model: self.model.clone(),
            prompt: text.to_string(),
        };

        let response = self
            .client
            .post(format!("{}/api/embeddings", self.ollama_url))
            .json(&request)
            .send()
            .await?;

        if !response.status().is_success() {
            return Err(anyhow::anyhow!(
                "Ollama API error: {} - {}",
                response.status(),
                response.text().await.unwrap_or_default()
            ));
        }

        let embedding_response: OllamaEmbeddingResponse = response.json().await?;
        Ok(embedding_response.embedding)
    }

    async fn test_connection(&self) -> Result<()> {
        let response = self
            .client
            .get(format!("{}/api/tags", self.ollama_url))
            .send()
            .await?;

        if !response.status().is_success() {
            return Err(anyhow::anyhow!(
                "Cannot connect to Ollama at {}. Make sure Ollama is running.",
                self.ollama_url
            ));
        }

        tracing::info!("Successfully connected to Ollama at {}", self.ollama_url);
        Ok(())
    }
}

```

--------------------------------------------------------------------------------
/setup.md:
--------------------------------------------------------------------------------

```markdown
# Rust Local RAG - Setup for Claude Desktop

## Prerequisites

### 1. Install Rust
```bash
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
source ~/.cargo/env
```

### 2. Install Ollama
```bash
# macOS
brew install ollama

# Linux  
curl -fsSL https://ollama.com/install.sh | sh

# Start Ollama
ollama serve

# Install embedding model
ollama pull nomic-embed-text
```

### 3. Install Poppler (for PDF parsing)
```bash
# macOS
brew install poppler

# Linux (Ubuntu/Debian)
sudo apt-get install poppler-utils

# Linux (CentOS/RHEL)
sudo yum install poppler-utils
```

## Setup

### 1. Build and Install
```bash
# Clone and build
git clone <repository-url>
cd rust-local-rag
cargo build --release

# Install globally
cargo install --path .
```

### 2. Create Directories and Add Documents
```bash
# Create required directories
mkdir -p ~/Documents/data
mkdir -p ~/Documents/rag
mkdir -p /tmp/rust-local-rag

# Add your PDF documents
cp your-pdfs/*.pdf ~/Documents/rag/
```

## Claude Desktop Integration

### 1. Find Claude Desktop Config
- **macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json`
- **Windows**: `%APPDATA%\Claude\claude_desktop_config.json`
- **Linux**: `~/.config/Claude/claude_desktop_config.json`

### 2. Add This Configuration
```json
{
    "mcpServers": {
        "rust-local-rag": {
            "command": "/Users/yourusername/.cargo/bin/rust-local-rag",
            "env": {
                "DATA_DIR": "/Users/yourusername/Documents/data",
                "DOCUMENTS_DIR": "/Users/yourusername/Documents/rag",
                "LOG_DIR": "/tmp/rust-local-rag",
                "LOG_LEVEL": "info",
                "LOG_MAX_MB": "10"
            }
        }
    }
}
```

**Important**: Replace `yourusername` with your actual username, or use absolute paths specific to your system.

### 3. Find Your Actual Paths
```bash
# Find your cargo bin directory
echo "$HOME/.cargo/bin/rust-local-rag"

# Verify the binary exists
which rust-local-rag
```

### 4. Restart Claude Desktop

## How PDF Processing Works

The application automatically:

1. **Scans the documents directory** on startup for PDF files
2. **Extracts text** using poppler's `pdftotext` utility
3. **Chunks the text** into manageable segments (typically 500-1000 characters)
4. **Generates embeddings** using Ollama's `nomic-embed-text` model
5. **Stores embeddings** in the data directory for fast retrieval
6. **Indexes documents** for semantic search

Supported formats:
- PDF files (via poppler)
- Text extraction preserves basic formatting
- Each document is split into searchable chunks

## Troubleshooting

### Installation Issues
1. **Rust not found**: Restart terminal after installing Rust
2. **Ollama connection failed**: Ensure `ollama serve` is running
3. **Poppler not found**: Verify installation with `pdftotext --version`

### Claude Desktop Issues
1. **Binary not found**: Check path with `which rust-local-rag`
2. **Permission denied**: Ensure directories are writable
3. **No documents indexed**: Check PDF files exist in `DOCUMENTS_DIR`
4. **Connection failed**: Check logs in `LOG_DIR` directory

### PDF Processing Issues
1. **Text extraction failed**: Ensure PDFs are not password-protected or corrupted
2. **Empty results**: Some PDFs may be image-only (scanned documents)
3. **Slow indexing**: Large documents take time to process on first run

### Log Files
Check application logs for detailed error information:
```bash
# View latest logs
tail -f /tmp/rust-local-rag/rust-local-rag.log

# Check for errors
grep -i error /tmp/rust-local-rag/rust-local-rag.log
```

That's it! Your documents will be automatically indexed and searchable in Claude Desktop.

```

--------------------------------------------------------------------------------
/src/mcp_server.rs:
--------------------------------------------------------------------------------

```rust
use anyhow::Result;
use rmcp::{
    Error as McpError, ServerHandler, ServiceExt, model::*, schemars, tool, transport::stdio,
};

use std::sync::Arc;
use tokio::sync::RwLock;

use crate::rag_engine::RagEngine;

#[derive(Debug, serde::Deserialize, schemars::JsonSchema)]
pub struct SearchRequest {
    #[schemars(description = "The search query")]
    pub query: String,
    #[schemars(description = "Number of results to return (default: 5)")]
    pub top_k: Option<usize>,
}

#[derive(Clone)]
pub struct RagMcpServer {
    rag_state: Arc<RwLock<RagEngine>>,
}

#[tool(tool_box)]
impl RagMcpServer {
    pub fn new(rag_state: Arc<RwLock<RagEngine>>) -> Self {
        Self { rag_state }
    }

    #[tool(description = "Search through uploaded documents using semantic similarity")]
    async fn search_documents(
        &self,
        #[tool(aggr)] SearchRequest { query, top_k }: SearchRequest,
    ) -> Result<CallToolResult, McpError> {
        let top_k = top_k.unwrap_or(5);
        let engine = self.rag_state.read().await;

        match engine.search(&query, top_k).await {
            Ok(results) => {
                let formatted_results = if results.is_empty() {
                    "No results found.".to_string()
                } else {
                    results
                        .iter()
                        .enumerate()
                        .map(|(i, result)| {
                            format!(
                                "**Result {}** (Score: {:.3}) [{}] (Chunk: {})\n{}\n",
                                i + 1,
                                result.score,
                                result.document,
                                result.chunk_id,
                                result.text
                            )
                        })
                        .collect::<Vec<_>>()
                        .join("\n---\n\n")
                };

                Ok(CallToolResult::success(vec![Content::text(format!(
                    "Found {} results for '{}':\n\n{}",
                    results.len(),
                    query,
                    formatted_results
                ))]))
            }
            Err(e) => Ok(CallToolResult {
                content: vec![Content::text(format!("Search error: {}", e))],
                is_error: Some(true),
            }),
        }
    }

    #[tool(description = "List all uploaded documents")]
    async fn list_documents(&self) -> Result<CallToolResult, McpError> {
        let engine = self.rag_state.read().await;
        let documents = engine.list_documents();

        let response = if documents.is_empty() {
            "No documents uploaded yet.".to_string()
        } else {
            format!(
                "Uploaded documents ({}):\n{}",
                documents.len(),
                documents
                    .iter()
                    .enumerate()
                    .map(|(i, doc)| format!("{}. {}", i + 1, doc))
                    .collect::<Vec<_>>()
                    .join("\n")
            )
        };

        Ok(CallToolResult::success(vec![Content::text(response)]))
    }

    #[tool(description = "Get RAG system statistics")]
    async fn get_stats(&self) -> Result<CallToolResult, McpError> {
        let engine = self.rag_state.read().await;
        let stats = engine.get_stats();

        Ok(CallToolResult::success(vec![Content::text(format!(
            "RAG System Stats:\n{}",
            serde_json::to_string_pretty(&stats).unwrap()
        ))]))
    }
}

#[tool(tool_box)]
impl ServerHandler for RagMcpServer {
    fn get_info(&self) -> ServerInfo {
        ServerInfo {
            protocol_version: ProtocolVersion::V_2024_11_05,
            capabilities: ServerCapabilities::builder().enable_tools().build(),
            server_info: Implementation {
                name: "rust-rag-server".to_string(),
                version: "0.1.0".to_string(),
            },
            instructions: Some(
                "A Rust-based RAG server for document search and analysis.".to_string(),
            ),
        }
    }
}

pub async fn start_mcp_server(rag_state: Arc<RwLock<RagEngine>>) -> Result<()> {
    tracing::info!("Starting MCP server");

    let server = RagMcpServer::new(rag_state);
    let service = server.serve(stdio()).await?;

    service.waiting().await?;

    Ok(())
}

```

--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------

```rust
use anyhow::Result;
use std::sync::Arc;
use tokio::sync::RwLock;
use tracing_subscriber::EnvFilter;

mod embeddings;
mod mcp_server;
mod rag_engine;

use rag_engine::RagEngine;

fn get_data_dir() -> String {
    std::env::var("DATA_DIR").unwrap_or_else(|_| "./data".to_string())
}

fn get_documents_dir() -> String {
    std::env::var("DOCUMENTS_DIR").unwrap_or_else(|_| "./documents".to_string())
}

fn get_log_dir() -> String {
    std::env::var("LOG_DIR").unwrap_or_else(|_| {
        if std::path::Path::new("/var/log").exists() && is_writable("/var/log") {
            "/var/log/rust-local-rag".to_string()
        } else {
            "./logs".to_string()
        }
    })
}

fn get_log_level() -> String {
    std::env::var("LOG_LEVEL").unwrap_or_else(|_| "info".to_string())
}

fn get_log_max_mb() -> u64 {
    std::env::var("LOG_MAX_MB")
        .ok()
        .and_then(|s| s.parse().ok())
        .unwrap_or(5)
}

fn is_writable(path: &str) -> bool {
    std::fs::OpenOptions::new()
        .create(true)
        .write(true)
        .open(format!("{}/test_write", path))
        .map(|_| {
            let _ = std::fs::remove_file(format!("{}/test_write", path));
            true
        })
        .unwrap_or(false)
}

fn setup_logging() -> Result<()> {
    let log_dir = get_log_dir();
    let log_level = get_log_level();
    let log_max_mb = get_log_max_mb();

    std::fs::create_dir_all(&log_dir)?;

    let env_filter =
        EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(&log_level));

    let is_development = std::env::var("DEVELOPMENT").is_ok() || std::env::var("DEV").is_ok();
    let force_console = std::env::var("CONSOLE_LOGS").is_ok();

    if is_development || force_console {
        tracing_subscriber::fmt()
            .with_env_filter(env_filter)
            .compact()
            .init();
        tracing::info!("Development mode: logging to console");
    } else {
        let log_file = format!("{}/rust-local-rag.log", log_dir);
        let file_appender = std::fs::OpenOptions::new()
            .create(true)
            .append(true)
            .open(&log_file)?;

        tracing_subscriber::fmt()
            .with_env_filter(env_filter)
            .with_writer(file_appender)
            .json()
            .init();
    }

    tracing::info!("Logging initialized");
    tracing::info!("Log directory: {}", log_dir);
    tracing::info!("Log level: {}", log_level);
    tracing::info!("Log max size: {}MB (auto-truncate)", log_max_mb);
    tracing::info!("Development mode: {}", is_development || force_console);

    Ok(())
}

async fn start_log_cleanup_task(log_dir: String, max_mb: u64) {
    let max_bytes = max_mb * 1024 * 1024;
    let log_file = format!("{}/rust-local-rag.log", log_dir);

    tokio::spawn(async move {
        let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(300));

        loop {
            interval.tick().await;

            if let Ok(metadata) = std::fs::metadata(&log_file) {
                if metadata.len() > max_bytes {
                    if let Err(e) = std::fs::write(
                        &log_file,
                        format!("[LOG TRUNCATED - Size exceeded {}MB]\n", max_mb),
                    ) {
                        eprintln!("Failed to truncate log file: {}", e);
                    }
                }
            }
        }
    });
}

#[tokio::main]
async fn main() -> Result<()> {
    if let Err(e) = dotenv::dotenv() {
        eprintln!("Warning: Could not load .env file: {}", e);
    }
    setup_logging()?;

    let data_dir = get_data_dir();
    let documents_dir = get_documents_dir();
    let log_dir = get_log_dir();
    let log_max_mb = get_log_max_mb();

    tokio::fs::create_dir_all(&data_dir).await?;
    tokio::fs::create_dir_all(&documents_dir).await?;

    start_log_cleanup_task(log_dir, log_max_mb).await;
    tracing::info!("Started automatic log cleanup task (max: {}MB)", log_max_mb);

    let rag_engine = RagEngine::new(&data_dir).await?;
    let rag_state = Arc::new(RwLock::new(rag_engine));

    let document_loading_state = rag_state.clone();
    let docs_dir = documents_dir.clone();
    tokio::spawn(async move {
        tracing::info!("Starting document loading in background...");
        let mut engine = document_loading_state.write().await;
        if let Err(e) = engine.load_documents_from_dir(&docs_dir).await {
            tracing::error!("Failed to load documents: {}", e);
        } else {
            tracing::info!("Document loading completed successfully");
        }
    });

    tracing::info!("Starting MCP server (stdin/stdout mode)");
    tracing::info!("Data directory: {}", data_dir);
    tracing::info!("Documents directory: {}", documents_dir);

    mcp_server::start_mcp_server(rag_state).await?;

    Ok(())
}

```

--------------------------------------------------------------------------------
/how-to-use.md:
--------------------------------------------------------------------------------

```markdown
# Rust Local RAG - Claude Desktop Usage

## Claude Desktop Configuration Template

```json
{
    "mcpServers": {
        "rust-local-rag": {
            "command": "/Users/yourusername/.cargo/bin/rust-local-rag",
            "env": {
                "DATA_DIR": "/Users/yourusername/Documents/data",
                "DOCUMENTS_DIR": "/Users/yourusername/Documents/rag",
                "LOG_DIR": "/tmp/rust-local-rag",
                "LOG_LEVEL": "info",
                "LOG_MAX_MB": "10"
            }
        }
    }
}
```

**Important**: Replace `yourusername` with your actual username. Use absolute paths for reliable operation.

## Environment Variables

| Variable | Description | Default |
|----------|-------------|---------|
| `DATA_DIR` | Embeddings storage directory | `./data` |
| `DOCUMENTS_DIR` | PDF documents directory | `./documents` |
| `LOG_DIR` | Log files directory | `./logs` |
| `LOG_LEVEL` | Logging level (error/warn/info/debug) | `info` |
| `LOG_MAX_MB` | Log file size limit in MB | `5` |

## Adding Documents

### 1. Add PDFs to Documents Directory
```bash
# Copy PDFs to your documents directory
cp your-file.pdf ~/Documents/rag/

# Or move multiple files
mv /path/to/pdfs/*.pdf ~/Documents/rag/
```

### 2. Restart Claude Desktop
The application will automatically:
- Detect new PDF files
- Extract text using poppler
- Generate embeddings
- Index documents for search

## Available MCP Tools

When configured, Claude Desktop can use these tools:

### 1. Search Documents
Search through your documents using semantic similarity.
- **Tool**: `search_documents`
- **Parameters**: `query` (string), `top_k` (optional number, default: 5)

### 2. List Documents  
Get a list of all indexed documents.
- **Tool**: `list_documents`
- **Parameters**: None

### 3. Get Statistics
View RAG system statistics and status.
- **Tool**: `get_stats` 
- **Parameters**: None

## Usage in Claude

Once configured, you can ask Claude to:

### Document Search Examples
- "Search my documents for information about machine learning"
- "What does my documentation say about API authentication?"
- "Find references to database optimization in my PDFs"

### Document Management Examples  
- "List all the documents you can access"
- "Show me statistics about the document index"
- "How many documents do you have indexed?"

### Analysis Examples
- "Summarize the key points from documents about project requirements"
- "Compare what different documents say about security best practices"
- "Find common themes across all my documentation"

## PDF Processing Details

### Supported PDF Types
- ✅ **Text-based PDFs**: Searchable text content
- ✅ **Mixed content**: PDFs with both text and images
- ⚠️ **Scanned PDFs**: Image-only documents (limited text extraction)
- ❌ **Password-protected**: Encrypted PDFs cannot be processed

### Text Extraction Process
1. **PDF to Text**: Uses poppler's `pdftotext` for reliable extraction
2. **Text Chunking**: Splits documents into ~500-1000 character segments
3. **Embedding Generation**: Creates vector embeddings using Ollama
4. **Indexing**: Stores embeddings for fast semantic search

### Performance Notes
- **First-time indexing**: May take several minutes for large document collections
- **Subsequent startups**: Uses cached embeddings for fast loading
- **Memory usage**: Scales with document collection size
- **Search speed**: Sub-second search responses after indexing

## Troubleshooting

### MCP Server Issues
1. **Server not connecting**:
   ```bash
   # Check binary exists and is executable
   which rust-local-rag
   ls -la ~/.cargo/bin/rust-local-rag
   ```

2. **Check Claude Desktop logs**:
   - **macOS**: `~/Library/Logs/Claude/mcp*.log`
   - **Windows**: `%APPDATA%\Claude\Logs\mcp*.log`
   - **Linux**: `~/.local/share/Claude/logs/mcp*.log`

### Document Processing Issues
1. **Documents not found**:
   ```bash
   # Verify directory exists and contains PDFs
   ls -la ~/Documents/rag/
   file ~/Documents/rag/*.pdf
   ```

2. **PDF processing failures**:
   ```bash
   # Test poppler installation
   pdftotext --version
   
   # Test PDF text extraction manually
   pdftotext ~/Documents/rag/sample.pdf -
   ```

3. **Empty search results**:
   - Check if documents were successfully indexed
   - Verify Ollama is running (`ollama serve`)
   - Check embedding model is installed (`ollama list`)

### Log Analysis
```bash
# View real-time logs
tail -f /tmp/rust-local-rag/rust-local-rag.log

# Search for errors
grep -i "error\|failed\|panic" /tmp/rust-local-rag/rust-local-rag.log

# Check document loading
grep -i "document" /tmp/rust-local-rag/rust-local-rag.log
```

### Configuration Validation
```bash
# Test configuration with manual run
DATA_DIR="~/Documents/data" \
DOCUMENTS_DIR="~/Documents/rag" \
LOG_DIR="/tmp/rust-local-rag" \
LOG_LEVEL="debug" \
rust-local-rag
```

## Performance Optimization

### For Large Document Collections
- Use SSD storage for `DATA_DIR` 
- Increase `LOG_MAX_MB` for detailed logging
- Consider splitting large PDFs into smaller files
- Monitor memory usage during initial indexing

### For Faster Searches
- Keep Ollama running continuously
- Use specific search terms rather than broad queries
- Adjust `top_k` parameter based on needs (lower = faster)

## Security Considerations

- Documents are processed locally (no external API calls)
- Embeddings stored locally in `DATA_DIR`
- Ollama runs locally for embedding generation
- No document content sent to external services

```

--------------------------------------------------------------------------------
/src/rag_engine.rs:
--------------------------------------------------------------------------------

```rust
use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use uuid::Uuid;
use walkdir::WalkDir;

use crate::embeddings::EmbeddingService;

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DocumentChunk {
    pub id: String,
    pub document_name: String,
    pub text: String,
    pub embedding: Vec<f32>,
    pub chunk_index: usize,
}

#[derive(Debug, Clone)]
pub struct SearchResult {
    pub text: String,
    pub score: f32,
    pub document: String,
    pub chunk_id: String,
}

pub struct RagEngine {
    chunks: HashMap<String, DocumentChunk>,
    embedding_service: EmbeddingService,
    data_dir: String,
}

impl RagEngine {
    pub async fn new(data_dir: &str) -> Result<Self> {
        let embedding_service = EmbeddingService::new().await?;

        let mut engine = Self {
            chunks: HashMap::new(),
            embedding_service,
            data_dir: data_dir.to_string(),
        };

        if let Err(e) = engine.load_from_disk().await {
            tracing::warn!("Could not load existing data: {}", e);
        }

        Ok(engine)
    }

    pub async fn add_document(&mut self, filename: &str, data: &[u8]) -> Result<usize> {
        tracing::info!("Processing document: {}", filename);

        let text = self.extract_pdf_text(data)?;
        if text.trim().is_empty() {
            return Err(anyhow::anyhow!("No text extracted from PDF"));
        }

        let chunks = self.chunk_text(&text, 500);
        tracing::info!("Created {} chunks for {}", chunks.len(), filename);

        self.chunks
            .retain(|_, chunk| chunk.document_name != filename);

        let mut chunk_count = 0;
        for (i, chunk_text) in chunks.into_iter().enumerate() {
            if chunk_text.trim().len() < 10 {
                continue;
            }

            tracing::debug!("Generating embedding for chunk {} of {}", i + 1, filename);
            let embedding = self.embedding_service.get_embedding(&chunk_text).await?;

            let chunk = DocumentChunk {
                id: Uuid::new_v4().to_string(),
                document_name: filename.to_string(),
                text: chunk_text,
                embedding,
                chunk_index: i,
            };

            self.chunks.insert(chunk.id.clone(), chunk);
            chunk_count += 1;
        }

        self.save_to_disk().await?;

        tracing::info!(
            "Successfully processed {} chunks for {}",
            chunk_count,
            filename
        );
        Ok(chunk_count)
    }

    pub async fn search(&self, query: &str, top_k: usize) -> Result<Vec<SearchResult>> {
        if self.chunks.is_empty() {
            return Ok(vec![]);
        }

        tracing::debug!("Searching for: '{}'", query);

        let query_embedding = self.embedding_service.get_embedding(query).await?;

        let mut scores: Vec<(f32, &DocumentChunk)> = self
            .chunks
            .values()
            .map(|chunk| {
                let similarity = cosine_similarity(&query_embedding, &chunk.embedding);
                (similarity, chunk)
            })
            .collect();

        scores.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap());

        Ok(scores
            .into_iter()
            .take(top_k)
            .map(|(score, chunk)| SearchResult {
                text: chunk.text.clone(),
                score,
                document: chunk.document_name.clone(),
                chunk_id: chunk.id.clone(),
            })
            .collect())
    }

    pub fn list_documents(&self) -> Vec<String> {
        let mut docs: Vec<String> = self
            .chunks
            .values()
            .map(|chunk| chunk.document_name.clone())
            .collect::<std::collections::HashSet<_>>()
            .into_iter()
            .collect();
        docs.sort();
        docs
    }

    pub fn get_stats(&self) -> serde_json::Value {
        let doc_count = self.list_documents().len();
        let chunk_count = self.chunks.len();

        serde_json::json!({
            "documents": doc_count,
            "chunks": chunk_count,
            "status": "ready"
        })
    }

    pub async fn load_documents_from_dir(&mut self, dir: &str) -> Result<()> {
        for entry in WalkDir::new(dir).into_iter().filter_map(|e| e.ok()) {
            let path = entry.path();
            if path.extension().and_then(|s| s.to_str()) == Some("pdf") {
                let filename = path.file_name().unwrap().to_str().unwrap();

                if self.chunks.values().any(|c| c.document_name == filename) {
                    tracing::info!("Document {} already processed, skipping", filename);
                    continue;
                }

                match tokio::fs::read(&path).await {
                    Ok(data) => {
                        tracing::info!("Loading document: {}", filename);
                        match self.add_document(filename, &data).await {
                            Ok(chunk_count) => {
                                tracing::info!(
                                    "Successfully processed {} with {} chunks",
                                    filename,
                                    chunk_count
                                );
                            }
                            Err(e) => {
                                tracing::warn!("Skipping {}: {}", filename, e);
                            }
                        }
                    }
                    Err(e) => {
                        tracing::error!("Failed to read {}: {}", filename, e);
                    }
                }
            }
        }

        Ok(())
    }

    fn extract_pdf_text(&self, data: &[u8]) -> Result<String> {
        tracing::info!("Extracting PDF text using pdftotext system binary");
        self.extract_pdf_with_pdftotext(data)
    }

    fn extract_pdf_with_pdftotext(&self, data: &[u8]) -> Result<String> {
        use std::process::Command;

        let temp_dir = std::env::temp_dir();
        let temp_file = temp_dir.join(format!("temp_pdf_{}.pdf", std::process::id()));

        std::fs::write(&temp_file, data)
            .map_err(|e| anyhow::anyhow!("Failed to write temp PDF: {}", e))?;

        let output = Command::new("pdftotext")
            .arg("-layout")
            .arg("-enc")
            .arg("UTF-8")
            .arg(&temp_file)
            .arg("-")
            .output();
        let _ = std::fs::remove_file(&temp_file);

        match output {
            Ok(output) if output.status.success() => {
                let text = String::from_utf8_lossy(&output.stdout).to_string();
                let text_chars = text.chars().count();

                if text.trim().is_empty() {
                    tracing::warn!("pdftotext extracted 0 characters");
                    Err(anyhow::anyhow!("pdftotext produced no text output"))
                } else {
                    tracing::info!("✅ pdftotext extracted {} characters", text_chars);
                    Ok(text)
                }
            }
            Ok(output) => {
                let error_msg = String::from_utf8_lossy(&output.stderr);
                tracing::warn!("pdftotext failed with error: {}", error_msg);
                Err(anyhow::anyhow!("pdftotext failed: {}", error_msg))
            }
            Err(e) => {
                tracing::warn!("Failed to run pdftotext command: {}", e);
                Err(anyhow::anyhow!(
                    "pdftotext command failed: {} (is poppler installed?)",
                    e
                ))
            }
        }
    }

    fn chunk_text(&self, text: &str, chunk_size: usize) -> Vec<String> {
        let words: Vec<&str> = text.split_whitespace().collect();
        let mut chunks = Vec::new();

        for chunk in words.chunks(chunk_size) {
            let chunk_text = chunk.join(" ");
            if !chunk_text.trim().is_empty() {
                chunks.push(chunk_text);
            }
        }

        chunks
    }

    async fn save_to_disk(&self) -> Result<()> {
        let path = format!("{}/chunks.json", self.data_dir);
        let data = serde_json::to_string_pretty(&self.chunks)?;
        tokio::fs::write(path, data).await?;
        tracing::debug!("Saved {} chunks to disk", self.chunks.len());
        Ok(())
    }

    async fn load_from_disk(&mut self) -> Result<()> {
        let path = format!("{}/chunks.json", self.data_dir);
        if tokio::fs::try_exists(&path).await? {
            let data = tokio::fs::read_to_string(path).await?;
            self.chunks = serde_json::from_str(&data)?;
            tracing::info!("Loaded {} chunks from disk", self.chunks.len());
        }
        Ok(())
    }
}

fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
    if a.len() != b.len() {
        return 0.0;
    }

    let dot_product: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
    let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
    let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();

    if norm_a == 0.0 || norm_b == 0.0 {
        0.0
    } else {
        dot_product / (norm_a * norm_b)
    }
}

```