This is page 1 of 4. Use http://codebase.md/wrale/mcp-server-tree-sitter?page={x} to view the full context. # Directory Structure ``` ├── .codestateignore ├── .github │ └── workflows │ ├── ci.yml │ └── release.yml ├── .gitignore ├── .python-version ├── CONTRIBUTING.md ├── docs │ ├── architecture.md │ ├── cli.md │ ├── config.md │ ├── diagnostics.md │ ├── logging.md │ ├── requirements │ │ └── logging.md │ └── tree-sitter-type-safety.md ├── FEATURES.md ├── LICENSE ├── Makefile ├── NOTICE ├── pyproject.toml ├── README.md ├── ROADMAP.md ├── scripts │ └── implementation-search.sh ├── src │ └── mcp_server_tree_sitter │ ├── __init__.py │ ├── __main__.py │ ├── api.py │ ├── bootstrap │ │ ├── __init__.py │ │ └── logging_bootstrap.py │ ├── cache │ │ ├── __init__.py │ │ └── parser_cache.py │ ├── capabilities │ │ ├── __init__.py │ │ └── server_capabilities.py │ ├── config.py │ ├── context.py │ ├── di.py │ ├── exceptions.py │ ├── language │ │ ├── __init__.py │ │ ├── query_templates.py │ │ ├── registry.py │ │ └── templates │ │ ├── __init__.py │ │ ├── apl.py │ │ ├── c.py │ │ ├── cpp.py │ │ ├── go.py │ │ ├── java.py │ │ ├── javascript.py │ │ ├── julia.py │ │ ├── kotlin.py │ │ ├── python.py │ │ ├── rust.py │ │ ├── swift.py │ │ └── typescript.py │ ├── logging_config.py │ ├── models │ │ ├── __init__.py │ │ ├── ast_cursor.py │ │ ├── ast.py │ │ └── project.py │ ├── prompts │ │ ├── __init__.py │ │ └── code_patterns.py │ ├── server.py │ ├── testing │ │ ├── __init__.py │ │ └── pytest_diagnostic.py │ ├── tools │ │ ├── __init__.py │ │ ├── analysis.py │ │ ├── ast_operations.py │ │ ├── debug.py │ │ ├── file_operations.py │ │ ├── project.py │ │ ├── query_builder.py │ │ ├── registration.py │ │ └── search.py │ └── utils │ ├── __init__.py │ ├── context │ │ ├── __init__.py │ │ └── mcp_context.py │ ├── file_io.py │ ├── path.py │ ├── security.py │ ├── tree_sitter_helpers.py │ └── tree_sitter_types.py ├── tests │ ├── __init__.py │ ├── .gitignore │ ├── conftest.py │ ├── test_ast_cursor.py │ ├── test_basic.py │ ├── test_cache_config.py │ ├── test_cli_arguments.py │ ├── test_config_behavior.py │ ├── test_config_manager.py │ ├── test_context.py │ ├── test_debug_flag.py │ ├── test_di.py │ ├── test_diagnostics │ │ ├── __init__.py │ │ ├── test_ast_parsing.py │ │ ├── test_ast.py │ │ ├── test_cursor_ast.py │ │ ├── test_language_pack.py │ │ ├── test_language_registry.py │ │ └── test_unpacking_errors.py │ ├── test_env_config.py │ ├── test_failure_modes.py │ ├── test_file_operations.py │ ├── test_helpers.py │ ├── test_language_listing.py │ ├── test_logging_bootstrap.py │ ├── test_logging_config_di.py │ ├── test_logging_config.py │ ├── test_logging_early_init.py │ ├── test_logging_env_vars.py │ ├── test_logging_handlers.py │ ├── test_makefile_targets.py │ ├── test_mcp_context.py │ ├── test_models_ast.py │ ├── test_persistent_server.py │ ├── test_project_persistence.py │ ├── test_query_result_handling.py │ ├── test_registration.py │ ├── test_rust_compatibility.py │ ├── test_server_capabilities.py │ ├── test_server.py │ ├── test_symbol_extraction.py │ ├── test_tree_sitter_helpers.py │ ├── test_yaml_config_di.py │ └── test_yaml_config.py ├── TODO.md └── uv.lock ``` # Files -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- ``` 3.12 ``` -------------------------------------------------------------------------------- /.codestateignore: -------------------------------------------------------------------------------- ``` uv.lock ``` -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- ``` # Reports *.json ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # UV # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. #uv.lock # poetry # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control #poetry.lock # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. #pdm.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. # https://pdm.fming.dev/latest/usage/project/#working-with-version-control .pdm.toml .pdm-python .pdm-build/ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # PyCharm # JetBrains specific template is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ # Ruff stuff: .ruff_cache/ # PyPI configuration file .pypirc # etc. results/ diagnostic_results/ *.json ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown [](https://mseep.ai/app/wrale-mcp-server-tree-sitter) # MCP Tree-sitter Server A Model Context Protocol (MCP) server that provides code analysis capabilities using tree-sitter, designed to give AI assistants intelligent access to codebases with appropriate context management. Claude Desktop is the reference implementation target. <a href="https://glama.ai/mcp/servers/@wrale/mcp-server-tree-sitter"> <img width="380" height="200" src="https://glama.ai/mcp/servers/@wrale/mcp-server-tree-sitter/badge" alt="mcp-server-tree-sitter MCP server" /> </a> ## Features - 🔍 **Flexible Exploration**: Examine code at multiple levels of granularity - 🧠 **Context Management**: Provides just enough information without overwhelming the context window - 🌐 **Language Agnostic**: Supports many programming languages including Python, JavaScript, TypeScript, Go, Rust, C, C++, Swift, Java, Kotlin, Julia, and APL via tree-sitter-language-pack - 🌳 **Structure-Aware**: Uses AST-based understanding with efficient cursor-based traversal - 🔎 **Searchable**: Find specific patterns using text search and tree-sitter queries - 🔄 **Caching**: Optimized performance through parse tree caching - 🔑 **Symbol Extraction**: Extract and analyze functions, classes, and other code symbols - 📊 **Dependency Analysis**: Identify and analyze code dependencies and relationships - 🧩 **State Persistence**: Maintains project registrations and cached data between invocations - 🔒 **Secure**: Built-in security boundaries and input validation For a comprehensive list of all available commands, their current implementation status, and detailed feature matrix, please refer to the [FEATURES.md](FEATURES.md) document. ## Installation ### Prerequisites - Python 3.10+ - Tree-sitter language parsers for your preferred languages ### Basic Installation ```bash pip install mcp-server-tree-sitter ``` ### Development Installation ```bash git clone https://github.com/wrale/mcp-server-tree-sitter.git cd mcp-server-tree-sitter pip install -e ".[dev,languages]" ``` ## Quick Start ### Running with Claude Desktop You can make the server available in Claude Desktop either through the MCP CLI or by manually configuring Claude Desktop. #### Using MCP CLI Register the server with Claude Desktop: ```bash mcp install mcp_server_tree_sitter.server:mcp --name "tree_sitter" ``` #### Manual Configuration Alternatively, you can manually configure Claude Desktop: 1. Open your Claude Desktop configuration file: - macOS/Linux: `~/Library/Application Support/Claude/claude_desktop_config.json` - Windows: `%APPDATA%\Claude\claude_desktop_config.json` Create the file if it doesn't exist. 2. Add the server to the `mcpServers` section: ```json { "mcpServers": { "tree_sitter": { "command": "python", "args": [ "-m", "mcp_server_tree_sitter.server" ] } } } ``` Alternatively, if using uv or another package manager: ```json { "mcpServers": { "tree_sitter": { "command": "uv", "args": [ "--directory", "/ABSOLUTE/PATH/TO/YOUR/PROJECT", "run", "-m", "mcp_server_tree_sitter.server" ] } } } ``` Note: Make sure to replace `/ABSOLUTE/PATH/TO/YOUR/PROJECT` with the actual absolute path to your project directory. 3. Save the file and restart Claude Desktop. The MCP tools icon (hammer) will appear in Claude Desktop's interface once you have properly configured at least one MCP server. You can then access the `tree_sitter` server's functionality by clicking on this icon. ### Configuring with Released Version If you prefer not to manually install the package from PyPI (released version) or clone the repository, simply use the following configuration for Claude Desktop: 1. Open your Claude Desktop configuration file (same location as above). 2. Add the tree-sitter server to the `mcpServers` section: ```json { "mcpServers": { "tree_sitter": { "command": "uvx", "args": [ "--directory", "/ABSOLUTE/PATH/TO/YOUR/PROJECT", "mcp-server-tree-sitter" ] } } } ``` 3. Save the file and restart Claude Desktop. This method uses `uvx` to run the installed PyPI package directly, which is the recommended approach for the released version. The server doesn't require any additional parameters to run in its basic configuration. ## State Persistence The MCP Tree-sitter Server maintains state between invocations. This means: - Projects stay registered until explicitly removed or the server is restarted - Parse trees are cached according to configuration settings - Language information is retained throughout the server's lifetime This persistence is maintained in-memory during the server's lifetime using singleton patterns for key components. ### Running as a standalone server There are several ways to run the server: #### Using the MCP CLI directly: ```bash python -m mcp run mcp_server_tree_sitter.server ``` #### Using Makefile targets: ```bash # Show available targets make # Run the server with default settings make mcp-run # Show help information make mcp-run ARGS="--help" # Show version information make mcp-run ARGS="--version" # Run with custom configuration file make mcp-run ARGS="--config /path/to/config.yaml" # Enable debug logging make mcp-run ARGS="--debug" # Disable parse tree caching make mcp-run ARGS="--disable-cache" ``` #### Using the installed script: ```bash # Run the server with default settings mcp-server-tree-sitter # Show help information mcp-server-tree-sitter --help # Show version information mcp-server-tree-sitter --version # Run with custom configuration file mcp-server-tree-sitter --config /path/to/config.yaml # Enable debug logging mcp-server-tree-sitter --debug # Disable parse tree caching mcp-server-tree-sitter --disable-cache ``` ### Using with the MCP Inspector Using the MCP CLI directly: ```bash python -m mcp dev mcp_server_tree_sitter.server ``` Or using the Makefile target: ```bash make mcp-dev ``` You can also pass arguments: ```bash make mcp-dev ARGS="--debug" ``` ## Usage ### Register a Project First, register a project to analyze: ``` register_project_tool(path="/path/to/your/project", name="my-project") ``` ### Explore Files List files in the project: ``` list_files(project="my-project", pattern="**/*.py") ``` View file content: ``` get_file(project="my-project", path="src/main.py") ``` ### Analyze Code Structure Get the syntax tree: ``` get_ast(project="my-project", path="src/main.py", max_depth=3) ``` Extract symbols: ``` get_symbols(project="my-project", path="src/main.py") ``` ### Search Code Search for text: ``` find_text(project="my-project", pattern="function", file_pattern="**/*.py") ``` Run tree-sitter queries: ``` run_query( project="my-project", query='(function_definition name: (identifier) @function.name)', language="python" ) ``` ### Analyze Complexity ``` analyze_complexity(project="my-project", path="src/main.py") ``` ## Direct Python Usage While the primary intended use is through the MCP server, you can also use the library directly in Python code: ```python # Import from the API module from mcp_server_tree_sitter.api import ( register_project, list_projects, get_config, get_language_registry ) # Register a project project_info = register_project( path="/path/to/project", name="my-project", description="Description" ) # List projects projects = list_projects() # Get configuration config = get_config() # Access components through dependency injection from mcp_server_tree_sitter.di import get_container container = get_container() project_registry = container.project_registry language_registry = container.language_registry ``` ## Configuration Create a YAML configuration file: ```yaml cache: enabled: true # Enable/disable caching (default: true) max_size_mb: 100 # Maximum cache size in MB (default: 100) ttl_seconds: 300 # Cache entry time-to-live in seconds (default: 300) security: max_file_size_mb: 5 # Maximum file size to process in MB (default: 5) excluded_dirs: # Directories to exclude from processing - .git - node_modules - __pycache__ allowed_extensions: # Optional list of allowed file extensions # - py # - js # Leave empty or omit for all extensions language: default_max_depth: 5 # Default max depth for AST traversal (default: 5) preferred_languages: # List of languages to pre-load at startup for faster performance - python # Pre-loading reduces latency for first operations - javascript log_level: INFO # Logging level (DEBUG, INFO, WARNING, ERROR) max_results_default: 100 # Default maximum results for search operations ``` Load it with: ``` configure(config_path="/path/to/config.yaml") ``` ### Logging Configuration The server's logging verbosity can be controlled using environment variables: ```bash # Enable detailed debug logging export MCP_TS_LOG_LEVEL=DEBUG # Use normal informational logging (default) export MCP_TS_LOG_LEVEL=INFO # Only show warning and error messages export MCP_TS_LOG_LEVEL=WARNING ``` For comprehensive information about logging configuration, please refer to the [logging documentation](docs/logging.md). For details on the command-line interface, see the [CLI documentation](docs/cli.md). ### About preferred_languages The `preferred_languages` setting controls which language parsers are pre-loaded at server startup rather than on-demand. This provides several benefits: - **Faster initial analysis**: No delay when first analyzing a file of a pre-loaded language - **Early error detection**: Issues with parsers are discovered at startup, not during use - **Predictable memory allocation**: Memory for frequently used parsers is allocated upfront By default, all parsers are loaded on-demand when first needed. For optimal performance, specify the languages you use most frequently in your projects. You can also configure specific settings: ``` configure(cache_enabled=True, max_file_size_mb=10, log_level="DEBUG") ``` Or use environment variables: ```bash export MCP_TS_CACHE_MAX_SIZE_MB=256 export MCP_TS_LOG_LEVEL=DEBUG export MCP_TS_CONFIG_PATH=/path/to/config.yaml ``` Environment variables use the format `MCP_TS_SECTION_SETTING` (e.g., `MCP_TS_CACHE_MAX_SIZE_MB`) for section settings, or `MCP_TS_SETTING` (e.g., `MCP_TS_LOG_LEVEL`) for top-level settings. Configuration values are applied in this order of precedence: 1. Environment variables (highest) 2. Values set via `configure()` calls 3. YAML configuration file 4. Default values (lowest) The server will look for configuration in: 1. Path specified in `configure()` call 2. Path specified by `MCP_TS_CONFIG_PATH` environment variable 3. Default location: `~/.config/tree-sitter/config.yaml` ## For Developers ### Diagnostic Capabilities The MCP Tree-sitter Server includes a diagnostic framework to help identify and fix issues: ```bash # Run diagnostic tests make test-diagnostics # CI-friendly version (won't fail the build on diagnostic issues) make test-diagnostics-ci ``` Diagnostic tests provide detailed information about the server's behavior and can help isolate specific issues. For more information about the diagnostic framework, please see the [diagnostics documentation](docs/diagnostics.md). ### Type Safety Considerations The MCP Tree-sitter Server maintains type safety when interfacing with tree-sitter libraries through careful design patterns and protocols. If you're extending the codebase, please review the [type safety guide](docs/tree-sitter-type-safety.md) for important information about handling tree-sitter API variations. ## Available Resources The server provides the following MCP resources: - `project://{project}/files` - List all files in a project - `project://{project}/files/{pattern}` - List files matching a pattern - `project://{project}/file/{path}` - Get file content - `project://{project}/file/{path}/lines/{start}-{end}` - Get specific lines from a file - `project://{project}/ast/{path}` - Get the AST for a file - `project://{project}/ast/{path}/depth/{depth}` - Get the AST with custom depth ## Available Tools The server provides tools for: - Project management: `register_project_tool`, `list_projects_tool`, `remove_project_tool` - Language management: `list_languages`, `check_language_available` - File operations: `list_files`, `get_file`, `get_file_metadata` - AST analysis: `get_ast`, `get_node_at_position` - Code search: `find_text`, `run_query` - Symbol extraction: `get_symbols`, `find_usage` - Project analysis: `analyze_project`, `get_dependencies`, `analyze_complexity` - Query building: `get_query_template_tool`, `list_query_templates_tool`, `build_query`, `adapt_query`, `get_node_types` - Similar code detection: `find_similar_code` - Cache management: `clear_cache` - Configuration diagnostics: `diagnose_config` See [FEATURES.md](FEATURES.md) for detailed information about each tool's implementation status, dependencies, and usage examples. ## Available Prompts The server provides the following MCP prompts: - `code_review` - Create a prompt for reviewing code - `explain_code` - Create a prompt for explaining code - `explain_tree_sitter_query` - Explain tree-sitter query syntax - `suggest_improvements` - Create a prompt for suggesting code improvements - `project_overview` - Create a prompt for a project overview analysis ## License MIT ``` -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- ```markdown # Contributing to MCP Tree-sitter Server Thank you for your interest in contributing to MCP Tree-sitter Server! This guide will help you understand our development process and coding standards. ## Development Setup 1. Clone the repository: ```bash git clone https://github.com/organization/mcp-server-tree-sitter.git cd mcp-server-tree-sitter ``` 2. Install with development dependencies: ```bash make install-dev ``` 3. Install language parsers (optional): ```bash make install-languages ``` ## Code Style and Standards We follow a strict set of coding standards to maintain consistency throughout the codebase: ### Python Style - We use [Black](https://black.readthedocs.io/) for code formatting with a line length of 88 characters - We use [Ruff](https://github.com/charliermarsh/ruff) for linting - We use [MyPy](https://mypy.readthedocs.io/) for static type checking ### Exception Handling - Use specific exception types rather than catching generic exceptions when possible - When re-raising exceptions, use the `from` clause to preserve the stack trace: ```python try: # Some code except SomeError as e: raise CustomError("Meaningful message") from e ``` ### Testing - Write tests for all new functionality - Run tests before submitting: ```bash make test ``` ### Documentation - Document all functions, classes, and modules using docstrings - Follow the Google Python Style Guide for docstrings - Include type hints for all function parameters and return values ## Development Workflow 1. Create a branch for your feature or bugfix: ```bash git checkout -b feature/your-feature-name ``` 2. Make your changes and ensure they pass linting and tests: ```bash make format make lint make test ``` 3. Commit your changes with a clear message describing the change 4. Submit a pull request to the main repository ## Running the Server You can run the server in different modes: - For development and testing: ```bash make mcp-dev ``` - For direct execution: ```bash make mcp-run ``` - To install in Claude Desktop: ```bash make mcp-install ``` ## Project Architecture The project follows a modular architecture: - `config.py` - Configuration management - `language/` - Tree-sitter language handling - `models/` - Data models for AST and projects - `cache/` - Caching mechanisms - `resources/` - MCP resources (files, AST) - `tools/` - MCP tools (search, analysis, etc.) - `utils/` - Utility functions - `prompts/` - MCP prompts - `server.py` - FastMCP server implementation ## Seeking Help If you have questions or need help, please open an issue or contact the maintainers. ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/tools/__init__.py: -------------------------------------------------------------------------------- ```python """MCP tool components.""" ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/prompts/__init__.py: -------------------------------------------------------------------------------- ```python """MCP prompt components.""" ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/models/__init__.py: -------------------------------------------------------------------------------- ```python """Data models for MCP server.""" ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/cache/__init__.py: -------------------------------------------------------------------------------- ```python """Cache components for MCP server.""" ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/utils/__init__.py: -------------------------------------------------------------------------------- ```python """Utility functions for MCP server.""" ``` -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- ```python """Test package for mcp-server-tree-sitter.""" ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/__init__.py: -------------------------------------------------------------------------------- ```python """Language handling components for MCP server.""" ``` -------------------------------------------------------------------------------- /tests/test_diagnostics/__init__.py: -------------------------------------------------------------------------------- ```python """Pytest-based diagnostic tests for mcp-server-tree-sitter.""" ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/capabilities/__init__.py: -------------------------------------------------------------------------------- ```python """MCP capability declarations.""" from .server_capabilities import register_capabilities __all__ = ["register_capabilities"] ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/utils/context/__init__.py: -------------------------------------------------------------------------------- ```python """Context handling utilities for MCP operations.""" from .mcp_context import MCPContext, ProgressScope __all__ = ["MCPContext", "ProgressScope"] ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/testing/__init__.py: -------------------------------------------------------------------------------- ```python """Testing utilities for mcp-server-tree-sitter.""" from .pytest_diagnostic import DiagnosticData, diagnostic __all__ = ["DiagnosticData", "diagnostic"] ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/bootstrap/__init__.py: -------------------------------------------------------------------------------- ```python """Bootstrap package for early initialization dependencies. This package contains modules that should be imported and initialized before any other modules in the project to ensure proper setup of core services. """ # Import logging bootstrap module to ensure it's available from . import logging_bootstrap # Export key functions for convenience from .logging_bootstrap import get_log_level_from_env, get_logger, update_log_levels __all__ = ["get_logger", "update_log_levels", "get_log_level_from_env", "logging_bootstrap"] ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/__init__.py: -------------------------------------------------------------------------------- ```python """MCP Server for Tree-sitter - Code analysis capabilities using tree-sitter. This module provides a Model Context Protocol server that gives LLMs like Claude intelligent access to codebases with appropriate context management. """ # Import bootstrap package first to ensure core services are set up # before any other modules are imported from . import bootstrap as bootstrap # noqa: F401 - Import needed for initialization # Logging is now configured via the bootstrap.logging_bootstrap module # The bootstrap module automatically calls configure_root_logger() when imported __version__ = "0.1.0" ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/__init__.py: -------------------------------------------------------------------------------- ```python """Language-specific query templates collection.""" from typing import Dict from . import ( apl, c, cpp, go, java, javascript, julia, kotlin, python, rust, swift, typescript, ) # Combine all language templates QUERY_TEMPLATES: Dict[str, Dict[str, str]] = { "python": python.TEMPLATES, "javascript": javascript.TEMPLATES, "typescript": typescript.TEMPLATES, "go": go.TEMPLATES, "rust": rust.TEMPLATES, "c": c.TEMPLATES, "cpp": cpp.TEMPLATES, "swift": swift.TEMPLATES, "java": java.TEMPLATES, "kotlin": kotlin.TEMPLATES, "julia": julia.TEMPLATES, "apl": apl.TEMPLATES, } ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/apl.py: -------------------------------------------------------------------------------- ```python """Query templates for APL language.""" TEMPLATES = { "functions": """ (function_definition name: (identifier) @function.name body: (block) @function.body) @function.def """, "namespaces": """ (namespace_declaration name: (identifier) @namespace.name) @namespace.def """, "variables": """ (assignment left: (identifier) @variable.name) @variable.def """, "imports": """ (import_statement module: (identifier) @import.module) @import """, "operators": """ (operator_definition operator: (_) @operator.sym body: (block) @operator.body) @operator.def """, "classes": """ (class_definition name: (identifier) @class.name body: (block) @class.body) @class.def """, } ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/query_templates.py: -------------------------------------------------------------------------------- ```python """Query templates for common code patterns by language.""" from typing import Any, Dict, Optional from .templates import QUERY_TEMPLATES def get_query_template(language: str, template_name: str) -> Optional[str]: """ Get a query template for a language. Args: language: Language identifier template_name: Template name Returns: Query string or None if not found """ language_templates = QUERY_TEMPLATES.get(language) if language_templates: return language_templates.get(template_name) return None def list_query_templates(language: Optional[str] = None) -> Dict[str, Any]: """ List available query templates. Args: language: Optional language to filter by Returns: Dictionary of templates by language """ if language: return {language: QUERY_TEMPLATES.get(language, {})} return QUERY_TEMPLATES ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/logging_config.py: -------------------------------------------------------------------------------- ```python """Logging configuration for MCP Tree-sitter Server. This module is maintained for backwards compatibility. All functionality has been moved to the bootstrap.logging_bootstrap module, which is the canonical source for logging configuration. All imports from this module should be updated to use: from mcp_server_tree_sitter.bootstrap import get_logger, update_log_levels """ # Import the bootstrap module's logging components to maintain backwards compatibility from .bootstrap.logging_bootstrap import ( LOG_LEVEL_MAP, configure_root_logger, get_log_level_from_env, get_logger, update_log_levels, ) # Re-export all the functions and constants for backwards compatibility __all__ = ["LOG_LEVEL_MAP", "configure_root_logger", "get_log_level_from_env", "get_logger", "update_log_levels"] # The bootstrap module already calls configure_root_logger() when imported, # so we don't need to call it again here. ``` -------------------------------------------------------------------------------- /scripts/implementation-search.sh: -------------------------------------------------------------------------------- ```bash #!/bin/bash # implementation-search.sh - Script to spot check implementation patterns # Enable strict mode set -euo pipefail # Check if search term is provided if [ $# -eq 0 ]; then echo "Usage: $0 <search_term>" exit 1 fi # Directories to exclude EXCLUDE_DIRS=( ".venv" ".git" "./diagnostic_results" "./.pytest_cache" "./.ruff_cache" "./.mypy_cache" "./tests/__pycache__" "./__pycache__" "./src/mcp_server_tree_sitter/__pycache__" "./src/*/bootstrap/__pycache__" "./src/*/__pycache__" ) # Files to exclude EXCLUDE_FILES=( "./.gitignore" "./TODO.md" "./FEATURES.md" ) # Build exclude arguments for grep EXCLUDE_ARGS="" for dir in "${EXCLUDE_DIRS[@]}"; do EXCLUDE_ARGS+="--exclude-dir=${dir} " done for file in "${EXCLUDE_FILES[@]}"; do EXCLUDE_ARGS+="--exclude=${file} " done # Run grep with all exclusions grep -r "${1}" . ${EXCLUDE_ARGS} --binary-files=without-match ``` -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- ```python """Pytest configuration for mcp-server-tree-sitter tests.""" import pytest # Import and register the diagnostic plugin pytest_plugins = ["mcp_server_tree_sitter.testing.pytest_diagnostic"] @pytest.fixture(autouse=True, scope="function") def reset_project_registry(): """Reset the project registry between tests. This prevents tests from interfering with each other when using the project registry, which is a singleton that persists across tests. """ # Import here to avoid circular imports from mcp_server_tree_sitter.di import get_container # Get registry through DI container container = get_container() registry = container.project_registry # Store original projects to restore after test original_projects = dict(registry._projects) # Clear for this test registry._projects.clear() yield # Restore original projects registry._projects.clear() registry._projects.update(original_projects) ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/c.py: -------------------------------------------------------------------------------- ```python """Query templates for C language.""" TEMPLATES = { "functions": """ (function_definition declarator: (function_declarator declarator: (identifier) @function.name)) @function.def (declaration declarator: (function_declarator declarator: (identifier) @function.name)) @function.decl """, "structs": """ (struct_specifier name: (type_identifier) @struct.name) @struct.def (union_specifier name: (type_identifier) @union.name) @union.def (enum_specifier name: (type_identifier) @enum.name) @enum.def """, "imports": """ (preproc_include) @import (preproc_include path: (string_literal) @import.system) @import.system (preproc_include path: (system_lib_string) @import.system) @import.system """, "macros": """ (preproc_function_def name: (identifier) @macro.name) @macro.def """, } ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/exceptions.py: -------------------------------------------------------------------------------- ```python """Exception classes for mcp-server-tree-sitter.""" class MCPTreeSitterError(Exception): """Base exception for mcp-server-tree-sitter.""" pass class LanguageError(MCPTreeSitterError): """Errors related to tree-sitter languages.""" pass class LanguageNotFoundError(LanguageError): """Raised when a language parser is not available.""" pass class LanguageInstallError(LanguageError): """Raised when language installation fails.""" pass class ParsingError(MCPTreeSitterError): """Errors during parsing.""" pass class ProjectError(MCPTreeSitterError): """Errors related to project management.""" pass class FileAccessError(MCPTreeSitterError): """Errors accessing project files.""" pass class QueryError(MCPTreeSitterError): """Errors related to tree-sitter queries.""" pass class SecurityError(MCPTreeSitterError): """Security-related errors.""" pass class CacheError(MCPTreeSitterError): """Errors related to caching.""" pass ``` -------------------------------------------------------------------------------- /tests/test_di.py: -------------------------------------------------------------------------------- ```python """Tests for the dependency injection container.""" from mcp_server_tree_sitter.di import get_container def test_container_singleton(): """Test that get_container returns the same instance each time.""" container1 = get_container() container2 = get_container() assert container1 is container2 def test_register_custom_dependency(): """Test registering and retrieving a custom dependency.""" container = get_container() # Register a custom dependency test_value = {"test": "value"} container.register_dependency("test_dependency", test_value) # Retrieve it retrieved = container.get_dependency("test_dependency") assert retrieved is test_value def test_core_dependencies_initialized(): """Test that core dependencies are automatically initialized.""" container = get_container() assert container.config_manager is not None assert container.project_registry is not None assert container.language_registry is not None assert container.tree_cache is not None ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/javascript.py: -------------------------------------------------------------------------------- ```python """Query templates for JavaScript.""" TEMPLATES = { "functions": """ (function_declaration name: (identifier) @function.name parameters: (formal_parameters) @function.params body: (statement_block) @function.body) @function.def (arrow_function parameters: (formal_parameters) @function.params body: (_) @function.body) @function.def """, "classes": """ (class_declaration name: (identifier) @class.name body: (class_body) @class.body) @class.def """, "imports": """ (import_statement) @import (import_statement source: (string) @import.source specifier: (_) @import.specifier) @import.full """, "function_calls": """ (call_expression function: (identifier) @call.function arguments: (arguments) @call.args) @call """, "assignments": """ (variable_declarator name: (_) @assign.target value: (_) @assign.value) @assign """, } ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/rust.py: -------------------------------------------------------------------------------- ```python """Query templates for Rust.""" TEMPLATES = { "functions": """ (function_item name: (identifier) @function.name parameters: (parameters) @function.params body: (block) @function.body) @function.def """, "structs": """ (struct_item name: (type_identifier) @struct.name body: (field_declaration_list) @struct.body) @struct.def """, "enums": """ (enum_item name: (type_identifier) @enum.name body: (enum_variant_list) @enum.body) @enum.def """, "imports": """ (use_declaration) @import (use_declaration (identifier) @import.name) @import.direct (use_declaration (scoped_identifier path: (_) @import.path name: (identifier) @import.name)) @import.scoped (use_declaration (scoped_use_list path: (_) @import.path)) @import.list """, "traits": """ (trait_item name: (type_identifier) @trait.name) @trait.def """, "impls": """ (impl_item trait: (_)? @impl.trait type: (_) @impl.type) @impl.def """, } ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/python.py: -------------------------------------------------------------------------------- ```python """Query templates for Python.""" TEMPLATES = { "functions": """ (function_definition name: (identifier) @function.name parameters: (parameters) @function.params body: (block) @function.body) @function.def """, "classes": """ (class_definition name: (identifier) @class.name body: (block) @class.body) @class.def """, "imports": """ (import_statement name: (dotted_name) @import.module) @import (import_from_statement module_name: (dotted_name) @import.from name: (dotted_name) @import.item) @import ;; Handle aliased imports with 'as' keyword (import_from_statement module_name: (dotted_name) @import.from name: (aliased_import name: (dotted_name) @import.item alias: (identifier) @import.alias)) @import """, "function_calls": """ (call function: (identifier) @call.function arguments: (argument_list) @call.args) @call """, "assignments": """ (assignment left: (_) @assign.target right: (_) @assign.value) @assign """, } ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/go.py: -------------------------------------------------------------------------------- ```python """Query templates for Go.""" TEMPLATES = { "functions": """ (function_declaration name: (identifier) @function.name parameters: (parameter_list) @function.params body: (block) @function.body) @function.def (method_declaration name: (field_identifier) @method.name parameters: (parameter_list) @method.params body: (block) @method.body) @method.def """, "structs": """ (type_declaration (type_spec name: (type_identifier) @struct.name type: (struct_type) @struct.body)) @struct.def (type_declaration (type_spec name: (type_identifier) @type.name type: (_) @type.body)) @type.def """, "imports": """ (import_declaration) @import (import_declaration (import_spec_list (import_spec) @import.spec)) @import.list (import_declaration (import_spec_list (import_spec path: (_) @import.path))) @import.path_list (import_declaration (import_spec path: (_) @import.path)) @import.single """, "interfaces": """ (type_declaration (type_spec name: (type_identifier) @interface.name type: (interface_type) @interface.body)) @interface.def """, } ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/cpp.py: -------------------------------------------------------------------------------- ```python """Query templates for C++ language.""" TEMPLATES = { "functions": """ (function_definition declarator: (function_declarator declarator: (identifier) @function.name)) @function.def (declaration declarator: (function_declarator declarator: (identifier) @function.name)) @function.decl (method_definition declarator: (function_declarator declarator: (field_identifier) @method.name)) @method.def """, "classes": """ (class_specifier name: (type_identifier) @class.name) @class.def """, "structs": """ (struct_specifier name: (type_identifier) @struct.name) @struct.def (union_specifier name: (type_identifier) @union.name) @union.def (enum_specifier name: (type_identifier) @enum.name) @enum.def """, "imports": """ (preproc_include) @import (preproc_include path: (string_literal) @import.path) @import.user (preproc_include path: (system_lib_string) @import.path) @import.system (namespace_definition name: (namespace_identifier) @import.namespace) @import.namespace_def """, "templates": """ (template_declaration) @template.def (template_declaration declaration: (class_specifier name: (type_identifier) @template.class)) @template.class_def """, } ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/java.py: -------------------------------------------------------------------------------- ```python """Query templates for Java language.""" TEMPLATES = { "functions": """ (method_declaration name: (identifier) @function.name parameters: (formal_parameters) @function.params body: (block) @function.body) @function.def (constructor_declaration name: (identifier) @constructor.name parameters: (formal_parameters) @constructor.params body: (block) @constructor.body) @constructor.def """, "classes": """ (class_declaration name: (identifier) @class.name body: (class_body) @class.body) @class.def """, "interfaces": """ (interface_declaration name: (identifier) @interface.name body: (class_body) @interface.body) @interface.def """, "imports": """ (import_declaration) @import (import_declaration name: (qualified_name) @import.name) @import.qualified (import_declaration name: (qualified_name name: (identifier) @import.class)) @import.class (import_declaration asterisk: "*") @import.wildcard """, "annotations": """ (annotation name: (identifier) @annotation.name) @annotation (annotation_type_declaration name: (identifier) @annotation.type_name) @annotation.type """, "enums": """ (enum_declaration name: (identifier) @enum.name body: (enum_body) @enum.body) @enum.def """, } ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/julia.py: -------------------------------------------------------------------------------- ```python """Query templates for Julia language.""" TEMPLATES = { "functions": """ (function_definition name: (identifier) @function.name) @function.def (function_definition name: (identifier) @function.name parameters: (parameter_list) @function.params body: (block) @function.body) @function.def (short_function_definition name: (identifier) @function.name) @function.short_def """, "modules": """ (module_definition name: (identifier) @module.name body: (block) @module.body) @module.def """, "structs": """ (struct_definition name: (identifier) @struct.name body: (block) @struct.body) @struct.def (mutable_struct_definition name: (identifier) @struct.name body: (block) @struct.body) @struct.mutable_def """, "imports": """ (import_statement) @import (import_statement name: (identifier) @import.name) @import.simple (using_statement) @using (using_statement name: (identifier) @using.name) @using.simple (import_statement name: (dot_expression) @import.qualified) @import.qualified """, "macros": """ (macro_definition name: (identifier) @macro.name body: (block) @macro.body) @macro.def """, "abstractTypes": """ (abstract_definition name: (identifier) @abstract.name) @abstract.def """, } ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/swift.py: -------------------------------------------------------------------------------- ```python """Query templates for Swift language.""" TEMPLATES = { "functions": """ (function_declaration name: (identifier) @function.name) @function.def (function_declaration name: (identifier) @function.name body: (code_block) @function.body) @function.def """, "classes": """ (class_declaration name: (type_identifier) @class.name) @class.def (class_declaration name: (type_identifier) @class.name body: (class_body) @class.body) @class.def """, "structs": """ (struct_declaration name: (type_identifier) @struct.name) @struct.def (struct_declaration name: (type_identifier) @struct.name body: (struct_body) @struct.body) @struct.def """, "imports": """ (import_declaration) @import (import_declaration path: (identifier) @import.path) @import.simple (import_declaration path: (_) @import.path) @import.complex """, "protocols": """ (protocol_declaration name: (type_identifier) @protocol.name) @protocol.def (protocol_declaration name: (type_identifier) @protocol.name body: (protocol_body) @protocol.body) @protocol.def """, "extensions": """ (extension_declaration name: (type_identifier) @extension.name) @extension.def (extension_declaration name: (type_identifier) @extension.name body: (extension_body) @extension.body) @extension.def """, } ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/typescript.py: -------------------------------------------------------------------------------- ```python """Query templates for TypeScript.""" TEMPLATES = { "functions": """ (function_declaration name: (identifier) @function.name parameters: (formal_parameters) @function.params body: (statement_block) @function.body) @function.def (arrow_function parameters: (formal_parameters) @function.params body: (_) @function.body) @function.def (method_definition name: (property_identifier) @method.name parameters: (formal_parameters) @method.params body: (statement_block) @method.body) @method.def """, "classes": """ (class_declaration name: (type_identifier) @class.name body: (class_body) @class.body) @class.def """, "interfaces": """ (interface_declaration name: (type_identifier) @interface.name body: (object_type) @interface.body) @interface.def (type_alias_declaration name: (type_identifier) @alias.name value: (_) @alias.value) @alias.def """, "imports": """ (import_statement) @import (import_statement source: (string) @import.source) @import.source_only (import_statement source: (string) @import.source specifier: (named_imports (import_specifier name: (identifier) @import.name))) @import.named (import_statement source: (string) @import.source specifier: (namespace_import name: (identifier) @import.namespace)) @import.namespace """, } ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/kotlin.py: -------------------------------------------------------------------------------- ```python """Query templates for Kotlin language.""" TEMPLATES = { "functions": """ (function_declaration name: (simple_identifier) @function.name) @function.def (function_declaration name: (simple_identifier) @function.name function_body: (function_body) @function.body) @function.def """, "classes": """ (class_declaration name: (simple_identifier) @class.name) @class.def (class_declaration name: (simple_identifier) @class.name class_body: (class_body) @class.body) @class.def """, "interfaces": """ (interface_declaration name: (simple_identifier) @interface.name) @interface.def (interface_declaration name: (simple_identifier) @interface.name class_body: (class_body) @interface.body) @interface.def """, "imports": """ (import_header) @import (import_header identifier: (identifier) @import.id) @import.simple (import_header identifier: (dot_qualified_expression) @import.qualified) @import.qualified (import_header import_alias: (import_alias name: (simple_identifier) @import.alias)) @import.aliased """, "properties": """ (property_declaration variable_declaration: (variable_declaration simple_identifier: (simple_identifier) @property.name)) @property.def """, "dataClasses": """ (class_declaration type: (type_modifiers (type_modifier "data" @data_class.modifier)) name: (simple_identifier) @data_class.name) @data_class.def """, } ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/di.py: -------------------------------------------------------------------------------- ```python """Dependency injection container for MCP Tree-sitter Server. This module provides a central container for managing all application dependencies, replacing the global variables and singletons previously used throughout the codebase. """ from typing import Any, Dict # Import logging from bootstrap package from .bootstrap import get_logger from .cache.parser_cache import TreeCache from .config import ConfigurationManager, ServerConfig from .language.registry import LanguageRegistry from .models.project import ProjectRegistry logger = get_logger(__name__) class DependencyContainer: """Container for all application dependencies.""" def __init__(self) -> None: """Initialize container with all core dependencies.""" logger.debug("Initializing dependency container") # Create core dependencies self.config_manager = ConfigurationManager() self._config = self.config_manager.get_config() self.project_registry = ProjectRegistry() self.language_registry = LanguageRegistry() self.tree_cache = TreeCache( max_size_mb=self._config.cache.max_size_mb, ttl_seconds=self._config.cache.ttl_seconds ) # Storage for any additional dependencies self._additional: Dict[str, Any] = {} def get_config(self) -> ServerConfig: """Get the current configuration.""" # Always get the latest from the config manager config = self.config_manager.get_config() return config def register_dependency(self, name: str, instance: Any) -> None: """Register an additional dependency.""" self._additional[name] = instance def get_dependency(self, name: str) -> Any: """Get a registered dependency.""" return self._additional.get(name) # Create the single container instance - this will be the ONLY global container = DependencyContainer() def get_container() -> DependencyContainer: """Get the dependency container.""" return container ``` -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- ```yaml name: Release on: release: types: [published] permissions: contents: read id-token: write jobs: release: runs-on: ubuntu-latest timeout-minutes: 5 steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.12" - name: Install uv run: | curl -LsSf https://astral.sh/uv/install.sh | sh echo "$HOME/.cargo/bin" >> $GITHUB_PATH - name: Install development dependencies run: | uv venv source .venv/bin/activate uv pip install -e ".[dev]" - name: Run comprehensive tests run: | source .venv/bin/activate # Run linting and formatting ruff check . ruff format . --check mypy src/mcp_server_tree_sitter # Run all tests (regular + diagnostics) pytest tests pytest tests/test_diagnostics/ -v env: PYTHONPATH: ${{ github.workspace }}/src - name: Ensure diagnostic results directory exists if: always() run: mkdir -p diagnostic_results - name: Create placeholder if needed if: always() run: | if [ -z "$(ls -A diagnostic_results 2>/dev/null)" ]; then echo '{"info": "No diagnostic results generated"}' > diagnostic_results/placeholder.json fi - name: Archive diagnostic results if: always() uses: actions/upload-artifact@v4 with: name: diagnostic-results-release path: diagnostic_results/ retention-days: 7 if-no-files-found: warn - name: Install build dependencies run: | source .venv/bin/activate uv pip install build twine - name: Build package run: | source .venv/bin/activate python -m build - name: Test wheel run: | python -m pip install dist/*.whl mcp-server-tree-sitter --help - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/utils/security.py: -------------------------------------------------------------------------------- ```python """Security utilities for mcp-server-tree-sitter.""" import logging from pathlib import Path from typing import Union from ..api import get_config from ..exceptions import SecurityError def validate_file_access(file_path: Union[str, Path], project_root: Union[str, Path]) -> None: """ Validate a file can be safely accessed. Args: file_path: Path to validate project_root: Project root directory Raises: SecurityError: If path fails validation """ # Always get a fresh config for each validation config = get_config() logger = logging.getLogger(__name__) path_obj = Path(file_path) root_obj = Path(project_root) # Normalize paths to prevent directory traversal try: normalized_path = path_obj.resolve() normalized_root = root_obj.resolve() except (ValueError, OSError) as e: raise SecurityError(f"Invalid path: {e}") from e # Check if path is inside project root if not str(normalized_path).startswith(str(normalized_root)): raise SecurityError(f"Access denied: {file_path} is outside project root") # Check excluded directories for excluded in config.security.excluded_dirs: if excluded in normalized_path.parts: raise SecurityError(f"Access denied to excluded directory: {excluded}") # Check file extension if restriction is enabled if config.security.allowed_extensions and path_obj.suffix.lower()[1:] not in config.security.allowed_extensions: raise SecurityError(f"File type not allowed: {path_obj.suffix}") # Check file size if it exists if normalized_path.exists() and normalized_path.is_file(): file_size_mb = normalized_path.stat().st_size / (1024 * 1024) max_file_size_mb = config.security.max_file_size_mb logger.debug(f"File size check: {file_size_mb:.2f}MB, limit: {max_file_size_mb}MB") if file_size_mb > max_file_size_mb: raise SecurityError(f"File too large: {file_size_mb:.2f}MB exceeds limit of {max_file_size_mb}MB") ``` -------------------------------------------------------------------------------- /tests/test_persistent_server.py: -------------------------------------------------------------------------------- ```python """Tests for the persistent MCP server implementation.""" import tempfile from mcp_server_tree_sitter.models.project import ProjectRegistry from mcp_server_tree_sitter.server import ( mcp, ) # Was previously importing from persistent_server # Use the actual project registry for persistence tests project_registry = ProjectRegistry() def test_persistent_mcp_instance() -> None: """Test that the persistent MCP instance works properly.""" # Simply check that the instance exists assert mcp is not None assert mcp.name == "tree_sitter" def test_persistent_project_registration() -> None: """Test that project registration persists across different functions.""" # We can't directly clear projects in the new design # Instead, let's just work with existing ones # Create a temporary directory with tempfile.TemporaryDirectory() as temp_dir: project_name = "persistent_test" # Register a project directly using the registry project = project_registry.register_project(project_name, temp_dir) # Verify it was registered assert project.name == project_name all_projects = project_registry.list_projects() project_names = [p["name"] for p in all_projects] assert project_name in project_names # Get the project again to verify persistence project2 = project_registry.get_project(project_name) assert project2.name == project_name # List projects to verify it's included projects = project_registry.list_projects() assert any(p["name"] == project_name for p in projects) def test_project_registry_singleton() -> None: """Test that project_registry is a singleton that persists.""" # Check singleton behavior registry1 = ProjectRegistry() registry2 = ProjectRegistry() # Should be the same instance assert registry1 is registry2 # Get projects from both registries projects1 = registry1.list_projects() projects2 = registry2.list_projects() # Should have the same number of projects assert len(projects1) == len(projects2) ``` -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- ```toml [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [project] name = "mcp-server-tree-sitter" version = "0.5.1" description = "MCP Server for Tree-sitter code analysis" readme = "README.md" requires-python = ">=3.10" license = {text = "MIT"} authors = [ {name = "Wrale LTD", email = "[email protected]"} ] classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ] dependencies = [ "mcp[cli]>=0.12.0", "tree-sitter>=0.20.0", "tree-sitter-language-pack>=0.6.1", "pyyaml>=6.0", "pydantic>=2.0.0", "types-pyyaml>=6.0.12.20241230", ] [project.optional-dependencies] dev = [ "pytest>=7.0.0", "pytest-cov>=4.0.0", "ruff>=0.0.262", "mypy>=1.2.0", ] # Language support (now included via tree-sitter-language-pack) languages = [ # No individual languages needed as tree-sitter-language-pack provides all ] [project.urls] "Homepage" = "https://github.com/wrale/mcp-server-tree-sitter" "Bug Tracker" = "https://github.com/wrale/mcp-server-tree-sitter/issues" [project.scripts] mcp-server-tree-sitter = "mcp_server_tree_sitter.server:main" [tool.hatch.build.targets.wheel] packages = ["src/mcp_server_tree_sitter"] [tool.pytest.ini_options] testpaths = ["tests"] python_files = "test_*.py" python_classes = "Test*" python_functions = "test_*" markers = [ "diagnostic: mark test as producing diagnostic information", ] [tool.mypy] python_version = "3.10" warn_return_any = true warn_unused_configs = true disallow_untyped_defs = true disallow_incomplete_defs = true [[tool.mypy.overrides]] module = "tree_sitter.*" ignore_missing_imports = true [[tool.mypy.overrides]] module = "tests.*" disallow_untyped_defs = false disallow_incomplete_defs = false check_untyped_defs = false warn_return_any = false warn_no_return = false [tool.ruff] line-length = 120 target-version = "py310" [tool.ruff.lint] select = ["E", "F", "I", "W", "B"] ``` -------------------------------------------------------------------------------- /docs/cli.md: -------------------------------------------------------------------------------- ```markdown # MCP Tree-sitter Server CLI Guide This document explains the command-line interface (CLI) for the MCP Tree-sitter Server, including available options and usage patterns. ## Command-Line Arguments The MCP Tree-sitter Server provides a command-line interface with several options: ```bash mcp-server-tree-sitter [options] ``` ### Available Options | Option | Description | |--------|-------------| | `--help` | Show help message and exit | | `--version` | Show version information and exit | | `--config CONFIG` | Path to configuration file | | `--debug` | Enable debug logging | | `--disable-cache` | Disable parse tree caching | ### Examples Display help information: ```bash mcp-server-tree-sitter --help ``` Show version information: ```bash mcp-server-tree-sitter --version ``` Run with a custom configuration file: ```bash mcp-server-tree-sitter --config /path/to/config.yaml ``` Enable debug logging: ```bash mcp-server-tree-sitter --debug ``` Disable parse tree caching: ```bash mcp-server-tree-sitter --disable-cache ``` ## Running with MCP The server can also be run using the MCP command-line interface: ```bash # Run the server mcp run mcp_server_tree_sitter.server # Run with the MCP Inspector mcp dev mcp_server_tree_sitter.server ``` You can pass the same arguments to these commands: ```bash # Enable debug logging mcp run mcp_server_tree_sitter.server --debug # Use a custom configuration file with the inspector mcp dev mcp_server_tree_sitter.server --config /path/to/config.yaml ``` ## Using Makefile Targets For convenience, the project provides Makefile targets for common operations: ```bash # Show available targets make # Run the server with default settings make mcp-run # Run with specific arguments make mcp-run ARGS="--debug --config /path/to/config.yaml" # Run with the inspector make mcp-dev ARGS="--debug" ``` ## Environment Variables The server also supports configuration through environment variables: ```bash # Set log level export MCP_TS_LOG_LEVEL=DEBUG # Set configuration file path export MCP_TS_CONFIG_PATH=/path/to/config.yaml # Run the server mcp-server-tree-sitter ``` See the [Configuration Guide](./config.md) for more details on environment variables and configuration options. ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/utils/path.py: -------------------------------------------------------------------------------- ```python """Path utilities for mcp-server-tree-sitter.""" import os from pathlib import Path from typing import Union def normalize_path(path: Union[str, Path], ensure_absolute: bool = False) -> Path: """ Normalize a path for cross-platform compatibility. Args: path: Path string or object ensure_absolute: If True, raises ValueError for relative paths Returns: Normalized Path object """ path_obj = Path(path).expanduser().resolve() if ensure_absolute and not path_obj.is_absolute(): raise ValueError(f"Path must be absolute: {path}") return path_obj def safe_relative_path(path: Union[str, Path], base: Union[str, Path]) -> Path: """ Safely get a relative path that prevents directory traversal attacks. Args: path: Target path base: Base directory that should contain the path Returns: Relative path object Raises: ValueError: If path attempts to escape base directory """ base_path = normalize_path(base) target_path = normalize_path(path) # Ensure target is within base try: relative = target_path.relative_to(base_path) # Check for directory traversal if ".." in str(relative).split(os.sep): raise ValueError(f"Path contains forbidden directory traversal: {path}") return relative except ValueError as e: raise ValueError(f"Path {path} is not within base directory {base}") from e def get_project_root(path: Union[str, Path]) -> Path: """ Attempt to determine project root from a file path by looking for common markers. Args: path: Path to start from (file or directory) Returns: Path to likely project root """ path_obj = normalize_path(path) # If path is a file, start from its directory if path_obj.is_file(): path_obj = path_obj.parent # Look for common project indicators markers = [ ".git", "pyproject.toml", "setup.py", "package.json", "Cargo.toml", "CMakeLists.txt", ".svn", "Makefile", ] # Start from path and go up directories until a marker is found current = path_obj while current != current.parent: # Stop at filesystem root for marker in markers: if (current / marker).exists(): return current current = current.parent # If no marker found, return original directory return path_obj ``` -------------------------------------------------------------------------------- /tests/test_basic.py: -------------------------------------------------------------------------------- ```python """Basic tests for mcp-server-tree-sitter.""" import tempfile from mcp_server_tree_sitter.config import ServerConfig from mcp_server_tree_sitter.language.registry import LanguageRegistry from mcp_server_tree_sitter.models.project import ProjectRegistry def test_config_default() -> None: """Test that default configuration is loaded.""" # Create a default configuration config = ServerConfig() # Check defaults assert config.cache.enabled is True assert config.cache.max_size_mb == 100 assert config.security.max_file_size_mb == 5 assert ".git" in config.security.excluded_dirs def test_project_registry() -> None: """Test project registry functionality.""" registry = ProjectRegistry() # Create a temporary directory with tempfile.TemporaryDirectory() as temp_dir: # Register a project project = registry.register_project("test", temp_dir) # Check project details assert project.name == "test" # Use os.path.samefile to compare paths instead of string comparison # This handles platform-specific path normalization # (e.g., /tmp -> /private/tmp on macOS) import os assert os.path.samefile(str(project.root_path), temp_dir) # List projects projects = registry.list_projects() assert len(projects) == 1 assert projects[0]["name"] == "test" # Get project project2 = registry.get_project("test") assert project2.name == "test" # Remove project registry.remove_project("test") projects = registry.list_projects() assert len(projects) == 0 def test_language_registry() -> None: """Test language registry functionality.""" registry = LanguageRegistry() # Test language detection assert registry.language_for_file("test.py") == "python" assert registry.language_for_file("script.js") == "javascript" assert registry.language_for_file("style.css") == "css" # Test available languages languages = registry.list_available_languages() assert isinstance(languages, list) # Test installable languages (should be empty now with language-pack) installable = registry.list_installable_languages() assert isinstance(installable, list) assert len(installable) == 0 # No languages need to be separately installed if __name__ == "__main__": # Run tests test_config_default() test_project_registry() test_language_registry() print("All tests passed!") ``` -------------------------------------------------------------------------------- /tests/test_language_listing.py: -------------------------------------------------------------------------------- ```python """Test for language listing functionality.""" from mcp_server_tree_sitter.language.registry import LanguageRegistry from tests.test_helpers import check_language_available, list_languages def test_list_available_languages() -> None: """Test that list_available_languages returns languages correctly.""" registry = LanguageRegistry() # Get available languages available_languages = registry.list_available_languages() # Check for common languages we expect to be available expected_languages = [ "python", "javascript", "typescript", "c", "cpp", "go", "rust", ] # Assert that we have languages available assert len(available_languages) > 0, "No languages available" # Assert that we find at least some of our expected languages for lang in expected_languages: assert lang in available_languages, f"Expected language {lang} not in available languages" def test_language_api_consistency() -> None: """Test consistency between language detection and language listing.""" registry = LanguageRegistry() # Test with a few common languages test_languages = [ "python", "javascript", "typescript", "c", "cpp", "go", "rust", ] # Check each language both through is_language_available and list_available_languages available_languages = registry.list_available_languages() for lang in test_languages: is_available = registry.is_language_available(lang) is_listed = lang in available_languages # Both methods should return the same result assert is_available == is_listed, f"Inconsistency for {lang}: available={is_available}, listed={is_listed}" def test_server_language_tools() -> None: """Test the server language tools.""" # Test list_languages languages_result = list_languages() assert "available" in languages_result, "Missing 'available' key in list_languages result" assert isinstance(languages_result["available"], list), "'available' should be a list" assert len(languages_result["available"]) > 0, "No languages available" # Test each language with check_language_available for lang in ["python", "javascript", "typescript"]: result = check_language_available(lang) assert result["status"] == "success", f"Language {lang} should be available" assert "message" in result, "Missing 'message' key in check_language_available result" if __name__ == "__main__": test_list_available_languages() test_language_api_consistency() test_server_language_tools() print("All tests passed!") ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/__main__.py: -------------------------------------------------------------------------------- ```python """Main entry point for mcp-server-tree-sitter.""" import argparse import os import sys from .bootstrap import get_logger, update_log_levels from .config import load_config from .context import global_context from .server import mcp # Get a properly configured logger logger = get_logger(__name__) def main() -> int: """Run the server with optional arguments.""" # Parse command line arguments parser = argparse.ArgumentParser(description="MCP Tree-sitter Server - Code analysis with tree-sitter") parser.add_argument("--config", help="Path to configuration file") parser.add_argument("--debug", action="store_true", help="Enable debug logging") parser.add_argument("--disable-cache", action="store_true", help="Disable parse tree caching") parser.add_argument("--version", action="store_true", help="Show version and exit") args = parser.parse_args() # Handle version display if args.version: import importlib.metadata try: version = importlib.metadata.version("mcp-server-tree-sitter") print(f"mcp-server-tree-sitter version {version}") except importlib.metadata.PackageNotFoundError: print("mcp-server-tree-sitter (version unknown - package not installed)") return 0 # Set up logging level if args.debug: # Set environment variable first for consistency os.environ["MCP_TS_LOG_LEVEL"] = "DEBUG" # Then update log levels update_log_levels("DEBUG") logger.debug("Debug logging enabled") # Load configuration try: config = load_config(args.config) # Update global context with config if args.config: global_context.config_manager.load_from_file(args.config) else: # Update individual settings from config global_context.config_manager.update_value("cache.enabled", config.cache.enabled) global_context.config_manager.update_value("cache.max_size_mb", config.cache.max_size_mb) global_context.config_manager.update_value("security.max_file_size_mb", config.security.max_file_size_mb) global_context.config_manager.update_value("language.default_max_depth", config.language.default_max_depth) logger.debug("Configuration loaded successfully") except Exception as e: logger.error(f"Error loading configuration: {e}") return 1 # Run the server try: logger.info("Starting MCP Tree-sitter Server (with state persistence)") mcp.run() except KeyboardInterrupt: logger.info("Server stopped by user") except Exception as e: logger.error(f"Error running server: {e}") return 1 return 0 if __name__ == "__main__": sys.exit(main()) ``` -------------------------------------------------------------------------------- /tests/test_ast_cursor.py: -------------------------------------------------------------------------------- ```python """Test the cursor-based AST implementation.""" import tempfile from pathlib import Path from mcp_server_tree_sitter.language.registry import LanguageRegistry from mcp_server_tree_sitter.models.ast_cursor import node_to_dict_cursor from mcp_server_tree_sitter.utils.file_io import read_binary_file from mcp_server_tree_sitter.utils.tree_sitter_helpers import create_parser, parse_source def test_cursor_based_ast() -> None: """Test that the cursor-based AST node_to_dict function works.""" # Create a temporary test file with tempfile.NamedTemporaryFile(suffix=".py", mode="w+") as f: f.write("def hello():\n print('Hello, world!')\n\nhello()\n") f.flush() file_path = Path(f.name) # Set up language registry registry = LanguageRegistry() language = registry.language_for_file(file_path.name) assert language is not None, "Could not detect language for test file" language_obj = registry.get_language(language) # Parse the file parser = create_parser(language_obj) source_bytes = read_binary_file(file_path) tree = parse_source(source_bytes, parser) # Get AST using cursor-based approach cursor_ast = node_to_dict_cursor(tree.root_node, source_bytes, max_depth=3) # Basic validation assert "id" in cursor_ast, "AST should include node ID" assert cursor_ast["type"] == "module", "Root node should be a module" assert "children" in cursor_ast, "AST should include children" assert len(cursor_ast["children"]) > 0, "AST should have at least one child" # Check function definition if cursor_ast["children"]: function_node = cursor_ast["children"][0] assert function_node["type"] == "function_definition", "Expected function definition" # Check if children are properly included assert "children" in function_node, "Function should have children" assert function_node["children_count"] > 0, "Function should have children" # Verify some function components exist function_children_types = [child["type"] for child in function_node["children"]] assert "identifier" in function_children_types, "Function should have identifier" # Verify text extraction works if available if "text" in function_node: # Check for 'hello' in the text, handling both string and bytes if isinstance(function_node["text"], bytes): assert b"hello" in function_node["text"], "Function text should contain 'hello'" else: assert "hello" in function_node["text"], "Function text should contain 'hello'" if __name__ == "__main__": test_cursor_based_ast() print("All tests passed!") ``` -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- ```yaml name: CI on: push: branches: [ main ] pull_request: branches: [ main ] jobs: test: runs-on: ubuntu-latest strategy: matrix: python-version: ["3.12"] install-method: ["uv", "uvx"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install uv run: | curl -LsSf https://astral.sh/uv/install.sh | sh echo "$HOME/.cargo/bin" >> $GITHUB_PATH - name: Install dependencies with uv if: matrix.install-method == 'uv' run: | uv venv source .venv/bin/activate uv pip install -e ".[dev]" which ruff which python - name: Install globally with uvx (system-wide) if: matrix.install-method == 'uvx' run: | python -m pip install -e ".[dev]" which ruff which python - name: Run checks and tests (uv) if: matrix.install-method == 'uv' run: | source .venv/bin/activate # Linting and formatting ruff check . ruff format . --check mypy src/mcp_server_tree_sitter # Run all tests including diagnostics pytest tests pytest tests/test_diagnostics/ -v env: PYTHONPATH: ${{ github.workspace }}/src - name: Run checks and tests (system) if: matrix.install-method == 'uvx' run: | # Linting and formatting ruff check . ruff format . --check mypy src/mcp_server_tree_sitter # Run all tests including diagnostics pytest tests pytest tests/test_diagnostics/ -v env: PYTHONPATH: ${{ github.workspace }}/src - name: Ensure diagnostic results directory exists if: always() run: mkdir -p diagnostic_results - name: Create placeholder if needed if: always() run: | if [ -z "$(ls -A diagnostic_results 2>/dev/null)" ]; then echo '{"info": "No diagnostic results generated"}' > diagnostic_results/placeholder.json fi - name: Archive diagnostic results if: always() uses: actions/upload-artifact@v4 with: name: diagnostic-results-${{ matrix.install-method }} path: diagnostic_results/ retention-days: 7 if-no-files-found: warn verify-uvx: runs-on: ubuntu-latest timeout-minutes: 5 steps: - uses: actions/checkout@v4 - name: Set up Python 3.12 uses: actions/setup-python@v5 with: python-version: "3.12" - name: Install build dependencies run: | python -m pip install build python -m pip install uv - name: Build package run: python -m build - name: Install and verify run: | python -m pip install dist/*.whl mcp-server-tree-sitter --help ``` -------------------------------------------------------------------------------- /tests/test_cli_arguments.py: -------------------------------------------------------------------------------- ```python """Tests for command-line argument handling.""" import subprocess import sys from unittest.mock import patch import pytest from mcp_server_tree_sitter.server import main def test_help_flag_does_not_start_server(): """Test that --help flag prints help and doesn't start the server.""" # Use subprocess to test the actual command result = subprocess.run( [sys.executable, "-m", "mcp_server_tree_sitter", "--help"], capture_output=True, text=True, check=False, ) # Check that it exited successfully assert result.returncode == 0 # Check that the help text was printed assert "MCP Tree-sitter Server" in result.stdout assert "--help" in result.stdout assert "--config" in result.stdout # Server should not have started - no startup messages assert "Starting MCP Tree-sitter Server" not in result.stdout def test_version_flag_exits_without_starting_server(): """Test that --version shows version and exits without starting the server.""" result = subprocess.run( [sys.executable, "-m", "mcp_server_tree_sitter", "--version"], capture_output=True, text=True, check=False, ) # Check that it exited successfully assert result.returncode == 0 # Check that the version was printed assert "mcp-server-tree-sitter version" in result.stdout # Server should not have started assert "Starting MCP Tree-sitter Server" not in result.stdout def test_direct_script_help_flag(): """Test that mcp-server-tree-sitter --help works correctly when called as a script.""" # This uses a mock to avoid actually calling the script binary with ( patch("sys.argv", ["mcp-server-tree-sitter", "--help"]), patch("argparse.ArgumentParser.parse_args") as mock_parse_args, # We don't actually need to use mock_exit in the test, # but we still want to patch sys.exit to prevent actual exits patch("sys.exit"), ): # Mock the ArgumentParser.parse_args to simulate --help behavior # When --help is used, argparse exits with code 0 after printing help mock_parse_args.side_effect = SystemExit(0) # This should catch the SystemExit raised by parse_args with pytest.raises(SystemExit) as excinfo: main() # Verify it's exiting with code 0 (success) assert excinfo.value.code == 0 def test_entry_point_implementation(): """Verify that the entry point properly uses argparse for argument handling.""" import inspect from mcp_server_tree_sitter.server import main # Get the source code of the main function source = inspect.getsource(main) # Check that it's using argparse assert "argparse.ArgumentParser" in source assert "parse_args" in source # Check for proper handling of key flags assert "--help" in source or "automatically" in source # argparse adds --help automatically assert "--version" in source ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/tools/debug.py: -------------------------------------------------------------------------------- ```python """Debug tools for diagnosing configuration issues.""" from pathlib import Path from typing import Any, Dict import yaml from ..config import ServerConfig, update_config_from_new from ..context import global_context def diagnose_yaml_config(config_path: str) -> Dict[str, Any]: """Diagnose issues with YAML configuration loading. Args: config_path: Path to YAML config file Returns: Dictionary with diagnostic information """ result = { "file_path": config_path, "exists": False, "readable": False, "yaml_valid": False, "parsed_data": None, "config_before": None, "config_after": None, "error": None, } # Check if file exists path_obj = Path(config_path) result["exists"] = path_obj.exists() if not result["exists"]: result["error"] = f"File does not exist: {config_path}" return result # Check if file is readable try: with open(path_obj, "r") as f: content = f.read() result["readable"] = True result["file_content"] = content except Exception as e: result["error"] = f"Error reading file: {str(e)}" return result # Try to parse YAML try: config_data = yaml.safe_load(content) result["yaml_valid"] = True result["parsed_data"] = config_data except Exception as e: result["error"] = f"Error parsing YAML: {str(e)}" return result # Check if parsed data is None or empty if config_data is None: result["error"] = "YAML parser returned None (file empty or contains only comments)" return result if not isinstance(config_data, dict): result["error"] = f"YAML parser returned non-dict: {type(config_data)}" return result # Try creating a new config try: # Get current config current_config = global_context.get_config() result["config_before"] = { "cache.max_size_mb": current_config.cache.max_size_mb, "security.max_file_size_mb": current_config.security.max_file_size_mb, "language.default_max_depth": current_config.language.default_max_depth, } # Create new config from parsed data new_config = ServerConfig(**config_data) # Before update result["new_config"] = { "cache.max_size_mb": new_config.cache.max_size_mb, "security.max_file_size_mb": new_config.security.max_file_size_mb, "language.default_max_depth": new_config.language.default_max_depth, } # Update config update_config_from_new(current_config, new_config) # After update result["config_after"] = { "cache.max_size_mb": current_config.cache.max_size_mb, "security.max_file_size_mb": current_config.security.max_file_size_mb, "language.default_max_depth": current_config.language.default_max_depth, } except Exception as e: result["error"] = f"Error updating config: {str(e)}" return result return result ``` -------------------------------------------------------------------------------- /tests/test_makefile_targets.py: -------------------------------------------------------------------------------- ```python """Tests for Makefile targets to ensure they execute correctly.""" import os import re import subprocess from pathlib import Path def test_makefile_target_syntax(): """Test that critical Makefile targets are correctly formed.""" # Get the Makefile content makefile_path = Path(__file__).parent.parent / "Makefile" with open(makefile_path, "r") as f: makefile_content = f.read() # Test mcp targets - they should use uv run mcp directly mcp_target_pattern = r"mcp-(run|dev|install):\n\t\$\(UV\) run mcp" mcp_targets = re.findall(mcp_target_pattern, makefile_content) # We should find at least 3 matches (run, dev, install) assert len(mcp_targets) >= 3, "Missing proper mcp invocation in Makefile targets" # Check for correct server module reference assert "$(PACKAGE).server" in makefile_content, "Server module reference is incorrect" # Custom test for mcp-run mcp_run_pattern = r"mcp-run:.*\n\t\$\(UV\) run mcp run \$\(PACKAGE\)\.server" assert re.search(mcp_run_pattern, makefile_content), "mcp-run target is incorrectly formed" # Test that help is the default target assert ".PHONY: all help" in makefile_content, "help is not properly declared as .PHONY" assert "help: show-help" in makefile_content, "help is not properly set as default target" def test_makefile_target_execution(): """Test that Makefile targets execute correctly when invoked with --help.""" # We'll only try the --help flag since we don't want to actually start the server # Skip if not in a development environment if not os.path.exists("Makefile"): print("Skipping test_makefile_target_execution: Makefile not found") return # Skip this test in CI environment if os.environ.get("CI") == "true" or os.environ.get("GITHUB_ACTIONS") == "true": print("Skipping test_makefile_target_execution in CI environment") return # Test mcp-run with --help try: # Use the make target with --help appended to see if it resolves correctly # We capture stderr because sometimes help messages go there result = subprocess.run( ["make", "mcp-run", "ARGS=--help"], capture_output=True, text=True, timeout=5, # Don't let this run too long check=False, env={**os.environ, "MAKEFLAGS": ""}, # Clear any inherited make flags ) # The run shouldn't fail catastrophically assert "File not found" not in result.stderr, "mcp-run can't find the module" # We expect to see help text in the output (stdout or stderr) output = result.stdout + result.stderr has_usage = "usage:" in output.lower() or "mcp run" in output # We don't fail the test if the help check fails - this is more of a warning # since the environment might not be set up to run make directly if not has_usage: print("WARNING: Couldn't verify mcp-run --help output; environment may not be properly configured") except (subprocess.SubprocessError, FileNotFoundError) as e: # Don't fail the test if we can't run make print(f"WARNING: Couldn't execute make command; skipping execution check: {e}") ``` -------------------------------------------------------------------------------- /tests/test_env_config.py: -------------------------------------------------------------------------------- ```python """Tests for environment variable configuration overrides.""" import os import tempfile import pytest import yaml from mcp_server_tree_sitter.config import ConfigurationManager @pytest.fixture def temp_yaml_file(): """Create a temporary YAML file with test configuration.""" with tempfile.NamedTemporaryFile(suffix=".yaml", mode="w+", delete=False) as temp_file: test_config = { "cache": {"enabled": True, "max_size_mb": 256, "ttl_seconds": 3600}, "security": {"max_file_size_mb": 10, "excluded_dirs": [".git", "node_modules", "__pycache__", ".cache"]}, "language": {"auto_install": True, "default_max_depth": 7}, } yaml.dump(test_config, temp_file) temp_file.flush() temp_file_path = temp_file.name yield temp_file_path # Clean up os.unlink(temp_file_path) def test_env_overrides_defaults(monkeypatch): """Environment variables should override hard-coded defaults.""" # Using single underscore format that matches current implementation monkeypatch.setenv("MCP_TS_CACHE_MAX_SIZE_MB", "512") mgr = ConfigurationManager() cfg = mgr.get_config() assert cfg.cache.max_size_mb == 512, "Environment variable should override default value" # ensure other defaults stay intact assert cfg.security.max_file_size_mb == 5 assert cfg.language.default_max_depth == 5 def test_env_overrides_yaml(temp_yaml_file, monkeypatch): """Environment variables should take precedence over YAML values.""" # YAML sets 256; env var must win with 1024 # Using single underscore format that matches current implementation monkeypatch.setenv("MCP_TS_CACHE_MAX_SIZE_MB", "1024") # Also set a security env var to verify multiple variables work monkeypatch.setenv("MCP_TS_SECURITY_MAX_FILE_SIZE_MB", "15") mgr = ConfigurationManager() # First load the YAML file mgr.load_from_file(temp_yaml_file) # Get the loaded config cfg = mgr.get_config() # Verify environment variables override YAML settings assert cfg.cache.max_size_mb == 1024, "Environment variable should override YAML values" assert cfg.security.max_file_size_mb == 15, "Environment variable should override YAML values" # But YAML values that aren't overridden by env vars should remain assert cfg.cache.ttl_seconds == 3600 assert cfg.language.default_max_depth == 7 assert cfg.language.auto_install is True def test_log_level_env_var(monkeypatch): """Test the specific MCP_TS_LOG_LEVEL variable that was the original issue.""" monkeypatch.setenv("MCP_TS_LOG_LEVEL", "DEBUG") mgr = ConfigurationManager() cfg = mgr.get_config() assert cfg.log_level == "DEBUG", "Log level should be set from environment variable" def test_invalid_env_var_handling(monkeypatch): """Test that invalid environment variable values don't crash the system.""" # Set an invalid value for an integer field monkeypatch.setenv("MCP_TS_CACHE_MAX_SIZE_MB", "not_a_number") # This should not raise an exception mgr = ConfigurationManager() cfg = mgr.get_config() # The default value should be used assert cfg.cache.max_size_mb == 100, "Invalid values should fall back to defaults" ``` -------------------------------------------------------------------------------- /docs/tree-sitter-type-safety.md: -------------------------------------------------------------------------------- ```markdown # Tree-sitter Type Safety Guide This document explains our approach to type safety when interfacing with the tree-sitter library and why certain type-checking suppressions are necessary. ## Background The MCP Tree-sitter Server maintains type safety through Python's type hints and mypy verification. However, when interfacing with external libraries like tree-sitter, we encounter challenges: 1. Tree-sitter's Python bindings have inconsistent API signatures across versions 2. Tree-sitter objects don't always match our protocol definitions 3. The library may work at runtime but fail static type checking ## Type Suppression Strategy We use targeted `# type: ignore` comments to handle specific scenarios where mypy can't verify correctness, but our runtime code handles the variations properly. ### Examples of Necessary Type Suppressions #### Parser Interface Variations Some versions of tree-sitter use `set_language()` while others use `language` as the attribute/method: ```python try: parser.set_language(safe_language) # type: ignore except AttributeError: if hasattr(parser, 'language'): # Use the language method if available parser.language = safe_language # type: ignore else: # Fallback to setting the attribute directly parser.language = safe_language # type: ignore ``` #### Node Handling Safety For cursor navigation and tree traversal, we need to handle potential `None` values: ```python def visit(node: Optional[Node], field_name: Optional[str], depth: int) -> bool: if node is None: return False # Continue with node operations... ``` ## Guidelines for Using Type Suppressions 1. **Be specific**: Always use `# type: ignore` on the exact line with the issue, not for entire blocks or files 2. **Add comments**: Explain why the suppression is necessary 3. **Try alternatives first**: Only use suppressions after trying to fix the actual type issue 4. **Include runtime checks**: Always pair suppressions with runtime checks (try/except, if hasattr, etc.) ## Our Pattern for Library Compatibility We follow a consistent pattern for tree-sitter API compatibility: 1. **Define Protocols**: Use Protocol classes to define expected interfaces 2. **Safe Type Casting**: Use wrapper functions like `ensure_node()` to safely cast objects 3. **Feature Detection**: Use `hasattr()` checks before accessing attributes 4. **Fallback Mechanisms**: Provide multiple ways to accomplish the same task 5. **Graceful Degradation**: Handle missing features by providing simplified alternatives ## Testing Approach Even with type suppressions, we ensure correctness through: 1. Comprehensive test coverage for different tree-sitter operations 2. Tests with and without tree-sitter installed to verify fallback mechanisms 3. Runtime verification of object capabilities before operations ## When to Update Type Suppressions Review and potentially remove type suppressions when: 1. Upgrading minimum supported tree-sitter version 2. Refactoring the interface to the tree-sitter library 3. Adding new wrapper functions that can handle type variations 4. Improving Protocol definitions to better match runtime behavior By following these guidelines, we maintain a balance between static type safety and runtime flexibility when working with the tree-sitter library. ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/api.py: -------------------------------------------------------------------------------- ```python """API functions for accessing container dependencies. This module provides function-based access to dependencies managed by the container, helping to break circular import chains and simplify access. """ import logging from typing import Any, Dict, List, Optional from .di import get_container from .exceptions import ProjectError logger = logging.getLogger(__name__) def get_project_registry() -> Any: """Get the project registry.""" return get_container().project_registry def get_language_registry() -> Any: """Get the language registry.""" return get_container().language_registry def get_tree_cache() -> Any: """Get the tree cache.""" return get_container().tree_cache def get_config() -> Any: """Get the current configuration.""" return get_container().get_config() def get_config_manager() -> Any: """Get the configuration manager.""" return get_container().config_manager def register_project(path: str, name: Optional[str] = None, description: Optional[str] = None) -> Dict[str, Any]: """Register a project.""" project_registry = get_project_registry() language_registry = get_language_registry() try: # Register project project = project_registry.register_project(name or path, path, description) # Scan for languages project.scan_files(language_registry) project_dict = project.to_dict() # Add type annotations result: Dict[str, Any] = { "name": project_dict["name"], "root_path": project_dict["root_path"], "description": project_dict["description"], "languages": project_dict["languages"], "last_scan_time": project_dict["last_scan_time"], } return result except Exception as e: raise ProjectError(f"Failed to register project: {e}") from e def list_projects() -> List[Dict[str, Any]]: """List all registered projects.""" projects_list = get_project_registry().list_projects() # Convert to explicitly typed list result: List[Dict[str, Any]] = [] for project in projects_list: result.append( { "name": project["name"], "root_path": project["root_path"], "description": project["description"], "languages": project["languages"], "last_scan_time": project["last_scan_time"], } ) return result def remove_project(name: str) -> Dict[str, str]: """Remove a registered project.""" get_project_registry().remove_project(name) return {"status": "success", "message": f"Project '{name}' removed"} def clear_cache(project: Optional[str] = None, file_path: Optional[str] = None) -> Dict[str, str]: """Clear the parse tree cache.""" tree_cache = get_tree_cache() if project and file_path: # Get file path project_registry = get_project_registry() project_obj = project_registry.get_project(project) abs_path = project_obj.get_file_path(file_path) # Clear cache tree_cache.invalidate(abs_path) return {"status": "success", "message": f"Cache cleared for {file_path} in {project}"} else: # Clear all tree_cache.invalidate() return {"status": "success", "message": "Cache cleared"} ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/tools/project.py: -------------------------------------------------------------------------------- ```python """Project management tools for MCP server.""" from typing import Any, Dict, List, Optional from ..api import get_language_registry, get_project_registry from ..exceptions import ProjectError def register_project(path: str, name: Optional[str] = None, description: Optional[str] = None) -> Dict[str, Any]: """ Register a project for code analysis. Args: path: Path to the project directory name: Optional name for the project (defaults to directory name) description: Optional description Returns: Project information """ # Get dependencies from API project_registry = get_project_registry() language_registry = get_language_registry() try: # Register project project = project_registry.register_project(name or path, path, description) # Scan for languages project.scan_files(language_registry) project_dict = project.to_dict() # Add type annotations for clarity result: Dict[str, Any] = { "name": project_dict["name"], "root_path": project_dict["root_path"], "description": project_dict["description"], "languages": project_dict["languages"], "last_scan_time": project_dict["last_scan_time"], } return result except Exception as e: raise ProjectError(f"Failed to register project: {e}") from e def get_project(name: str) -> Dict[str, Any]: """ Get project information. Args: name: Project name Returns: Project information """ # Get dependency from API project_registry = get_project_registry() try: project = project_registry.get_project(name) project_dict = project.to_dict() # Add type annotations for clarity result: Dict[str, Any] = { "name": project_dict["name"], "root_path": project_dict["root_path"], "description": project_dict["description"], "languages": project_dict["languages"], "last_scan_time": project_dict["last_scan_time"], } return result except Exception as e: raise ProjectError(f"Failed to get project: {e}") from e def list_projects() -> List[Dict[str, Any]]: """ List all registered projects. Returns: List of project information """ # Get dependency from API project_registry = get_project_registry() projects_list = project_registry.list_projects() # Explicitly create a typed list result: List[Dict[str, Any]] = [] for project in projects_list: result.append( { "name": project["name"], "root_path": project["root_path"], "description": project["description"], "languages": project["languages"], "last_scan_time": project["last_scan_time"], } ) return result def remove_project(name: str) -> Dict[str, str]: """ Remove a project. Args: name: Project name Returns: Success message """ # Get dependency from API project_registry = get_project_registry() try: project_registry.remove_project(name) return {"status": "success", "message": f"Project '{name}' removed"} except Exception as e: raise ProjectError(f"Failed to remove project: {e}") from e ``` -------------------------------------------------------------------------------- /tests/test_diagnostics/test_ast.py: -------------------------------------------------------------------------------- ```python """Example of using pytest with diagnostic plugin for testing.""" import tempfile from pathlib import Path import pytest from mcp_server_tree_sitter.api import get_project_registry from mcp_server_tree_sitter.language.registry import LanguageRegistry from tests.test_helpers import get_ast, register_project_tool # Load the diagnostic fixture pytest.importorskip("mcp_server_tree_sitter.testing") @pytest.fixture def test_project(): """Create a temporary test project with a sample file.""" # Set up a temporary directory with tempfile.TemporaryDirectory() as temp_dir: project_path = Path(temp_dir) # Create a test file test_file = project_path / "test.py" with open(test_file, "w") as f: f.write("def hello():\n print('Hello, world!')\n\nhello()\n") # Register project project_name = "diagnostic_test_project" register_project_tool(path=str(project_path), name=project_name) # Yield the project info yield {"name": project_name, "path": project_path, "file": "test.py"} # Clean up project_registry = get_project_registry() try: project_registry.remove_project(project_name) except Exception: pass @pytest.mark.diagnostic def test_ast_failure(test_project, diagnostic) -> None: """Test the get_ast functionality.""" # Add test details to diagnostic data diagnostic.add_detail("project", test_project["name"]) diagnostic.add_detail("file", test_project["file"]) try: # Try to get the AST ast_result = get_ast( project=test_project["name"], path=test_project["file"], max_depth=3, include_text=True, ) # Add the result to diagnostics diagnostic.add_detail("ast_result", str(ast_result)) # This assertion would fail if there's an issue with AST parsing assert "tree" in ast_result, "AST result should contain a tree" # Check that the tree doesn't contain an error if isinstance(ast_result["tree"], dict) and "error" in ast_result["tree"]: raise AssertionError(f"AST tree contains an error: {ast_result['tree']['error']}") except Exception as e: # Record the error in diagnostics diagnostic.add_error("AstParsingError", str(e)) # Create the artifact artifact = { "error_type": type(e).__name__, "error_message": str(e), "project": test_project["name"], "file": test_project["file"], } diagnostic.add_artifact("ast_failure", artifact) # Re-raise to fail the test raise @pytest.mark.diagnostic def test_language_detection(diagnostic) -> None: """Test language detection functionality.""" registry = LanguageRegistry() # Test a few common file extensions test_files = { "test.py": "python", "test.js": "javascript", "test.ts": "typescript", "test.unknown": None, } results = {} failures = [] for filename, expected in test_files.items(): detected = registry.language_for_file(filename) match = detected == expected results[filename] = {"detected": detected, "expected": expected, "match": match} if not match: failures.append(filename) # Add all results to diagnostic data diagnostic.add_detail("detection_results", results) if failures: diagnostic.add_detail("failed_files", failures) # Check results with proper assertions for filename, expected in test_files.items(): assert registry.language_for_file(filename) == expected, f"Language detection failed for {filename}" ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/tools/ast_operations.py: -------------------------------------------------------------------------------- ```python """AST operation tools for MCP server.""" import logging from typing import Any, Dict, Optional from ..exceptions import FileAccessError, ParsingError from ..models.ast import node_to_dict from ..utils.file_io import read_binary_file from ..utils.security import validate_file_access from ..utils.tree_sitter_helpers import ( parse_source, ) logger = logging.getLogger(__name__) def get_file_ast( project: Any, path: str, language_registry: Any, tree_cache: Any, max_depth: Optional[int] = None, include_text: bool = True, ) -> Dict[str, Any]: """ Get the AST for a file. Args: project: Project object path: File path (relative to project root) language_registry: Language registry tree_cache: Tree cache instance max_depth: Maximum depth to traverse the tree include_text: Whether to include node text Returns: AST as a nested dictionary Raises: FileAccessError: If file access fails ParsingError: If parsing fails """ abs_path = project.get_file_path(path) try: validate_file_access(abs_path, project.root_path) except Exception as e: raise FileAccessError(f"Access denied: {e}") from e language = language_registry.language_for_file(path) if not language: raise ParsingError(f"Could not detect language for {path}") tree, source_bytes = parse_file(abs_path, language, language_registry, tree_cache) return { "file": path, "language": language, "tree": node_to_dict( tree.root_node, source_bytes, include_children=True, include_text=include_text, max_depth=max_depth if max_depth is not None else 5, ), } def parse_file(file_path: Any, language: str, language_registry: Any, tree_cache: Any) -> tuple[Any, bytes]: """ Parse a file using tree-sitter. Args: file_path: Path to file language: Language identifier language_registry: Language registry tree_cache: Tree cache instance Returns: (Tree, source_bytes) tuple Raises: ParsingError: If parsing fails """ # Always check the cache first, even if caching is disabled # This ensures cache misses are tracked correctly in tests cached = tree_cache.get(file_path, language) if cached: tree, bytes_data = cached return tree, bytes_data try: # Parse the file using helper parser = language_registry.get_parser(language) # Use source directly with parser to avoid parser vs. language confusion source_bytes = read_binary_file(file_path) tree = parse_source(source_bytes, parser) result_tuple = (tree, source_bytes) # Cache the tree only if caching is enabled is_cache_enabled = False try: # Get cache enabled state from tree_cache is_cache_enabled = tree_cache._is_cache_enabled() except Exception: # Fallback to instance value if method not available is_cache_enabled = getattr(tree_cache, "enabled", False) # Store in cache only if enabled if is_cache_enabled: tree_cache.put(file_path, language, tree, source_bytes) return result_tuple except Exception as e: raise ParsingError(f"Error parsing {file_path}: {e}") from e def find_node_at_position(root_node: Any, row: int, column: int) -> Optional[Any]: """ Find the most specific node at a given position. Args: root_node: Root node to search from row: Row (line) number, 0-based column: Column number, 0-based Returns: Node at position or None if not found """ from ..models.ast import find_node_at_position as find_node return find_node(root_node, row, column) ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/utils/file_io.py: -------------------------------------------------------------------------------- ```python """Utilities for safe file operations. This module provides safe file I/O operations with proper encoding handling and consistent interfaces for both text and binary operations. """ from pathlib import Path from typing import List, Optional, Tuple, Union def read_text_file(path: Union[str, Path]) -> List[str]: """ Safely read a text file with proper encoding handling. Args: path: Path to the file Returns: List of lines from the file """ with open(str(path), "r", encoding="utf-8", errors="replace") as f: return f.readlines() def read_binary_file(path: Union[str, Path]) -> bytes: """ Safely read a binary file. Args: path: Path to the file Returns: File contents as bytes """ with open(str(path), "rb") as f: return f.read() def get_file_content_and_lines(path: Union[str, Path]) -> Tuple[bytes, List[str]]: """ Get both binary content and text lines from a file. Args: path: Path to the file Returns: Tuple of (binary_content, text_lines) """ binary_content = read_binary_file(path) text_lines = read_text_file(path) return binary_content, text_lines def is_line_comment(line: str, comment_prefix: str) -> bool: """ Check if a line is a comment. Args: line: The line to check comment_prefix: Comment prefix character(s) Returns: True if the line is a comment """ return line.strip().startswith(comment_prefix) def count_comment_lines(lines: List[str], comment_prefix: str) -> int: """ Count comment lines in a file. Args: lines: List of lines to check comment_prefix: Comment prefix character(s) Returns: Number of comment lines """ return sum(1 for line in lines if is_line_comment(line, comment_prefix)) def get_comment_prefix(language: str) -> Optional[str]: """ Get the comment prefix for a language. Args: language: Language identifier Returns: Comment prefix or None if unknown """ # Language-specific comment detection comment_starters = { "python": "#", "javascript": "//", "typescript": "//", "java": "//", "c": "//", "cpp": "//", "go": "//", "ruby": "#", "rust": "//", "php": "//", "swift": "//", "kotlin": "//", "scala": "//", "bash": "#", "shell": "#", "yaml": "#", "html": "<!--", "css": "/*", "scss": "//", "sass": "//", "sql": "--", } return comment_starters.get(language) def parse_file_with_encoding(path: Union[str, Path], encoding: str = "utf-8") -> Tuple[bytes, List[str]]: """ Parse a file with explicit encoding handling, returning both binary and text. Args: path: Path to the file encoding: Text encoding to use Returns: Tuple of (binary_content, decoded_lines) """ binary_content = read_binary_file(path) # Now decode the binary content with the specified encoding text = binary_content.decode(encoding, errors="replace") lines = text.splitlines(True) # Keep line endings return binary_content, lines def read_file_lines(path: Union[str, Path], start_line: int = 0, max_lines: Optional[int] = None) -> List[str]: """ Read specific lines from a file. Args: path: Path to the file start_line: First line to include (0-based) max_lines: Maximum number of lines to return Returns: List of requested lines """ with open(str(path), "r", encoding="utf-8", errors="replace") as f: # Skip lines before start_line for _ in range(start_line): next(f, None) # Read up to max_lines if max_lines is not None: return [f.readline() for _ in range(max_lines)] else: return f.readlines() ``` -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- ```markdown # MCP Tree-sitter Server: TODO Board This Kanban board tracks tasks specifically focused on improving partially working commands and implementing missing features. ## In Progress ### High Priority --- #### Fix Similar Code Detection - **Description**: Improve the `find_similar_code` command to reliably return results - **Tasks**: - [ ] Debug why command completes but doesn't return results - [ ] Optimize similarity threshold and matching algorithm - [ ] Add more detailed logging for troubleshooting - [ ] Create comprehensive test cases with expected results - **Acceptance Criteria**: - Command reliably returns similar code snippets when they exist - Appropriate feedback when no similar code is found - Documentation updated with examples and recommended thresholds - **Complexity**: Medium - **Dependencies**: None #### Complete Tree Editing and Incremental Parsing - **Description**: Extend AST functionality to support tree manipulation - **Tasks**: - [ ] Implement tree editing operations (insert, delete, replace nodes) - [ ] Add incremental parsing to efficiently update trees after edits - [ ] Ensure node IDs remain consistent during tree manipulations - **Acceptance Criteria**: - Trees can be modified through API calls - Incremental parsing reduces parse time for small changes - Proper error handling for invalid modifications - **Complexity**: High - **Dependencies**: None ### Medium Priority --- #### Implement UTF-16 Support - **Description**: Add encoding detection and support for UTF-16 - **Tasks**: - [ ] Implement encoding detection for input files - [ ] Add UTF-16 to UTF-8 conversion for parser compatibility - [ ] Handle position mapping between different encodings - **Acceptance Criteria**: - Correctly parse and handle UTF-16 encoded files - Maintain accurate position information in different encodings - Test suite includes UTF-16 encoded files - **Complexity**: Medium - **Dependencies**: None #### Add Read Callable Support - **Description**: Implement custom read strategies for efficient large file handling - **Tasks**: - [ ] Create streaming parser interface for large files - [ ] Implement memory-efficient parsing strategy - [ ] Add support for custom read handlers - **Acceptance Criteria**: - Successfully parse files larger than memory constraints - Performance tests show acceptable parsing speed - Documentation on how to use custom read strategies - **Complexity**: High - **Dependencies**: None ## Ready for Review ### High Priority --- #### Complete MCP Context Progress Reporting - **Description**: Implement progress reporting for long-running operations - **Tasks**: - [ ] Add progress tracking to all long-running operations - [ ] Implement progress callbacks in the MCP context - [ ] Update API to report progress percentage - **Acceptance Criteria**: - Long-running operations report progress - Progress is visible to the user - Cancellation is possible for operations in progress - **Complexity**: Low - **Dependencies**: None ## Done *No tasks completed yet* ## Backlog ### Low Priority --- #### Add Image Handling Support - **Description**: Implement support for returning images/visualizations from tools - **Tasks**: - [ ] Create image generation utilities for AST visualization - [ ] Add support for returning images in MCP responses - [ ] Implement SVG or PNG export of tree structures - **Acceptance Criteria**: - Tools can return visual representations of code structures - AST visualizations can be generated and returned - **Complexity**: Medium - **Dependencies**: None --- ## Task Metadata ### Priority Levels - **High**: Critical for core functionality, should be addressed immediately - **Medium**: Important for comprehensive feature set, address after high priority items - **Low**: Nice to have, address when resources permit ### Complexity Levels - **Low**: Estimated 1-2 days of work - **Medium**: Estimated 3-5 days of work - **High**: Estimated 1-2 weeks of work ``` -------------------------------------------------------------------------------- /tests/test_config_manager.py: -------------------------------------------------------------------------------- ```python """Tests for the new ConfigurationManager class.""" import os import tempfile import pytest import yaml # Import will fail initially until we implement the class @pytest.fixture def temp_yaml_file(): """Create a temporary YAML file with test configuration.""" with tempfile.NamedTemporaryFile(suffix=".yaml", mode="w+", delete=False) as temp_file: test_config = { "cache": {"enabled": True, "max_size_mb": 256, "ttl_seconds": 3600}, "security": {"max_file_size_mb": 10, "excluded_dirs": [".git", "node_modules", "__pycache__", ".cache"]}, "language": {"auto_install": True, "default_max_depth": 7}, } yaml.dump(test_config, temp_file) temp_file.flush() temp_file_path = temp_file.name yield temp_file_path # Clean up os.unlink(temp_file_path) def test_config_manager_initialization(): """Test that ConfigurationManager initializes with default config.""" # This test will fail until we implement ConfigurationManager from mcp_server_tree_sitter.config import ConfigurationManager manager = ConfigurationManager() config = manager.get_config() # Check default values assert config.cache.max_size_mb == 100 assert config.security.max_file_size_mb == 5 assert config.language.default_max_depth == 5 def test_config_manager_load_from_file(temp_yaml_file): """Test loading configuration from a file.""" # This test will fail until we implement ConfigurationManager from mcp_server_tree_sitter.config import ConfigurationManager manager = ConfigurationManager() manager.load_from_file(temp_yaml_file) config = manager.get_config() # Check loaded values assert config.cache.max_size_mb == 256 assert config.security.max_file_size_mb == 10 assert config.language.default_max_depth == 7 def test_config_manager_update_values(): """Test updating individual configuration values.""" # This test will fail until we implement ConfigurationManager from mcp_server_tree_sitter.config import ConfigurationManager manager = ConfigurationManager() # Update values manager.update_value("cache.max_size_mb", 512) manager.update_value("security.max_file_size_mb", 20) # Check updated values config = manager.get_config() assert config.cache.max_size_mb == 512 assert config.security.max_file_size_mb == 20 def test_config_manager_to_dict(): """Test converting configuration to dictionary.""" # This test will fail until we implement ConfigurationManager from mcp_server_tree_sitter.config import ConfigurationManager manager = ConfigurationManager() config_dict = manager.to_dict() # Check dictionary structure assert "cache" in config_dict assert "security" in config_dict assert "language" in config_dict assert config_dict["cache"]["max_size_mb"] == 100 def test_env_overrides_defaults(monkeypatch): """Environment variables should override hard-coded defaults.""" monkeypatch.setenv("MCP_TS_CACHE_MAX_SIZE_MB", "512") from mcp_server_tree_sitter.config import ConfigurationManager mgr = ConfigurationManager() cfg = mgr.get_config() assert cfg.cache.max_size_mb == 512, "Environment variable should override default value" # ensure other defaults stay intact assert cfg.security.max_file_size_mb == 5 assert cfg.language.default_max_depth == 5 def test_env_overrides_yaml(temp_yaml_file, monkeypatch): """Environment variables should take precedence over YAML values.""" # YAML sets 256; env var must win with 1024 monkeypatch.setenv("MCP_TS_CACHE_MAX_SIZE_MB", "1024") monkeypatch.setenv("MCP_TS_SECURITY_MAX_FILE_SIZE_MB", "15") from mcp_server_tree_sitter.config import ConfigurationManager mgr = ConfigurationManager() mgr.load_from_file(temp_yaml_file) cfg = mgr.get_config() assert cfg.cache.max_size_mb == 1024, "Environment variable should override YAML value" assert cfg.security.max_file_size_mb == 15, "Environment variable should override YAML value" ``` -------------------------------------------------------------------------------- /tests/test_logging_early_init.py: -------------------------------------------------------------------------------- ```python """Test that logging configuration is applied early in application lifecycle.""" import importlib import logging import os from unittest.mock import MagicMock, patch def test_early_init_in_package(): """Test that logging is configured before other modules are imported.""" # Rather than mocking which won't work well with imports, # we'll check the actual package __init__.py file content import inspect import mcp_server_tree_sitter # Get the source code of the package __init__.py init_source = inspect.getsource(mcp_server_tree_sitter) # Verify bootstrap import is present and comes before other imports assert "from . import bootstrap" in init_source, "bootstrap should be imported in __init__.py" # Check the bootstrap/__init__.py to ensure it imports logging_bootstrap import mcp_server_tree_sitter.bootstrap bootstrap_init_source = inspect.getsource(mcp_server_tree_sitter.bootstrap) assert "from . import logging_bootstrap" in bootstrap_init_source, "bootstrap init should import logging_bootstrap" # Check that bootstrap's __all__ includes logging functions assert "get_logger" in mcp_server_tree_sitter.bootstrap.__all__, "get_logger should be exported by bootstrap" assert "update_log_levels" in mcp_server_tree_sitter.bootstrap.__all__, ( "update_log_levels should be exported by bootstrap" ) def test_configure_is_called_at_import(): """Test that the configure_root_logger is called when bootstrap is imported.""" # Mock the root logger configuration function with patch("logging.basicConfig") as mock_basic_config: # Force reload of the module to trigger initialization import mcp_server_tree_sitter.bootstrap.logging_bootstrap importlib.reload(mcp_server_tree_sitter.bootstrap.logging_bootstrap) # Verify logging.basicConfig was called mock_basic_config.assert_called_once() def test_environment_vars_processed_early(): """Test that environment variables are processed before logger configuration.""" # Test the function directly rather than trying to mock it # Save current environment variable value original_env = os.environ.get("MCP_TS_LOG_LEVEL", None) try: # Test with DEBUG level os.environ["MCP_TS_LOG_LEVEL"] = "DEBUG" from mcp_server_tree_sitter.bootstrap.logging_bootstrap import get_log_level_from_env # Verify function returns correct level assert get_log_level_from_env() == logging.DEBUG, "Should return DEBUG level from environment" # Test with INFO level - this time specify module differently to avoid NameError os.environ["MCP_TS_LOG_LEVEL"] = "INFO" # First import the module import importlib import mcp_server_tree_sitter.bootstrap.logging_bootstrap as bootstrap_logging # Then reload it to pick up the new environment variable importlib.reload(bootstrap_logging) # Verify the function returns the new level assert bootstrap_logging.get_log_level_from_env() == logging.INFO, "Should return INFO level from environment" finally: # Restore environment if original_env is None: del os.environ["MCP_TS_LOG_LEVEL"] else: os.environ["MCP_TS_LOG_LEVEL"] = original_env def test_handlers_synchronized_at_init(): """Test that handler levels are synchronized at initialization.""" # Mock handlers on the root logger mock_handler = MagicMock() root_logger = logging.getLogger() original_handlers = root_logger.handlers try: # Add mock handler and capture original handlers root_logger.handlers = [mock_handler] # Set environment variable with patch.dict(os.environ, {"MCP_TS_LOG_LEVEL": "DEBUG"}): # Mock the get_log_level_from_env function to control return value with patch("mcp_server_tree_sitter.bootstrap.logging_bootstrap.get_log_level_from_env") as mock_get_level: mock_get_level.return_value = logging.DEBUG # Force reload to trigger initialization import mcp_server_tree_sitter.bootstrap.logging_bootstrap importlib.reload(mcp_server_tree_sitter.bootstrap.logging_bootstrap) # Verify handler level was set mock_handler.setLevel.assert_called_with(logging.DEBUG) finally: # Restore original handlers root_logger.handlers = original_handlers ``` -------------------------------------------------------------------------------- /tests/test_diagnostics/test_language_registry.py: -------------------------------------------------------------------------------- ```python """Pytest-based diagnostic tests for language registry functionality.""" import pytest from mcp_server_tree_sitter.language.registry import LanguageRegistry @pytest.mark.diagnostic def test_language_detection(diagnostic) -> None: """Test language detection functionality.""" registry = LanguageRegistry() # Test a few common file extensions test_files = { "test.py": "python", "test.js": "javascript", "test.ts": "typescript", "test.go": "go", "test.cpp": "cpp", "test.c": "c", "test.rs": "rust", "test.unknown": None, } results = {} failures = [] for filename, expected in test_files.items(): detected = registry.language_for_file(filename) match = detected == expected results[filename] = {"detected": detected, "expected": expected, "match": match} if not match: failures.append(filename) # Add all results to diagnostic data diagnostic.add_detail("detection_results", results) if failures: diagnostic.add_detail("failed_files", failures) # Check results with proper assertions for filename, expected in test_files.items(): assert registry.language_for_file(filename) == expected, f"Language detection failed for {filename}" @pytest.mark.diagnostic def test_language_list_empty(diagnostic) -> None: """Test that list_languages returns languages correctly.""" registry = LanguageRegistry() # Get available languages available_languages = registry.list_available_languages() installable_languages = registry.list_installable_languages() # Add results to diagnostic data diagnostic.add_detail("available_languages", available_languages) diagnostic.add_detail("installable_languages", installable_languages) # Check for common languages we expect to be available expected_languages = [ "python", "javascript", "typescript", "c", "cpp", "go", "rust", ] for lang in expected_languages: if lang not in available_languages: diagnostic.add_error( "LanguageNotAvailable", f"Expected language {lang} not in available languages", ) # Assert that some languages are available assert len(available_languages) > 0, "No languages available" # Assert that we find at least some of our expected languages common_languages = set(expected_languages) & set(available_languages) assert len(common_languages) > 0, "None of the expected languages are available" @pytest.mark.diagnostic def test_language_detection_vs_listing(diagnostic) -> None: """Test discrepancy between language detection and language listing.""" registry = LanguageRegistry() # Test with a few common languages test_languages = [ "python", "javascript", "typescript", "c", "cpp", "go", "rust", ] results = {} for lang in test_languages: try: # Check if language is available if registry.is_language_available(lang): results[lang] = { "available": True, "language_object": bool(registry.get_language(lang) is not None), "reason": "", } else: results[lang] = { "available": False, "reason": "Not available in language-pack", "language_object": False, } except Exception as e: results[lang] = {"available": False, "error": str(e), "language_object": False} # Check if languages reported as available appear in list_languages available_languages = registry.list_available_languages() # Add results to diagnostic data diagnostic.add_detail("language_results", results) diagnostic.add_detail("available_languages", available_languages) # Compare detection vs listing discrepancies = [] for lang, result in results.items(): if result.get("available", False) and lang not in available_languages: discrepancies.append(lang) if discrepancies: diagnostic.add_error( "LanguageInconsistency", f"Languages available but not in list_languages: {discrepancies}", ) # For diagnostic purposes, not all assertions should fail # This checks if there are any available languages successful_languages = [lang for lang, result in results.items() if result.get("available", False)] assert len(successful_languages) > 0, "No languages could be successfully installed" ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/context.py: -------------------------------------------------------------------------------- ```python """Context class for managing dependency injection. This module provides a ServerContext class to manage dependencies and provide a cleaner interface for interacting with the application's components while supporting dependency injection. """ from typing import Any, Dict, List, Optional # Import logging from bootstrap package from .bootstrap import get_logger, update_log_levels from .cache.parser_cache import TreeCache from .config import ConfigurationManager, ServerConfig from .di import get_container from .exceptions import ProjectError from .language.registry import LanguageRegistry from .models.project import ProjectRegistry logger = get_logger(__name__) class ServerContext: """Context for managing application state with dependency injection.""" def __init__( self, config_manager: Optional[ConfigurationManager] = None, project_registry: Optional[ProjectRegistry] = None, language_registry: Optional[LanguageRegistry] = None, tree_cache: Optional[TreeCache] = None, ): """ Initialize with optional components. If components are not provided, they will be fetched from the global container. """ container = get_container() self.config_manager = config_manager or container.config_manager self.project_registry = project_registry or container.project_registry self.language_registry = language_registry or container.language_registry self.tree_cache = tree_cache or container.tree_cache def get_config(self) -> ServerConfig: """Get the current configuration.""" return self.config_manager.get_config() # Project management methods def register_project( self, path: str, name: Optional[str] = None, description: Optional[str] = None ) -> Dict[str, Any]: """Register a project for code analysis.""" try: # Register project project = self.project_registry.register_project(name or path, path, description) # Scan for languages project.scan_files(self.language_registry) return project.to_dict() except Exception as e: raise ProjectError(f"Failed to register project: {e}") from e def list_projects(self) -> List[Dict[str, Any]]: """List all registered projects.""" return self.project_registry.list_projects() def remove_project(self, name: str) -> Dict[str, str]: """Remove a registered project.""" self.project_registry.remove_project(name) return {"status": "success", "message": f"Project '{name}' removed"} # Cache management methods def clear_cache(self, project: Optional[str] = None, file_path: Optional[str] = None) -> Dict[str, str]: """Clear the parse tree cache.""" if project and file_path: # Get file path project_obj = self.project_registry.get_project(project) abs_path = project_obj.get_file_path(file_path) # Clear cache self.tree_cache.invalidate(abs_path) return {"status": "success", "message": f"Cache cleared for {file_path} in {project}"} else: # Clear all self.tree_cache.invalidate() return {"status": "success", "message": "Cache cleared"} # Configuration management methods def configure( self, config_path: Optional[str] = None, cache_enabled: Optional[bool] = None, max_file_size_mb: Optional[int] = None, log_level: Optional[str] = None, ) -> Dict[str, Any]: """Configure the server.""" # Load config if path provided if config_path: logger.info(f"Configuring server with YAML config from: {config_path}") self.config_manager.load_from_file(config_path) # Update specific settings if cache_enabled is not None: logger.info(f"Setting cache.enabled to {cache_enabled}") self.config_manager.update_value("cache.enabled", cache_enabled) self.tree_cache.set_enabled(cache_enabled) if max_file_size_mb is not None: logger.info(f"Setting security.max_file_size_mb to {max_file_size_mb}") self.config_manager.update_value("security.max_file_size_mb", max_file_size_mb) if log_level is not None: logger.info(f"Setting log_level to {log_level}") self.config_manager.update_value("log_level", log_level) # Apply log level using centralized bootstrap function update_log_levels(log_level) logger.debug(f"Applied log level {log_level} to mcp_server_tree_sitter loggers") # Return current config as dict return self.config_manager.to_dict() # Create a global context instance for convenience global_context = ServerContext() def get_global_context() -> ServerContext: """Get the global server context.""" return global_context ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/bootstrap/logging_bootstrap.py: -------------------------------------------------------------------------------- ```python """Bootstrap module for logging configuration with minimal dependencies. This module is imported first in the initialization sequence to ensure logging is configured before any other modules are imported. It has no dependencies on other modules in the project to avoid import cycles. This is the CANONICAL implementation of logging configuration. If you need to modify how logging is configured, make changes here and nowhere else. """ import logging import os from typing import Dict, Union # Numeric values corresponding to log level names LOG_LEVEL_MAP: Dict[str, int] = { "DEBUG": logging.DEBUG, "INFO": logging.INFO, "WARNING": logging.WARNING, "ERROR": logging.ERROR, "CRITICAL": logging.CRITICAL, } def get_log_level_from_env() -> int: """ Get log level from environment variable MCP_TS_LOG_LEVEL. Returns: int: Logging level value (e.g., logging.DEBUG, logging.INFO) """ env_level = os.environ.get("MCP_TS_LOG_LEVEL", "INFO").upper() return LOG_LEVEL_MAP.get(env_level, logging.INFO) def configure_root_logger() -> None: """ Configure the root logger based on environment variables. This should be called at the earliest possible point in the application. """ log_level = get_log_level_from_env() # Configure the root logger with proper format and level logging.basicConfig(level=log_level, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") # Ensure the root logger for our package is also set correctly pkg_logger = logging.getLogger("mcp_server_tree_sitter") pkg_logger.setLevel(log_level) # Ensure all handlers have the correct level for handler in logging.root.handlers: handler.setLevel(log_level) # Ensure propagation is preserved pkg_logger.propagate = True # Ensure all existing loggers' handlers are synchronized for name in logging.root.manager.loggerDict: if name.startswith("mcp_server_tree_sitter"): logger = logging.getLogger(name) # Only synchronize handler levels, don't set logger level for handler in logger.handlers: handler.setLevel(logger.getEffectiveLevel()) def update_log_levels(level_name: Union[str, int]) -> None: """ Update the root package logger level and synchronize handler levels. This function sets the level of the root package logger only. Child loggers will inherit this level unless they have their own explicit level settings. Handler levels are updated to match their logger's effective level. Args: level_name: Log level name (DEBUG, INFO, etc.) or numeric value """ # Convert string level name to numeric value if needed if isinstance(level_name, str): level_value = LOG_LEVEL_MAP.get(level_name.upper(), logging.INFO) else: level_value = level_name # Update ONLY the root package logger level pkg_logger = logging.getLogger("mcp_server_tree_sitter") pkg_logger.setLevel(level_value) # Update all handlers on the root package logger for handler in pkg_logger.handlers: handler.setLevel(level_value) # Also update the root logger for consistency - this helps with debug flag handling # when the module is already imported root_logger = logging.getLogger() root_logger.setLevel(level_value) for handler in root_logger.handlers: handler.setLevel(level_value) # Synchronize handler levels with their logger's effective level # for all existing loggers in our package hierarchy for name in logging.root.manager.loggerDict: if name == "mcp_server_tree_sitter" or name.startswith("mcp_server_tree_sitter."): logger = logging.getLogger(name) # DO NOT set the logger's level explicitly to maintain hierarchy # Only synchronize handler levels with the logger's effective level for handler in logger.handlers: handler.setLevel(logger.getEffectiveLevel()) # Ensure propagation is preserved logger.propagate = True def get_logger(name: str) -> logging.Logger: """ Get a properly configured logger with appropriate level. Args: name: Logger name, typically __name__ Returns: logging.Logger: Configured logger """ logger = logging.getLogger(name) # Only set level explicitly for the root package logger # Child loggers will inherit levels as needed if name == "mcp_server_tree_sitter": log_level = get_log_level_from_env() logger.setLevel(log_level) # Ensure all handlers have the correct level for handler in logger.handlers: handler.setLevel(log_level) else: # For child loggers, ensure handlers match their effective level # without setting the logger level explicitly effective_level = logger.getEffectiveLevel() for handler in logger.handlers: handler.setLevel(effective_level) # Ensure propagation is enabled logger.propagate = True return logger # Run the root logger configuration when this module is imported configure_root_logger() ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/utils/context/mcp_context.py: -------------------------------------------------------------------------------- ```python """Context handling for MCP operations with progress reporting.""" import logging from contextlib import contextmanager from typing import Any, Generator, Optional, TypeVar logger = logging.getLogger(__name__) T = TypeVar("T") class ProgressScope: """Scope for tracking progress of an operation.""" def __init__(self, context: "MCPContext", total: int, description: str): """ Initialize a progress scope. Args: context: The parent MCPContext total: Total number of steps description: Description of the operation """ self.context = context self.total = total self.description = description self.current = 0 def update(self, step: int = 1) -> None: """ Update progress by a number of steps. Args: step: Number of steps to add to progress """ self.current += step if self.current > self.total: self.current = self.total self.context.report_progress(self.current, self.total) def set_progress(self, current: int) -> None: """ Set progress to a specific value. Args: current: Current progress value """ self.current = max(0, min(current, self.total)) self.context.report_progress(self.current, self.total) class MCPContext: """Context for MCP operations with progress reporting.""" def __init__(self, ctx: Optional[Any] = None): """ Initialize context with optional MCP context. Args: ctx: MCP context object, if available """ self.ctx = ctx self.total_steps = 0 self.current_step = 0 def report_progress(self, current: int, total: int) -> None: """ Report progress to the MCP client. Args: current: Current progress value total: Total steps """ self.current_step = current self.total_steps = total if self.ctx and hasattr(self.ctx, "report_progress"): # Use MCP context if available try: self.ctx.report_progress(current, total) except Exception as e: logger.warning(f"Failed to report progress: {e}") else: # Log progress if no MCP context if total > 0: percentage = int((current / total) * 100) logger.debug(f"Progress: {percentage}% ({current}/{total})") def info(self, message: str) -> None: """ Log an info message. Args: message: Message to log """ logger.info(message) if self.ctx and hasattr(self.ctx, "info"): try: self.ctx.info(message) except Exception as e: logger.warning(f"Failed to send info message: {e}") def warning(self, message: str) -> None: """ Log a warning message. Args: message: Message to log """ logger.warning(message) if self.ctx and hasattr(self.ctx, "warning"): try: self.ctx.warning(message) except Exception as e: logger.warning(f"Failed to send warning message: {e}") def error(self, message: str) -> None: """ Log an error message. Args: message: Message to log """ logger.error(message) if self.ctx and hasattr(self.ctx, "error"): try: self.ctx.error(message) except Exception as e: logger.warning(f"Failed to send error message: {e}") @contextmanager def progress_scope(self, total: int, description: str) -> Generator[ProgressScope, None, None]: """ Context manager for tracking progress of an operation. Args: total: Total number of steps description: Description of the operation Yields: ProgressScope object for updating progress """ try: self.info(f"Starting: {description}") scope = ProgressScope(self, total, description) scope.update(0) # Set initial progress to 0 yield scope finally: if scope.current < scope.total: scope.set_progress(scope.total) # Ensure we complete the progress self.info(f"Completed: {description}") def with_mcp_context(self, ctx: Any) -> "MCPContext": """ Create a new context with the given MCP context. Args: ctx: MCP context object Returns: New MCPContext with the given MCP context """ return MCPContext(ctx) @staticmethod def from_mcp_context(ctx: Optional[Any]) -> "MCPContext": """ Create a context from an MCP context. Args: ctx: MCP context object or None Returns: New MCPContext """ return MCPContext(ctx) def try_get_mcp_context(self) -> Optional[Any]: """ Get the wrapped MCP context if available. Returns: MCP context or None """ return self.ctx ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/capabilities/server_capabilities.py: -------------------------------------------------------------------------------- ```python """Server capability declarations for MCP integration.""" import logging from typing import Any, Dict, List logger = logging.getLogger(__name__) def register_capabilities(mcp_server: Any) -> None: """ Register MCP server capabilities. Args: mcp_server: MCP server instance """ # Use dependency injection instead of global context from ..di import get_container # Get container and dependencies container = get_container() config_manager = container.config_manager config = config_manager.get_config() # FastMCP may not have capability method, so we'll skip this for now # @mcp_server.capability("prompts.listChanged") def handle_prompts_list_changed() -> Dict[str, Any]: """Handle prompt template management events.""" logger.debug("Received prompts.listChanged event") return {"status": "success"} # @mcp_server.capability("resources.subscribe") def handle_resources_subscribe(resource_uri: str) -> Dict[str, Any]: """ Handle resource subscription requests. Args: resource_uri: Resource URI to subscribe to Returns: Subscription response """ logger.debug(f"Received subscription request for {resource_uri}") return {"status": "success", "resource": resource_uri} # @mcp_server.capability("resources.listChanged") def handle_resources_list_changed() -> Dict[str, Any]: """Handle resource discovery events.""" logger.debug("Received resources.listChanged event") return {"status": "success"} # @mcp_server.capability("tools.listChanged") def handle_tools_list_changed() -> Dict[str, Any]: """Handle tool discovery events.""" logger.debug("Received tools.listChanged event") return {"status": "success"} # @mcp_server.capability("logging") def handle_logging(level: str, message: str) -> Dict[str, Any]: """ Handle logging configuration. Args: level: Log level message: Log message Returns: Logging response """ log_levels = { "debug": logging.DEBUG, "info": logging.INFO, "warning": logging.WARNING, "error": logging.ERROR, } log_level = log_levels.get(level.lower(), logging.INFO) logger.log(log_level, f"MCP: {message}") return {"status": "success"} # @mcp_server.capability("completion") def handle_completion(text: str, position: int) -> Dict[str, Any]: """ Handle argument completion suggestions. Args: text: Current input text position: Cursor position in text Returns: Completion suggestions """ # Simple completion for commonly used arguments suggestions: List[Dict[str, str]] = [] # Extract the current word being typed current_word = "" i = position - 1 while i >= 0 and text[i].isalnum() or text[i] == "_": current_word = text[i] + current_word i -= 1 # Project name suggestions if current_word and "project" in text[:position].lower(): # Use container's project registry project_registry = container.project_registry for project_dict in project_registry.list_projects(): project_name = project_dict["name"] if project_name.startswith(current_word): suggestions.append( { "text": project_name, "description": f"Project: {project_name}", } ) # Language suggestions if current_word and "language" in text[:position].lower(): # Use container's language registry language_registry = container.language_registry for language in language_registry.list_available_languages(): if language.startswith(current_word): suggestions.append({"text": language, "description": f"Language: {language}"}) # Config suggestions if current_word and "config" in text[:position].lower(): if "cache_enabled".startswith(current_word): suggestions.append( { "text": "cache_enabled", "description": f"Cache enabled: {config.cache.enabled}", } ) if "max_file_size_mb".startswith(current_word): # Store in variable to avoid line length error size_mb = config.security.max_file_size_mb suggestions.append( { "text": "max_file_size_mb", "description": f"Max file size: {size_mb} MB", } ) if "log_level".startswith(current_word): suggestions.append( { "text": "log_level", "description": f"Log level: {config.log_level}", } ) return {"suggestions": suggestions} # Ensure capabilities are accessible to tests if hasattr(mcp_server, "capabilities"): mcp_server.capabilities["logging"] = handle_logging mcp_server.capabilities["completion"] = handle_completion ``` -------------------------------------------------------------------------------- /docs/architecture.md: -------------------------------------------------------------------------------- ```markdown # Architecture Overview This document provides an overview of the MCP Tree-sitter Server's architecture, focusing on key components and design patterns. ## Core Architecture The MCP Tree-sitter Server follows a structured architecture with the following components: 1. **Bootstrap Layer**: Core initialization systems that must be available to all modules with minimal dependencies 2. **Configuration Layer**: Configuration management with environment variable support 3. **Dependency Injection Container**: Central container for managing and accessing services 4. **Tree-sitter Integration**: Interfaces with the tree-sitter library for parsing and analysis 5. **MCP Protocol Layer**: Handles interactions with the Model Context Protocol ## Bootstrap Layer The bootstrap layer handles critical initialization tasks that must happen before anything else: ``` src/mcp_server_tree_sitter/bootstrap/ ├── __init__.py # Exports key bootstrap functions └── logging_bootstrap.py # Canonical logging configuration ``` This layer is imported first in the package's `__init__.py` and has minimal dependencies. The bootstrap module ensures that core services like logging are properly initialized and globally available to all modules. **Key Design Principle**: Each component in the bootstrap layer must have minimal dependencies to avoid import cycles and ensure reliable initialization. ## Dependency Injection Pattern Instead of using global variables (which was the approach in earlier versions), the application now uses a structured dependency injection pattern: 1. **DependencyContainer**: The `DependencyContainer` class holds all application components and services 2. **ServerContext**: A context class provides a clean interface for interacting with dependencies 3. **Access Functions**: API functions like `get_logger()` and `update_log_levels()` provide easy access to functionality This approach has several benefits: - Cleaner testing with the ability to mock dependencies - Better encapsulation of implementation details - Reduced global state and improved thread safety - Clearer dependency relationships between components ## Logging Design Logging follows a hierarchical model using Python's standard `logging` module: 1. **Root Package Logger**: Only the root package logger (`mcp_server_tree_sitter`) has its level explicitly set 2. **Child Loggers**: Child loggers inherit their level from the root package logger 3. **Handler Synchronization**: Handler levels are synchronized with their logger's effective level **Canonical Implementation**: The logging system is defined in a single location - `bootstrap/logging_bootstrap.py`. Other modules import from this module to ensure consistent behavior. ### Logging Functions The bootstrap module provides these key logging functions: ```python # Get log level from environment variable get_log_level_from_env() # Configure the root logger configure_root_logger() # Get a properly configured logger get_logger(name) # Update log levels update_log_levels(level_name) ``` ## Configuration System The configuration system uses a layered approach: 1. **Environment Variables**: Highest precedence (e.g., `MCP_TS_LOG_LEVEL=DEBUG`) 2. **Explicit Updates**: Updates made via `update_value()` calls 3. **YAML Configuration**: Settings from YAML configuration files 4. **Default Values**: Fallback defaults defined in model classes The `ConfigurationManager` is responsible for loading, managing, and applying configuration, while a `ServerConfig` model encapsulates the actual configuration settings. ## Project and Language Management Projects and languages are managed by registry classes: 1. **ProjectRegistry**: Maintains active project registrations 2. **LanguageRegistry**: Manages tree-sitter language parsers These registries are accessed through the dependency container or context, providing a clean interface for operations. ## Use of Builder and Factory Patterns The server uses several design patterns for cleaner code: 1. **Builder Pattern**: Used for constructing complex objects like `Project` instances 2. **Factory Methods**: Used to create tree-sitter parsers and queries 3. **Singleton Pattern**: Used for the dependency container to ensure consistent state ## Lifecycle Management The server's lifecycle is managed in a structured way: 1. **Bootstrap Phase**: Initializes logging and critical systems (from `__init__.py`) 2. **Configuration Phase**: Loads configuration from files and environment 3. **Dependency Initialization**: Sets up all dependencies in the container 4. **Server Setup**: Configures MCP tools and capabilities 5. **Running Phase**: Processes requests from the MCP client 6. **Shutdown**: Gracefully handles shutdown and cleanup ## Error Handling Strategy The server implements a layered error handling approach: 1. **Custom Exceptions**: Defined in `exceptions.py` for specific error cases 2. **Function-Level Handlers**: Most low-level functions do error handling 3. **Tool-Level Handlers**: MCP tools handle errors and return structured responses 4. **Global Exception Handling**: FastMCP provides top-level error handling ## Future Architecture Improvements Planned architectural improvements include: 1. **Complete Decoupling**: Further reduce dependencies between components 2. **Module Structure Refinement**: Better organize modules by responsibility 3. **Configuration Caching**: Optimize configuration access patterns 4. **Async Support**: Add support for asynchronous operations 5. **Plugin Architecture**: Support for extensibility through plugins ``` -------------------------------------------------------------------------------- /tests/test_logging_bootstrap.py: -------------------------------------------------------------------------------- ```python """Tests for the logging bootstrap module.""" import importlib import logging import pytest def test_bootstrap_imported_first(): """Test that bootstrap is imported in __init__.py before anything else.""" # Get the content of __init__.py import inspect import mcp_server_tree_sitter init_source = inspect.getsource(mcp_server_tree_sitter) # Check that bootstrap is imported before any other modules bootstrap_import_index = init_source.find("from . import bootstrap") assert bootstrap_import_index > 0, "bootstrap should be imported in __init__.py" # Check that bootstrap is imported before any other significant imports other_imports = [ "from . import config", "from . import server", "from . import context", ] for other_import in other_imports: other_import_index = init_source.find(other_import) if other_import_index > 0: assert bootstrap_import_index < other_import_index, f"bootstrap should be imported before {other_import}" def test_logging_config_forwards_to_bootstrap(): """Test that logging_config.py forwards to bootstrap.logging_bootstrap.""" # Import both modules from mcp_server_tree_sitter import logging_config from mcp_server_tree_sitter.bootstrap import logging_bootstrap # Verify that key functions are the same objects assert logging_config.get_logger is logging_bootstrap.get_logger assert logging_config.update_log_levels is logging_bootstrap.update_log_levels assert logging_config.get_log_level_from_env is logging_bootstrap.get_log_level_from_env assert logging_config.configure_root_logger is logging_bootstrap.configure_root_logger assert logging_config.LOG_LEVEL_MAP is logging_bootstrap.LOG_LEVEL_MAP def test_key_modules_use_bootstrap(): """Test that key modules import logging utilities from bootstrap.""" # Import key modules modules_to_check = [ "mcp_server_tree_sitter.server", "mcp_server_tree_sitter.config", "mcp_server_tree_sitter.context", "mcp_server_tree_sitter.di", "mcp_server_tree_sitter.__main__", ] # Import bootstrap for comparison # Check each module for module_name in modules_to_check: try: # Import the module module = importlib.import_module(module_name) # Check if the module has a logger attribute if hasattr(module, "logger"): # Check where the logger comes from by examining the code import inspect source = inspect.getsource(module) # Look for bootstrap import pattern bootstrap_import = "from .bootstrap import get_logger" in source legacy_import = "from .logging_config import get_logger" in source # If module uses logging_config, it should be forwarding to bootstrap assert bootstrap_import or not legacy_import, f"{module_name} should import get_logger from bootstrap" except (ImportError, AttributeError) as e: pytest.skip(f"Couldn't check {module_name}: {e}") def test_log_level_update_consistency(): """Test that all log level updates use bootstrap's implementation.""" # Create test loggers and handlers root_logger = logging.getLogger("mcp_server_tree_sitter") original_level = root_logger.level child_logger = logging.getLogger("mcp_server_tree_sitter.test_logging_bootstrap") child_handler = logging.StreamHandler() child_handler.setLevel(logging.WARNING) child_logger.addHandler(child_handler) try: # Import and use bootstrap's update_log_levels from mcp_server_tree_sitter.bootstrap import update_log_levels # Set a known state before testing root_logger.setLevel(logging.INFO) child_logger.setLevel(logging.NOTSET) # Apply the update update_log_levels("DEBUG") # Verify effects on root logger assert root_logger.level == logging.DEBUG, "Root logger level should be updated" # Verify effects on child logger assert child_logger.level == logging.NOTSET, "Child logger level should not be changed" assert child_logger.getEffectiveLevel() == logging.DEBUG, "Child logger should inherit level from root" # Explicitly synchronize the handler level by calling update_log_levels again update_log_levels("DEBUG") # Now check the handler level assert child_handler.level == logging.DEBUG, "Handler level should be synchronized" finally: # Clean up root_logger.setLevel(original_level) child_logger.removeHandler(child_handler) def test_no_duplicate_log_level_implementations(): """Test that only the bootstrap implementation of update_log_levels exists.""" # Import bootstrap's update_log_levels for reference from mcp_server_tree_sitter.bootstrap.logging_bootstrap import update_log_levels as bootstrap_update # Import the re-exported function from logging_config from mcp_server_tree_sitter.logging_config import update_log_levels as config_update # Verify the re-exported function is the same object as the original assert config_update is bootstrap_update, "logging_config should re-export the same function object" # Get the module from context # We test the identity of the imported function rather than checking source code # which is more brittle from mcp_server_tree_sitter.context import update_log_levels as context_update # If context.py properly imports from bootstrap or logging_config, # all three should be the same object assert context_update is bootstrap_update, "context should import update_log_levels from bootstrap" ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/models/ast_cursor.py: -------------------------------------------------------------------------------- ```python """AST representation models using cursor-based traversal.""" from typing import Any, Dict, Optional from ..utils.tree_sitter_helpers import ( get_node_text, walk_tree, ) from ..utils.tree_sitter_types import Node, ensure_node def node_to_dict_cursor( node: Any, source_bytes: Optional[bytes] = None, include_children: bool = True, include_text: bool = True, max_depth: int = 5, ) -> Dict[str, Any]: """ Convert a tree-sitter node to a dictionary using cursor-based traversal. This implementation avoids stack overflow issues for large ASTs by using cursor-based traversal instead of recursion. Args: node: Tree-sitter Node object source_bytes: Source code bytes include_children: Whether to include children nodes include_text: Whether to include node text max_depth: Maximum depth to traverse Returns: Dictionary representation of the node """ safe_node = ensure_node(node) # Create a map to track node IDs node_map: Dict[int, Dict[str, Any]] = {} # Function to generate unique ID for a node def get_node_id(node: Node) -> int: return hash((node.start_byte, node.end_byte, node.type)) # Initialize the root node data root_id = get_node_id(safe_node) root_data = { "id": root_id, "type": safe_node.type, "start_point": { "row": safe_node.start_point[0], "column": safe_node.start_point[1], }, "end_point": {"row": safe_node.end_point[0], "column": safe_node.end_point[1]}, "start_byte": safe_node.start_byte, "end_byte": safe_node.end_byte, "named": safe_node.is_named, "children_count": safe_node.child_count, } # Only include children list if we're including children if include_children: root_data["children"] = [] # Add text if requested if source_bytes and include_text: try: root_data["text"] = get_node_text(safe_node, source_bytes) except Exception as e: root_data["text_error"] = str(e) # Add root to node map node_map[root_id] = root_data # Skip child processing if not requested or at max depth if not include_children or max_depth <= 0: return root_data # Get cursor at root cursor = walk_tree(safe_node) # Track current node data, parent stack, and depth current_data = root_data parent_stack = [] current_depth = 0 # Process a node and add it to node_map def process_node(current_node: Node, parent_data: Dict[str, Any], depth: int) -> Dict[str, Any]: node_id = get_node_id(current_node) # Return existing node data if already processed if node_id in node_map: return node_map[node_id] # Create node data node_data = { "id": node_id, "type": current_node.type, "start_point": { "row": current_node.start_point[0], "column": current_node.start_point[1], }, "end_point": { "row": current_node.end_point[0], "column": current_node.end_point[1], }, "start_byte": current_node.start_byte, "end_byte": current_node.end_byte, "named": current_node.is_named, } # Add text if requested if source_bytes and include_text: try: node_data["text"] = get_node_text(current_node, source_bytes) except Exception as e: node_data["text_error"] = str(e) # Set children count node_data["children_count"] = current_node.child_count # Only add children list if we're including children if include_children: if depth < max_depth: node_data["children"] = [] else: node_data["truncated"] = True # Add to node map node_map[node_id] = node_data # Add to parent's children list if parent_data and "children" in parent_data: parent_data["children"].append(node_data) parent_data["children_count"] = len(parent_data["children"]) return node_data # Traversal state visited_children = False # Main traversal loop while True: # Try to visit children if not already visited and depth allows if not visited_children and current_depth < max_depth: if cursor.goto_first_child(): # Process the child node current_depth += 1 parent_stack.append(current_data) # Ensure node is not None before processing if cursor.node is not None: current_data = process_node(cursor.node, current_data, current_depth) else: visited_children = True continue else: # No children visited_children = True # Try next sibling if children visited elif cursor.goto_next_sibling(): # Ensure node is not None before processing if cursor.node is not None: current_data = process_node(cursor.node, parent_stack[-1], current_depth) else: visited_children = True visited_children = False continue # Go back to parent if no more siblings elif parent_stack: cursor.goto_parent() current_data = parent_stack.pop() current_depth -= 1 visited_children = True # If we're back at root level and finished all children, we're done if not parent_stack: break else: # No more nodes to process break return root_data ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/utils/tree_sitter_types.py: -------------------------------------------------------------------------------- ```python """Type handling utilities for tree-sitter. This module provides type definitions and safety wrappers for the tree-sitter library to ensure type safety with or without the library installed. """ from typing import Any, Protocol, TypeVar, cast # Define protocols for tree-sitter types class LanguageProtocol(Protocol): """Protocol for Tree-sitter Language class.""" def query(self, query_string: str) -> Any: ... class ParserProtocol(Protocol): """Protocol for Tree-sitter Parser class.""" def set_language(self, language: Any) -> None: ... def language(self, language: Any) -> None: ... # Alternative name for set_language def parse(self, bytes_input: bytes) -> Any: ... class TreeProtocol(Protocol): """Protocol for Tree-sitter Tree class.""" @property def root_node(self) -> Any: ... class NodeProtocol(Protocol): """Protocol for Tree-sitter Node class.""" @property def children(self) -> list[Any]: ... @property def named_children(self) -> list[Any]: ... @property def child_count(self) -> int: ... @property def named_child_count(self) -> int: ... @property def start_point(self) -> tuple[int, int]: ... @property def end_point(self) -> tuple[int, int]: ... @property def start_byte(self) -> int: ... @property def end_byte(self) -> int: ... @property def type(self) -> str: ... @property def is_named(self) -> bool: ... @property def parent(self) -> Any: ... @property def children_by_field_name(self) -> dict[str, list[Any]]: ... def walk(self) -> Any: ... class CursorProtocol(Protocol): """Protocol for Tree-sitter Cursor class.""" @property def node(self) -> Any: ... def goto_first_child(self) -> bool: ... def goto_next_sibling(self) -> bool: ... def goto_parent(self) -> bool: ... # Type variables for type safety T = TypeVar("T") # Try to import actual tree-sitter types try: from tree_sitter import Language as _Language from tree_sitter import Node as _Node from tree_sitter import Parser as _Parser from tree_sitter import Tree as _Tree from tree_sitter import TreeCursor as _TreeCursor # Export actual types if available Language = _Language Parser = _Parser Tree = _Tree Node = _Node TreeCursor = _TreeCursor HAS_TREE_SITTER = True except ImportError: # Create stub classes if tree-sitter is not available HAS_TREE_SITTER = False class DummyLanguage: """Dummy implementation when tree-sitter is not available.""" def __init__(self, *args: Any, **kwargs: Any) -> None: pass def query(self, query_string: str) -> Any: """Dummy query method.""" return None class DummyParser: """Dummy implementation when tree-sitter is not available.""" def set_language(self, language: Any) -> None: """Dummy set_language method.""" pass def language(self, language: Any) -> None: """Dummy language method (alternative to set_language).""" pass def parse(self, bytes_input: bytes) -> Any: """Dummy parse method.""" return None class DummyNode: """Dummy implementation when tree-sitter is not available.""" @property def children(self) -> list[Any]: return [] @property def named_children(self) -> list[Any]: return [] @property def child_count(self) -> int: return 0 @property def named_child_count(self) -> int: return 0 @property def start_point(self) -> tuple[int, int]: return (0, 0) @property def end_point(self) -> tuple[int, int]: return (0, 0) @property def start_byte(self) -> int: return 0 @property def end_byte(self) -> int: return 0 @property def type(self) -> str: return "" @property def is_named(self) -> bool: return False @property def parent(self) -> Any: return None @property def children_by_field_name(self) -> dict[str, list[Any]]: return {} def walk(self) -> Any: return DummyTreeCursor() class DummyTreeCursor: """Dummy implementation when tree-sitter is not available.""" @property def node(self) -> Any: return DummyNode() def goto_first_child(self) -> bool: return False def goto_next_sibling(self) -> bool: return False def goto_parent(self) -> bool: return False class DummyTree: """Dummy implementation when tree-sitter is not available.""" @property def root_node(self) -> Any: return DummyNode() # Export dummy types for type checking # Declare dummy types for when tree-sitter is not available Language = DummyLanguage # type: ignore Parser = DummyParser # type: ignore Tree = DummyTree # type: ignore Node = DummyNode # type: ignore TreeCursor = DummyTreeCursor # type: ignore # Helper function to safely cast to tree-sitter types def ensure_language(obj: Any) -> "Language": """Safely cast to Language type.""" return cast(Language, obj) def ensure_parser(obj: Any) -> "Parser": """Safely cast to Parser type.""" return cast(Parser, obj) def ensure_tree(obj: Any) -> "Tree": """Safely cast to Tree type.""" return cast(Tree, obj) def ensure_node(obj: Any) -> "Node": """Safely cast to Node type.""" return cast(Node, obj) def ensure_cursor(obj: Any) -> "TreeCursor": """Safely cast to TreeCursor type.""" return cast(TreeCursor, obj) ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/tools/query_builder.py: -------------------------------------------------------------------------------- ```python """Tools for building and manipulating tree-sitter queries.""" from typing import Dict, List from ..language.query_templates import get_query_template def get_template(language: str, pattern: str) -> str: """ Get a query template with optional parameter replacement. Args: language: Language identifier pattern: Template name or custom pattern Returns: Query string """ # Check if this is a template name template = get_query_template(language, pattern) if template: return template # Otherwise return as-is return pattern def build_compound_query(language: str, patterns: List[str], combine: str = "or") -> str: """ Build a compound query from multiple patterns. Args: language: Language identifier patterns: List of pattern names or custom patterns combine: How to combine patterns ("or" or "and") Returns: Combined query string """ queries = [] for pattern in patterns: template = get_template(language, pattern) if template: queries.append(template) # For 'or' we can just concatenate if combine.lower() == "or": return "\n".join(queries) # For 'and' we need to add predicates # This is a simplified implementation combined = "\n".join(queries) combined += "\n\n;; Add your #match predicates here to require combinations" return combined def adapt_query(query: str, from_language: str, to_language: str) -> Dict[str, str]: """ Adapt a query from one language to another. Args: query: Original query string from_language: Source language to_language: Target language Returns: Dictionary with adapted query and metadata """ adapted = adapt_query_for_language(query, from_language, to_language) return { "original_language": from_language, "target_language": to_language, "original_query": query, "adapted_query": adapted, } def adapt_query_for_language(query: str, from_language: str, to_language: str) -> str: """ Try to adapt a query from one language to another. Args: query: Original query from_language: Source language to_language: Target language Returns: Adapted query string Note: This is a simplified implementation that assumes similar node types. A real implementation would need language-specific translations. """ translations = { # Python -> JavaScript ("python", "javascript"): { "function_definition": "function_declaration", "class_definition": "class_declaration", "block": "statement_block", "parameters": "formal_parameters", "argument_list": "arguments", "import_statement": "import_statement", "call": "call_expression", }, # JavaScript -> Python ("javascript", "python"): { "function_declaration": "function_definition", "class_declaration": "class_definition", "statement_block": "block", "formal_parameters": "parameters", "arguments": "argument_list", "call_expression": "call", }, # Add more language pairs... } pair = (from_language, to_language) if pair in translations: trans_dict = translations[pair] for src, dst in trans_dict.items(): # Simple string replacement query = query.replace(f"({src}", f"({dst}") return query def describe_node_types(language: str) -> Dict[str, str]: """ Get descriptions of common node types for a language. Args: language: Language identifier Returns: Dictionary of node type -> description """ # This would ideally be generated from tree-sitter grammar definitions descriptions = { "python": { "module": "The root node of a Python file", "function_definition": "A function definition with name and params", # Shortened for line length "class_definition": "A class definition with name and body", "import_statement": "An import statement", "import_from_statement": "A from ... import ... statement", "assignment": "An assignment statement", "call": "A function call with function name and arguments", "identifier": "An identifier (name)", "string": "A string literal", "integer": "An integer literal", "float": "A floating-point literal", "block": "A block of code (indented statements)", "if_statement": "An if statement with condition and body", "for_statement": "A for loop with target, iterable, and body", "while_statement": "A while loop with condition and body", }, "javascript": { "program": "The root node of a JavaScript file", "function_declaration": "A function declaration with name and params", "arrow_function": "An arrow function with parameters and body", "class_declaration": "A class declaration with name and body", "import_statement": "An import statement", "export_statement": "An export statement", "variable_declaration": "A variable declaration", "call_expression": "A function call with function and arguments", "identifier": "An identifier (name)", "string": "A string literal", "number": "A numeric literal", "statement_block": "A block of statements", "if_statement": "An if statement with condition and consequence", "for_statement": "A for loop", "while_statement": "A while loop with condition and body", }, # Add more languages... } return descriptions.get(language, {}) ``` -------------------------------------------------------------------------------- /tests/test_diagnostics/test_language_pack.py: -------------------------------------------------------------------------------- ```python """Pytest-based diagnostic tests for tree-sitter language pack integration.""" import sys import pytest @pytest.mark.diagnostic def test_tree_sitter_import(diagnostic) -> None: """Test basic import of tree-sitter library.""" try: # Try to import the tree-sitter library import tree_sitter # Record basic functionality information results = { "version": getattr(tree_sitter, "__version__", "Unknown"), "has_language": hasattr(tree_sitter, "Language"), "has_parser": hasattr(tree_sitter, "Parser"), "has_tree": hasattr(tree_sitter, "Tree"), "has_node": hasattr(tree_sitter, "Node"), "dir_contents": dir(tree_sitter), } diagnostic.add_detail("tree_sitter_info", results) # Check if Parser can be initialized try: _ = tree_sitter.Parser() diagnostic.add_detail("can_create_parser", True) except Exception as e: diagnostic.add_detail("can_create_parser", False) diagnostic.add_error("ParserCreationError", str(e)) # Verify the basic components are available assert hasattr(tree_sitter, "Language"), "tree_sitter should have Language class" assert hasattr(tree_sitter, "Parser"), "tree_sitter should have Parser class" assert hasattr(tree_sitter, "Tree"), "tree_sitter should have Tree class" assert hasattr(tree_sitter, "Node"), "tree_sitter should have Node class" except ImportError as e: diagnostic.add_error("ImportError", str(e)) pytest.fail(f"Failed to import tree_sitter: {e}") except Exception as e: diagnostic.add_error("UnexpectedError", str(e)) raise @pytest.mark.diagnostic def test_language_pack_import(diagnostic) -> None: """Test basic import of tree-sitter-language-pack.""" try: # Try to import the tree-sitter-language-pack import tree_sitter_language_pack # Check if bindings are available bindings_available = hasattr(tree_sitter_language_pack, "bindings") version = getattr(tree_sitter_language_pack, "__version__", "Unknown") results = { "version": version, "bindings_available": bindings_available, "dir_contents": dir(tree_sitter_language_pack), } diagnostic.add_detail("language_pack_info", results) # Test basic assertions assert hasattr(tree_sitter_language_pack, "get_language"), ( "tree_sitter_language_pack should have get_language function" ) assert hasattr(tree_sitter_language_pack, "get_parser"), ( "tree_sitter_language_pack should have get_parser function" ) except ImportError as e: diagnostic.add_error("ImportError", str(e)) pytest.fail(f"Failed to import tree_sitter_language_pack: {e}") except Exception as e: diagnostic.add_error("UnexpectedError", str(e)) raise @pytest.mark.diagnostic def test_language_binding_available(diagnostic) -> None: """Test if specific language bindings are available.""" test_languages = [ "python", "javascript", "typescript", "c", "cpp", "go", "rust", ] language_results = {} try: # Use find_spec to check if the module is available import importlib.util has_pack = importlib.util.find_spec("tree_sitter_language_pack") is not None diagnostic.add_detail("has_language_pack", has_pack) # If we have the language_pack, we'll try to use it later # through _get_language_binding() for language in test_languages: try: # Try to get the binding for this language binding_result = _get_language_binding(language) language_results[language] = binding_result except Exception as e: language_results[language] = { "status": "error", "error": str(e), } diagnostic.add_detail("language_results", language_results) # Check that at least some languages are available successful_languages = [lang for lang, result in language_results.items() if result.get("status") == "success"] if not successful_languages: diagnostic.add_error("NoLanguagesAvailable", "None of the test languages are available") assert len(successful_languages) > 0, "No languages are available" except ImportError: diagnostic.add_error("ImportError", "tree_sitter_language_pack not available") pytest.fail("tree_sitter_language_pack not available") except Exception as e: diagnostic.add_error("UnexpectedError", str(e)) raise def _get_language_binding(language_name) -> dict: """Helper method to test getting a language binding from the language pack.""" try: from tree_sitter_language_pack import get_language, get_parser # Get language (may raise exception) language = get_language(language_name) # Try to get a parser parser = get_parser(language_name) return { "status": "success", "language_available": language is not None, "parser_available": parser is not None, "language_type": type(language).__name__ if language else None, "parser_type": type(parser).__name__ if parser else None, } except Exception as e: return { "status": "error", "error_type": type(e).__name__, "error_message": str(e), } @pytest.mark.diagnostic def test_python_environment(diagnostic) -> None: """Test the Python environment to help diagnose issues.""" env_info = { "python_version": sys.version, "python_path": sys.executable, "sys_path": sys.path, "modules": sorted(list(sys.modules.keys())), } diagnostic.add_detail("python_environment", env_info) diagnostic.add_detail("environment_captured", True) ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/server.py: -------------------------------------------------------------------------------- ```python """MCP server implementation for Tree-sitter with dependency injection.""" import os from typing import Any, Dict, Optional, Tuple from mcp.server.fastmcp import FastMCP from .bootstrap import get_logger, update_log_levels from .config import ServerConfig from .di import DependencyContainer, get_container # Create server instance mcp = FastMCP("tree_sitter") # Set up logger logger = get_logger(__name__) def configure_with_context( container: DependencyContainer, config_path: Optional[str] = None, cache_enabled: Optional[bool] = None, max_file_size_mb: Optional[int] = None, log_level: Optional[str] = None, ) -> Tuple[Dict[str, Any], ServerConfig]: """Configure the server with explicit context. Args: container: DependencyContainer instance config_path: Path to YAML config file cache_enabled: Whether to enable parse tree caching max_file_size_mb: Maximum file size in MB log_level: Logging level (DEBUG, INFO, WARNING, ERROR) Returns: Tuple of (configuration dict, ServerConfig object) """ # Get initial config for comparison config_manager = container.config_manager tree_cache = container.tree_cache initial_config = config_manager.get_config() logger.info( f"Initial configuration: " f"cache.max_size_mb = {initial_config.cache.max_size_mb}, " f"security.max_file_size_mb = {initial_config.security.max_file_size_mb}, " f"language.default_max_depth = {initial_config.language.default_max_depth}" ) # Load config if path provided if config_path: logger.info(f"Configuring server with YAML config from: {config_path}") # Log absolute path to ensure we're looking at the right file abs_path = os.path.abspath(config_path) logger.info(f"Absolute path: {abs_path}") # Check if the file exists before trying to load it if not os.path.exists(abs_path): logger.error(f"Config file does not exist: {abs_path}") config_manager.load_from_file(abs_path) # Log configuration after loading YAML intermediate_config = config_manager.get_config() logger.info( f"Configuration after loading YAML: " f"cache.max_size_mb = {intermediate_config.cache.max_size_mb}, " f"security.max_file_size_mb = {intermediate_config.security.max_file_size_mb}, " f"language.default_max_depth = {intermediate_config.language.default_max_depth}" ) # Update specific settings if provided if cache_enabled is not None: logger.info(f"Setting cache.enabled to {cache_enabled}") config_manager.update_value("cache.enabled", cache_enabled) tree_cache.set_enabled(cache_enabled) if max_file_size_mb is not None: logger.info(f"Setting security.max_file_size_mb to {max_file_size_mb}") config_manager.update_value("security.max_file_size_mb", max_file_size_mb) if log_level is not None: logger.info(f"Setting log_level to {log_level}") config_manager.update_value("log_level", log_level) # Apply log level using already imported update_log_levels update_log_levels(log_level) logger.debug(f"Applied log level {log_level} to mcp_server_tree_sitter loggers") # Get final configuration config = config_manager.get_config() logger.info( f"Final configuration: " f"cache.max_size_mb = {config.cache.max_size_mb}, " f"security.max_file_size_mb = {config.security.max_file_size_mb}, " f"language.default_max_depth = {config.language.default_max_depth}" ) # Return current config as dict and the actual config object config_dict = config_manager.to_dict() return config_dict, config def main() -> None: """Run the server with command-line argument handling""" import argparse import sys # Parse command line arguments parser = argparse.ArgumentParser(description="MCP Tree-sitter Server - Code analysis with tree-sitter") parser.add_argument("--config", help="Path to configuration file") parser.add_argument("--debug", action="store_true", help="Enable debug logging") parser.add_argument("--disable-cache", action="store_true", help="Disable parse tree caching") parser.add_argument("--version", action="store_true", help="Show version and exit") # Parse arguments - this handles --help automatically args = parser.parse_args() # Handle version display if args.version: import importlib.metadata try: version = importlib.metadata.version("mcp-server-tree-sitter") print(f"mcp-server-tree-sitter version {version}") except importlib.metadata.PackageNotFoundError: print("mcp-server-tree-sitter (version unknown - package not installed)") sys.exit(0) # Set up debug logging if requested if args.debug: # Set environment variable first for consistency os.environ["MCP_TS_LOG_LEVEL"] = "DEBUG" # Then update log levels update_log_levels("DEBUG") logger.debug("Debug logging enabled") # Get the container container = get_container() # Configure with provided options if args.config: logger.info(f"Loading configuration from {args.config}") container.config_manager.load_from_file(args.config) if args.disable_cache: logger.info("Disabling parse tree cache as requested") container.config_manager.update_value("cache.enabled", False) container.tree_cache.set_enabled(False) # Register capabilities and tools from .capabilities import register_capabilities from .tools.registration import register_tools register_capabilities(mcp) register_tools(mcp, container) # Load configuration from environment config = container.get_config() # Update tree cache settings from config container.tree_cache.set_max_size_mb(config.cache.max_size_mb) container.tree_cache.set_enabled(config.cache.enabled) # Run the server logger.info("Starting MCP Tree-sitter Server") mcp.run() if __name__ == "__main__": main() ``` -------------------------------------------------------------------------------- /tests/test_project_persistence.py: -------------------------------------------------------------------------------- ```python """Tests for project registry persistence between MCP tool calls.""" import tempfile import threading from mcp_server_tree_sitter.api import get_project_registry from mcp_server_tree_sitter.models.project import ProjectRegistry from tests.test_helpers import register_project_tool def test_project_registry_singleton() -> None: """Test that project_registry is a singleton that persists.""" # Get the project registry from API project_registry = get_project_registry() # We can't directly clear projects in the new design # Instead, we'll check the current projects and try to avoid conflicts current_projects = project_registry.list_projects() # We'll just assert that we know the current state assert isinstance(current_projects, list) # Register a project with tempfile.TemporaryDirectory() as temp_dir: project_name = "test_project" project_registry.register_project(project_name, temp_dir) # Verify project was registered all_projects = project_registry.list_projects() project_names = [p["name"] for p in all_projects] assert project_name in project_names # Create a new registry instance new_registry = ProjectRegistry() # Because ProjectRegistry uses a class-level singleton pattern, # this should be the same instance all_projects = new_registry.list_projects() project_names = [p["name"] for p in all_projects] assert project_name in project_names def test_mcp_tool_persistence() -> None: """Test that projects persist using the project functions.""" # Get the project registry from API project_registry = get_project_registry() # We can't directly clear projects in the new design # Instead, let's work with the existing state with tempfile.TemporaryDirectory() as temp_dir: # Register a project using the function directly project_name = "test_persistence" register_project_tool(temp_dir, project_name) # Verify it exists in the registry all_projects = project_registry.list_projects() project_names = [p["name"] for p in all_projects] assert project_name in project_names # Try to get the project directly project = project_registry.get_project(project_name) assert project.name == project_name def test_project_registry_threads() -> None: """Test that project registry works correctly across threads.""" # Get the project registry from API project_registry = get_project_registry() # We can't directly clear projects in the new design # Instead, let's work with the existing state with tempfile.TemporaryDirectory() as temp_dir: project_name = "thread_test" # Function to run in a thread def thread_func() -> None: # This should use the same registry instance registry = ProjectRegistry() registry.register_project(f"{project_name}_thread", temp_dir) # Register a project in the main thread project_registry.register_project(project_name, temp_dir) # Start a thread to register another project thread = threading.Thread(target=thread_func) thread.start() thread.join() # Both projects should be in the registry all_projects = project_registry.list_projects() project_names = [p["name"] for p in all_projects] assert project_name in project_names assert f"{project_name}_thread" in project_names def test_server_lifecycle() -> None: """Test that project registry survives server "restarts".""" # Get the project registry from API project_registry = get_project_registry() # We can't directly clear projects in the new design # Instead, let's work with the existing state with tempfile.TemporaryDirectory() as temp_dir: project_name = "lifecycle_test" # Register a project register_project_tool(temp_dir, project_name) # Verify it exists all_projects = project_registry.list_projects() project_names = [p["name"] for p in all_projects] assert project_name in project_names # Simulate server restart by importing modules again # Note: This doesn't actually restart anything, it just tests # that the singleton pattern works as expected with imports import importlib import mcp_server_tree_sitter.api importlib.reload(mcp_server_tree_sitter.api) # Get the project registry from the reloaded module from mcp_server_tree_sitter.api import get_project_registry as new_get_project_registry new_project_registry = new_get_project_registry() # The registry should still contain our project all_projects = new_project_registry.list_projects() project_names = [p["name"] for p in all_projects] assert project_name in project_names def test_project_persistence_in_mcp_server() -> None: """Test that project registry survives server "restarts".""" # Get the project registry from API project_registry = get_project_registry() # We can't directly clear projects in the new design # Instead, let's work with the existing state with tempfile.TemporaryDirectory() as temp_dir: project_name = "lifecycle_test" # Register a project register_project_tool(temp_dir, project_name) # Verify it exists all_projects = project_registry.list_projects() project_names = [p["name"] for p in all_projects] assert project_name in project_names # Simulate server restart by importing modules again import importlib import mcp_server_tree_sitter.tools.project importlib.reload(mcp_server_tree_sitter.tools.project) # Get the project registry again test_registry = get_project_registry() # The registry should still contain our project all_projects = test_registry.list_projects() project_names = [p["name"] for p in all_projects] assert project_name in project_names if __name__ == "__main__": # Run tests test_project_registry_singleton() test_mcp_tool_persistence() test_project_registry_threads() test_server_lifecycle() test_project_persistence_in_mcp_server() print("All tests passed!") ``` -------------------------------------------------------------------------------- /tests/test_logging_config.py: -------------------------------------------------------------------------------- ```python """Tests for log level configuration settings. This file is being kept as an integration test but has been updated to fully use DI. """ import io import logging import tempfile from contextlib import contextmanager from pathlib import Path import pytest from mcp_server_tree_sitter.di import get_container from tests.test_helpers import configure, get_ast, register_project_tool, temp_config @contextmanager def capture_logs(logger_name="mcp_server_tree_sitter"): """ Context manager to capture logs from a specific logger. Args: logger_name: Name of the logger to capture Returns: StringIO object containing captured logs """ # Get the logger logger = logging.getLogger(logger_name) # Save original level, handlers, and propagate value original_level = logger.level original_handlers = logger.handlers.copy() original_propagate = logger.propagate # Create a StringIO object to capture logs log_capture = io.StringIO() handler = logging.StreamHandler(log_capture) formatter = logging.Formatter("%(levelname)s:%(name)s:%(message)s") handler.setFormatter(formatter) # Clear handlers and add our capture handler logger.handlers = [handler] # Disable propagation to parent loggers to avoid duplicate messages # and ensure our log level settings take effect logger.propagate = False try: yield log_capture finally: # Restore original handlers, level, and propagate setting logger.handlers = original_handlers logger.setLevel(original_level) logger.propagate = original_propagate @pytest.fixture def test_project(): """Create a temporary test project with a sample file.""" with tempfile.TemporaryDirectory() as temp_dir: project_path = Path(temp_dir) # Create a simple Python file test_file = project_path / "test.py" with open(test_file, "w") as f: f.write("def hello():\n print('Hello, world!')\n\nhello()\n") # Register the project project_name = "logging_test_project" try: register_project_tool(path=str(project_path), name=project_name) except Exception: # If registration fails, try with a more unique name import time project_name = f"logging_test_project_{int(time.time())}" register_project_tool(path=str(project_path), name=project_name) yield {"name": project_name, "path": str(project_path), "file": "test.py"} def test_log_level_setting(test_project): """Test that log_level setting controls logging verbosity.""" # Root logger for the package logger_name = "mcp_server_tree_sitter" # Get container for checking values later container = get_container() original_log_level = container.get_config().log_level try: # Test with DEBUG level with temp_config(**{"log_level": "DEBUG"}): # Apply configuration configure(log_level="DEBUG") # Capture logs during an operation with capture_logs(logger_name) as log_capture: # Don't force the root logger level - it should be set by configure # logging.getLogger(logger_name).setLevel(logging.DEBUG) # Perform an operation that generates logs get_ast(project=test_project["name"], path=test_project["file"]) # Check captured logs logs = log_capture.getvalue() print(f"DEBUG logs: {logs}") # Should contain DEBUG level messages assert "DEBUG:" in logs, "DEBUG level messages should be present" # Test with INFO level (less verbose) with temp_config(**{"log_level": "INFO"}): # Apply configuration configure(log_level="INFO") # Capture logs during an operation with capture_logs(logger_name) as log_capture: # The root logger level should be set by configure to INFO # No need to manually set it # Generate a debug log that should be filtered logger = logging.getLogger(f"{logger_name}.test") logger.debug("This debug message should be filtered out") # Generate an info log that should be included logger.info("This info message should be included") logs = log_capture.getvalue() print(f"INFO logs: {logs}") # Should not contain the DEBUG message but should contain INFO assert "This debug message should be filtered out" not in logs, "DEBUG messages should be filtered" assert "This info message should be included" in logs, "INFO messages should be included" finally: # Restore original log level container.config_manager.update_value("log_level", original_log_level) def test_log_level_in_yaml_config(): """Test that log_level can be configured via YAML.""" # Create a temporary YAML file with tempfile.NamedTemporaryFile(suffix=".yaml", mode="w+", delete=False) as temp_file: # Write a configuration with explicit log level temp_file.write(""" log_level: DEBUG cache: enabled: true max_size_mb: 100 """) temp_file.flush() temp_file_path = temp_file.name try: # Get container for checking values later container = get_container() original_log_level = container.get_config().log_level try: # Load the configuration result = configure(config_path=temp_file_path) # Verify the log level was set correctly assert result["log_level"] == "DEBUG", "Log level should be set from YAML" # Verify it's applied to loggers with capture_logs("mcp_server_tree_sitter") as log_capture: logger = logging.getLogger("mcp_server_tree_sitter.test") logger.debug("Test debug message") logs = log_capture.getvalue() assert "Test debug message" in logs, "DEBUG log level should be applied" finally: # Restore original log level container.config_manager.update_value("log_level", original_log_level) finally: # Clean up import os os.unlink(temp_file_path) ``` -------------------------------------------------------------------------------- /ROADMAP.md: -------------------------------------------------------------------------------- ```markdown # MCP Tree-sitter Server Roadmap This document outlines the planned improvements and future features for the MCP Tree-sitter Server project. CRITICAL: When a task is done, update this document to mark it done. However, you must ensure it is done for all files/subjects present in the repo. DO NOT mark a task done simply because a subset of the targeted files/subjects have been handled. Mark it [WIP] in that case. ## Short-term Goals ### Code Quality - ✅ Fix linting issues identified by ruff - ✅ Improve exception handling using proper `from` clause - ✅ Remove unused variables and improve code organization - ✅ Implement TreeCursor API support with proper type handling - ✅ Add incremental parsing support - ✅ Add MCP Progress Reporting - ✅ Add Server Capabilities Declaration - [ ] Add mcp server start flag(s) for enabling (allow list approach) and disabling (block list approach) a list of features. Only one approach may be applied at a time. The default should be minimal allowed, for now. Add meta features such as stable, wip, advanced, basic - ✅ Add mcp server start flag(s) for ensuring language packs are installed - Resolved by tree-sitter-language-pack integration - [ ] Add mcp server start flag(s) for ensuring project is configured beforehand. - [ ] Achieve 100% type hinting coverage (and ensure this is enforced by our linting) - [ ] Improve docstring coverage and quality (Don't thrash on updating docs that are already good) (HOLD pending other work) - [ ] Split files until the longest .py file is less than 500 lines (unless that breaks functionality, in which case do not) ### Testing - ✅ Create and maintain tests for AST functionality, query execution, and symbol extraction - 🔄 [WIP] Create additional tests for context utilities, incremental parsing, and cursor traversal - [ ] Increase unit test coverage to 100% and begin enforcing that in pre-commit and CI - [ ] Add integration tests for MCP server functionality (HOLD pending other work) - [ ] Create automated testing workflow with GitHub Actions (unit, integration, static, etc) (HOLD pending other work) ### Documentation (HOLD) - ✅ Create CONTRIBUTING.md with developer guidelines - 🔄 [WIP] Create a docs/user-guide.md with more examples and clearer installation instructions. Link to it from README.md - [ ] Add detailed API documentation in docs/api-guide.md - 🔄 [WIP] Create usage tutorials and examples -- focus only on Claude Desktop for now. ## Medium-term Goals (HOLD) ### Feature Improvements - ✅ Add support for more tree-sitter languages by implementing https://github.com/Goldziher/tree-sitter-language-pack/ - ✅ Add support for query execution with proper result handling - [ ] Improve query building tools with more sophisticated matching options (HOLD because we could cripple the codebase with complexity) - [ ] Implement more advanced code analysis metrics (HOLD because we could cripple the codebase with complexity) - [ ] Enhance caching system with better invalidation strategy (HOLD because we could cripple the codebase with complexity) ### User Experience - [ ] Create a web-based UI for visualizing ASTs and running queries (HOLD because Claude's experience is more important) - [ ] Add CLI commands for common operations (HOLD because Claude runs commands by a different channel) - [✅] Implement progress reporting for long-running operations - [ ] Add configuration presets for different use cases (HOLD because we could cripple the codebase with complexity) ### Security - [ ] Add comprehensive input validation (HOLD because we could cripple the codebase with complexity) - [ ] Implement access control for multi-user environments (HOLD because we could cripple the codebase with complexity) - [ ] Add sandbox mode for running untrusted queries (HOLD because we could cripple the codebase with complexity) ## Long-term Goals (HOLD) ### Advanced Features - [ ] Implement semantic analysis capabilities (HOLD because we need stability first) - [ ] Add code transformation tools (HOLD because we need stability first) - [ ] Support cross-language analysis (HOLD because we need stability first) ### Integration - [ ] Create plugins for popular IDEs (VS Code, IntelliJ) (HOLD because we need stability first) - [ ] Implement integration with CI/CD pipelines (HOLD because we need stability first) - [ ] Add support for other LLM frameworks beyond MCP (HOLD because we need stability first) ### Performance - [ ] Optimize for large codebases (> 1M LOC) (HOLD because we need stability first) - [ ] Implement distributed analysis for very large projects (HOLD because we need stability first) - [ ] Add streaming responses for large result sets (HOLD because we need stability first) ## Completed Implementations ### MCP Context Handling - Added `utils/context/mcp_context.py` with progress tracking capabilities - Implemented `MCPContext` class with progress reporting - Created `ProgressScope` for structured operation tracking - Added context information passing to analysis tools ### TreeCursor API Support - Enhanced `utils/tree_sitter_types.py` with TreeCursor protocol - Added efficient cursor-based tree traversal in `utils/tree_sitter_helpers.py` - Implemented collector pattern using cursors to efficiently find nodes ### Incremental Parsing - Added support for tree editing in `utils/tree_sitter_helpers.py` - Enhanced cache to track tree modifications in `cache/parser_cache.py` - Implemented changed_ranges detection for optimization ### Server Capabilities Declaration - Created `capabilities/server_capabilities.py` for capability declaration - Implemented required MCP server capabilities - Added support for completion suggestions - Added structured logging integration ## Features and Ideas Below are some ideas and feature requests being considered: 1. **Semantic Diff**: Show semantic differences between code versions rather than just text diffs (HOLD because we need stability first) 2. **Code Quality Metrics**: Integrate with code quality metrics and linters (HOLD because we need stability first) 3. **Interactive Query Builder**: Visual tool to build and test tree-sitter queries (HOLD because we need stability first) 4. **Code Completion**: Use tree-sitter for more intelligent code completion suggestions (HOLD because we need stability first) 5. **Visualization Export**: Export AST visualizations to various formats (SVG, PNG, etc.) (HOLD because we need stability first) ``` -------------------------------------------------------------------------------- /tests/test_logging_config_di.py: -------------------------------------------------------------------------------- ```python """Tests for log level configuration settings with dependency injection.""" import io import logging import tempfile from contextlib import contextmanager from pathlib import Path import pytest from mcp_server_tree_sitter.di import get_container from tests.test_helpers import configure, get_ast, register_project_tool, temp_config @contextmanager def capture_logs(logger_name="mcp_server_tree_sitter"): """ Context manager to capture logs from a specific logger. Args: logger_name: Name of the logger to capture Returns: StringIO object containing captured logs """ # Get the logger logger = logging.getLogger(logger_name) # Save original level and handlers original_level = logger.level original_handlers = logger.handlers.copy() # Create a StringIO object to capture logs log_capture = io.StringIO() handler = logging.StreamHandler(log_capture) formatter = logging.Formatter("%(levelname)s:%(name)s:%(message)s") handler.setFormatter(formatter) # Clear handlers and add our capture handler logger.handlers = [handler] try: yield log_capture finally: # Restore original handlers and level logger.handlers = original_handlers logger.setLevel(original_level) @pytest.fixture def test_project(): """Create a temporary test project with a sample file.""" with tempfile.TemporaryDirectory() as temp_dir: project_path = Path(temp_dir) # Create a simple Python file test_file = project_path / "test.py" with open(test_file, "w") as f: f.write("def hello():\n print('Hello, world!')\n\nhello()\n") # Register the project project_name = "logging_test_project" try: register_project_tool(path=str(project_path), name=project_name) except Exception: # If registration fails, try with a more unique name import time project_name = f"logging_test_project_{int(time.time())}" register_project_tool(path=str(project_path), name=project_name) yield {"name": project_name, "path": str(project_path), "file": "test.py"} def test_log_level_setting_di(test_project): """Test that log_level setting controls logging verbosity.""" # Root logger for the package logger_name = "mcp_server_tree_sitter" # Get container for checking values later container = get_container() original_log_level = container.get_config().log_level try: # Test with DEBUG level with temp_config(**{"log_level": "DEBUG"}): # Apply configuration configure(log_level="DEBUG") # Capture logs during an operation with capture_logs(logger_name) as log_capture: # Force the root logger to debug level logging.getLogger(logger_name).setLevel(logging.DEBUG) # Perform an operation that generates logs get_ast(project=test_project["name"], path=test_project["file"]) # Check captured logs logs = log_capture.getvalue() print(f"DEBUG logs: {logs}") # Should contain DEBUG level messages assert "DEBUG:" in logs, "DEBUG level messages should be present" # Test with INFO level (less verbose) with temp_config(**{"log_level": "INFO"}): # Apply configuration configure(log_level="INFO") # Capture logs during an operation with capture_logs(logger_name) as log_capture: # Important: Set the root logger to INFO instead of DEBUG # to ensure proper level filtering root_logger = logging.getLogger(logger_name) root_logger.setLevel(logging.INFO) # Set the handler level for the logger for handler in root_logger.handlers: handler.setLevel(logging.INFO) # Create a test logger logger = logging.getLogger(f"{logger_name}.test") # Make sure it inherits from the root logger logger.setLevel(logging.NOTSET) # Generate a debug log that should be filtered logger.debug("This debug message should be filtered out") # Generate an info log that should be included logger.info("This info message should be included") logs = log_capture.getvalue() print(f"INFO logs: {logs}") # Should not contain the DEBUG message but should contain INFO assert "This debug message should be filtered out" not in logs, "DEBUG messages should be filtered" assert "This info message should be included" in logs, "INFO messages should be included" finally: # Restore original log level container.config_manager.update_value("log_level", original_log_level) def test_log_level_in_yaml_config_di(): """Test that log_level can be configured via YAML.""" # Create a temporary YAML file with tempfile.NamedTemporaryFile(suffix=".yaml", mode="w+", delete=False) as temp_file: # Write a configuration with explicit log level temp_file.write(""" log_level: DEBUG cache: enabled: true max_size_mb: 100 """) temp_file.flush() temp_file_path = temp_file.name try: # Get container for checking values later container = get_container() original_log_level = container.get_config().log_level try: # Load the configuration result = configure(config_path=temp_file_path) # Verify the log level was set correctly assert result["log_level"] == "DEBUG", "Log level should be set from YAML" # Verify it's applied to loggers with capture_logs("mcp_server_tree_sitter") as log_capture: logger = logging.getLogger("mcp_server_tree_sitter.test") logger.debug("Test debug message") logs = log_capture.getvalue() assert "Test debug message" in logs, "DEBUG log level should be applied" finally: # Restore original log level container.config_manager.update_value("log_level", original_log_level) finally: # Clean up import os os.unlink(temp_file_path) ``` -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/models/project.py: -------------------------------------------------------------------------------- ```python """Project model for MCP server.""" import os import threading import time from pathlib import Path from typing import Any, Dict, List, Optional, Set from ..exceptions import ProjectError from ..utils.path import get_project_root, normalize_path class Project: """Represents a project for code analysis.""" def __init__(self, name: str, path: Path, description: Optional[str] = None): self.name = name self.root_path = path self.description = description self.languages: Dict[str, int] = {} # Language -> file count self.last_scan_time = 0 self.scan_lock = threading.Lock() def to_dict(self) -> Dict[str, Any]: """Convert to dictionary representation.""" return { "name": self.name, "root_path": str(self.root_path), "description": self.description, "languages": self.languages, "last_scan_time": self.last_scan_time, } def scan_files(self, language_registry: Any, force: bool = False) -> Dict[str, int]: """ Scan project files and identify languages. Args: language_registry: LanguageRegistry instance force: Whether to force rescan Returns: Dictionary of language -> file count """ # Skip scan if it was done recently and not forced if not force and time.time() - self.last_scan_time < 60: # 1 minute return self.languages with self.scan_lock: languages: Dict[str, int] = {} scanned: Set[str] = set() for root, _, files in os.walk(self.root_path): # Skip hidden directories if any(part.startswith(".") for part in Path(root).parts): continue for file in files: # Skip hidden files if file.startswith("."): continue file_path = os.path.join(root, file) rel_path = os.path.relpath(file_path, self.root_path) # Skip already scanned files if rel_path in scanned: continue language = language_registry.language_for_file(file) if language: languages[language] = languages.get(language, 0) + 1 scanned.add(rel_path) self.languages = languages self.last_scan_time = int(time.time()) return languages def get_file_path(self, relative_path: str) -> Path: """ Get absolute file path from project-relative path. Args: relative_path: Path relative to project root Returns: Absolute Path Raises: ProjectError: If path is outside project root """ # Normalize relative path to avoid directory traversal norm_path = normalize_path(self.root_path / relative_path) # Check path is inside project if not str(norm_path).startswith(str(self.root_path)): raise ProjectError(f"Path '{relative_path}' is outside project root") return norm_path class ProjectRegistry: """Manages projects for code analysis.""" # Class variables for singleton pattern _instance: Optional["ProjectRegistry"] = None _global_lock = threading.RLock() def __new__(cls) -> "ProjectRegistry": """Implement singleton pattern with proper locking.""" with cls._global_lock: if cls._instance is None: instance = super(ProjectRegistry, cls).__new__(cls) # We need to set attributes on the instance, not the class instance._projects = {} cls._instance = instance return cls._instance def __init__(self) -> None: """Initialize the registry only once.""" # The actual initialization is done in __new__ to ensure it happens exactly once if not hasattr(self, "_projects"): self._projects: Dict[str, Project] = {} def register_project(self, name: str, path: str, description: Optional[str] = None) -> Project: """ Register a new project. Args: name: Project name path: Project path description: Optional project description Returns: Registered Project Raises: ProjectError: If project already exists or path is invalid """ with self._global_lock: if name in self._projects: raise ProjectError(f"Project '{name}' already exists") try: norm_path = normalize_path(path, ensure_absolute=True) if not norm_path.exists(): raise ProjectError(f"Path does not exist: {path}") if not norm_path.is_dir(): raise ProjectError(f"Path is not a directory: {path}") # Try to find project root project_root = get_project_root(norm_path) project = Project(name, project_root, description) self._projects[name] = project return project except Exception as e: raise ProjectError(f"Failed to register project: {e}") from e def get_project(self, name: str) -> Project: """ Get a project by name. Args: name: Project name Returns: Project Raises: ProjectError: If project doesn't exist """ with self._global_lock: if name not in self._projects: raise ProjectError(f"Project '{name}' not found") project = self._projects[name] return project def list_projects(self) -> List[Dict[str, Any]]: """ List all registered projects. Returns: List of project dictionaries """ with self._global_lock: return [project.to_dict() for project in self._projects.values()] def remove_project(self, name: str) -> None: """ Remove a project. Args: name: Project name Raises: ProjectError: If project doesn't exist """ with self._global_lock: if name not in self._projects: raise ProjectError(f"Project '{name}' not found") del self._projects[name] ```