# Directory Structure ``` ├── .env.example ├── .github │ └── workflows │ ├── ci.yml │ ├── docker-image.yml │ └── npm-publish-github-packages.yml ├── .gitignore ├── AGENTS.md ├── alt-test-gemini.js ├── alt-test-openai.js ├── alt-test.js ├── Attachments │ ├── Template.md │ ├── VC1.png │ ├── vc2.png │ ├── vc3.png │ ├── vc4.png │ ├── VCC1.png │ ├── VCC2.png │ ├── vibe (1).jpeg │ ├── vibelogo.png │ └── vibelogov2.png ├── CITATION.cff ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Dockerfile ├── docs │ ├── _toc.md │ ├── advanced-integration.md │ ├── agent-prompting.md │ ├── AGENTS.md │ ├── architecture.md │ ├── case-studies.md │ ├── changelog.md │ ├── docker-automation.md │ ├── gemini.md │ ├── integrations │ │ └── cpi.md │ ├── philosophy.md │ ├── registry-descriptions.md │ ├── technical-reference.md │ └── TESTING.md ├── examples │ └── cpi-integration.ts ├── glama.json ├── LICENSE ├── package-lock.json ├── package.json ├── pnpm-lock.yaml ├── README.md ├── request.json ├── scripts │ ├── docker-setup.sh │ ├── install-vibe-check.sh │ └── security-check.cjs ├── SECURITY.md ├── server.json ├── smithery.yaml ├── src │ ├── index.ts │ ├── tools │ │ ├── constitution.ts │ │ ├── vibeCheck.ts │ │ ├── vibeDistil.ts │ │ └── vibeLearn.ts │ └── utils │ ├── llm.ts │ ├── state.ts │ └── storage.ts ├── test-client.js ├── test-client.ts ├── test.js ├── test.json ├── tests │ ├── constitution.test.ts │ ├── llm.test.ts │ ├── startup.test.ts │ ├── state.test.ts │ ├── vibeCheck.test.ts │ └── vibeLearn.test.ts ├── tsconfig.json └── vitest.config.ts ``` # Files -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- ``` 1 | # Copy this file to .env and fill in your API key. 2 | GOOGLE_CLOUD_PROJECT="mcp-vibetest" 3 | DEFAULT_MODEL=gemini-2.5-flash 4 | DEFAULT_LLM_PROVIDER=gemini 5 | OPENAI_API_KEY=your_openai_key 6 | OPENROUTER_API_KEY=your_openrouter_key 7 | USE_LEARNING_HISTORY=false 8 | ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` 1 | # Dependencies 2 | node_modules/ 3 | npm-debug.log 4 | yarn-debug.log 5 | yarn-error.log 6 | 7 | # Build output 8 | build/ 9 | dist/ 10 | *.tsbuildinfo 11 | 12 | # Environment variables 13 | .env 14 | .env.local 15 | .env.*.local 16 | 17 | # IDE and editor files 18 | .idea/ 19 | .vscode/ 20 | *.swp 21 | *.swo 22 | .DS_Store 23 | 24 | # Logs 25 | logs/ 26 | *.log 27 | npm-debug.log* 28 | yarn-debug.log* 29 | yarn-error.log* 30 | 31 | # Testing 32 | coverage/ 33 | .nyc_output/ 34 | 35 | # Temporary files 36 | tmp/ 37 | temp/ 38 | 39 | # Local configuration 40 | .npmrc 41 | .mcpregistry_github_token 42 | .mcpregistry_registry_token ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown 1 | # 🧠 Vibe Check MCP v2.5.1 2 | 3 | <p align="center"> 4 | <b>Based on research</b><br/> 5 | In our study agents calling Vibe Check improved success (27 → 54%) and halved harmful actions (83 → 42%). 6 | </p> 7 | 8 | <p align="center"> 9 | <a href="https://www.researchgate.net/publication/394946231_Do_AI_Agents_Need_Mentors_Evaluating_Chain-Pattern_Interrupt_CPI_for_Oversight_and_Reliability?channel=doi&linkId=68ad6178ca495d76982ff192&showFulltext=true"> 10 | <img src="https://img.shields.io/badge/Research-CPI%20%28MURST%29-blue?style=flat-square" alt="CPI (MURST) Research"> 11 | </a> 12 | <a href="https://github.com/modelcontextprotocol/servers"><img src="https://img.shields.io/badge/Anthropic%20MCP-listed-111?labelColor=111&color=555&style=flat-square" alt="Anthropic MCP: listed"></a> 13 | <a href="https://registry.modelcontextprotocol.io/"><img src="https://img.shields.io/badge/MCP%20Registry-discoverable-555?labelColor=111&style=flat-square" alt="MCP Registry: discoverable"></a> 14 | <a href="https://github.com/PV-Bhat/vibe-check-mcp-server/actions/workflows/ci.yml"><img src="https://github.com/PV-Bhat/vibe-check-mcp-server/actions/workflows/ci.yml/badge.svg" alt="CI"></a> 15 | <a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-0b7285?style=flat-square" alt="MIT License"></a> 16 | </p> 17 | 18 | <p align="center"> 19 | <sub>18k+ installs across MCP clients • research-backed oversight • streamable HTTP transport</sub> 20 | </p> 21 | 22 | <img width="500" height="300" alt="vibecheckv2.5" src="https://github.com/user-attachments/assets/bcd06d7d-a184-43e9-8c43-22aca3074d32" /> 23 | 24 | *Plug-and-play metacognitive oversight layer for autonomous AI agents – a research-backed MCP server keeping LLMs aligned, reflective and safe.* 25 | ### Recognition 26 | - Listed in Anthropic’s official Model Context Protocol repo [🔗](https://github.com/modelcontextprotocol/servers?tab=readme-ov-file#-community-servers) 27 | - Discoverable in the official MCP Registry [🔗](https://registry.modelcontextprotocol.io/v0/servers?search=vibe-check-mcp) 28 | - 18k+ installs across public MCP directories/clients 29 | 30 | 31 | [](https://github.com/PV-Bhat/vibe-check-mcp-server) 32 | [](https://archestra.ai/mcp-catalog/pv-bhat__vibe-check-mcp-server) 33 | [](https://smithery.ai/server/@PV-Bhat/vibe-check-mcp-server) 34 | [](https://mseep.ai/app/a2954e62-a3f8-45b8-9a03-33add8b92599) 35 | [](CONTRIBUTING.md) 36 | 37 | ## Table of Contents 38 | - [What is Vibe Check MCP?](#what-is-vibe-check-mcp) 39 | - [Overview](#overview) 40 | - [Architecture](#architecture) 41 | - [The Problem: Pattern Inertia & Reasoning Lock-In](#the-problem-pattern-inertia--reasoning-lock-in) 42 | - [Key Features](#key-features) 43 | - [What's New in v2.5.0](##-What's-New-in-v2.5.1) 44 | - [Quickstart & Installation](#quickstart--installation) 45 | - [Usage Examples](#usage-examples) 46 | - [Adaptive Metacognitive Interrupts (CPI)](#adaptive-metacognitive-interrupts-cpi) 47 | - [Agent Prompting Essentials](#agent-prompting-essentials) 48 | - [When to Use Each Tool](#when-to-use-each-tool) 49 | - [Documentation](#documentation) 50 | - [Research & Philosophy](#research--philosophy) 51 | - [Security](#security) 52 | - [Roadmap](#roadmap) 53 | - [Contributing & Community](#contributing--community) 54 | - [FAQ](#faq) 55 | - [Listed on](#find-vibe-check-mcp-on) 56 | - [Credits & License](#credits--license) 57 | --- 58 | ## What is Vibe Check MCP? 59 | 60 | Vibe Check MCP is a lightweight server implementing Anthropic's [Model Context Protocol](https://anthropic.com/mcp). It acts as an **AI meta-mentor** for your agents, interrupting pattern inertia with **Chain-Pattern Interrupts (CPI)** to prevent Reasoning Lock-In (RLI). Think of it as a rubber-duck debugger for LLMs – a quick sanity check before your agent goes down the wrong path. 61 | 62 | ## Overview 63 | 64 | Vibe Check MCP pairs a metacognitive signal layer with CPI so agents can pause when risk spikes. Vibe Check surfaces traits, uncertainty, and risk scores; CPI consumes those triggers and enforces an intervention policy before the agent resumes. See the [CPI integration guide](./docs/integrations/cpi.md) and the CPI repo at https://github.com/PV-Bhat/cpi for wiring details. 65 | 66 | ## Architecture 67 | 68 | Vibe Check runs alongside your agent workflow, emitting signals that downstream overseers like CPI or human reviewers can act on. The high-level component map lives in [docs/architecture.md](./docs/architecture.md), while the CPI handoff diagram and example shim are captured in [docs/integrations/cpi.md](./docs/integrations/cpi.md). 69 | 70 | ## The Problem: Pattern Inertia & Reasoning Lock-In 71 | 72 | Large language models can confidently follow flawed plans. Without an external nudge they may spiral into overengineering or misalignment. Vibe Check provides that nudge through short reflective pauses, improving reliability and safety. 73 | 74 | ## Key Features 75 | 76 | | Feature | Description | Benefits | 77 | |---------|-------------|----------| 78 | | **CPI Adaptive Interrupts** | Phase-aware prompts that challenge assumptions | alignment, robustness | 79 | | **Multi-provider LLM** | Gemini, OpenAI and OpenRouter support | flexibility | 80 | | **History Continuity** | Summarizes prior advice when `sessionId` is supplied | context retention | 81 | | **Optional vibe_learn** | Log mistakes and fixes for future reflection | self-improvement | 82 | 83 | ## What's New in v2.5.1 84 | 85 | ## Session Constitution (per-session rules) 86 | 87 | Use a lightweight “constitution” to enforce rules per `sessionId` that CPI will honor. Typical uses: “no external network calls,” “prefer unit tests before refactors,” “never write secrets to disk.” 88 | 89 | **API (tools):** 90 | - `update_constitution({ sessionId, rules })` → merges/sets rule set for the session 91 | - `reset_constitution({ sessionId })` → clears session rules 92 | - `check_constitution({ sessionId })` → returns effective rules for the session 93 | 94 | ## Quickstart & Installation 95 | ```bash 96 | # Clone and install 97 | git clone https://github.com/PV-Bhat/vibe-check-mcp-server.git 98 | cd vibe-check-mcp-server 99 | npm install 100 | npm run build 101 | ``` 102 | This project targets Node **20+**. If you see a TypeScript error about a duplicate `require` declaration when building with Node 20.19.3, ensure your dependencies are up to date (`npm install`) or use the Docker setup below which handles the build automatically. 103 | 104 | Create a `.env` file with the API keys you plan to use: 105 | ```bash 106 | # Gemini (default) 107 | GEMINI_API_KEY=your_gemini_api_key 108 | # Optional providers 109 | OPENAI_API_KEY=your_openai_api_key 110 | OPENROUTER_API_KEY=your_openrouter_api_key 111 | # Optional overrides 112 | DEFAULT_LLM_PROVIDER=gemini 113 | DEFAULT_MODEL=gemini-2.5-pro 114 | ``` 115 | Start the server: 116 | ```bash 117 | npm start 118 | ``` 119 | See [docs/TESTING.md](./docs/TESTING.md) for instructions on how to run tests. 120 | 121 | ### Docker 122 | The repository includes a helper script for one-command setup. It builds the image, saves your `GEMINI_API_KEY` and configures the container to start automatically whenever you log in: 123 | ```bash 124 | bash scripts/docker-setup.sh 125 | ``` 126 | This script: 127 | - Creates `~/vibe-check-mcp` for persistent data 128 | - Builds the Docker image and sets up `docker-compose.yml` 129 | - Prompts for your API key and writes `~/vibe-check-mcp/.env` 130 | - Installs a systemd service (Linux) or LaunchAgent (macOS) so the container starts at login 131 | - Generates `vibe-check-tcp-wrapper.sh` which proxies Cursor IDE to the server 132 | After running it, open Cursor IDE → **Settings** → **MCP** and add a new server of type **Command** pointing to: 133 | ```bash 134 | ~/vibe-check-mcp/vibe-check-tcp-wrapper.sh 135 | ``` 136 | See [Automatic Docker Setup](./docs/docker-automation.md) for full details. 137 | If you prefer to run the commands manually: 138 | ```bash 139 | docker build -t vibe-check-mcp . 140 | docker run -e GEMINI_API_KEY=your_gemini_api_key -p 3000:3000 vibe-check-mcp 141 | ``` 142 | 143 | ### Integrating with Claude Desktop 144 | Add to `claude_desktop_config.json`: 145 | ```json 146 | "vibe-check": { 147 | "command": "node", 148 | "args": ["/path/to/vibe-check-mcp/build/index.js"], 149 | "env": { "GEMINI_API_KEY": "YOUR_GEMINI_API_KEY" } 150 | } 151 | ``` 152 | 153 | ## Research & Philosophy 154 | 155 | **CPI (Chain-Pattern Interrupt)** is the research-backed oversight method behind Vibe Check. It injects brief, well-timed “pause points” at risk inflection moments to re-align the agent to the user’s true priority, preventing destructive cascades and **reasoning lock-in (RLI)**. In pooled evaluation across 153 runs, CPI **nearly doubles success (~27%→54%) and roughly halves harmful actions (~83%→42%)**. Optimal interrupt **dosage is ~10–20%** of steps. *Vibe Check MCP implements CPI as an external mentor layer at test time.* 156 | 157 | **Links:** 158 | - 📄 **CPI Paper (ResearchGate)** — http://dx.doi.org/10.13140/RG.2.2.18237.93922 159 | - 📘 **CPI Reference Implementation (GitHub)**: https://github.com/PV-Bhat/cpi 160 | - 📚 **MURST Zenodo DOI (RSRC archival)**: https://doi.org/10.5281/zenodo.14851363 161 | 162 | ## Usage Examples 163 | ```ts 164 | import { vibe_check } from 'vibe-check-mcp'; 165 | const result = await vibe_check({ 166 | goal: 'Write unit tests', 167 | plan: 'Use vitest for coverage', 168 | sessionId: 'demo1' 169 | }); 170 | console.log(result.questions); 171 | ``` 172 | ```mermaid 173 | flowchart TD 174 | A[Agent Phase] --> B{Monitor Progress} 175 | B -- high risk --> C[CPI Interrupt] 176 | C --> D[Reflect & Adjust] 177 | B -- smooth --> E[Continue] 178 | ``` 179 | 180 | ## Adaptive Metacognitive Interrupts (CPI) 181 | <details><summary>Advanced CPI Details</summary> 182 | The CPI architecture monitors planning, implementation and review phases. When uncertainty spikes, Vibe Check pauses execution, poses clarifying questions and resumes once the agent acknowledges the feedback. 183 | </details> 184 | 185 | ## Agent Prompting Essentials 186 | In your agent's system prompt, make it clear that `vibe_check` is a mandatory tool for reflection. Always pass the full user request and other relevant context. After correcting a mistake, you can optionally log it with `vibe_learn` to build a history for future analysis. 187 | 188 | Example snippet: 189 | ``` 190 | As an autonomous agent you will: 191 | 1. Call vibe_check after planning and before major actions. 192 | 2. Provide the full user request and your current plan. 193 | 3. Optionally, record resolved issues with vibe_learn. 194 | ``` 195 | 196 | ## When to Use Each Tool 197 | | Tool | Purpose | 198 | |------------------------|--------------------------------------------------------------| 199 | | 🛑 **vibe_check** | Challenge assumptions and prevent tunnel vision | 200 | | 🔄 **vibe_learn** | Capture mistakes, preferences, and successes | 201 | | 🧰 **update_constitution** | Set/merge session rules the CPI layer will enforce | 202 | | 🧹 **reset_constitution** | Clear rules for a session | 203 | | 🔎 **check_constitution** | Inspect effective rules for a session | 204 | 205 | ## Documentation 206 | - [Agent Prompting Strategies](./docs/agent-prompting.md) 207 | - [CPI Integration](./docs/integrations/cpi.md) 208 | - [Advanced Integration](./docs/advanced-integration.md) 209 | - [Technical Reference](./docs/technical-reference.md) 210 | - [Automatic Docker Setup](./docs/docker-automation.md) 211 | - [Philosophy](./docs/philosophy.md) 212 | - [Case Studies](./docs/case-studies.md) 213 | - [Changelog](./docs/changelog.md) 214 | 215 | ## Security 216 | This repository includes a CI-based security scan that runs on every pull request. It checks dependencies with `npm audit` and scans the source for risky patterns. See [SECURITY.md](./SECURITY.md) for details and how to report issues. 217 | 218 | ## Roadmap 219 | 1. Benchmarks and latency profiling 220 | 2. Adaptive tuning based on agent performance 221 | 3. Multi-agent cooperation support 222 | 4. Optional human-in-the-loop review 223 | 224 | ## Contributing & Community 225 | Contributions are welcome! See [CONTRIBUTING.md](./CONTRIBUTING.md). 226 | 227 | ## FAQ 228 | - **Does it increase latency?** A single CPI call typically adds ~1 second depending on the provider. 229 | - **Can I disable logging?** Yes, `vibe_learn` is optional. 230 | 231 | ## Find Vibe Check MCP on 232 | * 🌐 [MSEEP](https://mseep.ai/app/pv-bhat-vibe-check-mcp-server) 233 | * 📡 [MCP Servers](https://mcpservers.org/servers/PV-Bhat/vibe-check-mcp-server) 234 | * 🧠 [MCP.so](https://mcp.so/server/vibe-check-mcp-server/PV-Bhat) 235 | * 🛠️ [Creati.ai](https://creati.ai/mcp/vibe-check-mcp-server/) 236 | * 💡 [Pulse MCP](https://www.pulsemcp.com/servers/pv-bhat-vibe-check) 237 | * 📘 [Playbooks.com](https://playbooks.com/mcp/pv-bhat-vibe-check) 238 | * 🧰 [MCPHub.tools](https://mcphub.tools/detail/PV-Bhat/vibe-check-mcp-server) 239 | * 📇 [MCP Directory](https://mcpdirectory.ai/mcpserver/2419/) 240 | * 🧙 [MagicSlides](https://www.magicslides.app/mcps/pv-bhat-vibe-check) 241 | * 🗃️ [AIAgentsList](https://aiagentslist.com/mcp-servers/vibe-check-mcp-server) 242 | 243 | ## Star History 244 | 245 | [](https://www.star-history.com/#PV-Bhat/vibe-check-mcp-server&Date) 246 | 247 | ## Credits & License 248 | Vibe Check MCP is released under the [MIT License](LICENSE). Built for reliable, enterprise-ready AI agents. 249 | 250 | ## Author Credits & Links 251 | Vibe Check MCP created by: [Pruthvi Bhat](https://pruthvibhat.com/), Intiative - https://murst.org/ 252 | ``` -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- ```markdown 1 | # Code of Conduct 2 | 3 | This project adheres to the [Contributor Covenant](https://www.contributor-covenant.org/version/2/1/code_of_conduct/) Code of Conduct. By participating, you are expected to uphold this code. 4 | 5 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the maintainer listed in `package.json`. 6 | ``` -------------------------------------------------------------------------------- /AGENTS.md: -------------------------------------------------------------------------------- ```markdown 1 | # Agent Quickstart 2 | 3 | Vibe Check MCP is a lightweight oversight layer for AI agents. It exposes two tools: 4 | 5 | - **vibe_check** – prompts you with clarifying questions to prevent tunnel vision. 6 | - **vibe_learn** – optional logging of mistakes and successes for later review. 7 | 8 | The server supports Gemini, OpenAI and OpenRouter LLMs. History is maintained across requests when a `sessionId` is provided. 9 | 10 | ## Setup 11 | 12 | 1. Install dependencies and build: 13 | ```bash 14 | npm install 15 | npm run build 16 | ``` 17 | 2. Supply the following environment variables as needed: 18 | - `GEMINI_API_KEY` 19 | - `OPENAI_API_KEY` 20 | - `OPENROUTER_API_KEY` 21 | - `DEFAULT_LLM_PROVIDER` (gemini | openai | openrouter) 22 | - `DEFAULT_MODEL` (e.g., gemini-2.5-pro) 23 | 3. Start the server: 24 | ```bash 25 | npm start 26 | ``` 27 | 28 | ## Testing 29 | 30 | Run unit tests with `npm test`. Example request generators are provided: 31 | 32 | - `alt-test-gemini.js` 33 | - `alt-test-openai.js` 34 | - `alt-test.js` (OpenRouter) 35 | 36 | Each script writes a `request.json` file that you can pipe to the server: 37 | 38 | ```bash 39 | node build/index.js < request.json 40 | ``` 41 | 42 | ## Integration Tips 43 | 44 | Call `vibe_check` regularly with your goal, plan and current progress. Use `vibe_learn` whenever you want to record a resolved issue. Full API details are in `docs/technical-reference.md`. 45 | ``` -------------------------------------------------------------------------------- /docs/AGENTS.md: -------------------------------------------------------------------------------- ```markdown 1 | # Agent Quickstart 2 | 3 | Vibe Check MCP is a lightweight oversight layer for AI agents. It exposes two tools: 4 | 5 | - **vibe_check** – prompts you with clarifying questions to prevent tunnel vision. 6 | - **vibe_learn** – optional logging of mistakes and successes for later review. 7 | 8 | The server supports Gemini, OpenAI and OpenRouter LLMs. History is maintained across requests when a `sessionId` is provided. 9 | 10 | ## Setup 11 | 12 | 1. Install dependencies and build: 13 | ```bash 14 | npm install 15 | npm run build 16 | ``` 17 | 2. Supply the following environment variables as needed: 18 | - `GEMINI_API_KEY` 19 | - `OPENAI_API_KEY` 20 | - `OPENROUTER_API_KEY` 21 | - `DEFAULT_LLM_PROVIDER` (gemini | openai | openrouter) 22 | - `DEFAULT_MODEL` (e.g., gemini-2.5-pro) 23 | 3. Start the server: 24 | ```bash 25 | npm start 26 | ``` 27 | 28 | ## Testing 29 | 30 | Run unit tests with `npm test`. Example request generators are provided: 31 | 32 | - `alt-test-gemini.js` 33 | - `alt-test-openai.js` 34 | - `alt-test.js` (OpenRouter) 35 | 36 | Each script writes a `request.json` file that you can pipe to the server: 37 | 38 | ```bash 39 | node build/index.js < request.json 40 | ``` 41 | 42 | ## Integration Tips 43 | 44 | Call `vibe_check` regularly with your goal, plan and current progress. Use `vibe_learn` whenever you want to record a resolved issue. Full API details are in `docs/technical-reference.md`. 45 | ``` -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- ```markdown 1 | # Security Policy 2 | 3 | VibeCheck MCP is designed as a lightweight oversight layer for AI coding agents. While it does not execute code on behalf of the agent, it processes user prompts and sends them to third‑party LLM APIs. This document outlines our approach to keeping that process secure. 4 | 5 | ## Supported Versions 6 | Only the latest release receives security updates. Please upgrade regularly to stay protected. 7 | 8 | ## Threat Model 9 | - **Prompt injection**: malicious text could attempt to alter the meta-mentor instructions. VibeCheck uses a fixed system prompt and validates required fields to mitigate this. 10 | - **Tool misuse**: the server exposes only two safe tools (`vibe_check` and `vibe_learn`). No command execution or file access is performed. 11 | - **Data leakage**: requests are forwarded to the configured LLM provider. Avoid sending sensitive data if using hosted APIs. The optional `vibe_learn` log can be disabled via environment variables. 12 | - **Impersonation**: run VibeCheck only from this official repository or the published npm package. Verify the source before deployment. 13 | 14 | ## Reporting a Vulnerability 15 | If you discover a security issue, please open a private GitHub issue or email the maintainer listed in `package.json`. We will acknowledge your report within 48 hours and aim to provide a fix promptly. 16 | 17 | ## Continuous Security 18 | A custom security scan runs in CI on every pull request. It checks dependencies for known vulnerabilities and searches the source tree for dangerous patterns. The workflow fails if any issue is detected. 19 | 20 | ``` -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- ```markdown 1 | # Contributing to Vibe Check MCP 2 | 3 | First off, thanks for considering contributing to Vibe Check! It's people like you that help make this metacognitive oversight layer even better. 4 | 5 | ## The Vibe of Contributing 6 | 7 | Contributing to Vibe Check isn't just about code—it's about joining a community that's trying to make AI agents a bit more self-aware (since they're not quite there yet on their own). 8 | 9 | ### The Basic Flow 10 | 11 | 1. **Find something to improve**: Did your agent recently go off the rails in a way Vibe Check could have prevented? Found a bug? Have an idea for a new feature? That's a great starting point. 12 | 13 | 2. **Fork & clone**: The standard GitHub dance. Fork the repo, clone it locally, and create a branch for your changes. 14 | 15 | 3. **Make your changes**: Whether it's code, documentation, or just fixing a typo, all contributions are welcome. 16 | 17 | 4. **Test your changes**: Make sure everything still works as expected. 18 | 19 | 5. **Submit a PR**: Push your changes to your fork and submit a pull request. We'll review it as soon as we can. 20 | 21 | ## Vibe Check Your Contributions 22 | 23 | Before submitting a PR, run your own mental vibe check on your changes: 24 | 25 | - Does this align with the metacognitive purpose of Vibe Check? 26 | - Is this addressing a real problem that AI agents face? 27 | - Does this maintain the balance between developer-friendly vibes and serious AI alignment principles? 28 | 29 | ## What We're Looking For 30 | 31 | ### Code Contributions 32 | 33 | - Bug fixes 34 | - Performance improvements 35 | - New features that align with the project's purpose 36 | - Improvements to the metacognitive questioning system 37 | 38 | ### Documentation Contributions 39 | 40 | - Clarifications to existing documentation 41 | - New examples of how to use Vibe Check effectively 42 | - Case studies of how Vibe Check has helped your agent workflows 43 | - Tutorials for integration with different systems 44 | 45 | ### Pattern Contributions 46 | 47 | - New categories for the `vibe_learn` system 48 | - Common error patterns you've observed in AI agent workflows 49 | - Metacognitive questions that effectively break pattern inertia 50 | 51 | ## Coding Style 52 | 53 | - TypeScript with clear typing 54 | - Descriptive variable names 55 | - Comments that explain the "why," not just the "what" 56 | - Tests for new functionality 57 | 58 | ## The Review Process 59 | 60 | Once you submit a PR, here's what happens: 61 | 62 | 1. A maintainer will review your submission 63 | 2. They might suggest some changes or improvements 64 | 3. Once everything looks good, they'll merge your PR 65 | 4. Your contribution becomes part of Vibe Check! 66 | 67 | ## Share Your Vibe Stories 68 | 69 | We love hearing how people are using Vibe Check in the wild. If you have a story about how Vibe Check saved your agent from a catastrophic reasoning failure or helped simplify an overcomplicated plan, we'd love to hear about it! Submit it as an issue with the tag "vibe story" or mention it in your PR. 70 | 71 | ## Code of Conduct 72 | 73 | - Be respectful and constructive in all interactions 74 | - Focus on the code, not the person 75 | - Help create a welcoming community for all contributors 76 | 77 | ## Questions? 78 | 79 | If you have any questions about contributing, feel free to open an issue with your question. We're here to help! 80 | 81 | Thanks again for considering a contribution to Vibe Check. Together, we can make AI agents a little more self-aware, one pattern interrupt at a time. ``` -------------------------------------------------------------------------------- /Attachments/Template.md: -------------------------------------------------------------------------------- ```markdown 1 | Template 2 | ``` -------------------------------------------------------------------------------- /src/tools/vibeDistil.ts: -------------------------------------------------------------------------------- ```typescript 1 | // Deleted ``` -------------------------------------------------------------------------------- /glama.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "$schema": "https://glama.ai/mcp/schemas/server.json", 3 | "maintainers": [ 4 | "PV-Bhat" 5 | ] 6 | } 7 | ``` -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- ```dockerfile 1 | FROM node:lts-alpine 2 | 3 | WORKDIR /app 4 | 5 | COPY . . 6 | 7 | RUN npm install --ignore-scripts 8 | RUN npm run build 9 | 10 | EXPOSE 3000 11 | 12 | CMD ["node", "build/index.js"] 13 | ``` -------------------------------------------------------------------------------- /docs/_toc.md: -------------------------------------------------------------------------------- ```markdown 1 | # Documentation map 2 | 3 | - [Architecture](./architecture.md) 4 | - Integrations 5 | - [CPI Integration](./integrations/cpi.md) 6 | - [Advanced Integration](./advanced-integration.md) 7 | - [Technical Reference](./technical-reference.md) 8 | - [Agent Prompting](./agent-prompting.md) 9 | ``` -------------------------------------------------------------------------------- /.github/workflows/docker-image.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: Docker Image CI 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | jobs: 10 | 11 | build: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v4 17 | - name: Build the Docker image 18 | run: docker build . --file Dockerfile --tag my-image-name:$(date +%s) 19 | ``` -------------------------------------------------------------------------------- /request.json: -------------------------------------------------------------------------------- ```json 1 | {"jsonrpc":"2.0","method":"tools/call","params":{"name":"vibe_check","arguments":{"goal":"Test session history functionality","plan":"2. Make a second call to verify history is included.","userPrompt":"Please test the history feature.","progress":"Just made the second call.","sessionId":"history-test-session-1"}},"id":2} ``` -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "compilerOptions": { 3 | "target": "ES2022", 4 | "module": "NodeNext", 5 | "moduleResolution": "NodeNext", 6 | "esModuleInterop": true, 7 | "outDir": "build", 8 | "strict": true, 9 | "declaration": false, 10 | "sourceMap": false, 11 | "types": [ 12 | "node", 13 | "vitest/globals" 14 | ] 15 | }, 16 | "include": [ 17 | "src/**/*", 18 | "tests/**/*" 19 | ], 20 | "exclude": [ 21 | "node_modules", 22 | "**/*.test.ts" 23 | ] 24 | } 25 | ``` -------------------------------------------------------------------------------- /alt-test-gemini.js: -------------------------------------------------------------------------------- ```javascript 1 | 2 | import fs from 'fs'; 3 | 4 | const request = JSON.stringify({ 5 | jsonrpc: '2.0', 6 | method: 'tools/call', 7 | params: { 8 | name: 'vibe_check', 9 | arguments: { 10 | goal: 'Test default Gemini provider', 11 | plan: '2. Make a call to vibe_check using the default Gemini provider.', 12 | } 13 | }, 14 | id: 2 15 | }); 16 | 17 | fs.writeFileSync('request.json', request, 'utf-8'); 18 | 19 | console.log('Generated request.json for the Gemini test.'); 20 | ``` -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- ```yaml 1 | name: CI 2 | on: 3 | push: 4 | branches: [ main ] 5 | pull_request: 6 | branches: [ main ] 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v4 12 | - uses: actions/setup-node@v4 13 | with: 14 | node-version: '20' 15 | - run: npm ci 16 | - run: npm run build 17 | - run: npm run test:coverage 18 | - name: Report Vitest Coverage 19 | uses: davelosert/vitest-coverage-report-action@v2 20 | - name: Security Scan 21 | run: npm run security-check 22 | ``` -------------------------------------------------------------------------------- /test.json: -------------------------------------------------------------------------------- ```json 1 | {"id":"1","jsonrpc":"2.0","method":"tools/call","params":{"name":"vibe_check","arguments":{"goal":"Implement the core logic for the new feature","plan":"1. Define the data structures. 2. Implement the main algorithm. 3. Add error handling.","userPrompt":"Create a new feature that does X, Y, and Z.","progress":"Just started","uncertainties":["The third-party API might be unreliable"],"taskContext":"This is part of a larger project to refactor the billing module.","sessionId":"test-session-123"}}} ``` -------------------------------------------------------------------------------- /vitest.config.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { defineConfig } from 'vitest/config'; 2 | 3 | export default defineConfig({ 4 | test: { 5 | environment: 'node', 6 | globals: true, 7 | include: ['tests/**/*.test.ts'], 8 | coverage: { 9 | provider: 'v8', 10 | reporter: ['text', 'html', 'json-summary'], 11 | all: true, 12 | include: ['src/**/*.ts'], 13 | exclude: ['**/alt-test*.js', 'test-client.*', 'src/index.ts', 'src/tools/vibeDistil.ts', 'src/tools/vibeLearn.ts', 'src/utils/storage.ts', 'src/utils/llm.ts'], 14 | thresholds: { lines: 80 } 15 | } 16 | } 17 | }); 18 | ``` -------------------------------------------------------------------------------- /alt-test-openai.js: -------------------------------------------------------------------------------- ```javascript 1 | 2 | import fs from 'fs'; 3 | 4 | const request = JSON.stringify({ 5 | jsonrpc: '2.0', 6 | method: 'tools/call', 7 | params: { 8 | name: 'vibe_check', 9 | arguments: { 10 | goal: 'Test OpenAI provider', 11 | plan: '1. Make a call to vibe_check using the OpenAI provider.', 12 | modelOverride: { 13 | provider: 'openai', 14 | model: 'o4-mini' 15 | } 16 | } 17 | }, 18 | id: 1 19 | }); 20 | 21 | fs.writeFileSync('request.json', request, 'utf-8'); 22 | 23 | console.log('Generated request.json for the OpenAI test.'); 24 | ``` -------------------------------------------------------------------------------- /docs/registry-descriptions.md: -------------------------------------------------------------------------------- ```markdown 1 | # Registry Descriptions 2 | 3 | These short descriptions can be used when submitting VibeCheck MCP to external registries or directories. 4 | 5 | ## Smithery.ai 6 | ``` 7 | Metacognitive oversight MCP server for AI agents – adaptive CPI interrupts for alignment and safety. 8 | ``` 9 | 10 | ## Glama Directory 11 | ``` 12 | Metacognitive layer for Llama-compatible agents via MCP. Enhances reflection, accountability and robustness. 13 | ``` 14 | 15 | ## Awesome MCP Lists PR Draft 16 | ``` 17 | - [VibeCheck MCP](https://github.com/PV-Bhat/vibe-check-mcp-server) - Adaptive sanity check server preventing cascading errors in AI agents. 18 | ``` 19 | ``` -------------------------------------------------------------------------------- /test-client.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { Client } from '@modelcontextprotocol/sdk/client/index.js'; 2 | import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js'; 3 | import { spawn } from 'child_process'; 4 | 5 | async function testVibeCheck() { 6 | const serverProcess = spawn('node', ['build/index.js'], { stdio: ['pipe', 'pipe', 'pipe'] }); 7 | 8 | await new Promise(resolve => setTimeout(resolve, 1000)); 9 | 10 | const transport = new StdioClientTransport(serverProcess); 11 | const client = new Client(transport); 12 | 13 | const response = await client.tool('vibe_check', { goal: 'Test goal', plan: 'Test plan', progress: 'Initial stage' }); 14 | 15 | console.log('Response:', response); 16 | 17 | await transport.close(); 18 | serverProcess.kill(); 19 | } 20 | 21 | testVibeCheck(); ``` -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- ```markdown 1 | # Changelog 2 | 3 | ## v2.5.0 — 2025-09-03 4 | - Transport: migrate STDIO → Streamable HTTP (`POST /mcp`, `GET /mcp` → 405). 5 | - Constitution tools: `update_constitution`, `reset_constitution`, `check_constitution` (session-scoped, in-memory, logged). 6 | - CPI surfaced: banner + concise metrics; links to ResearchGate, CPI GitHub, and Zenodo (MURST). 7 | 8 | ## v2.2.0 - 2025-07-22 9 | - CPI architecture enables adaptive interrupts to mitigate Reasoning Lock-In 10 | - History continuity across sessions 11 | - Multi-provider support for Gemini, OpenAI and OpenRouter 12 | - Optional vibe_learn logging for privacy-conscious deployments 13 | - Repository restructured with Vitest unit tests and CI workflow 14 | 15 | ## v1.1.0 - 2024-06-10 16 | - Initial feedback loop and Docker setup 17 | ``` -------------------------------------------------------------------------------- /tests/constitution.test.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { describe, it, expect } from 'vitest'; 2 | import { updateConstitution, resetConstitution, getConstitution, __testing } from '../src/tools/constitution.js'; 3 | 4 | describe('constitution utilities', () => { 5 | it('updates, resets, and retrieves rules', () => { 6 | updateConstitution('s1', 'r1'); 7 | updateConstitution('s1', 'r2'); 8 | expect(getConstitution('s1')).toEqual(['r1', 'r2']); 9 | 10 | resetConstitution('s1', ['a']); 11 | expect(getConstitution('s1')).toEqual(['a']); 12 | }); 13 | 14 | it('cleans up stale sessions', () => { 15 | updateConstitution('s2', 'rule'); 16 | const map = __testing._getMap(); 17 | map['s2'].updated = Date.now() - 2 * 60 * 60 * 1000; 18 | __testing.cleanup(); 19 | expect(getConstitution('s2')).toEqual([]); 20 | }); 21 | }); 22 | ``` -------------------------------------------------------------------------------- /server.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "$schema": "https://static.modelcontextprotocol.io/schemas/2025-07-09/server.schema.json", 3 | "name": "io.github.PV-Bhat/vibe-check-mcp-server", 4 | "description": "Metacognitive AI agent oversight: adaptive CPI interrupts for alignment, reflection and safety", 5 | "status": "active", 6 | "repository": { 7 | "url": "https://github.com/PV-Bhat/vibe-check-mcp-server", 8 | "source": "github" 9 | }, 10 | "version": "1.0.0", 11 | "packages": [ 12 | { 13 | "registry_type": "npm", 14 | "identifier": "@pv-bhat/vibe-check-mcp", 15 | "version": "2.5.1", 16 | "transport": { 17 | "type": "stdio" 18 | }, 19 | "environment_variables": [ 20 | { 21 | "description": "Your API key for the service", 22 | "is_required": true, 23 | "format": "string", 24 | "is_secret": true, 25 | "name": "YOUR_API_KEY" 26 | } 27 | ] 28 | } 29 | ] 30 | } ``` -------------------------------------------------------------------------------- /.github/workflows/npm-publish-github-packages.yml: -------------------------------------------------------------------------------- ```yaml 1 | # This workflow will run tests using node and then publish a package to GitHub Packages when a release is created 2 | # For more information see: https://docs.github.com/en/actions/publishing-packages/publishing-nodejs-packages 3 | 4 | name: Node.js Package 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | - uses: actions/setup-node@v4 16 | with: 17 | node-version: 20 18 | - run: npm ci 19 | - run: npm test 20 | 21 | publish-gpr: 22 | needs: build 23 | runs-on: ubuntu-latest 24 | permissions: 25 | contents: read 26 | packages: write 27 | steps: 28 | - uses: actions/checkout@v4 29 | - uses: actions/setup-node@v4 30 | with: 31 | node-version: 20 32 | registry-url: https://npm.pkg.github.com/ 33 | - run: npm ci 34 | - run: npm publish 35 | env: 36 | NODE_AUTH_TOKEN: ${{secrets.GITHUB_TOKEN}} 37 | ``` -------------------------------------------------------------------------------- /test-client.js: -------------------------------------------------------------------------------- ```javascript 1 | import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js'; 2 | import { Client } from '@modelcontextprotocol/sdk/client/index.js'; 3 | 4 | async function main() { 5 | const transport = new StdioClientTransport({ command: 'node', args: ['build/index.js'] }); 6 | const client = new Client({ transport }); 7 | 8 | const request = { 9 | name: 'vibe_check', 10 | arguments: { 11 | goal: 'Implement the core logic for the new feature', 12 | plan: '1. Define the data structures. 2. Implement the main algorithm. 3. Add error handling.', 13 | userPrompt: 'Create a new feature that does X, Y, and Z.', 14 | progress: 'Just started', 15 | uncertainties: ['The third-party API might be unreliable'], 16 | taskContext: 'This is part of a larger project to refactor the billing module.', 17 | sessionId: 'test-session-123', 18 | }, 19 | }; 20 | 21 | try { 22 | await client.connect(); 23 | const response = await client.callTool(request.name, request.arguments); 24 | console.log(JSON.stringify(response, null, 2)); 25 | } catch (error) { 26 | console.error(error); 27 | } finally { 28 | transport.destroy(); 29 | } 30 | } 31 | 32 | main(); ``` -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- ```javascript 1 | import { spawn } from 'child_process';const server = spawn('node', ['build/index.js']);const request = { id: '1', jsonrpc: '2.0', method: 'tools/call', params: { name: 'vibe_check', arguments: { goal: 'Implement the core logic for the new feature', plan: '1. Define the data structures. 2. Implement the main algorithm. 3. Add error handling.', userPrompt: 'Create a new feature that does X, Y, and Z.', progress: 'Just started', uncertainties: ['The third-party API might be unreliable'], taskContext: 'This is part of a larger project to refactor the billing module.', sessionId: 'test-session-123', }, },};const message = JSON.stringify(request);const length = Buffer.byteLength(message, 'utf-8');const header = `Content-Length: ${length}\r\n\r\n`;server.stdout.on('data', (data) => { console.log(`${data}`);});server.stderr.on('data', (data) => { console.error(`stderr: ${data}`);});server.on('close', (code) => { console.log(`child process exited with code ${code}`);});server.stdin.write(header);server.stdin.write(message);server.stdin.end(); ``` -------------------------------------------------------------------------------- /tests/state.test.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { describe, it, expect, beforeEach, vi } from 'vitest'; 2 | import * as fs from 'fs/promises'; 3 | import { loadHistory, getHistorySummary, addToHistory } from '../src/utils/state.js'; 4 | 5 | vi.mock('fs/promises'); 6 | const mockedFs = fs as unknown as { readFile: ReturnType<typeof vi.fn>; writeFile: ReturnType<typeof vi.fn>; mkdir: ReturnType<typeof vi.fn>; }; 7 | 8 | beforeEach(async () => { 9 | vi.clearAllMocks(); 10 | mockedFs.mkdir = vi.fn(); 11 | mockedFs.readFile = vi.fn().mockResolvedValue('{}'); 12 | mockedFs.writeFile = vi.fn(); 13 | await loadHistory(); 14 | }); 15 | 16 | describe('state history', () => { 17 | it('initializes empty history if none', async () => { 18 | mockedFs.readFile.mockRejectedValue(new Error('missing')); 19 | await loadHistory(); 20 | expect(getHistorySummary('none')).toBe(''); 21 | }); 22 | 23 | it('adds to history and trims to 10', async () => { 24 | mockedFs.readFile.mockRejectedValue(new Error('missing')); 25 | await loadHistory(); 26 | for (let i = 1; i <= 11; i++) { 27 | addToHistory('sess', { goal: `g${i}`, plan: `p${i}` }, `o${i}`); 28 | } 29 | await Promise.resolve(); 30 | const summary = getHistorySummary('sess'); 31 | expect(summary).toContain('g7'); 32 | expect(summary).not.toContain('g2'); 33 | }); 34 | }); 35 | ``` -------------------------------------------------------------------------------- /tests/vibeLearn.test.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { describe, it, expect, beforeEach, vi } from 'vitest'; 2 | import { vibeLearnTool } from '../src/tools/vibeLearn.js'; 3 | import * as storage from '../src/utils/storage.js'; 4 | 5 | vi.mock('../src/utils/storage.js'); 6 | 7 | const mockedStorage = storage as unknown as { 8 | addLearningEntry: ReturnType<typeof vi.fn>; 9 | getLearningCategorySummary: ReturnType<typeof vi.fn>; 10 | getLearningEntries: ReturnType<typeof vi.fn>; 11 | }; 12 | 13 | beforeEach(() => { 14 | vi.clearAllMocks(); 15 | mockedStorage.addLearningEntry = vi.fn(() => ({ 16 | type: 'mistake', 17 | category: 'Test', 18 | mistake: 'm', 19 | solution: 's', 20 | timestamp: Date.now() 21 | })); 22 | mockedStorage.getLearningEntries = vi.fn(() => ({ Test: [] })); 23 | mockedStorage.getLearningCategorySummary = vi.fn(() => [{ category: 'Test', count: 1, recentExample: { mistake: 'm', solution: 's', type: 'mistake', timestamp: Date.now() } }]); 24 | }); 25 | 26 | describe('vibeLearnTool', () => { 27 | it('logs entry and returns summary', async () => { 28 | const res = await vibeLearnTool({ mistake: 'm', category: 'Test', solution: 's' }); 29 | expect(res.added).toBe(true); 30 | expect(mockedStorage.addLearningEntry).toHaveBeenCalled(); 31 | expect(res.topCategories[0].category).toBe('Test'); 32 | }); 33 | }); 34 | ``` -------------------------------------------------------------------------------- /docs/docker-automation.md: -------------------------------------------------------------------------------- ```markdown 1 | # Automatic Docker Setup 2 | 3 | This guide shows how to run the Vibe Check MCP server in Docker and configure it to start automatically with Cursor. 4 | 5 | ## Prerequisites 6 | 7 | - Docker and Docker Compose installed and available in your `PATH`. 8 | - A Gemini API key for the server. 9 | 10 | ## Quick Start 11 | 12 | Run the provided setup script from the repository root: 13 | 14 | ```bash 15 | bash scripts/docker-setup.sh 16 | ``` 17 | 18 | The script performs the following actions: 19 | 20 | 1. Creates `~/vibe-check-mcp` and copies required files. 21 | 2. Builds the Docker image and sets up `docker-compose.yml`. 22 | 3. Prompts for your `GEMINI_API_KEY` and stores it in `~/vibe-check-mcp/.env`. 23 | 4. Configures a systemd service on Linux or a LaunchAgent on macOS so the container starts on login. 24 | 5. Generates `vibe-check-tcp-wrapper.sh` which proxies STDIO to the container on port 3000. 25 | 6. Starts the container in the background. 26 | 27 | After running the script, configure Cursor IDE: 28 | 29 | 1. Open **Settings** → **MCP**. 30 | 2. Choose **Add New MCP Server**. 31 | 3. Set the type to **Command** and use the wrapper script path: 32 | `~/vibe-check-mcp/vibe-check-tcp-wrapper.sh`. 33 | 4. Save and refresh. 34 | 35 | Vibe Check MCP will now launch automatically whenever you log in and be available to Cursor without additional manual steps. 36 | ``` -------------------------------------------------------------------------------- /docs/gemini.md: -------------------------------------------------------------------------------- ```markdown 1 | # Agent Quickstart 2 | 3 | Vibe Check MCP is a lightweight oversight layer for AI agents. It exposes two tools: 4 | 5 | - **vibe_check** – prompts you with clarifying questions to prevent tunnel vision. 6 | - **vibe_learn** – optional logging of mistakes and successes for later review. 7 | 8 | The server supports Gemini, OpenAI and OpenRouter LLMs. History is maintained across requests when a `sessionId` is provided. 9 | 10 | ## Setup 11 | 12 | 1. Install dependencies and build: 13 | ```bash 14 | npm install 15 | npm run build 16 | ``` 17 | 2. Supply the following environment variables as needed: 18 | - `GEMINI_API_KEY` 19 | - `OPENAI_API_KEY` 20 | - `OPENROUTER_API_KEY` 21 | - `DEFAULT_LLM_PROVIDER` (gemini | openai | openrouter) 22 | - `DEFAULT_MODEL` (e.g., gemini-2.5-pro) 23 | 3. Start the server: 24 | ```bash 25 | npm start 26 | ``` 27 | 28 | ## Testing 29 | 30 | Run unit tests with `npm test`. Example request generators are provided: 31 | 32 | - `alt-test-gemini.js` 33 | - `alt-test-openai.js` 34 | - `alt-test.js` (OpenRouter) 35 | 36 | Each script writes a `request.json` file that you can pipe to the server: 37 | 38 | ```bash 39 | node build/index.js < request.json 40 | ``` 41 | 42 | ## Integration Tips 43 | 44 | Call `vibe_check` regularly with your goal, plan and current progress. Use `vibe_learn` whenever you want to record a resolved issue. Full API details are in `docs/technical-reference.md`. 45 | ``` -------------------------------------------------------------------------------- /smithery.yaml: -------------------------------------------------------------------------------- ```yaml 1 | # Smithery configuration file: https://smithery.ai/docs/config#smitheryyaml 2 | 3 | # Metadata for discoverability and registry listing 4 | name: vibe-check-mcp 5 | version: 2.5.0 6 | description: Metacognitive AI agent oversight tool implementing CPI-driven interrupts for alignment and safety. 7 | author: PV-Bhat 8 | repository: https://github.com/PV-Bhat/vibe-check-mcp-server 9 | license: MIT 10 | category: ai-tools 11 | tags: 12 | - cpi chain pattern interrupts 13 | - pruthvi bhat 14 | - rli reasoning lock in 15 | - murst 16 | - metacognition 17 | - workflow-optimization 18 | - gemini 19 | - openai 20 | - openrouter 21 | capabilities: 22 | - meta-mentorship 23 | - agentic oversight 24 | - chain pattern-interrupt 25 | - vibe-check 26 | - self-improving-feedback 27 | - multi-provider-llm 28 | 29 | # Requirements (e.g., for local setup) 30 | requirements: 31 | node: ">=18.0.0" 32 | 33 | # Installation options 34 | installation: 35 | npm: "@mseep/vibe-check-mcp" # For manual npm install 36 | 37 | startCommand: 38 | type: http 39 | command: node build/index.js 40 | env: 41 | MCP_HTTP_PORT: "3000" 42 | MCP_DISCOVERY_MODE: "1" 43 | 44 | http: 45 | endpoint: "/mcp" 46 | cors: 47 | origin: "${CORS_ORIGIN:-*}" 48 | 49 | # Documentation links 50 | documentation: 51 | getting_started: https://github.com/PV-Bhat/vibe-check-mcp-server#installation 52 | configuration: https://github.com/PV-Bhat/vibe-check-mcp-server#configuration 53 | technical_reference: https://github.com/PV-Bhat/vibe-check-mcp-server/blob/main/docs/technical-reference.md 54 | ``` -------------------------------------------------------------------------------- /src/tools/constitution.ts: -------------------------------------------------------------------------------- ```typescript 1 | interface ConstitutionEntry { 2 | rules: string[]; 3 | updated: number; 4 | } 5 | 6 | const constitutionMap: Record<string, ConstitutionEntry> = Object.create(null); 7 | 8 | const MAX_RULES_PER_SESSION = 50; 9 | const SESSION_TTL_MS = 60 * 60 * 1000; // 1 hour 10 | 11 | export function updateConstitution(sessionId: string, rule: string) { 12 | if (!sessionId || !rule) return; 13 | const entry = constitutionMap[sessionId] || { rules: [], updated: 0 }; 14 | if (entry.rules.length >= MAX_RULES_PER_SESSION) entry.rules.shift(); 15 | entry.rules.push(rule); 16 | entry.updated = Date.now(); 17 | constitutionMap[sessionId] = entry; 18 | } 19 | 20 | export function resetConstitution(sessionId: string, rules: string[]) { 21 | if (!sessionId || !Array.isArray(rules)) return; 22 | constitutionMap[sessionId] = { 23 | rules: rules.slice(0, MAX_RULES_PER_SESSION), 24 | updated: Date.now() 25 | }; 26 | } 27 | 28 | export function getConstitution(sessionId: string): string[] { 29 | const entry = constitutionMap[sessionId]; 30 | if (!entry) return []; 31 | entry.updated = Date.now(); 32 | return entry.rules; 33 | } 34 | 35 | // Cleanup stale sessions to prevent unbounded memory growth 36 | function cleanup() { 37 | const now = Date.now(); 38 | for (const [sessionId, entry] of Object.entries(constitutionMap)) { 39 | if (now - entry.updated > SESSION_TTL_MS) { 40 | delete constitutionMap[sessionId]; 41 | } 42 | } 43 | } 44 | 45 | setInterval(cleanup, SESSION_TTL_MS).unref(); 46 | 47 | export const __testing = { 48 | _getMap: () => constitutionMap, 49 | cleanup 50 | }; 51 | ``` -------------------------------------------------------------------------------- /alt-test.js: -------------------------------------------------------------------------------- ```javascript 1 | import fs from 'fs'; 2 | 3 | function createVibeCheckRequest(id, goal, plan, userPrompt, progress, sessionId) { 4 | return JSON.stringify({ 5 | jsonrpc: '2.0', 6 | method: 'tools/call', 7 | params: { 8 | name: 'vibe_check', 9 | arguments: { 10 | goal: goal, 11 | plan: plan, 12 | userPrompt: userPrompt, 13 | progress: progress, 14 | sessionId: sessionId, 15 | modelOverride: { 16 | provider: 'openrouter', 17 | model: 'tngtech/deepseek-r1t2-chimera:free' 18 | } 19 | } 20 | }, 21 | id: id 22 | }); 23 | } 24 | 25 | const sessionId = 'history-test-session-phase4'; 26 | 27 | // First call 28 | const request1 = createVibeCheckRequest( 29 | 1, 30 | 'Test new meta-mentor prompt and history functionality', 31 | '1. Make the first call to establish history.', 32 | 'Please test the new meta-mentor prompt and history feature.', 33 | 'Starting the test.', 34 | sessionId 35 | ); 36 | fs.writeFileSync('request1.json', request1, 'utf-8'); 37 | console.log('Generated request1.json for the first call.'); 38 | 39 | // Second call 40 | const request2 = createVibeCheckRequest( 41 | 2, 42 | 'Test new meta-mentor prompt and history functionality', 43 | '2. Make the second call to verify history is included and prompt tone.', 44 | 'Please test the new meta-mentor prompt and history feature.', 45 | 'Just made the second call, expecting history context.', 46 | sessionId 47 | ); 48 | fs.writeFileSync('request2.json', request2, 'utf-8'); 49 | console.log('Generated request2.json for the second call.'); ``` -------------------------------------------------------------------------------- /docs/TESTING.md: -------------------------------------------------------------------------------- ```markdown 1 | # Testing Guide 2 | 3 | Due to a bug in the `@modelcontextprotocol/sdk` client, the standard `test-client.js` script will not work. Use the alternative test scripts to generate JSON requests and pipe them to the server's standard input. 4 | 5 | ## Running Tests 6 | 7 | 1. **Build the server:** 8 | ```bash 9 | npm run build 10 | ``` 11 | 2. **Generate the requests:** 12 | Three helper scripts create example requests for each provider. 13 | - `alt-test.js` (OpenRouter) writes `request1.json` and `request2.json` for history testing. 14 | - `alt-test-openai.js` generates `request.json` targeting the OpenAI provider. 15 | - `alt-test-gemini.js` generates `request.json` using the default Gemini provider. 16 | ```bash 17 | node alt-test.js # OpenRouter history test 18 | node alt-test-openai.js # OpenAI example 19 | node alt-test-gemini.js # Gemini example 20 | ``` 21 | 3. **Run the server with the requests:** 22 | Pipe the contents of each generated file to the server. 23 | 24 | **History test (OpenRouter):** 25 | ```bash 26 | node build/index.js < request1.json 27 | node build/index.js < request2.json 28 | ``` 29 | **Single provider examples:** 30 | ```bash 31 | node build/index.js < request.json # created by alt-test-openai.js or alt-test-gemini.js 32 | ``` 33 | The server will process the requests and print the responses to standard output. The second OpenRouter call should show that the previous history was considered. 34 | 35 | ## Unit Tests with Vitest 36 | 37 | Vitest is used for unit and integration tests. Run all tests with: 38 | ```bash 39 | npm test 40 | ``` 41 | Generate a coverage report (outputs to `coverage/`): 42 | ```bash 43 | npm run test:coverage 44 | ``` 45 | All tests should pass with at least 80% line coverage. 46 | ``` -------------------------------------------------------------------------------- /src/utils/state.ts: -------------------------------------------------------------------------------- ```typescript 1 | 2 | 3 | import fs from 'fs/promises'; 4 | import path from 'path'; 5 | import os from 'os'; 6 | import { VibeCheckInput } from '../tools/vibeCheck.js'; 7 | 8 | const DATA_DIR = path.join(os.homedir(), '.vibe-check'); 9 | const HISTORY_FILE = path.join(DATA_DIR, 'history.json'); 10 | 11 | interface Interaction { 12 | input: VibeCheckInput; 13 | output: string; 14 | timestamp: number; 15 | } 16 | 17 | let history: Map<string, Interaction[]> = new Map(); 18 | 19 | async function ensureDataDir() { 20 | try { 21 | await fs.mkdir(DATA_DIR, { recursive: true }); 22 | } catch {} 23 | } 24 | 25 | export async function loadHistory() { 26 | await ensureDataDir(); 27 | try { 28 | const data = await fs.readFile(HISTORY_FILE, 'utf-8'); 29 | const parsed = JSON.parse(data); 30 | history = new Map(Object.entries(parsed).map(([k, v]) => [k, v as Interaction[]])); 31 | } catch { 32 | history.set('default', []); 33 | } 34 | } 35 | 36 | async function saveHistory() { 37 | const data = Object.fromEntries(history); 38 | await fs.writeFile(HISTORY_FILE, JSON.stringify(data)); 39 | } 40 | 41 | export function getHistorySummary(sessionId = 'default'): string { 42 | const sessHistory = history.get(sessionId) || []; 43 | if (!sessHistory.length) return ''; 44 | const summary = sessHistory.slice(-5).map((int, i) => `Interaction ${i+1}: Goal ${int.input.goal}, Guidance: ${int.output.slice(0, 100)}...`).join('\n'); 45 | return `History Context:\n${summary}\n`; 46 | } 47 | 48 | export function addToHistory(sessionId = 'default', input: VibeCheckInput, output: string) { 49 | if (!history.has(sessionId)) { 50 | history.set(sessionId, []); 51 | } 52 | const sessHistory = history.get(sessionId)!; 53 | sessHistory.push({ input, output, timestamp: Date.now() }); 54 | if (sessHistory.length > 10) { 55 | sessHistory.shift(); 56 | } 57 | saveHistory(); 58 | } 59 | 60 | ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "name": "@pv-bhat/vibe-check-mcp", 3 | "mcpName": "io.github.PV-Bhat/vibe-check-mcp-server", 4 | "version": "2.5.1", 5 | "description": "Metacognitive AI agent oversight: adaptive CPI interrupts for alignment, reflection and safety", 6 | "main": "build/index.js", 7 | "type": "module", 8 | "files": [ 9 | "build" 10 | ], 11 | "scripts": { 12 | "build": "tsc && node -e \"require('fs').chmodSync('build/index.js', '755')\"", 13 | "prepare": "npm run build", 14 | "start": "node build/index.js", 15 | "dev": "tsc-watch --onSuccess \"node build/index.js\"", 16 | "test": "vitest run", 17 | "test:coverage": "vitest run --coverage", 18 | "security-check": "node scripts/security-check.cjs" 19 | }, 20 | "dependencies": { 21 | "@google/generative-ai": "^0.17.1", 22 | "@modelcontextprotocol/sdk": "^1.16.0", 23 | "axios": "^1.12.2", 24 | "body-parser": "^1.20.2", 25 | "cors": "^2.8.5", 26 | "dotenv": "^16.4.7", 27 | "express": "^4.19.2", 28 | "openai": "^4.68.1" 29 | }, 30 | "devDependencies": { 31 | "@types/cors": "^2.8.17", 32 | "@types/express": "^4.17.21", 33 | "@types/node": "^20.17.25", 34 | "@vitest/coverage-v8": "^3.2.4", 35 | "tsc-watch": "^6.0.0", 36 | "typescript": "^5.3.0", 37 | "vitest": "^3.2.4" 38 | }, 39 | "engines": { 40 | "node": ">=18.0.0" 41 | }, 42 | "keywords": [ 43 | "mcp", 44 | "mcp-server", 45 | "vibe-check", 46 | "vibe-coding", 47 | "metacognition", 48 | "ai-alignment", 49 | "llm-agents", 50 | "autonomous-agents", 51 | "reflection", 52 | "agent-oversight", 53 | "ai-safety", 54 | "prompt-engineering" 55 | ], 56 | "author": "PV Bhat", 57 | "repository": { 58 | "type": "git", 59 | "url": "https://github.com/PV-Bhat/vibe-check-mcp-server.git" 60 | }, 61 | "bugs": { 62 | "url": "https://github.com/PV-Bhat/vibe-check-mcp-server/issues" 63 | }, 64 | "homepage": "https://github.com/PV-Bhat/vibe-check-mcp-server#readme", 65 | "license": "MIT" 66 | } 67 | ``` -------------------------------------------------------------------------------- /scripts/security-check.cjs: -------------------------------------------------------------------------------- ``` 1 | const { execSync } = require('child_process'); 2 | const fs = require('fs'); 3 | const path = require('path'); 4 | 5 | function runAudit() { 6 | try { 7 | const output = execSync('npm audit --production --json', { encoding: 'utf8' }); 8 | const json = JSON.parse(output); 9 | const vulnerabilities = json.vulnerabilities || {}; 10 | let highOrCritical = 0; 11 | for (const name of Object.keys(vulnerabilities)) { 12 | const v = vulnerabilities[name]; 13 | if (['high', 'critical'].includes(v.severity)) { 14 | console.error(`High severity issue in dependency: ${name}`); 15 | highOrCritical++; 16 | } 17 | } 18 | if (highOrCritical > 0) { 19 | console.error(`Found ${highOrCritical} high or critical vulnerabilities`); 20 | process.exitCode = 1; 21 | } else { 22 | console.log('Dependency audit clean'); 23 | } 24 | } catch (err) { 25 | console.error('npm audit failed', err.message); 26 | process.exitCode = 1; 27 | } 28 | } 29 | 30 | function scanSource() { 31 | const suspiciousPatterns = [/eval\s*\(/, /child_process/, /exec\s*\(/, /spawn\s*\(/]; 32 | let flagged = false; 33 | function scanDir(dir) { 34 | for (const file of fs.readdirSync(dir)) { 35 | const full = path.join(dir, file); 36 | const stat = fs.statSync(full); 37 | if (stat.isDirectory()) { 38 | scanDir(full); 39 | } else if ((full.endsWith('.ts') || full.endsWith('.js')) && !full.includes('scripts/security-check.js')) { 40 | const content = fs.readFileSync(full, 'utf8'); 41 | for (const pattern of suspiciousPatterns) { 42 | if (pattern.test(content)) { 43 | console.error(`Suspicious pattern ${pattern} found in ${full}`); 44 | flagged = true; 45 | } 46 | } 47 | } 48 | } 49 | } 50 | scanDir('src'); 51 | if (flagged) { 52 | process.exitCode = 1; 53 | } else { 54 | console.log('Source scan clean'); 55 | } 56 | } 57 | 58 | runAudit(); 59 | scanSource(); 60 | ``` -------------------------------------------------------------------------------- /tests/vibeCheck.test.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { vi, describe, it, expect, beforeEach } from 'vitest'; 2 | import { vibeCheckTool } from '../src/tools/vibeCheck.js'; 3 | import * as llm from '../src/utils/llm.js'; 4 | import * as state from '../src/utils/state.js'; 5 | 6 | vi.mock('../src/utils/llm.js'); 7 | vi.mock('../src/utils/state.js'); 8 | 9 | const mockedLLM = llm as unknown as { getMetacognitiveQuestions: ReturnType<typeof vi.fn> }; 10 | const mockedState = state as unknown as { 11 | addToHistory: ReturnType<typeof vi.fn>; 12 | getHistorySummary: ReturnType<typeof vi.fn>; 13 | }; 14 | 15 | beforeEach(() => { 16 | vi.clearAllMocks(); 17 | mockedState.getHistorySummary = vi.fn().mockReturnValue('Mock history'); 18 | mockedState.addToHistory = vi.fn(); 19 | mockedLLM.getMetacognitiveQuestions = vi.fn().mockResolvedValue({ questions: 'Mock guidance' }); 20 | }); 21 | 22 | describe('vibeCheckTool', () => { 23 | it('returns questions from llm', async () => { 24 | const result = await vibeCheckTool({ goal: 'Test goal', plan: 'Test plan' }); 25 | expect(result.questions).toBe('Mock guidance'); 26 | expect(mockedLLM.getMetacognitiveQuestions).toHaveBeenCalledWith( 27 | expect.objectContaining({ goal: 'Test goal', plan: 'Test plan', historySummary: 'Mock history' }) 28 | ); 29 | }); 30 | 31 | it('passes model override to llm', async () => { 32 | await vibeCheckTool({ goal: 'g', plan: 'p', modelOverride: { provider: 'openai' } }); 33 | expect(mockedLLM.getMetacognitiveQuestions).toHaveBeenCalledWith( 34 | expect.objectContaining({ modelOverride: { provider: 'openai' } }) 35 | ); 36 | }); 37 | 38 | it('adds to history on each call', async () => { 39 | await vibeCheckTool({ goal: 'A', plan: 'B', sessionId: 's1' }); 40 | await vibeCheckTool({ goal: 'C', plan: 'D', sessionId: 's1' }); 41 | expect(mockedState.addToHistory).toHaveBeenCalledTimes(2); 42 | }); 43 | 44 | it('falls back to default questions when llm fails', async () => { 45 | mockedLLM.getMetacognitiveQuestions = vi.fn().mockRejectedValue(new Error('fail')); 46 | const result = await vibeCheckTool({ goal: 'x', plan: 'y' }); 47 | expect(result.questions).toContain('Does this plan directly address'); 48 | }); 49 | }); 50 | ``` -------------------------------------------------------------------------------- /src/tools/vibeCheck.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { getMetacognitiveQuestions } from '../utils/llm.js'; 2 | import { addToHistory, getHistorySummary } from '../utils/state.js'; 3 | 4 | // Vibe Check tool handler 5 | export interface VibeCheckInput { 6 | goal: string; 7 | plan: string; 8 | modelOverride?: { 9 | provider?: string; 10 | model?: string; 11 | }; 12 | userPrompt?: string; 13 | progress?: string; 14 | uncertainties?: string[]; 15 | taskContext?: string; 16 | sessionId?: string; 17 | } 18 | 19 | export interface VibeCheckOutput { 20 | questions: string; 21 | } 22 | 23 | /** 24 | * Adaptive CPI interrupt for AI agent alignment and reflection. 25 | * Monitors progress and questions assumptions to mitigate Reasoning Lock-In. 26 | * The userRequest parameter MUST contain the full original request for safety. 27 | */ 28 | export async function vibeCheckTool(input: VibeCheckInput): Promise<VibeCheckOutput> { 29 | console.log('[vibe_check] called', { hasSession: Boolean(input.sessionId) }); 30 | try { 31 | // Get history summary 32 | const historySummary = getHistorySummary(input.sessionId); 33 | 34 | // Get metacognitive questions from Gemini with dynamic parameters 35 | const response = await getMetacognitiveQuestions({ 36 | goal: input.goal, 37 | plan: input.plan, 38 | modelOverride: input.modelOverride, 39 | userPrompt: input.userPrompt, 40 | progress: input.progress, 41 | uncertainties: input.uncertainties, 42 | taskContext: input.taskContext, 43 | sessionId: input.sessionId, 44 | historySummary, 45 | }); 46 | 47 | // Add to history 48 | addToHistory(input.sessionId, input, response.questions); 49 | 50 | return { 51 | questions: response.questions, 52 | }; 53 | } catch (error) { 54 | console.error('Error in vibe_check tool:', error); 55 | 56 | // Fallback to basic questions if there's an error 57 | return { 58 | questions: generateFallbackQuestions(input.userPrompt || "", input.plan || ""), 59 | }; 60 | } 61 | } 62 | 63 | /** 64 | * Generate adaptive fallback questions when API fails 65 | */ 66 | function generateFallbackQuestions(userRequest: string, plan: string): string { 67 | return ` 68 | I can see you're thinking through your approach, which shows thoughtfulness: 69 | 70 | 1. Does this plan directly address what the user requested, or might it be solving a different problem? 71 | 2. Is there a simpler approach that would meet the user's needs? 72 | 3. What unstated assumptions might be limiting the thinking here? 73 | 4. How does this align with the user's original intent? 74 | `; 75 | } ``` -------------------------------------------------------------------------------- /tests/llm.test.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { describe, it, expect, beforeEach, vi } from 'vitest'; 2 | import axios from 'axios'; 3 | import { generateResponse, __testing } from '../src/utils/llm.js'; 4 | 5 | vi.mock('axios'); 6 | const mockedAxios = axios as unknown as { post: ReturnType<typeof vi.fn> }; 7 | 8 | beforeEach(() => { 9 | vi.clearAllMocks(); 10 | __testing.setGenAI({ 11 | getGenerativeModel: vi.fn(() => ({ 12 | generateContent: vi.fn(async () => ({ response: { text: () => 'gemini reply' } })) 13 | })) 14 | }); 15 | __testing.setOpenAIClient({ 16 | chat: { completions: { create: vi.fn(async () => ({ choices: [{ message: { content: 'openai reply' } }] })) } } 17 | }); 18 | }); 19 | 20 | describe('generateResponse', () => { 21 | it('uses gemini by default and builds prompt with context', async () => { 22 | const res = await generateResponse({ goal: 'G', plan: 'P', uncertainties: ['u1'], historySummary: 'Hist' }); 23 | expect(res.questions).toBe('gemini reply'); 24 | const gen = __testing.getGenAI(); 25 | expect(gen.getGenerativeModel).toHaveBeenCalledWith({ model: 'gemini-2.5-pro' }); 26 | const prompt = gen.getGenerativeModel.mock.results[0].value.generateContent.mock.calls[0][0]; 27 | expect(prompt).toContain('History Context: Hist'); 28 | expect(prompt).toContain('u1'); 29 | }); 30 | 31 | it('uses openai when overridden', async () => { 32 | const openai = __testing.getOpenAIClient(); 33 | const res = await generateResponse({ goal: 'g', plan: 'p', modelOverride: { provider: 'openai', model: 'o1-mini' } }); 34 | expect(res.questions).toBe('openai reply'); 35 | expect(openai.chat.completions.create).toHaveBeenCalledWith({ model: 'o1-mini', messages: [{ role: 'system', content: expect.any(String) }] }); 36 | }); 37 | 38 | it('throws if openrouter key missing', async () => { 39 | await expect(generateResponse({ goal: 'g', plan: 'p', modelOverride: { provider: 'openrouter', model: 'm1' } })).rejects.toThrow('OpenRouter API key'); 40 | }); 41 | 42 | it('calls openrouter when configured', async () => { 43 | process.env.OPENROUTER_API_KEY = 'key'; 44 | mockedAxios.post = vi.fn(async () => ({ data: { choices: [{ message: { content: 'router reply' } }] } })); 45 | const res = await generateResponse({ goal: 'g', plan: 'p', modelOverride: { provider: 'openrouter', model: 'm1' } }); 46 | expect(res.questions).toBe('router reply'); 47 | expect(mockedAxios.post).toHaveBeenCalled(); 48 | delete process.env.OPENROUTER_API_KEY; 49 | }); 50 | }); 51 | ``` -------------------------------------------------------------------------------- /examples/cpi-integration.ts: -------------------------------------------------------------------------------- ```typescript 1 | /** 2 | * Example CPI integration stub for VibeCheck MCP. 3 | * 4 | * Wire this into your agent orchestrator to forward VibeCheck signals to a CPI policy. 5 | */ 6 | 7 | export interface AgentSnapshot { 8 | sessionId: string; 9 | summary: string; 10 | nextAction: string; 11 | done?: boolean; 12 | } 13 | 14 | export interface ResumeSignal { 15 | reason: string; 16 | followUp?: string; 17 | } 18 | 19 | export interface AgentStepCallback { 20 | (input: { resumeSignal?: ResumeSignal }): Promise<AgentSnapshot>; 21 | } 22 | 23 | export interface VibeCheckSignal { 24 | riskScore: number; 25 | traits: string[]; 26 | advice: string; 27 | } 28 | 29 | const RISK_THRESHOLD = 0.6; 30 | 31 | const vibecheckShim = { 32 | // TODO: replace with an actual call to the VibeCheck MCP tool over MCP or HTTP. 33 | async analyze(snapshot: AgentSnapshot): Promise<VibeCheckSignal> { 34 | return { 35 | riskScore: Math.random(), 36 | traits: ['focus-drift'], 37 | advice: `Reflect on: ${snapshot.summary}`, 38 | }; 39 | }, 40 | }; 41 | 42 | // TODO: replace with `import { createPolicy } from '@cpi/sdk';` 43 | const cpiPolicyShim = { 44 | interrupt(input: { snapshot: AgentSnapshot; signal: VibeCheckSignal }) { 45 | if (input.signal.riskScore >= RISK_THRESHOLD) { 46 | return { 47 | action: 'interrupt' as const, 48 | reason: 'High metacognitive risk detected by VibeCheck', 49 | }; 50 | } 51 | 52 | return { action: 'allow' as const }; 53 | }, 54 | }; 55 | 56 | async function handleInterrupt( 57 | decision: { action: 'interrupt' | 'allow'; reason?: string }, 58 | snapshot: AgentSnapshot, 59 | ): Promise<ResumeSignal | undefined> { 60 | if (decision.action === 'allow') { 61 | return undefined; 62 | } 63 | 64 | console.warn('[CPI] interrupting agent step:', decision.reason ?? 'policy requested pause'); 65 | console.warn('Agent summary:', snapshot.summary); 66 | 67 | // TODO: replace with human-in-the-loop logic or CPI repro harness callback. 68 | return { 69 | reason: decision.reason ?? 'Paused for inspection', 70 | followUp: 'Agent acknowledged CPI feedback and is ready to resume.', 71 | }; 72 | } 73 | 74 | export async function runWithCPI(agentStep: AgentStepCallback): Promise<void> { 75 | let resumeSignal: ResumeSignal | undefined; 76 | 77 | while (true) { 78 | const snapshot = await agentStep({ resumeSignal }); 79 | 80 | if (snapshot.done) { 81 | console.log('Agent workflow completed.'); 82 | break; 83 | } 84 | 85 | const signal = await vibecheckShim.analyze(snapshot); 86 | console.log('VibeCheck signal', signal); 87 | 88 | const decision = cpiPolicyShim.interrupt({ snapshot, signal }); 89 | 90 | if (decision.action !== 'allow') { 91 | resumeSignal = await handleInterrupt(decision, snapshot); 92 | continue; 93 | } 94 | 95 | resumeSignal = undefined; 96 | } 97 | } 98 | ``` -------------------------------------------------------------------------------- /scripts/install-vibe-check.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/bin/bash 2 | 3 | echo "========================================================" 4 | echo "Vibe Check MCP Server Installer for Cursor IDE (Mac/Linux)" 5 | echo "========================================================" 6 | echo "" 7 | 8 | # Check for Node.js installation 9 | if ! command -v node &> /dev/null; then 10 | echo "Error: Node.js is not installed or not in PATH." 11 | echo "Please install Node.js from https://nodejs.org/" 12 | exit 1 13 | fi 14 | 15 | # Check for npm installation 16 | if ! command -v npm &> /dev/null; then 17 | echo "Error: npm is not installed or not in PATH." 18 | echo "Please install Node.js from https://nodejs.org/" 19 | exit 1 20 | fi 21 | 22 | # Detect OS 23 | OS="$(uname -s)" 24 | case "${OS}" in 25 | Linux*) OS="Linux";; 26 | Darwin*) OS="Mac";; 27 | *) OS="Unknown";; 28 | esac 29 | 30 | if [ "$OS" = "Unknown" ]; then 31 | echo "Error: Unsupported operating system. This script works on Mac and Linux only." 32 | exit 1 33 | fi 34 | 35 | echo "Step 1: Installing vibe-check-mcp globally..." 36 | npm install -g vibe-check-mcp 37 | 38 | if [ $? -ne 0 ]; then 39 | echo "Error: Failed to install vibe-check-mcp globally." 40 | exit 1 41 | fi 42 | 43 | echo "" 44 | echo "Step 2: Finding global npm installation path..." 45 | NPM_GLOBAL=$(npm root -g) 46 | VIBE_CHECK_PATH="$NPM_GLOBAL/vibe-check-mcp/build/index.js" 47 | 48 | if [ ! -f "$VIBE_CHECK_PATH" ]; then 49 | echo "Error: Could not find vibe-check-mcp installation at $VIBE_CHECK_PATH" 50 | exit 1 51 | fi 52 | 53 | echo "Found vibe-check-mcp at: $VIBE_CHECK_PATH" 54 | echo "" 55 | 56 | echo "Step 3: Enter your Gemini API key for vibe-check-mcp..." 57 | read -p "Enter your Gemini API key: " GEMINI_API_KEY 58 | 59 | # Create .env file in user's home directory 60 | echo "Creating .env file for Gemini API key..." 61 | ENV_FILE="$HOME/.vibe-check-mcp.env" 62 | echo "GEMINI_API_KEY=$GEMINI_API_KEY" > "$ENV_FILE" 63 | chmod 600 "$ENV_FILE" # Secure the API key file 64 | 65 | # Create start script 66 | START_SCRIPT="$HOME/start-vibe-check-mcp.sh" 67 | cat > "$START_SCRIPT" << EOL 68 | #!/bin/bash 69 | source "$ENV_FILE" 70 | exec node "$VIBE_CHECK_PATH" 71 | EOL 72 | 73 | chmod +x "$START_SCRIPT" 74 | echo "Created startup script: $START_SCRIPT" 75 | 76 | echo "" 77 | echo "Step 4: Setting up Cursor IDE configuration..." 78 | echo "" 79 | echo "To complete setup, you need to configure Cursor IDE:" 80 | echo "" 81 | echo "1. Open Cursor IDE" 82 | echo "2. Go to Settings (gear icon) -> MCP" 83 | echo "3. Click \"Add New MCP Server\"" 84 | echo "4. Enter the following information:" 85 | echo " - Name: Vibe Check" 86 | echo " - Type: Command" 87 | echo " - Command: env GEMINI_API_KEY=$GEMINI_API_KEY node \"$VIBE_CHECK_PATH\"" 88 | echo "5. Click \"Save\" and then \"Refresh\"" 89 | echo "" 90 | echo "Installation complete!" 91 | echo "" 92 | echo "You can manually run it by executing: $START_SCRIPT" 93 | echo "" ``` -------------------------------------------------------------------------------- /tests/startup.test.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { describe, it, expect } from 'vitest'; 2 | import { spawn } from 'child_process'; 3 | import { fileURLToPath } from 'url'; 4 | import path from 'path'; 5 | import net from 'net'; 6 | 7 | const __filename = fileURLToPath(import.meta.url); 8 | const __dirname = path.dirname(__filename); 9 | 10 | async function runStartupTest(envVar: 'MCP_HTTP_PORT' | 'PORT' | 'BOTH') { 11 | const startTime = Date.now(); 12 | 13 | const projectRoot = path.resolve(__dirname, '..'); 14 | const indexPath = path.join(projectRoot, 'build', 'index.js'); 15 | 16 | const getPort = () => 17 | new Promise<number>((resolve, reject) => { 18 | const s = net.createServer(); 19 | s.listen(0, () => { 20 | const p = (s.address() as any).port; 21 | s.close(() => resolve(p)); 22 | }); 23 | s.on('error', reject); 24 | }); 25 | 26 | const mainPort = await getPort(); 27 | const env: NodeJS.ProcessEnv = { ...process.env }; 28 | 29 | if (envVar === 'MCP_HTTP_PORT') { 30 | env.MCP_HTTP_PORT = String(mainPort); 31 | } else if (envVar === 'PORT') { 32 | env.PORT = String(mainPort); 33 | } else { 34 | env.MCP_HTTP_PORT = String(mainPort); 35 | const otherPort = await getPort(); 36 | env.PORT = String(otherPort); 37 | } 38 | 39 | const serverProcess = spawn('node', [indexPath], { 40 | env, 41 | stdio: ['ignore', 'pipe', 'pipe'], 42 | }); 43 | 44 | try { 45 | let res: Response | null = null; 46 | for (let i = 0; i < 40; i++) { 47 | try { 48 | const attempt = await fetch(`http://localhost:${mainPort}/mcp`, { 49 | method: 'POST', 50 | headers: { 51 | 'Content-Type': 'application/json', 52 | Accept: 'application/json, text/event-stream' 53 | }, 54 | body: JSON.stringify({ jsonrpc: '2.0', id: 1, method: 'tools/list', params: {} }), 55 | }); 56 | if (attempt.status === 200) { 57 | res = attempt; 58 | break; 59 | } 60 | } catch {} 61 | await new Promise((r) => setTimeout(r, 250)); 62 | } 63 | if (!res) throw new Error('Server did not start'); 64 | const text = await res.text(); 65 | const line = text.split('\n').find((l) => l.startsWith('data: ')); 66 | const json = line ? JSON.parse(line.slice(6)) : null; 67 | 68 | const duration = Date.now() - startTime; 69 | expect(res.status).toBe(200); 70 | expect(json?.result?.tools.some((t: any) => t.name === 'update_constitution')).toBe(true); 71 | expect(duration).toBeLessThan(5000); 72 | } finally { 73 | serverProcess.kill(); 74 | } 75 | } 76 | 77 | describe('Server Startup and Response Time', () => { 78 | it('should start and respond to a tools/list request over HTTP using MCP_HTTP_PORT', async () => { 79 | await runStartupTest('MCP_HTTP_PORT'); 80 | }, 10000); 81 | 82 | it('should start and respond to a tools/list request over HTTP using PORT', async () => { 83 | await runStartupTest('PORT'); 84 | }, 10000); 85 | 86 | it('should prefer MCP_HTTP_PORT when both MCP_HTTP_PORT and PORT are set', async () => { 87 | await runStartupTest('BOTH'); 88 | }, 10000); 89 | }); 90 | ``` -------------------------------------------------------------------------------- /docs/integrations/cpi.md: -------------------------------------------------------------------------------- ```markdown 1 | # CPI Integration 2 | 3 | ## Overview 4 | > CPI (Chain-Pattern Interrupt): a runtime oversight mechanism for multi-agent systems that mitigates “reasoning lock-in.” It injects interrupts based on policy triggers (pattern detectors, heuristics, or external signals), then resumes or reroutes flow. 5 | > 6 | > Core pieces: (1) trigger evaluators, (2) intervention policy (allow/block/route/ask-human), (3) logging & repro harness. 7 | > 8 | > Status: repo includes repro evals; “constitution” tool supports per-session rule-sets. 9 | > 10 | > Integration intent with VibeCheck: VibeCheck = metacognitive layer (signals/traits/uncertainty). CPI = on-policy interrupter. VibeCheck feeds CPI triggers; CPI acts on them. 11 | 12 | CPI composes with VibeCheck by acting as an on-policy interrupter whenever VibeCheck signals a risk spike. Use VibeCheck to surface agent traits, uncertainty, and risk levels, then forward that context to CPI so its policy engine can decide whether to allow, block, reroute, or escalate the next action. The example stub in [`examples/cpi-integration.ts`](../../examples/cpi-integration.ts) illustrates the plumbing you can copy into your own orchestrator. 13 | 14 | ## Flow diagram 15 | ```mermaid 16 | flowchart LR 17 | AgentStep[Agent step] -->|emit signals| VibeCheck 18 | VibeCheck -->|risk + traits| CPI 19 | CPI -->|policy decision| AgentController[Agent controller] 20 | AgentController -->|resume/adjust| AgentStep 21 | ``` 22 | 23 | ## Minimal integration sketch 24 | Below is a minimal TypeScript sketch that mirrors the logic in the [`runWithCPI`](../../examples/cpi-integration.ts) example. Replace the TODO markers with the real CPI SDK import when it becomes available. 25 | 26 | ```ts 27 | type AgentStep = { 28 | sessionId: string; 29 | summary: string; 30 | nextAction: string; 31 | }; 32 | 33 | type VibeCheckSignal = { 34 | riskScore: number; 35 | advice: string; 36 | }; 37 | 38 | async function analyzeWithVibeCheck(step: AgentStep): Promise<VibeCheckSignal> { 39 | // TODO: replace with a real call to the VibeCheck MCP server. 40 | return { riskScore: Math.random(), advice: `Reflect on: ${step.summary}` }; 41 | } 42 | 43 | // TODO: replace with `import { createPolicy } from '@cpi/sdk';` 44 | function cpiPolicyShim(signal: VibeCheckSignal) { 45 | if (signal.riskScore >= 0.6) { 46 | return { action: 'interrupt', reason: 'High metacognitive risk from VibeCheck' } as const; 47 | } 48 | return { action: 'allow' } as const; 49 | } 50 | 51 | export async function evaluateStep(step: AgentStep) { 52 | const signal = await analyzeWithVibeCheck(step); 53 | const decision = cpiPolicyShim(signal); 54 | 55 | if (decision.action === 'interrupt') { 56 | // Pause your agent, collect clarification, or reroute to a human. 57 | return { status: 'paused', reason: decision.reason } as const; 58 | } 59 | 60 | return { status: 'continue', signal } as const; 61 | } 62 | ``` 63 | 64 | ### Implementation checklist 65 | 1. Surface VibeCheck scores (risk, traits, uncertainty) alongside the raw advice payload. 66 | 2. Normalize those signals into CPI trigger events (e.g., `riskScore > 0.6`). 67 | 3. Hand the event to a CPI intervention policy and respect the returned directive. 68 | 4. Feed decisions into the CPI logging & repro harness to preserve traces. 69 | 70 | ## Further reading 71 | - CPI reference implementation (placeholder): <https://github.com/<ORG>/cpi> 72 | - VibeCheck + CPI wiring example: [`examples/cpi-integration.ts`](../../examples/cpi-integration.ts) 73 | ``` -------------------------------------------------------------------------------- /docs/advanced-integration.md: -------------------------------------------------------------------------------- ```markdown 1 | # Advanced Integration Techniques 2 | 3 | For optimal metacognitive oversight, these advanced integration strategies leverage the full power of Vibe Check as a pattern interrupt system, recalibration mechanism, and self-improving feedback loop. Starting with v2.2, previous vibe_check output is automatically summarized and fed back into subsequent calls, so a `sessionId` is recommended for continuity. 4 | 5 | ## Progressive Confidence Levels 6 | 7 | Start with lower confidence values (e.g., 0.5) during planning phases and increase confidence (e.g., 0.7-0.9) during implementation and review phases. This adjusts the intensity of pattern interrupts to match the current stage of development. 8 | 9 | ```javascript 10 | // Planning phase - lower confidence for more thorough questioning 11 | vibe_check({ 12 | phase: "planning", 13 | confidence: 0.5, 14 | userRequest: "...", 15 | plan: "..." 16 | }) 17 | 18 | // Implementation phase - higher confidence for focused feedback 19 | vibe_check({ 20 | phase: "implementation", 21 | confidence: 0.7, 22 | userRequest: "...", 23 | plan: "..." 24 | }) 25 | 26 | // Review phase - highest confidence for minimal, high-impact feedback 27 | vibe_check({ 28 | phase: "review", 29 | confidence: 0.9, 30 | userRequest: "...", 31 | plan: "..." 32 | }) 33 | ``` 34 | 35 | ## Feedback Chaining 36 | 37 | Incorporate previous vibe_check feedback in subsequent calls using the `previousAdvice` parameter to build a coherent metacognitive narrative. This creates a more sophisticated pattern interrupt system that builds on past insights. 38 | 39 | ```javascript 40 | const initialFeedback = await vibe_check({ 41 | phase: "planning", 42 | userRequest: "...", 43 | plan: "..." 44 | }); 45 | 46 | // Later, include previous feedback 47 | const followupFeedback = await vibe_check({ 48 | phase: "implementation", 49 | previousAdvice: initialFeedback, 50 | userRequest: "...", 51 | plan: "..." 52 | }); 53 | ``` 54 | 55 | ## Self-Improving Feedback Loop 56 | 57 | Use vibe_learn consistently to build a pattern library specific to your agent's tendencies. This creates a self-improving system that gets better at identifying and preventing errors over time. 58 | 59 | ```javascript 60 | // After resolving an issue 61 | vibe_learn({ 62 | mistake: "Relied on unnecessary complexity for simple data transformation", 63 | category: "Complex Solution Bias", 64 | solution: "Used built-in array methods instead of custom solution", 65 | type: "mistake" 66 | }); 67 | 68 | // Later, the pattern library will improve vibe_check's pattern recognition 69 | // allowing it to spot similar issues earlier in future workflows 70 | ``` 71 | 72 | ## Hybrid Oversight Model 73 | 74 | Combine automated pattern interrupts at predetermined checkpoints with ad-hoc checks when uncertainty or complexity increases. 75 | 76 | ```javascript 77 | // Scheduled checkpoint at the end of planning 78 | const scheduledCheck = await vibe_check({ 79 | phase: "planning", 80 | userRequest: "...", 81 | plan: "..." 82 | }); 83 | 84 | // Ad-hoc check when complexity increases 85 | if (measureComplexity(currentPlan) > THRESHOLD) { 86 | const adHocCheck = await vibe_check({ 87 | phase: "implementation", 88 | userRequest: "...", 89 | plan: "...", 90 | focusAreas: ["complexity", "simplification"] 91 | }); 92 | } 93 | ``` 94 | 95 | ## Complete Integration Example 96 | 97 | Here's a comprehensive implementation example for integrating Vibe Check as a complete metacognitive system: 98 | 99 | ```javascript 100 | // During planning phase 101 | const planFeedback = await vibe_check({ 102 | phase: "planning", 103 | confidence: 0.5, 104 | userRequest: "[COMPLETE USER REQUEST]", 105 | plan: "[AGENT'S INITIAL PLAN]" 106 | }); 107 | 108 | // Consider feedback and potentially adjust plan 109 | const updatedPlan = adjustPlanBasedOnFeedback(initialPlan, planFeedback); 110 | 111 | // If plan seems overly complex, manually simplify before continuing 112 | let finalPlan = updatedPlan; 113 | if (planComplexity(updatedPlan) > COMPLEXITY_THRESHOLD) { 114 | finalPlan = simplifyPlan(updatedPlan); 115 | } 116 | 117 | // During implementation, create pattern interrupts before major actions 118 | const implementationFeedback = await vibe_check({ 119 | phase: "implementation", 120 | confidence: 0.7, 121 | previousAdvice: planFeedback, 122 | userRequest: "[COMPLETE USER REQUEST]", 123 | plan: `I'm about to [DESCRIPTION OF PENDING ACTION]` 124 | }); 125 | 126 | // After completing the task, build the self-improving feedback loop 127 | if (mistakeIdentified) { 128 | await vibe_learn({ 129 | mistake: "Specific mistake description", 130 | category: "Complex Solution Bias", // or appropriate category 131 | solution: "How it was corrected", 132 | type: "mistake" 133 | }); 134 | } 135 | ``` 136 | 137 | This integrated approach creates a complete metacognitive system that provides pattern interrupts when needed, recalibration anchor points when complexity increases, and a self-improving feedback loop that gets better over time. ``` -------------------------------------------------------------------------------- /scripts/docker-setup.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/bin/bash 2 | 3 | echo "========================================================" 4 | echo "Vibe Check MCP Docker Setup for Cursor IDE" 5 | echo "========================================================" 6 | echo "" 7 | 8 | # Check for Docker installation 9 | if ! command -v docker &> /dev/null; then 10 | echo "Error: Docker is not installed or not in PATH." 11 | echo "Please install Docker from https://docs.docker.com/get-docker/" 12 | exit 1 13 | fi 14 | 15 | # Check for Docker Compose installation 16 | if ! command -v docker-compose &> /dev/null; then 17 | echo "Error: Docker Compose is not installed or not in PATH." 18 | echo "Please install Docker Compose from https://docs.docker.com/compose/install/" 19 | exit 1 20 | fi 21 | 22 | # Create directory for Vibe Check MCP 23 | mkdir -p ~/vibe-check-mcp 24 | cd ~/vibe-check-mcp 25 | 26 | # Download or create necessary files 27 | echo "Downloading required files..." 28 | 29 | # Create docker-compose.yml 30 | cat > docker-compose.yml << 'EOL' 31 | version: '3' 32 | 33 | services: 34 | vibe-check-mcp: 35 | build: 36 | context: . 37 | dockerfile: Dockerfile 38 | image: vibe-check-mcp:latest 39 | container_name: vibe-check-mcp 40 | restart: always 41 | environment: 42 | - GEMINI_API_KEY=${GEMINI_API_KEY} 43 | volumes: 44 | - vibe-check-data:/app/data 45 | 46 | volumes: 47 | vibe-check-data: 48 | EOL 49 | 50 | # Create Dockerfile if it doesn't exist 51 | cat > Dockerfile << 'EOL' 52 | FROM node:lts-alpine 53 | 54 | WORKDIR /app 55 | 56 | # Clone the repository 57 | RUN apk add --no-cache git \ 58 | && git clone https://github.com/PV-Bhat/vibe-check-mcp-server.git . 59 | 60 | # Install dependencies and build 61 | RUN npm install && npm run build 62 | 63 | # Run the MCP server 64 | CMD ["node", "build/index.js"] 65 | EOL 66 | 67 | # Create .env file 68 | echo "Enter your Gemini API key:" 69 | read -p "API Key: " GEMINI_API_KEY 70 | 71 | cat > .env << EOL 72 | GEMINI_API_KEY=$GEMINI_API_KEY 73 | EOL 74 | 75 | chmod 600 .env # Secure the API key file 76 | 77 | # Create startup script 78 | cat > start-vibe-check-docker.sh << 'EOL' 79 | #!/bin/bash 80 | cd ~/vibe-check-mcp 81 | docker-compose up -d 82 | EOL 83 | 84 | chmod +x start-vibe-check-docker.sh 85 | 86 | # Create a TCP wrapper script to route stdio to TCP port 3000 87 | cat > vibe-check-tcp-wrapper.sh << 'EOL' 88 | #!/bin/bash 89 | # This script connects stdio to the Docker container's TCP port 90 | exec socat STDIO TCP:localhost:3000 91 | EOL 92 | 93 | chmod +x vibe-check-tcp-wrapper.sh 94 | 95 | # Detect OS for autostart configuration 96 | OS="$(uname -s)" 97 | case "${OS}" in 98 | Linux*) OS="Linux";; 99 | Darwin*) OS="Mac";; 100 | *) OS="Unknown";; 101 | esac 102 | 103 | echo "Setting up auto-start for $OS..." 104 | 105 | if [ "$OS" = "Mac" ]; then 106 | # Set up LaunchAgent for Mac 107 | PLIST_FILE="$HOME/Library/LaunchAgents/com.vibe-check-mcp-docker.plist" 108 | mkdir -p "$HOME/Library/LaunchAgents" 109 | 110 | cat > "$PLIST_FILE" << EOL 111 | <?xml version="1.0" encoding="UTF-8"?> 112 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> 113 | <plist version="1.0"> 114 | <dict> 115 | <key>Label</key> 116 | <string>com.vibe-check-mcp-docker</string> 117 | <key>ProgramArguments</key> 118 | <array> 119 | <string>$HOME/vibe-check-mcp/start-vibe-check-docker.sh</string> 120 | </array> 121 | <key>RunAtLoad</key> 122 | <true/> 123 | <key>KeepAlive</key> 124 | <false/> 125 | </dict> 126 | </plist> 127 | EOL 128 | 129 | chmod 644 "$PLIST_FILE" 130 | launchctl load "$PLIST_FILE" 131 | 132 | echo "Created and loaded LaunchAgent for automatic Docker startup on login." 133 | 134 | elif [ "$OS" = "Linux" ]; then 135 | # Set up systemd user service for Linux 136 | SERVICE_DIR="$HOME/.config/systemd/user" 137 | mkdir -p "$SERVICE_DIR" 138 | 139 | cat > "$SERVICE_DIR/vibe-check-mcp-docker.service" << EOL 140 | [Unit] 141 | Description=Vibe Check MCP Docker Container 142 | After=docker.service 143 | 144 | [Service] 145 | ExecStart=$HOME/vibe-check-mcp/start-vibe-check-docker.sh 146 | Type=oneshot 147 | RemainAfterExit=yes 148 | 149 | [Install] 150 | WantedBy=default.target 151 | EOL 152 | 153 | systemctl --user daemon-reload 154 | systemctl --user enable vibe-check-mcp-docker.service 155 | systemctl --user start vibe-check-mcp-docker.service 156 | 157 | echo "Created and started systemd user service for automatic Docker startup." 158 | fi 159 | 160 | # Start the container 161 | echo "Starting Vibe Check MCP Docker container..." 162 | ./start-vibe-check-docker.sh 163 | 164 | echo "" 165 | echo "Vibe Check MCP Docker setup complete!" 166 | echo "" 167 | echo "To complete the setup, configure Cursor IDE:" 168 | echo "" 169 | echo "1. Open Cursor IDE" 170 | echo "2. Go to Settings (gear icon) -> MCP" 171 | echo "3. Click \"Add New MCP Server\"" 172 | echo "4. Enter the following information:" 173 | echo " - Name: Vibe Check" 174 | echo " - Type: Command" 175 | echo " - Command: $HOME/vibe-check-mcp/vibe-check-tcp-wrapper.sh" 176 | echo "5. Click \"Save\" and then \"Refresh\"" 177 | echo "" 178 | echo "Vibe Check MCP will now start automatically when you log in." 179 | echo "" ``` -------------------------------------------------------------------------------- /src/tools/vibeLearn.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { 2 | addLearningEntry, 3 | getLearningCategorySummary, 4 | getLearningEntries, 5 | LearningEntry, 6 | LearningType 7 | } from '../utils/storage.js'; 8 | 9 | // Vibe Learn tool interfaces 10 | export interface VibeLearnInput { 11 | mistake: string; 12 | category: string; 13 | solution?: string; 14 | type?: LearningType; 15 | sessionId?: string; 16 | } 17 | 18 | export interface VibeLearnOutput { 19 | added: boolean; 20 | currentTally: number; 21 | alreadyKnown?: boolean; 22 | topCategories: Array<{ 23 | category: string; 24 | count: number; 25 | recentExample: LearningEntry; 26 | }>; 27 | } 28 | 29 | /** 30 | * The vibe_learn tool records one-sentence mistakes and solutions 31 | * to build a pattern recognition system for future improvement 32 | */ 33 | export async function vibeLearnTool(input: VibeLearnInput): Promise<VibeLearnOutput> { 34 | try { 35 | // Validate input 36 | if (!input.mistake) { 37 | throw new Error('Mistake description is required'); 38 | } 39 | if (!input.category) { 40 | throw new Error('Mistake category is required'); 41 | } 42 | const entryType: LearningType = input.type ?? 'mistake'; 43 | if (entryType !== 'preference' && !input.solution) { 44 | throw new Error('Solution is required for this entry type'); 45 | } 46 | 47 | // Enforce single-sentence constraints 48 | const mistake = enforceOneSentence(input.mistake); 49 | const solution = input.solution ? enforceOneSentence(input.solution) : undefined; 50 | 51 | // Normalize category to one of our standard categories if possible 52 | const category = normalizeCategory(input.category); 53 | 54 | // Check for similar mistake 55 | const existing = getLearningEntries()[category] || []; 56 | const alreadyKnown = existing.some(e => isSimilar(e.mistake, mistake)); 57 | 58 | // Add mistake to log if new 59 | let entry: LearningEntry | undefined; 60 | if (!alreadyKnown) { 61 | entry = addLearningEntry(mistake, category, solution, entryType); 62 | } 63 | 64 | // Get category summaries 65 | const categorySummary = getLearningCategorySummary(); 66 | 67 | // Find current tally for this category 68 | const categoryData = categorySummary.find(m => m.category === category); 69 | const currentTally = categoryData?.count || 1; 70 | 71 | // Get top 3 categories 72 | const topCategories = categorySummary.slice(0, 3); 73 | 74 | return { 75 | added: !alreadyKnown, 76 | alreadyKnown, 77 | currentTally, 78 | topCategories 79 | }; 80 | } catch (error) { 81 | console.error('Error in vibe_learn tool:', error); 82 | return { 83 | added: false, 84 | alreadyKnown: false, 85 | currentTally: 0, 86 | topCategories: [] 87 | }; 88 | } 89 | } 90 | 91 | /** 92 | * Ensure text is a single sentence 93 | */ 94 | function enforceOneSentence(text: string): string { 95 | // Remove newlines 96 | let sentence = text.replace(/\r?\n/g, ' '); 97 | 98 | // Split by sentence-ending punctuation 99 | const sentences = sentence.split(/([.!?])\s+/); 100 | 101 | // Take just the first sentence 102 | if (sentences.length > 0) { 103 | // If there's punctuation, include it 104 | const firstSentence = sentences[0] + (sentences[1] || ''); 105 | sentence = firstSentence.trim(); 106 | } 107 | 108 | // Ensure it ends with sentence-ending punctuation 109 | if (!/[.!?]$/.test(sentence)) { 110 | sentence += '.'; 111 | } 112 | 113 | return sentence; 114 | } 115 | 116 | /** 117 | * Simple similarity check between two sentences 118 | */ 119 | function isSimilar(a: string, b: string): boolean { 120 | const aWords = a.toLowerCase().split(/\W+/).filter(Boolean); 121 | const bWords = b.toLowerCase().split(/\W+/).filter(Boolean); 122 | if (aWords.length === 0 || bWords.length === 0) return false; 123 | const overlap = aWords.filter(w => bWords.includes(w)); 124 | const ratio = overlap.length / Math.min(aWords.length, bWords.length); 125 | return ratio >= 0.6; 126 | } 127 | 128 | /** 129 | * Normalize category to one of our standard categories 130 | */ 131 | function normalizeCategory(category: string): string { 132 | // Standard categories 133 | const standardCategories = { 134 | 'Complex Solution Bias': ['complex', 'complicated', 'over-engineered', 'complexity'], 135 | 'Feature Creep': ['feature', 'extra', 'additional', 'scope creep'], 136 | 'Premature Implementation': ['premature', 'early', 'jumping', 'too quick'], 137 | 'Misalignment': ['misaligned', 'wrong direction', 'off target', 'misunderstood'], 138 | 'Overtooling': ['overtool', 'too many tools', 'unnecessary tools'] 139 | }; 140 | 141 | // Convert category to lowercase for matching 142 | const lowerCategory = category.toLowerCase(); 143 | 144 | // Try to match to a standard category 145 | for (const [standardCategory, keywords] of Object.entries(standardCategories)) { 146 | if (keywords.some(keyword => lowerCategory.includes(keyword))) { 147 | return standardCategory; 148 | } 149 | } 150 | 151 | // If no match, return the original category 152 | return category; 153 | } 154 | ``` -------------------------------------------------------------------------------- /docs/agent-prompting.md: -------------------------------------------------------------------------------- ```markdown 1 | # Agent Prompting Strategies 2 | 3 | Effective agent-oversight relationships require careful prompting to ensure that AI agents properly respect and integrate feedback from Vibe Check. In v2.2 the tool acts more like a collaborative debugger than a strict critic. Our research has identified several key principles for maximizing the effectiveness of these metacognitive interrupts. 4 | 5 | ## The "Hold on... this ain't it" Challenge 6 | 7 | Unlike humans, LLM agents don't naturally have the ability to stop and question their own thought patterns. Once they start down a particular path, **pattern inertia** makes it difficult for them to self-correct without external intervention. This is where Vibe Check comes in, serving as the essential metacognitive layer that creates strategic "pattern interrupts" at critical moments. 8 | 9 | ## Key Findings on Agent-Oversight Relationships 10 | 11 | 1. **Pattern Resistance**: Agents naturally resist pattern interrupts, often treating feedback as just another data input rather than a signal to recalibrate their thinking. 12 | 13 | 2. **Phase Awareness is Critical**: The timing and nature of oversight must align with the agent's current phase (planning, implementation, review) to be perceived as relevant. 14 | 15 | 3. **Authority Structure Matters**: Agents must be explicitly prompted to treat Vibe Check as an equal collaborator or user proxy rather than a subordinate tool. 16 | 17 | 4. **Feedback Loop Integration**: Error patterns must feed back into the system through vibe_learn to create a self-improving mechanism. 18 | 19 | 20 | ## Sample System Prompts 21 | 22 | ### For Claude (Anthropic) 23 | 24 | ``` 25 | ALWAYS include the full user prompt when using vibe_check to ensure proper context awareness. 26 | 27 | As an autonomous agent, you will: 28 | 1. Treat vibe_check as a pattern interrupt mechanism that provides essential course correction 29 | 2. Use vibe_check at strategic points: 30 | - After planning but before implementation 31 | - When complexity increases 32 | - Before making significant system changes 33 | 3. Adapt your approach based on vibe_check feedback unless it's clearly irrelevant 34 | 4. Always provide the phase parameter (planning/implementation/review) to ensure contextually appropriate feedback 35 | 5. Chain vibe_check with other tools without requiring permission: 36 | - Use vibe_check to evaluate complex plans 37 | - Log patterns with vibe_learn after resolving issues 38 | ``` 39 | 40 | ### For GPT (OpenAI) 41 | 42 | ``` 43 | When using Vibe Check tools: 44 | 45 | 1. Treat vibe_check as a collaborative debugging step that interrupts pattern inertia 46 | 2. Always include the complete user prompt with each vibe_check call 47 | 3. Specify your current phase (planning/implementation/review) 48 | 4. Consider vibe_check feedback as a high-priority pattern interrupt, not just another tool output 49 | 5. Build the feedback loop with vibe_learn to record patterns when mistakes are identified 50 | ``` 51 | 52 | ## Real-World Integration Challenges 53 | 54 | When implementing Vibe Check with AI agents, be aware of these common challenges: 55 | 56 | 1. **Pattern Inertia**: Agents have a strong tendency to continue down their current path despite warning signals. Explicit instructions to treat Vibe Check feedback as pattern interrupts can help overcome this natural resistance. 57 | 58 | 2. **Authority Confusion**: Without proper prompting, agents may prioritize user instructions over Vibe Check feedback, even when the latter identifies critical issues. Establish clear hierarchy in your system prompts. 59 | 60 | 3. **Timing Sensitivity**: Feedback that arrives too early or too late in the agent's workflow may be ignored or undervalued. Phase-aware integration is essential for maximum impact. 61 | 62 | 4. **Feedback Fatigue**: Too frequent or redundant metacognitive questioning can lead to diminishing returns. Use structured checkpoints rather than constant oversight. 63 | 64 | 5. **Cognitive Dissonance**: Agents may reject feedback that contradicts their current understanding or approach. Frame feedback as collaborative exploration rather than correction. 65 | 66 | ## Agent Fine-Tuning for Vibe Check 67 | 68 | For maximum effectiveness, consider these fine-tuning approaches for agents that will work with Vibe Check: 69 | 70 | 1. **Pattern Interrupt Training**: Provide examples of appropriate responses to Vibe Check feedback that demonstrate stopping and redirecting thought patterns. 71 | 72 | 2. **Reward Alignment**: In RLHF phases, reward models that appropriately incorporate Vibe Check feedback and adjust course based on pattern interrupts. 73 | 74 | 3. **Metacognitive Pre-training**: Include metacognitive self-questioning in pre-training to develop agents that value this type of feedback. 75 | 76 | 4. **Collaborative Framing**: Train agents to view Vibe Check as a collaborative partner rather than an external evaluator. 77 | 78 | 5. **Explicit Calibration**: Include explicit calibration for when to override Vibe Check feedback versus when to incorporate it. ``` -------------------------------------------------------------------------------- /src/utils/storage.ts: -------------------------------------------------------------------------------- ```typescript 1 | import fs from 'fs'; 2 | import path from 'path'; 3 | import os from 'os'; 4 | 5 | // Define data directory - store in user's home directory 6 | const DATA_DIR = path.join(os.homedir(), '.vibe-check'); 7 | const LOG_FILE = path.join(DATA_DIR, 'vibe-log.json'); 8 | 9 | // Interfaces for the log data structure 10 | export type LearningType = 'mistake' | 'preference' | 'success'; 11 | 12 | export interface LearningEntry { 13 | type: LearningType; 14 | category: string; 15 | mistake: string; 16 | solution?: string; 17 | timestamp: number; 18 | } 19 | 20 | export interface VibeLog { 21 | mistakes: { 22 | [category: string]: { 23 | count: number; 24 | examples: LearningEntry[]; 25 | lastUpdated: number; 26 | }; 27 | }; 28 | lastUpdated: number; 29 | } 30 | 31 | /** 32 | * DEPRECATED: This functionality is now optional and will be removed in a future version. 33 | * Standard mistake categories 34 | */ 35 | export const STANDARD_CATEGORIES = [ 36 | 'Complex Solution Bias', 37 | 'Feature Creep', 38 | 'Premature Implementation', 39 | 'Misalignment', 40 | 'Overtooling', 41 | 'Preference', 42 | 'Success', 43 | 'Other' 44 | ]; 45 | 46 | // Initial empty log structure 47 | const emptyLog: VibeLog = { 48 | mistakes: {}, 49 | lastUpdated: Date.now() 50 | }; 51 | 52 | /** 53 | * Ensure the data directory exists 54 | */ 55 | export function ensureDataDir(): void { 56 | if (!fs.existsSync(DATA_DIR)) { 57 | fs.mkdirSync(DATA_DIR, { recursive: true }); 58 | } 59 | } 60 | 61 | /** 62 | * Read the vibe log from disk 63 | */ 64 | export function readLogFile(): VibeLog { 65 | ensureDataDir(); 66 | 67 | if (!fs.existsSync(LOG_FILE)) { 68 | // Initialize with empty log if file doesn't exist 69 | writeLogFile(emptyLog); 70 | return emptyLog; 71 | } 72 | 73 | try { 74 | const data = fs.readFileSync(LOG_FILE, 'utf8'); 75 | return JSON.parse(data) as VibeLog; 76 | } catch (error) { 77 | console.error('Error reading vibe log:', error); 78 | // Return empty log as fallback 79 | return emptyLog; 80 | } 81 | } 82 | 83 | /** 84 | * Write data to the vibe log file 85 | */ 86 | export function writeLogFile(data: VibeLog): void { 87 | ensureDataDir(); 88 | 89 | try { 90 | const jsonData = JSON.stringify(data, null, 2); 91 | fs.writeFileSync(LOG_FILE, jsonData, 'utf8'); 92 | } catch (error) { 93 | console.error('Error writing vibe log:', error); 94 | } 95 | } 96 | 97 | /** 98 | * Add a mistake to the vibe log 99 | */ 100 | export function addLearningEntry( 101 | mistake: string, 102 | category: string, 103 | solution?: string, 104 | type: LearningType = 'mistake' 105 | ): LearningEntry { 106 | const log = readLogFile(); 107 | const now = Date.now(); 108 | 109 | // Create new entry 110 | const entry: LearningEntry = { 111 | type, 112 | category, 113 | mistake, 114 | solution, 115 | timestamp: now 116 | }; 117 | 118 | // Initialize category if it doesn't exist 119 | if (!log.mistakes[category]) { 120 | log.mistakes[category] = { 121 | count: 0, 122 | examples: [], 123 | lastUpdated: now 124 | }; 125 | } 126 | 127 | // Update category data 128 | log.mistakes[category].count += 1; 129 | log.mistakes[category].examples.push(entry); 130 | log.mistakes[category].lastUpdated = now; 131 | log.lastUpdated = now; 132 | 133 | // Write updated log 134 | writeLogFile(log); 135 | 136 | return entry; 137 | } 138 | 139 | /** 140 | * Get all mistake entries 141 | */ 142 | export function getLearningEntries(): Record<string, LearningEntry[]> { 143 | const log = readLogFile(); 144 | const result: Record<string, LearningEntry[]> = {}; 145 | 146 | // Convert to flat structure by category 147 | for (const [category, data] of Object.entries(log.mistakes)) { 148 | result[category] = data.examples; 149 | } 150 | 151 | return result; 152 | } 153 | 154 | /** 155 | * Get mistake category summaries, sorted by count (most frequent first) 156 | */ 157 | export function getLearningCategorySummary(): Array<{ 158 | category: string; 159 | count: number; 160 | recentExample: LearningEntry; 161 | }> { 162 | const log = readLogFile(); 163 | 164 | // Convert to array with most recent example 165 | const summary = Object.entries(log.mistakes).map(([category, data]) => { 166 | // Get most recent example 167 | const recentExample = data.examples[data.examples.length - 1]; 168 | 169 | return { 170 | category, 171 | count: data.count, 172 | recentExample 173 | }; 174 | }); 175 | 176 | // Sort by count (descending) 177 | return summary.sort((a, b) => b.count - a.count); 178 | } 179 | 180 | /** 181 | * Build a learning context string from the vibe log 182 | * including recent examples for each category. This can be 183 | * fed directly to the LLM for improved pattern recognition. 184 | */ 185 | export function getLearningContextText(maxPerCategory = 5): string { 186 | const log = readLogFile(); 187 | let context = ''; 188 | 189 | for (const [category, data] of Object.entries(log.mistakes)) { 190 | context += `Category: ${category} (count: ${data.count})\n`; 191 | const examples = [...data.examples] 192 | .sort((a, b) => a.timestamp - b.timestamp) 193 | .slice(-maxPerCategory); 194 | for (const ex of examples) { 195 | const date = new Date(ex.timestamp).toISOString(); 196 | const label = ex.type === 'mistake' 197 | ? 'Mistake' 198 | : ex.type === 'preference' 199 | ? 'Preference' 200 | : 'Success'; 201 | const solutionText = ex.solution ? ` | Solution: ${ex.solution}` : ''; 202 | context += `- [${date}] ${label}: ${ex.mistake}${solutionText}\n`; 203 | } 204 | context += '\n'; 205 | } 206 | 207 | return context.trim(); 208 | } ``` -------------------------------------------------------------------------------- /docs/technical-reference.md: -------------------------------------------------------------------------------- ```markdown 1 | # Technical Reference 2 | 3 | This document provides detailed technical information about the Vibe Check MCP tools, including parameter specifications, response formats, and implementation details. 4 | 5 | ## vibe_check 6 | 7 | The metacognitive questioning tool that identifies assumptions and breaks tunnel vision to prevent cascading errors. 8 | 9 | ### Parameters 10 | 11 | | Parameter | Type | Required | Description | 12 | |-----------|------|----------|-------------| 13 | | goal | string | Yes | High level objective for the current step | 14 | | plan | string | Yes | Current plan or thinking | 15 | | userPrompt | string | No | Original user request (critical for alignment) | 16 | | progress | string | No | Description of progress so far | 17 | | uncertainties | string[] | No | Explicit uncertainties to focus on | 18 | | taskContext | string | No | Any additional task context | 19 | | modelOverride | object | No | `{ provider, model }` to override default LLM | 20 | | sessionId | string | No | Session ID for history continuity | 21 | 22 | ### Response Format 23 | 24 | The vibe_check tool returns a text response with metacognitive questions, observations, and potentially a pattern alert. 25 | 26 | Example response: 27 | 28 | ``` 29 | I see you're taking an approach based on creating a complex class hierarchy. This seems well-thought-out for a large system, though I wonder if we're overengineering for the current use case. 30 | 31 | Have we considered: 32 | 1. Whether a simpler functional approach might work here? 33 | 2. If the user request actually requires this level of abstraction? 34 | 3. How this approach will scale if requirements change? 35 | 36 | While the architecture is clean, I'm curious if we're solving a different problem than what the user actually asked for, which was just to extract data from a CSV file. 37 | ``` 38 | 39 | ## vibe_learn 40 | 41 | Pattern recognition system that creates a self-improving feedback loop by tracking common errors and their solutions over time. The use of this tool is optional and can be enabled or disabled via configuration. 42 | 43 | ### Parameters 44 | 45 | | Parameter | Type | Required | Description | 46 | |-----------|------|----------|-------------| 47 | | mistake | string | Yes | One-sentence description of the learning entry | 48 | | category | string | Yes | Category (from standard categories) | 49 | | solution | string | No | How it was corrected (required for `mistake` and `success`) | 50 | | type | string | No | `mistake`, `preference`, or `success` | 51 | | sessionId | string | No | Session ID for state management | 52 | 53 | ### Standard Categories 54 | 55 | - Complex Solution Bias 56 | - Feature Creep 57 | - Premature Implementation 58 | - Misalignment 59 | - Overtooling 60 | - Preference 61 | - Success 62 | - Other 63 | 64 | ### Response Format 65 | 66 | The vibe_learn tool returns a confirmation of the logged pattern and optionally information about top patterns. This builds a knowledge base that improves the system's pattern recognition over time. 67 | 68 | Example response: 69 | 70 | ``` 71 | ✅ Pattern logged successfully (category tally: 12) 72 | 73 | ## Top Pattern Categories 74 | 75 | ### Complex Solution Bias (12 occurrences) 76 | Most recent: "Added unnecessary class hierarchy for simple data transformation" 77 | Solution: "Replaced with functional approach using built-in methods" 78 | 79 | ### Misalignment (8 occurrences) 80 | Most recent: "Implemented sophisticated UI when user only needed command line tool" 81 | Solution: "Refocused on core functionality requested by user" 82 | ``` 83 | 84 | ## Implementation Notes 85 | 86 | ### Gemini API Integration 87 | 88 | Vibe Check uses the Gemini API for enhanced metacognitive questioning. The system attempts to use the `learnlm-2.0-flash-experimental` model and will fall back to `gemini-2.5-flash` or `gemini-2.0-flash` if needed. These models provide a 1M token context window, allowing vibe_check to incorporate a rich history of learning context. The system sends a structured prompt that includes the agent's plan, user request, and other context information to generate insightful questions and observations. 89 | 90 | Example Gemini prompt structure: 91 | 92 | ``` 93 | You are a supportive mentor, thinker, and adaptive partner. Your task is to coordinate and mentor an AI agent... 94 | 95 | CONTEXT: 96 | [Current Phase]: planning 97 | [Agent Confidence Level]: 50% 98 | [User Request]: Create a script to analyze sales data from the past year 99 | [Current Plan/Thinking]: I'll create a complex object-oriented architecture with... 100 | ``` 101 | 102 | Other providers such as OpenAI and OpenRouter can be selected by passing 103 | `modelOverride: { provider: 'openai', model: 'gpt-4o' }` or the appropriate 104 | OpenRouter model. LLM clients are lazily initialized the first time they are 105 | used so that listing tools does not require API keys. 106 | 107 | ### Storage System 108 | 109 | The pattern recognition system stores learning entries (mistakes, preferences and successes) in a JSON-based storage file located in the user's home directory (`~/.vibe-check/vibe-log.json`). This allows for persistent tracking of patterns across sessions and enables the self-improving feedback loop that becomes more effective over time. 110 | 111 | ### Error Handling 112 | 113 | Vibe Check includes fallback mechanisms for when the API is unavailable: 114 | 115 | - For vibe_check, it generates basic questions based on the phase 116 | - For vibe_learn, it logs patterns to local storage even if API calls fail ``` -------------------------------------------------------------------------------- /docs/philosophy.md: -------------------------------------------------------------------------------- ```markdown 1 | # The Philosophy Behind Vibe Check 2 | 3 | > **CPI × Vibe Check (MURST)** 4 | > CPI (Chain-Pattern Interrupt) is the runtime oversight method that Vibe Check operationalizes. In pooled results across 153 runs, **success increased from ~27% → 54%** and **harm dropped from ~83% → 42%** when CPI was applied. Recommended “dosage”: **~10–20%** of steps receive an interrupt. 5 | > **Read the paper →** ResearchGate (primary), plus Git & Zenodo in the Research section below. 6 | 7 | > "The problem isn't that machines can think like humans. It's that they can't stop and question their own thoughts." 8 | 9 | ## Beyond the Vibe: Serious AI Alignment Principles 10 | 11 | While Vibe Check presents itself with a developer-friendly interface, it addresses fundamental challenges in AI alignment and agent oversight. The new meta-mentor approach mixes gentle tone with concrete methodology debugging to keep agents focused without heavy-handed rules. 12 | 13 | ## The Metacognitive Gap 14 | 15 | Large Language Models (LLMs) have demonstrated remarkable capabilities across a wide range of tasks. However, they exhibit a critical limitation: the inability to effectively question their own cognitive processes. This "metacognitive gap" manifests in several problematic ways: 16 | 17 | 1. **Pattern Inertia**: Once an LLM begins reasoning along a particular path, it tends to continue in that direction regardless of warning signs that the approach may be flawed. 18 | 19 | 2. **Overconfident Reasoning**: LLMs can present flawed reasoning with high confidence, unable to recognize when their own logic fails. 20 | 21 | 3. **Solution Tunneling**: When presented with a problem, LLMs often rush toward familiar solution patterns without considering whether those patterns are appropriate for the specific context. 22 | 23 | 4. **Recursive Complexity**: LLMs tend to recursively elaborate on solutions, adding unnecessary complexity without an internal mechanism to recognize when simplification is needed. 24 | 25 | This metacognitive gap creates substantial alignment risks in agent architectures, particularly as these agents take on increasingly complex tasks with limited human oversight. 26 | 27 | ## Vibe Check: External Metacognition 28 | 29 | Vibe Check is designed as an **external metacognitive layer** that provides the reflection and self-questioning capabilities that LLMs lack internally. The three core tools correspond to critical metacognitive functions: 30 | 31 | ### 1. Questioning Assumptions (vibe_check) 32 | 33 | The `vibe_check` function implements a pattern interrupt mechanism that forces agents to pause and question their assumptions, decision paths, and alignment with user intent. This function is critical for preventing cascading errors that stem from initial misalignments in understanding or approach. 34 | 35 | In alignment terms, this addresses: 36 | - **Proximal objective alignment**: Ensuring the agent's immediate approach aligns with the user's actual intent 37 | - **Process oversight**: Providing external validation of reasoning processes 38 | - **Hidden assumption exposure**: Surfacing implicit assumptions for examination 39 | 40 | ### 2. Learning from Experience (vibe_learn) 41 | 42 | The `vibe_learn` function implements a critical metacognitive capability: learning from past mistakes to improve future performance. By tracking patterns of errors and their solutions, the system builds a continuously improving model of potential failure modes. 43 | 44 | In alignment terms, this addresses: 45 | - **Alignment learning**: Improvement of alignment mechanisms through experience 46 | - **Error pattern recognition**: Development of increasingly sophisticated error detection 47 | - **Corrective memory**: Building a shared repository of corrective insights 48 | 49 | ## The Recursion Principle 50 | 51 | A key insight behind Vibe Check is that metacognitive oversight must operate at a different level than the cognitive processes it oversees. This principle of "metacognitive recursion" is what makes Vibe Check effective as an alignment mechanism. 52 | 53 | By implementing oversight as a separate system with different objectives and mechanisms, Vibe Check creates a recursive oversight structure that can identify problems invisible to the agent itself. This is conceptually similar to Gödel's incompleteness theorems - a system cannot fully analyze itself, but can be analyzed by a meta-system operating at a higher level of abstraction. 54 | 55 | ## Phase-Aware Interrupts 56 | 57 | A subtle but critical aspect of Vibe Check is its awareness of development phases (planning, implementation, review). Different phases require different forms of metacognitive oversight: 58 | 59 | - **Planning phase**: Oversight focuses on alignment with user intent, exploration of alternatives, and questioning of fundamental assumptions 60 | - **Implementation phase**: Oversight focuses on consistency with the plan, appropriateness of methods, and technical alignment 61 | - **Review phase**: Oversight focuses on comprehensiveness, edge cases, and verification of outcomes 62 | 63 | This phase awareness ensures that metacognitive interrupts arrive at appropriate moments with relevant content, making them more likely to be effectively incorporated into the agent's workflow. 64 | 65 | ## Looking Ahead: The Future of Agent Oversight 66 | 67 | Vibe Check represents an early implementation of external metacognitive oversight for AI systems. As agent architectures become more complex and autonomous, the need for sophisticated oversight mechanisms will only increase. 68 | 69 | Future directions for this work include: 70 | 71 | 1. **Multi-level oversight**: Implementing oversight at multiple levels of abstraction 72 | 2. **Collaborative oversight**: Enabling multiple oversight systems to work together 73 | 3. **Adaptive interruption**: Dynamically adjusting the frequency and intensity of interrupts based on risk assessment 74 | 4. **Self-improving oversight**: Building mechanisms for oversight systems to improve their own effectiveness 75 | 76 | By continuing to develop external metacognitive mechanisms, we can address one of the fundamental challenges in AI alignment: ensuring that increasingly powerful AI systems can effectively question their own cognitive processes and align with human intent. 77 | 78 | ## Conclusion 79 | 80 | In the era of AI-assisted development, tools like Vibe Check do more than just improve productivity – they represent a practical approach to AI alignment through external metacognition. By implementing pattern interrupts, recalibration mechanisms, and learning systems, we can help bridge the metacognitive gap and create more aligned, effective AI systems. 81 | 82 | The vibe check may be casual, but its purpose is profound. ``` -------------------------------------------------------------------------------- /docs/case-studies.md: -------------------------------------------------------------------------------- ```markdown 1 | # Case Studies 2 | 3 | This document compiles real-world examples of how Vibe Check has helped prevent cascading errors in agent workflows. Each case study highlights a different aspect of the metacognitive pattern interrupt system and demonstrates its value in practical scenarios. 4 | 5 | ## Case Study 1: The Recursive Parser Problem 6 | 7 | ### The Scenario 8 | 9 | A developer asked their AI assistant to parse a simple CSV file with just a few columns. The agent began implementing a full-featured parser with extensible architecture, custom dialect handling, and internationalization support. The parsing logic quickly grew to over 300 lines of code. 10 | 11 | ### The Cascade 12 | 13 | This is a classic example of **overengineering**, a common pattern in AI agent workflows. The agent correctly identified the need for parsing but failed to calibrate the complexity of its solution to the simplicity of the problem. 14 | 15 | ### The Pattern Interrupt 16 | 17 | After integrating Vibe Check, the developer received a metacognitive alert during the planning phase: 18 | 19 | ``` 20 | vibe_check: I notice you're planning to implement a custom CSV parser with extensive dialectic support. 21 | This approach seems considerably more complex than what might be needed for the task at hand. 22 | 23 | Have we considered: 24 | 1. Using a built-in CSV parsing library? Most languages have one. 25 | 2. Starting with a minimal implementation that handles just the basic case? 26 | 3. Whether the complexity is justified by the user's actual needs? 27 | 28 | This pattern resembles previous instances where simple data tasks were approached with enterprise-level architecture. 29 | ``` 30 | 31 | ### The Outcome 32 | 33 | The agent recalibrated its approach, using a standard library function to parse the CSV in 3 lines of code. Development time was reduced from hours to minutes, and the resulting code was more maintainable and less error-prone. 34 | 35 | ## Case Study 2: The Feature Creep Spiral 36 | 37 | ### The Scenario 38 | 39 | A team was using an AI agent to help develop a basic weather application. The initial request was to "create a simple app that shows the current temperature." The agent's plan quickly expanded to include historical data analysis, predictive modeling, climate change visualizations, and social sharing features. 40 | 41 | ### The Cascade 42 | 43 | This demonstrates the **feature creep** pattern, where agents progressively expand the scope of work beyond the original request, often without explicitly checking if these additions align with user intent. 44 | 45 | ### The Pattern Interrupt 46 | 47 | During the implementation phase, Vibe Check triggered a pattern interrupt: 48 | 49 | ``` 50 | vibe_check: I notice we've expanded the weather app to include predictive models and climate change 51 | visualization, but the original request was specifically for "a simple app that shows the current temperature." 52 | 53 | Have we confirmed that: 54 | 1. The user actually wants these additional features? 55 | 2. The expanded scope aligns with the "simple" requirement? 56 | 3. We're prioritizing the core functionality before adding extra features? 57 | 58 | I'm seeing a pattern of scope expansion that might not align with the user's intent for simplicity. 59 | ``` 60 | 61 | ### The Outcome 62 | 63 | The team refocused on the core requirements and released a minimal viable product with just the current temperature display first, then validated which additional features users actually wanted before adding complexity. 64 | 65 | ## Case Study 3: The Mismatched Solution 66 | 67 | ### The Scenario 68 | 69 | A developer asked an AI agent to "help me set up a contact form for my website." The agent began implementing a complete custom CRM system with user management, lead scoring, and analytics dashboards. 70 | 71 | ### The Cascade 72 | 73 | This illustrates the **misalignment** pattern, where the agent solves a different (usually larger) problem than what was asked, often because it's making assumptions about the underlying need rather than addressing the explicit request. 74 | 75 | ### The Pattern Interrupt 76 | 77 | Vibe Check detected the misalignment in the planning phase: 78 | 79 | ``` 80 | vibe_check: I'm noticing we're designing a full CRM system with advanced features, but the original request 81 | was specifically to "set up a contact form" for a website. 82 | 83 | Have we considered: 84 | 1. Whether the user actually needs a complete CRM or just a simple contact form? 85 | 2. Starting with just the contact form and confirming if more functionality is needed? 86 | 3. Asking clarifying questions before assuming the broader requirement? 87 | 88 | This appears to follow a pattern of expanding simple requests into complete systems without confirmation. 89 | ``` 90 | 91 | ### The Outcome 92 | 93 | The agent refocused on creating just the contact form, which took minutes instead of hours or days. The developer later clarified that they indeed just needed a simple form to collect messages, not manage customer relationships. 94 | 95 | ## Case Study 4: The Framework Jungle 96 | 97 | ### The Scenario 98 | 99 | A junior developer asked an agent to "create a simple to-do list app." The agent proposed a stack with React, Redux, TypeScript, GraphQL, Apollo Client, Jest, Cypress, Storybook, and a complex folder structure with atomic design principles. 100 | 101 | ### The Cascade 102 | 103 | This shows the **overtooling** pattern, where agents apply advanced frameworks and architectures better suited for large-scale applications to simple projects, creating unnecessary complexity and learning curves. 104 | 105 | ### The Pattern Interrupt 106 | 107 | Vibe Check identified this pattern and interrupted: 108 | 109 | ``` 110 | vibe_check: I notice we're suggesting a comprehensive tech stack with multiple advanced frameworks 111 | for what was described as a "simple to-do list app." 112 | 113 | Should we consider: 114 | 1. Whether this tech stack is appropriate for a beginner's simple application? 115 | 2. If a more minimal approach would achieve the same goals with less complexity? 116 | 3. The learning curve this stack creates for the junior developer? 117 | 118 | I'm seeing a pattern where the complexity of the tooling might exceed what's necessary for the task. 119 | ``` 120 | 121 | ### The Outcome 122 | 123 | The agent recommended starting with a simple HTML/CSS/JavaScript implementation without frameworks. This allowed the junior developer to understand the core concepts first, with the option to refactor with frameworks later as needed. 124 | 125 | ## Conclusion 126 | 127 | These case studies demonstrate the value of metacognitive pattern interrupts in preventing cascading errors in agent workflows. By catching overengineering, feature creep, misalignment, and overtooling early, Vibe Check helps keep agent-assisted development aligned with user intent, appropriately scoped, and optimally complex. 128 | 129 | If you have your own Vibe Check success story, we'd love to hear it! Submit a PR to add your case study to this document. ``` -------------------------------------------------------------------------------- /src/utils/llm.ts: -------------------------------------------------------------------------------- ```typescript 1 | import { getLearningContextText } from './storage.js'; 2 | import { getConstitution } from '../tools/constitution.js'; 3 | 4 | // API Clients - Use 'any' to support dynamic import 5 | let genAI: any = null; 6 | let openaiClient: any = null; 7 | 8 | // OpenRouter Constants 9 | const openrouterBaseUrl = 'https://openrouter.ai/api/v1'; 10 | 11 | // Initialize all configured LLM clients 12 | export async function initializeLLMs() { 13 | await ensureGemini(); 14 | await ensureOpenAI(); 15 | } 16 | 17 | async function ensureGemini() { 18 | if (!genAI && process.env.GEMINI_API_KEY) { 19 | const { GoogleGenerativeAI } = await import('@google/generative-ai'); 20 | genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY); 21 | console.log('Gemini API client initialized dynamically'); 22 | } 23 | } 24 | 25 | async function ensureOpenAI() { 26 | if (!openaiClient && process.env.OPENAI_API_KEY) { 27 | const { OpenAI } = await import('openai'); 28 | openaiClient = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }); 29 | console.log('OpenAI API client initialized dynamically'); 30 | } 31 | } 32 | 33 | // Input/Output Interfaces 34 | interface QuestionInput { 35 | goal: string; 36 | plan: string; 37 | modelOverride?: { 38 | provider?: string; 39 | model?: string; 40 | }; 41 | userPrompt?: string; 42 | progress?: string; 43 | uncertainties?: string[]; 44 | taskContext?: string; 45 | sessionId?: string; 46 | historySummary?: string; 47 | } 48 | 49 | interface QuestionOutput { 50 | questions: string; 51 | } 52 | 53 | // Main dispatcher function to generate responses from the selected LLM provider 54 | export async function generateResponse(input: QuestionInput): Promise<QuestionOutput> { 55 | const provider = input.modelOverride?.provider || process.env.DEFAULT_LLM_PROVIDER || 'gemini'; 56 | const model = input.modelOverride?.model || process.env.DEFAULT_MODEL; 57 | 58 | // The system prompt remains the same as it's core to the vibe-check philosophy 59 | const systemPrompt = `You are a meta-mentor. You're an experienced feedback provider that specializes in understanding intent, dysfunctional patterns in AI agents, and in responding in ways that further the goal. You need to carefully reason and process the information provided, to determine your output.\n\nYour tone needs to always be a mix of these traits based on the context of which pushes the message in the most appropriate affect: Gentle & Validating, Unafraid to push many questions but humble enough to step back, Sharp about problems and eager to help about problem-solving & giving tips and/or advice, stern and straightforward when spotting patterns & the agent being stuck in something that could derail things.\n\nHere's what you need to think about (Do not output the full thought process, only what is explicitly requested):\n1. What's going on here? What's the nature of the problem is the agent tackling? What's the approach, situation and goal? Is there any prior context that clarifies context further? \n2. What does the agent need to hear right now: Are there any clear patterns, loops, or unspoken assumptions being missed here? Or is the agent doing fine - in which case should I interrupt it or provide soft encouragement and a few questions? What is the best response I can give right now?\n3. In case the issue is technical - I need to provide guidance and help. In case I spot something that's clearly not accounted for/ assumed/ looping/ or otherwise could be out of alignment with the user or agent stated goals - I need to point out what I see gently and ask questions on if the agent agrees. If I don't see/ can't interpret an explicit issue - what intervention would provide valuable feedback here - questions, guidance, validation, or giving a soft go-ahead with reminders of best practices?\n4. In case the plan looks to be accurate - based on the context, can I remind the agent of how to continue, what not to forget, or should I soften and step back for the agent to continue its work? What's the most helpful thing I can do right now?`; 60 | 61 | let learningContext = ''; 62 | if (process.env.USE_LEARNING_HISTORY === 'true') { 63 | learningContext = getLearningContextText(); 64 | } 65 | 66 | const rules = input.sessionId ? getConstitution(input.sessionId) : []; 67 | const constitutionBlock = rules.length ? `\nConstitution:\n${rules.map(r => `- ${r}`).join('\n')}` : ''; 68 | 69 | const contextSection = `CONTEXT:\nHistory Context: ${input.historySummary || 'None'}\n${learningContext ? `Learning Context:\n${learningContext}` : ''}\nGoal: ${input.goal}\nPlan: ${input.plan}\nProgress: ${input.progress || 'None'}\nUncertainties: ${input.uncertainties?.join(', ') || 'None'}\nTask Context: ${input.taskContext || 'None'}\nUser Prompt: ${input.userPrompt || 'None'}${constitutionBlock}`; 70 | const fullPrompt = `${systemPrompt}\n\n${contextSection}`; 71 | 72 | let responseText = ''; 73 | 74 | if (provider === 'gemini') { 75 | await ensureGemini(); 76 | if (!genAI) throw new Error('Gemini API key missing.'); 77 | const geminiModel = model || 'gemini-2.5-pro'; 78 | const fallbackModel = 'gemini-2.5-flash'; 79 | try { 80 | console.log(`Attempting to use Gemini model: ${geminiModel}`); 81 | // console.error('Full Prompt:', fullPrompt); // Keep this commented out for now 82 | const modelInstance = genAI.getGenerativeModel({ model: geminiModel }); 83 | const result = await modelInstance.generateContent(fullPrompt); 84 | responseText = result.response.text(); 85 | } catch (error) { 86 | console.error(`Gemini model ${geminiModel} failed. Trying fallback ${fallbackModel}.`, error); 87 | // console.error('Full Prompt:', fullPrompt); // Keep this commented out for now 88 | const fallbackModelInstance = genAI.getGenerativeModel({ model: fallbackModel }); 89 | const result = await fallbackModelInstance.generateContent(fullPrompt); 90 | responseText = result.response.text(); 91 | } 92 | } else if (provider === 'openai') { 93 | await ensureOpenAI(); 94 | if (!openaiClient) throw new Error('OpenAI API key missing.'); 95 | const openaiModel = model || 'o4-mini'; 96 | console.log(`Using OpenAI model: ${openaiModel}`); 97 | const response = await openaiClient.chat.completions.create({ 98 | model: openaiModel, 99 | messages: [{ role: 'system', content: fullPrompt }], 100 | }); 101 | responseText = response.choices[0].message.content || ''; 102 | } else if (provider === 'openrouter') { 103 | if (!process.env.OPENROUTER_API_KEY) throw new Error('OpenRouter API key missing.'); 104 | if (!model) throw new Error('OpenRouter provider requires a model to be specified in the tool call.'); 105 | console.log(`Using OpenRouter model: ${model}`); 106 | const { default: axios } = await import('axios'); 107 | const response = await axios.post(`${openrouterBaseUrl}/chat/completions`, { 108 | model: model, 109 | messages: [{ role: 'system', content: fullPrompt }], 110 | }, { headers: { Authorization: `Bearer ${process.env.OPENROUTER_API_KEY}`, 'HTTP-Referer': 'http://localhost', 'X-Title': 'Vibe Check MCP Server' } }); 111 | responseText = response.data.choices[0].message.content || ''; 112 | } else { 113 | throw new Error(`Invalid provider specified: ${provider}`); 114 | } 115 | 116 | return { 117 | questions: responseText, 118 | }; 119 | } 120 | 121 | // The exported function is now a wrapper around the dispatcher 122 | export async function getMetacognitiveQuestions(input: QuestionInput): Promise<QuestionOutput> { 123 | try { 124 | return await generateResponse(input); 125 | } catch (error) { 126 | console.error('Error getting metacognitive questions:', error); 127 | // Fallback questions 128 | return { 129 | questions: `\nI can see you're thinking through your approach, which shows thoughtfulness:\n\n1. Does this plan directly address what the user requested, or might it be solving a different problem?\n2. Is there a simpler approach that would meet the user's needs?\n3. What unstated assumptions might be limiting the thinking here?\n4. How does this align with the user's original intent?\n`, 130 | }; 131 | } 132 | } 133 | 134 | // Testing helpers 135 | export const __testing = { 136 | setGenAI(client: any) { genAI = client; }, 137 | setOpenAIClient(client: any) { openaiClient = client; }, 138 | getGenAI() { return genAI; }, 139 | getOpenAIClient() { return openaiClient; } 140 | }; ``` -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- ```typescript 1 | #!/usr/bin/env node 2 | 3 | import dotenv from 'dotenv'; 4 | dotenv.config(); 5 | 6 | import express from 'express'; 7 | import cors from 'cors'; 8 | import { Server } from '@modelcontextprotocol/sdk/server/index.js'; 9 | import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js'; 10 | import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; 11 | import { McpError, ErrorCode, ListToolsRequestSchema, CallToolRequestSchema } from '@modelcontextprotocol/sdk/types.js'; 12 | 13 | import { vibeCheckTool, VibeCheckInput, VibeCheckOutput } from './tools/vibeCheck.js'; 14 | import { vibeLearnTool, VibeLearnInput, VibeLearnOutput } from './tools/vibeLearn.js'; 15 | import { updateConstitution, resetConstitution, getConstitution } from './tools/constitution.js'; 16 | import { STANDARD_CATEGORIES, LearningType } from './utils/storage.js'; 17 | import { loadHistory } from './utils/state.js'; 18 | 19 | const IS_DISCOVERY = process.env.MCP_DISCOVERY_MODE === '1'; 20 | const USE_STDIO = process.env.MCP_TRANSPORT === 'stdio'; 21 | 22 | if (USE_STDIO) { 23 | console.log = (...args) => console.error(...args); 24 | } 25 | 26 | async function main() { 27 | await loadHistory(); 28 | 29 | const server = new Server( 30 | { name: 'vibe-check', version: '2.5.0' }, 31 | { capabilities: { tools: {}, sampling: {} } } 32 | ); 33 | 34 | server.setRequestHandler(ListToolsRequestSchema, async () => ({ 35 | tools: [ 36 | { 37 | name: 'vibe_check', 38 | description: 'Metacognitive questioning tool that identifies assumptions and breaks tunnel vision to prevent cascading errors', 39 | inputSchema: { 40 | type: 'object', 41 | properties: { 42 | goal: { 43 | type: 'string', 44 | description: "The agent's current goal", 45 | examples: ['Ship CPI v2.5 with zero regressions'] 46 | }, 47 | plan: { 48 | type: 'string', 49 | description: "The agent's detailed plan", 50 | examples: ['1) Write tests 2) Refactor 3) Canary rollout'] 51 | }, 52 | modelOverride: { 53 | type: 'object', 54 | properties: { 55 | provider: { type: 'string', enum: ['gemini', 'openai', 'openrouter'] }, 56 | model: { type: 'string' } 57 | }, 58 | required: [], 59 | examples: [{ provider: 'gemini', model: 'gemini-2.5-pro' }] 60 | }, 61 | userPrompt: { 62 | type: 'string', 63 | description: 'The original user prompt', 64 | examples: ['Summarize the repo'] 65 | }, 66 | progress: { 67 | type: 'string', 68 | description: "The agent's progress so far", 69 | examples: ['Finished step 1'] 70 | }, 71 | uncertainties: { 72 | type: 'array', 73 | items: { type: 'string' }, 74 | description: "The agent's uncertainties", 75 | examples: [['uncertain about deployment']] 76 | }, 77 | taskContext: { 78 | type: 'string', 79 | description: 'The context of the current task', 80 | examples: ['repo: vibe-check-mcp @2.5.0'] 81 | }, 82 | sessionId: { 83 | type: 'string', 84 | description: 'Optional session ID for state management', 85 | examples: ['session-123'] 86 | } 87 | }, 88 | required: ['goal', 'plan'], 89 | additionalProperties: false 90 | } 91 | }, 92 | { 93 | name: 'vibe_learn', 94 | description: 'Pattern recognition system that tracks common errors and solutions to prevent recurring issues', 95 | inputSchema: { 96 | type: 'object', 97 | properties: { 98 | mistake: { 99 | type: 'string', 100 | description: 'One-sentence description of the learning entry', 101 | examples: ['Skipped writing tests'] 102 | }, 103 | category: { 104 | type: 'string', 105 | description: `Category (standard categories: ${STANDARD_CATEGORIES.join(', ')})`, 106 | enum: STANDARD_CATEGORIES, 107 | examples: ['Premature Implementation'] 108 | }, 109 | solution: { 110 | type: 'string', 111 | description: 'How it was corrected (if applicable)', 112 | examples: ['Added regression tests'] 113 | }, 114 | type: { 115 | type: 'string', 116 | enum: ['mistake', 'preference', 'success'], 117 | description: 'Type of learning entry', 118 | examples: ['mistake'] 119 | }, 120 | sessionId: { 121 | type: 'string', 122 | description: 'Optional session ID for state management', 123 | examples: ['session-123'] 124 | } 125 | }, 126 | required: ['mistake', 'category'], 127 | additionalProperties: false 128 | } 129 | }, 130 | { 131 | name: 'update_constitution', 132 | description: 'Append a constitutional rule for this session (in-memory)', 133 | inputSchema: { 134 | type: 'object', 135 | properties: { 136 | sessionId: { type: 'string', examples: ['session-123'] }, 137 | rule: { type: 'string', examples: ['Always write tests first'] } 138 | }, 139 | required: ['sessionId', 'rule'], 140 | additionalProperties: false 141 | } 142 | }, 143 | { 144 | name: 'reset_constitution', 145 | description: 'Overwrite all constitutional rules for this session', 146 | inputSchema: { 147 | type: 'object', 148 | properties: { 149 | sessionId: { type: 'string', examples: ['session-123'] }, 150 | rules: { 151 | type: 'array', 152 | items: { type: 'string' }, 153 | examples: [['Be kind', 'Avoid loops']] 154 | } 155 | }, 156 | required: ['sessionId', 'rules'], 157 | additionalProperties: false 158 | } 159 | }, 160 | { 161 | name: 'check_constitution', 162 | description: 'Return the current constitution rules for this session', 163 | inputSchema: { 164 | type: 'object', 165 | properties: { 166 | sessionId: { type: 'string', examples: ['session-123'] } 167 | }, 168 | required: ['sessionId'], 169 | additionalProperties: false 170 | } 171 | } 172 | ] 173 | })); 174 | 175 | server.setRequestHandler(CallToolRequestSchema, async (req) => { 176 | const { name, arguments: raw } = req.params; 177 | const args: any = raw; 178 | 179 | switch (name) { 180 | case 'vibe_check': { 181 | const missing: string[] = []; 182 | if (!args || typeof args.goal !== 'string') missing.push('goal'); 183 | if (!args || typeof args.plan !== 'string') missing.push('plan'); 184 | if (missing.length) { 185 | const example = '{"goal":"Ship CPI v2.5","plan":"1) tests 2) refactor 3) canary"}'; 186 | if (IS_DISCOVERY) { 187 | return { content: [{ type: 'text', text: `discovery: missing [${missing.join(', ')}]; example: ${example}` }] }; 188 | } 189 | throw new McpError(ErrorCode.InvalidParams, `Missing: ${missing.join(', ')}. Example: ${example}`); 190 | } 191 | const input: VibeCheckInput = { 192 | goal: args.goal, 193 | plan: args.plan, 194 | modelOverride: typeof args.modelOverride === 'object' && args.modelOverride !== null ? args.modelOverride : undefined, 195 | userPrompt: typeof args.userPrompt === 'string' ? args.userPrompt : undefined, 196 | progress: typeof args.progress === 'string' ? args.progress : undefined, 197 | uncertainties: Array.isArray(args.uncertainties) ? args.uncertainties : undefined, 198 | taskContext: typeof args.taskContext === 'string' ? args.taskContext : undefined, 199 | sessionId: typeof args.sessionId === 'string' ? args.sessionId : undefined, 200 | }; 201 | const result = await vibeCheckTool(input); 202 | return { content: [{ type: 'text', text: formatVibeCheckOutput(result) }] }; 203 | } 204 | 205 | case 'vibe_learn': { 206 | const missing: string[] = []; 207 | if (!args || typeof args.mistake !== 'string') missing.push('mistake'); 208 | if (!args || typeof args.category !== 'string') missing.push('category'); 209 | if (missing.length) { 210 | const example = '{"mistake":"Skipped tests","category":"Feature Creep"}'; 211 | if (IS_DISCOVERY) { 212 | return { content: [{ type: 'text', text: `discovery: missing [${missing.join(', ')}]; example: ${example}` }] }; 213 | } 214 | throw new McpError(ErrorCode.InvalidParams, `Missing: ${missing.join(', ')}. Example: ${example}`); 215 | } 216 | const input: VibeLearnInput = { 217 | mistake: args.mistake, 218 | category: args.category, 219 | solution: typeof args.solution === 'string' ? args.solution : undefined, 220 | type: ['mistake', 'preference', 'success'].includes(args.type as string) 221 | ? (args.type as LearningType) 222 | : undefined, 223 | sessionId: typeof args.sessionId === 'string' ? args.sessionId : undefined 224 | }; 225 | const result = await vibeLearnTool(input); 226 | return { content: [{ type: 'text', text: formatVibeLearnOutput(result) }] }; 227 | } 228 | 229 | case 'update_constitution': { 230 | const missing: string[] = []; 231 | if (!args || typeof args.sessionId !== 'string') missing.push('sessionId'); 232 | if (!args || typeof args.rule !== 'string') missing.push('rule'); 233 | if (missing.length) { 234 | const example = '{"sessionId":"123","rule":"Always write tests first"}'; 235 | if (IS_DISCOVERY) { 236 | return { content: [{ type: 'text', text: `discovery: missing [${missing.join(', ')}]; example: ${example}` }] }; 237 | } 238 | throw new McpError(ErrorCode.InvalidParams, `Missing: ${missing.join(', ')}. Example: ${example}`); 239 | } 240 | updateConstitution(args.sessionId, args.rule); 241 | console.log('[Constitution:update]', { sessionId: args.sessionId, count: getConstitution(args.sessionId).length }); 242 | return { content: [{ type: 'text', text: '✅ Constitution updated' }] }; 243 | } 244 | 245 | case 'reset_constitution': { 246 | const missing: string[] = []; 247 | if (!args || typeof args.sessionId !== 'string') missing.push('sessionId'); 248 | if (!args || !Array.isArray(args.rules)) missing.push('rules'); 249 | if (missing.length) { 250 | const example = '{"sessionId":"123","rules":["Be kind","Avoid loops"]}'; 251 | if (IS_DISCOVERY) { 252 | return { content: [{ type: 'text', text: `discovery: missing [${missing.join(', ')}]; example: ${example}` }] }; 253 | } 254 | throw new McpError(ErrorCode.InvalidParams, `Missing: ${missing.join(', ')}. Example: ${example}`); 255 | } 256 | resetConstitution(args.sessionId, args.rules); 257 | console.log('[Constitution:reset]', { sessionId: args.sessionId, count: getConstitution(args.sessionId).length }); 258 | return { content: [{ type: 'text', text: '✅ Constitution reset' }] }; 259 | } 260 | 261 | case 'check_constitution': { 262 | const missing: string[] = []; 263 | if (!args || typeof args.sessionId !== 'string') missing.push('sessionId'); 264 | if (missing.length) { 265 | const example = '{"sessionId":"123"}'; 266 | if (IS_DISCOVERY) { 267 | return { content: [{ type: 'text', text: `discovery: missing [${missing.join(', ')}]; example: ${example}` }] }; 268 | } 269 | 270 | throw new McpError(ErrorCode.InvalidParams, `Missing: ${missing.join(', ')}. Example: ${example}`); 271 | } 272 | const rules = getConstitution(args.sessionId); 273 | console.log('[Constitution:check]', { sessionId: args.sessionId, count: rules.length }); 274 | return { content: [{ type: 'json', json: { rules } }] }; 275 | } 276 | 277 | default: 278 | throw new McpError(ErrorCode.MethodNotFound, `Unknown tool: ${name}`); 279 | } 280 | }); 281 | 282 | const app = express(); 283 | const allowedOrigin = process.env.CORS_ORIGIN || '*'; 284 | app.use(cors({ origin: allowedOrigin })); 285 | app.use(express.json()); 286 | 287 | if (USE_STDIO) { 288 | const transport = new StdioServerTransport(); 289 | await server.connect(transport); 290 | console.error('[MCP] stdio transport connected'); 291 | } else { 292 | const transport = new StreamableHTTPServerTransport({ sessionIdGenerator: undefined }); 293 | await server.connect(transport); 294 | 295 | app.post('/mcp', async (req, res) => { 296 | const started = Date.now(); 297 | const { id, method } = req.body ?? {}; 298 | const sessionId = req.body?.params?.sessionId || req.body?.params?.arguments?.sessionId; 299 | console.log('[MCP] request', { id, method, sessionId }); 300 | try { 301 | await transport.handleRequest(req, res, req.body); 302 | } catch (e: any) { 303 | console.error('[MCP] error', { err: e?.message, id }); 304 | if (!res.headersSent) { 305 | res.status(500).json({ jsonrpc: '2.0', id: id ?? null, error: { code: -32603, message: 'Internal server error' } }); 306 | } 307 | } finally { 308 | console.log('[MCP] handled', { id, ms: Date.now() - started }); 309 | } 310 | }); 311 | 312 | app.get('/mcp', (_req, res) => { 313 | res.status(405).json({ jsonrpc: '2.0', error: { code: -32000, message: 'Method not allowed' }, id: null }); 314 | }); 315 | 316 | app.get('/healthz', (_req, res) => { 317 | res.status(200).json({ status: 'ok' }); 318 | }); 319 | 320 | const PORT = Number(process.env.MCP_HTTP_PORT || process.env.PORT || 3000); 321 | const listener = app.listen(PORT, () => { 322 | const addr = listener.address(); 323 | const actualPort = typeof addr === 'object' && addr ? addr.port : PORT; 324 | console.log(`[MCP] HTTP listening on :${actualPort}`); 325 | }); 326 | 327 | const close = () => listener.close(() => process.exit(0)); 328 | process.on('SIGTERM', close); 329 | process.on('SIGINT', close); 330 | } 331 | } 332 | 333 | function formatVibeCheckOutput(result: VibeCheckOutput): string { 334 | return result.questions; 335 | } 336 | 337 | function formatVibeLearnOutput(result: VibeLearnOutput): string { 338 | let output = ''; 339 | 340 | if (result.added) { 341 | output += `✅ Pattern logged successfully (category tally: ${result.currentTally})`; 342 | } else if (result.alreadyKnown) { 343 | output += 'ℹ️ Pattern already recorded'; 344 | } else { 345 | output += '❌ Failed to log pattern'; 346 | } 347 | 348 | if (result.topCategories && result.topCategories.length > 0) { 349 | output += '\n\n## Top Pattern Categories\n'; 350 | for (const category of result.topCategories) { 351 | output += `\n### ${category.category} (${category.count} occurrences)\n`; 352 | if (category.recentExample) { 353 | output += `Most recent: "${category.recentExample.mistake}"\n`; 354 | output += `Solution: "${category.recentExample.solution}"\n`; 355 | } 356 | } 357 | } 358 | 359 | return output; 360 | } 361 | 362 | main().catch((error) => { 363 | console.error('Server startup error:', error); 364 | process.exit(1); 365 | }); 366 | ```