This is page 1 of 3. Use http://codebase.md/king-of-the-grackles/reddit-mcp-poc?lines=true&page={x} to view the full context. # Directory Structure ``` ├── .env.sample ├── .gemini │ └── settings.json ├── .gitignore ├── .python-version ├── .specify │ ├── memory │ │ └── constitution.md │ ├── scripts │ │ └── bash │ │ ├── check-implementation-prerequisites.sh │ │ ├── check-task-prerequisites.sh │ │ ├── common.sh │ │ ├── create-new-feature.sh │ │ ├── get-feature-paths.sh │ │ ├── setup-plan.sh │ │ └── update-agent-context.sh │ └── templates │ ├── agent-file-template.md │ ├── plan-template.md │ ├── spec-template.md │ └── tasks-template.md ├── package.json ├── pyproject.toml ├── README.md ├── reddit-research-agent.md ├── reports │ ├── ai-llm-weekly-trends-reddit-analysis-2025-01-20.md │ ├── saas-solopreneur-reddit-communities.md │ ├── top-50-active-AI-subreddits.md │ ├── top-50-subreddits-saas-ai-builders.md │ └── top-50-subreddits-saas-solopreneurs.md ├── server.json ├── specs │ ├── 003-fastmcp-context-integration.md │ ├── 003-implementation-summary.md │ ├── 003-phase-1-context-integration.md │ ├── 003-phase-2-progress-monitoring.md │ ├── agent-reasoning-visibility.md │ ├── agentic-discovery-architecture.md │ ├── chroma-proxy-architecture.md │ ├── deep-research-reddit-architecture.md │ └── reddit-research-agent-spec.md ├── src │ ├── __init__.py │ ├── chroma_client.py │ ├── config.py │ ├── models.py │ ├── resources.py │ ├── server.py │ └── tools │ ├── __init__.py │ ├── comments.py │ ├── discover.py │ ├── posts.py │ └── search.py ├── tests │ ├── test_context_integration.py │ └── test_tools.py └── uv.lock ``` # Files -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- ``` 1 | 3.12 2 | ``` -------------------------------------------------------------------------------- /.env.sample: -------------------------------------------------------------------------------- ``` 1 | # Reddit API Configuration (Required) 2 | REDDIT_CLIENT_ID=your_client_id_here 3 | REDDIT_CLIENT_SECRET=your_client_secret_here 4 | REDDIT_USER_AGENT=RedditMCP/1.0 by u/your_username 5 | 6 | # Descope Authentication (Required) 7 | DESCOPE_PROJECT_ID=P2abc...123 8 | SERVER_URL=http://localhost:8000 9 | DESCOPE_BASE_URL=https://api.descope.com 10 | 11 | # Vector Database Proxy Authentication (Optional) 12 | # The hosted service handles this automatically. 13 | # For development with your own proxy server: 14 | # CHROMA_PROXY_URL=https://your-proxy.com 15 | # CHROMA_PROXY_API_KEY=your_api_key_here ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- ``` 1 | # Critical Security - Environment Variables and Secrets 2 | .env 3 | .env.* 4 | !.env.sample 5 | !.env.example 6 | *.key 7 | *.pem 8 | *.cert 9 | *.crt 10 | secrets/ 11 | credentials/ 12 | config/secrets.json 13 | 14 | # Python 15 | __pycache__/ 16 | *.py[cod] 17 | *$py.class 18 | *.so 19 | .Python 20 | *.pyc 21 | *.pyo 22 | *.pyd 23 | .pytest_cache/ 24 | .mypy_cache/ 25 | .dmypy.json 26 | dmypy.json 27 | .coverage 28 | .coverage.* 29 | htmlcov/ 30 | .tox/ 31 | .hypothesis/ 32 | .ruff_cache/ 33 | *.cover 34 | *.log 35 | 36 | # Virtual Environments 37 | venv/ 38 | .venv/ 39 | env/ 40 | .env/ 41 | ENV/ 42 | env.bak/ 43 | venv.bak/ 44 | virtualenv/ 45 | 46 | # Package Management & Build 47 | build/ 48 | develop-eggs/ 49 | dist/ 50 | downloads/ 51 | eggs/ 52 | .eggs/ 53 | lib/ 54 | lib64/ 55 | parts/ 56 | sdist/ 57 | var/ 58 | wheels/ 59 | *.egg-info/ 60 | .installed.cfg 61 | *.egg 62 | MANIFEST 63 | pip-log.txt 64 | pip-delete-this-directory.txt 65 | uv.lock 66 | 67 | # IDEs and Editors 68 | .vscode/ 69 | .idea/ 70 | *.swp 71 | *.swo 72 | *~ 73 | .project 74 | .pydevproject 75 | .settings/ 76 | *.sublime-project 77 | *.sublime-workspace 78 | .atom/ 79 | .brackets.json 80 | 81 | # Operating System 82 | .DS_Store 83 | .DS_Store? 84 | ._* 85 | .Spotlight-V100 86 | .Trashes 87 | ehthumbs.db 88 | Thumbs.db 89 | desktop.ini 90 | 91 | # Project Specific 92 | logs/ 93 | *.log 94 | .claude/ 95 | *.db 96 | *.sqlite 97 | *.sqlite3 98 | instance/ 99 | 100 | # Testing & Documentation 101 | .nox/ 102 | docs/_build/ 103 | .scrapy/ 104 | target/ 105 | 106 | # Jupyter Notebook 107 | .ipynb_checkpoints 108 | *.ipynb_checkpoints/ 109 | 110 | # macOS 111 | .AppleDouble 112 | .LSOverride 113 | Icon 114 | .DocumentRevisions-V100 115 | .fseventsd 116 | .TemporaryItems 117 | .VolumeIcon.icns 118 | .com.apple.timemachine.donotpresent 119 | .AppleDB 120 | .AppleDesktop 121 | Network Trash Folder 122 | Temporary Items 123 | .apdisk 124 | 125 | # Windows 126 | $RECYCLE.BIN/ 127 | *.lnk 128 | *.msi 129 | *.msm 130 | *.msp 131 | 132 | # Backup files 133 | *.bak 134 | *.backup 135 | *.old 136 | *.orig 137 | *.tmp 138 | .history/ 139 | 140 | # FastMCP specific 141 | .fastmcp/ 142 | fastmcp.db 143 | 144 | # MCP Registry files 145 | .mcpregistry_* 146 | mcp-publisher 147 | 148 | # Development & Research directories 149 | fastmcp/ 150 | mcp-remote/ 151 | ai/*.rtf 152 | ``` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- ```markdown 1 | mcp-name: io.github.king-of-the-grackles/reddit-research-mcp 2 | 3 | # 🔍 Reddit Research MCP Server 4 | 5 | **Turn Reddit's chaos into structured insights with full citations** 6 | 7 | [](https://www.python.org/downloads/) 8 | [](https://github.com/jlowin/fastmcp) 9 | [](https://opensource.org/licenses/MIT) 10 | 11 | --- 12 | 13 | Your customers are on Reddit right now, comparing you to competitors, sharing pain points, requesting features. But finding those insights means hours of manual searching with no way to cite your sources. 14 | 15 | This MCP server turns Reddit into a queryable research database that generates reports with links to every claim. Get comprehensive market research, competitive analysis, and customer insights in minutes instead of hours. 16 | 17 | --- 18 | 19 | ## 🚀 Quick Setup (60 Seconds) 20 | 21 | **No credentials or configuration needed!** Connect to our hosted server: 22 | 23 | ### Claude Code 24 | ```bash 25 | claude mcp add --scope local --transport http reddit-research-mcp https://reddit-research-mcp.fastmcp.app/mcp 26 | ``` 27 | 28 | ### Cursor 29 | ``` 30 | cursor://anysphere.cursor-deeplink/mcp/install?name=reddit-research-mcp&config=eyJ1cmwiOiJodHRwczovL3JlZGRpdC1yZXNlYXJjaC1tY3AuZmFzdG1jcC5hcHAvbWNwIn0%3D 31 | ``` 32 | 33 | ### OpenAI Codex CLI 34 | ```bash 35 | codex mcp add reddit-research-mcp \ 36 | npx -y mcp-remote \ 37 | https://reddit-research-mcp.fastmcp.app/mcp \ 38 | --auth-timeout 120 \ 39 | --allow-http \ 40 | ``` 41 | 42 | ### Gemini CLI 43 | ```bash 44 | gemini mcp add reddit-research-mcp \ 45 | npx -y mcp-remote \ 46 | https://reddit-research-mcp.fastmcp.app/mcp \ 47 | --auth-timeout 120 \ 48 | --allow-http 49 | ``` 50 | 51 | ### Direct MCP Server URL 52 | For other AI assistants: `https://reddit-research-mcp.fastmcp.app/mcp` 53 | 54 | --- 55 | 56 | ## 🎯 What You Can Do 57 | 58 | ### Competitive Analysis 59 | ``` 60 | "What are developers saying about Next.js vs Remix?" 61 | ``` 62 | → Get a comprehensive report comparing sentiment, feature requests, pain points, and migration experiences with links to every mentioned discussion. 63 | 64 | ### Customer Discovery 65 | ``` 66 | "Find the top complaints about existing CRM tools in small business communities" 67 | ``` 68 | → Discover unmet needs, feature gaps, and pricing concerns directly from your target market with citations to real user feedback. 69 | 70 | ### Market Research 71 | ``` 72 | "Analyze sentiment about AI coding assistants across developer communities" 73 | ``` 74 | → Track adoption trends, concerns, success stories, and emerging use cases with temporal analysis showing how opinions evolved. 75 | 76 | ### Product Validation 77 | ``` 78 | "What problems are SaaS founders having with subscription billing?" 79 | ``` 80 | → Identify pain points and validate your solution with evidence from actual discussions, not assumptions. 81 | 82 | --- 83 | 84 | ## ✨ Why This Server? 85 | 86 | **Built for decision-makers who need evidence-based insights.** Every report links back to actual Reddit posts and comments. When you say "users are complaining about X," you'll have the receipts to prove it. Check the `/reports` folder for examples of deep-research reports with full citation trails. 87 | 88 | **Zero-friction setup designed for non-technical users.** Most MCP servers require cloning repos, managing Python environments, and hunting for API keys in developer dashboards. This one? Just paste the URL into Claude and start researching. Our hosted solution means no terminal commands, no credential management, no setup headaches. 89 | 90 | **Semantic search across 20,000+ active subreddits.** Reddit's API caps at 250 search results - useless for comprehensive research. We pre-indexed every active subreddit (2k+ members, active in last 7 days) with vector embeddings. Now you search conceptually across all of Reddit, finding relevant communities you didn't even know existed. Built with the [layered abstraction pattern](https://engineering.block.xyz/blog/build-mcp-tools-like-ogres-with-layers) for scalability. 91 | 92 | --- 93 | 94 | ## 📚 Specifications 95 | 96 | Some of the AI-generated specs that were used to build this project with Claude Code: 97 | - 📖 [Architecture Overview](specs/agentic-discovery-architecture.md) - System design and component interaction 98 | - 🤖 [Research Agent Details](specs/reddit-research-agent-spec.md) - Agent implementation patterns 99 | - 🔍 [Deep Research Architecture](specs/deep-research-reddit-architecture.md) - Research workflow and citation system 100 | - 🗄️ [ChromaDB Proxy Architecture](specs/chroma-proxy-architecture.md) - Vector search and authentication layer 101 | 102 | --- 103 | 104 | ## Technical Details 105 | 106 | <details> 107 | <summary><strong>🛠️ Core MCP Tools</strong></summary> 108 | 109 | #### Discover Communities 110 | ```python 111 | execute_operation("discover_subreddits", { 112 | "topic": "machine learning", 113 | "limit": 15 114 | }) 115 | ``` 116 | 117 | #### Search Across Reddit 118 | ```python 119 | execute_operation("search_all", { 120 | "query": "ChatGPT experiences", 121 | "time_filter": "week", 122 | "limit": 25 123 | }) 124 | ``` 125 | 126 | #### Batch Fetch Posts 127 | ```python 128 | execute_operation("fetch_multiple", { 129 | "subreddit_names": ["technology", "programming"], 130 | "limit_per_subreddit": 10, 131 | "time_filter": "day" 132 | }) 133 | ``` 134 | 135 | #### Deep Dive with Comments 136 | ```python 137 | execute_operation("fetch_comments", { 138 | "submission_id": "abc123", 139 | "comment_limit": 200, 140 | "sort": "best" 141 | }) 142 | ``` 143 | </details> 144 | 145 | <details> 146 | <summary><strong>📁 Project Structure</strong></summary> 147 | 148 | ``` 149 | reddit-research-mcp/ 150 | ├── src/ 151 | │ ├── server.py # FastMCP server 152 | │ ├── config.py # Reddit configuration 153 | │ ├── chroma_client.py # Vector database proxy 154 | │ ├── resources.py # MCP resources 155 | │ ├── models.py # Data models 156 | │ └── tools/ 157 | │ ├── search.py # Search operations 158 | │ ├── posts.py # Post fetching 159 | │ ├── comments.py # Comment retrieval 160 | │ └── discover.py # Subreddit discovery 161 | ├── tests/ # Test suite 162 | ├── reports/ # Example reports 163 | └── specs/ # Architecture docs 164 | ``` 165 | </details> 166 | 167 | <details> 168 | <summary><strong>🚀 Contributing & Tech Stack</strong></summary> 169 | 170 | This project uses: 171 | - Python 3.11+ with type hints 172 | - FastMCP for the server framework 173 | - Vector search via authenticated proxy (Render.com) 174 | - ChromaDB for semantic search 175 | - PRAW for Reddit API interaction 176 | 177 | --- 178 | 179 | <div align="center"> 180 | 181 | **Stop guessing. Start knowing what your market actually thinks.** 182 | 183 | [GitHub](https://github.com/king-of-the-grackles/reddit-research-mcp) • [Report Issues](https://github.com/king-of-the-grackles/reddit-research-mcp/issues) • [Request Features](https://github.com/king-of-the-grackles/reddit-research-mcp/issues) 184 | 185 | </div> ``` -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- ```python 1 | ``` -------------------------------------------------------------------------------- /src/tools/__init__.py: -------------------------------------------------------------------------------- ```python 1 | ``` -------------------------------------------------------------------------------- /.gemini/settings.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "mcpServers": { 3 | "reddit-research-mcp": { 4 | "httpUrl": "https://reddit-research-mcp.fastmcp.app/mcp" 5 | }, 6 | "local-reddit-research-mcp": { 7 | "command": "npx", 8 | "args": [ 9 | "-y", 10 | "mcp-remote", 11 | "http://localhost:8000/mcp", 12 | "--auth-timeout", 13 | "90", 14 | "--allow-http", 15 | "--debug" 16 | ] 17 | } 18 | } 19 | } ``` -------------------------------------------------------------------------------- /.specify/scripts/bash/get-feature-paths.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/usr/bin/env bash 2 | set -e 3 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 4 | source "$SCRIPT_DIR/common.sh" 5 | eval $(get_feature_paths) 6 | check_feature_branch "$CURRENT_BRANCH" || exit 1 7 | echo "REPO_ROOT: $REPO_ROOT"; echo "BRANCH: $CURRENT_BRANCH"; echo "FEATURE_DIR: $FEATURE_DIR"; echo "FEATURE_SPEC: $FEATURE_SPEC"; echo "IMPL_PLAN: $IMPL_PLAN"; echo "TASKS: $TASKS" 8 | ``` -------------------------------------------------------------------------------- /.specify/templates/agent-file-template.md: -------------------------------------------------------------------------------- ```markdown 1 | # [PROJECT NAME] Development Guidelines 2 | 3 | Auto-generated from all feature plans. Last updated: [DATE] 4 | 5 | ## Active Technologies 6 | [EXTRACTED FROM ALL PLAN.MD FILES] 7 | 8 | ## Project Structure 9 | ``` 10 | [ACTUAL STRUCTURE FROM PLANS] 11 | ``` 12 | 13 | ## Commands 14 | [ONLY COMMANDS FOR ACTIVE TECHNOLOGIES] 15 | 16 | ## Code Style 17 | [LANGUAGE-SPECIFIC, ONLY FOR LANGUAGES IN USE] 18 | 19 | ## Recent Changes 20 | [LAST 3 FEATURES AND WHAT THEY ADDED] 21 | 22 | <!-- MANUAL ADDITIONS START --> 23 | <!-- MANUAL ADDITIONS END --> ``` -------------------------------------------------------------------------------- /server.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "$schema": "https://static.modelcontextprotocol.io/schemas/2025-07-09/server.schema.json", 3 | "name": "io.github.king-of-the-grackles/reddit-research-mcp", 4 | "description": "Turn Reddit's chaos into structured insights with full citations - MCP server for Reddit research", 5 | "status": "active", 6 | "repository": { 7 | "url": "https://github.com/king-of-the-grackles/reddit-research-mcp", 8 | "source": "github" 9 | }, 10 | "version": "0.1.1", 11 | "packages": [ 12 | { 13 | "registry_type": "pypi", 14 | "identifier": "reddit-research-mcp", 15 | "version": "0.1.1", 16 | "transport": { 17 | "type": "stdio" 18 | }, 19 | "environment_variables": [] 20 | } 21 | ] 22 | } ``` -------------------------------------------------------------------------------- /.specify/scripts/bash/setup-plan.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/usr/bin/env bash 2 | set -e 3 | JSON_MODE=false 4 | for arg in "$@"; do case "$arg" in --json) JSON_MODE=true ;; --help|-h) echo "Usage: $0 [--json]"; exit 0 ;; esac; done 5 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 6 | source "$SCRIPT_DIR/common.sh" 7 | eval $(get_feature_paths) 8 | check_feature_branch "$CURRENT_BRANCH" || exit 1 9 | mkdir -p "$FEATURE_DIR" 10 | TEMPLATE="$REPO_ROOT/.specify/templates/plan-template.md" 11 | [[ -f "$TEMPLATE" ]] && cp "$TEMPLATE" "$IMPL_PLAN" 12 | if $JSON_MODE; then 13 | printf '{"FEATURE_SPEC":"%s","IMPL_PLAN":"%s","SPECS_DIR":"%s","BRANCH":"%s"}\n' \ 14 | "$FEATURE_SPEC" "$IMPL_PLAN" "$FEATURE_DIR" "$CURRENT_BRANCH" 15 | else 16 | echo "FEATURE_SPEC: $FEATURE_SPEC"; echo "IMPL_PLAN: $IMPL_PLAN"; echo "SPECS_DIR: $FEATURE_DIR"; echo "BRANCH: $CURRENT_BRANCH" 17 | fi 18 | ``` -------------------------------------------------------------------------------- /.specify/scripts/bash/common.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/usr/bin/env bash 2 | # (Moved to scripts/bash/) Common functions and variables for all scripts 3 | 4 | get_repo_root() { git rev-parse --show-toplevel; } 5 | get_current_branch() { git rev-parse --abbrev-ref HEAD; } 6 | 7 | check_feature_branch() { 8 | local branch="$1" 9 | if [[ ! "$branch" =~ ^[0-9]{3}- ]]; then 10 | echo "ERROR: Not on a feature branch. Current branch: $branch" >&2 11 | echo "Feature branches should be named like: 001-feature-name" >&2 12 | return 1 13 | fi; return 0 14 | } 15 | 16 | get_feature_dir() { echo "$1/specs/$2"; } 17 | 18 | get_feature_paths() { 19 | local repo_root=$(get_repo_root) 20 | local current_branch=$(get_current_branch) 21 | local feature_dir=$(get_feature_dir "$repo_root" "$current_branch") 22 | cat <<EOF 23 | REPO_ROOT='$repo_root' 24 | CURRENT_BRANCH='$current_branch' 25 | FEATURE_DIR='$feature_dir' 26 | FEATURE_SPEC='$feature_dir/spec.md' 27 | IMPL_PLAN='$feature_dir/plan.md' 28 | TASKS='$feature_dir/tasks.md' 29 | RESEARCH='$feature_dir/research.md' 30 | DATA_MODEL='$feature_dir/data-model.md' 31 | QUICKSTART='$feature_dir/quickstart.md' 32 | CONTRACTS_DIR='$feature_dir/contracts' 33 | EOF 34 | } 35 | 36 | check_file() { [[ -f "$1" ]] && echo " ✓ $2" || echo " ✗ $2"; } 37 | check_dir() { [[ -d "$1" && -n $(ls -A "$1" 2>/dev/null) ]] && echo " ✓ $2" || echo " ✗ $2"; } 38 | ``` -------------------------------------------------------------------------------- /.specify/scripts/bash/check-task-prerequisites.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/usr/bin/env bash 2 | set -e 3 | JSON_MODE=false 4 | for arg in "$@"; do case "$arg" in --json) JSON_MODE=true ;; --help|-h) echo "Usage: $0 [--json]"; exit 0 ;; esac; done 5 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 6 | source "$SCRIPT_DIR/common.sh" 7 | eval $(get_feature_paths) 8 | check_feature_branch "$CURRENT_BRANCH" || exit 1 9 | if [[ ! -d "$FEATURE_DIR" ]]; then echo "ERROR: Feature directory not found: $FEATURE_DIR"; echo "Run /specify first."; exit 1; fi 10 | if [[ ! -f "$IMPL_PLAN" ]]; then echo "ERROR: plan.md not found in $FEATURE_DIR"; echo "Run /plan first."; exit 1; fi 11 | if $JSON_MODE; then 12 | docs=(); [[ -f "$RESEARCH" ]] && docs+=("research.md"); [[ -f "$DATA_MODEL" ]] && docs+=("data-model.md"); ([[ -d "$CONTRACTS_DIR" ]] && [[ -n "$(ls -A "$CONTRACTS_DIR" 2>/dev/null)" ]]) && docs+=("contracts/"); [[ -f "$QUICKSTART" ]] && docs+=("quickstart.md"); 13 | json_docs=$(printf '"%s",' "${docs[@]}"); json_docs="[${json_docs%,}]"; printf '{"FEATURE_DIR":"%s","AVAILABLE_DOCS":%s}\n' "$FEATURE_DIR" "$json_docs" 14 | else 15 | echo "FEATURE_DIR:$FEATURE_DIR"; echo "AVAILABLE_DOCS:"; check_file "$RESEARCH" "research.md"; check_file "$DATA_MODEL" "data-model.md"; check_dir "$CONTRACTS_DIR" "contracts/"; check_file "$QUICKSTART" "quickstart.md"; fi 16 | ``` -------------------------------------------------------------------------------- /src/models.py: -------------------------------------------------------------------------------- ```python 1 | from typing import List, Optional, Dict, Any 2 | from pydantic import BaseModel, Field 3 | from datetime import datetime 4 | 5 | 6 | class RedditPost(BaseModel): 7 | """Model for a Reddit post/submission.""" 8 | id: str 9 | title: str 10 | author: str 11 | subreddit: str 12 | score: int 13 | created_utc: float 14 | url: str 15 | num_comments: int 16 | selftext: Optional[str] = None 17 | upvote_ratio: Optional[float] = None 18 | permalink: Optional[str] = None 19 | 20 | 21 | class SubredditInfo(BaseModel): 22 | """Model for subreddit metadata.""" 23 | name: str 24 | subscribers: int 25 | description: str 26 | 27 | 28 | class Comment(BaseModel): 29 | """Model for a Reddit comment.""" 30 | id: str 31 | body: str 32 | author: str 33 | score: int 34 | created_utc: float 35 | depth: int 36 | replies: List['Comment'] = Field(default_factory=list) 37 | 38 | 39 | class SearchResult(BaseModel): 40 | """Response model for search_reddit tool.""" 41 | results: List[RedditPost] 42 | count: int 43 | 44 | 45 | class SubredditPostsResult(BaseModel): 46 | """Response model for fetch_subreddit_posts tool.""" 47 | posts: List[RedditPost] 48 | subreddit: SubredditInfo 49 | count: int 50 | 51 | 52 | class SubmissionWithCommentsResult(BaseModel): 53 | """Response model for fetch_submission_with_comments tool.""" 54 | submission: RedditPost 55 | comments: List[Comment] 56 | total_comments_fetched: int 57 | 58 | 59 | # Allow recursive Comment model 60 | Comment.model_rebuild() ``` -------------------------------------------------------------------------------- /.specify/scripts/bash/check-implementation-prerequisites.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/usr/bin/env bash 2 | set -e 3 | JSON_MODE=false 4 | for arg in "$@"; do case "$arg" in --json) JSON_MODE=true ;; --help|-h) echo "Usage: $0 [--json]"; exit 0 ;; esac; done 5 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 6 | source "$SCRIPT_DIR/common.sh" 7 | eval $(get_feature_paths) 8 | check_feature_branch "$CURRENT_BRANCH" || exit 1 9 | if [[ ! -d "$FEATURE_DIR" ]]; then echo "ERROR: Feature directory not found: $FEATURE_DIR"; echo "Run /specify first."; exit 1; fi 10 | if [[ ! -f "$IMPL_PLAN" ]]; then echo "ERROR: plan.md not found in $FEATURE_DIR"; echo "Run /plan first."; exit 1; fi 11 | if [[ ! -f "$TASKS" ]]; then echo "ERROR: tasks.md not found in $FEATURE_DIR"; echo "Run /tasks first."; exit 1; fi 12 | if $JSON_MODE; then 13 | docs=(); [[ -f "$RESEARCH" ]] && docs+=("research.md"); [[ -f "$DATA_MODEL" ]] && docs+=("data-model.md"); ([[ -d "$CONTRACTS_DIR" ]] && [[ -n "$(ls -A "$CONTRACTS_DIR" 2>/dev/null)" ]]) && docs+=("contracts/"); [[ -f "$QUICKSTART" ]] && docs+=("quickstart.md"); [[ -f "$TASKS" ]] && docs+=("tasks.md"); 14 | json_docs=$(printf '"%s",' "${docs[@]}"); json_docs="[${json_docs%,}]"; printf '{"FEATURE_DIR":"%s","AVAILABLE_DOCS":%s}\n' "$FEATURE_DIR" "$json_docs" 15 | else 16 | echo "FEATURE_DIR:$FEATURE_DIR"; echo "AVAILABLE_DOCS:"; check_file "$RESEARCH" "research.md"; check_file "$DATA_MODEL" "data-model.md"; check_dir "$CONTRACTS_DIR" "contracts/"; check_file "$QUICKSTART" "quickstart.md"; check_file "$TASKS" "tasks.md"; fi ``` -------------------------------------------------------------------------------- /src/config.py: -------------------------------------------------------------------------------- ```python 1 | import praw 2 | import os 3 | from pathlib import Path 4 | from dotenv import load_dotenv 5 | 6 | def get_reddit_client() -> praw.Reddit: 7 | """Get configured Reddit client (read-only) from environment.""" 8 | client_id = None 9 | client_secret = None 10 | user_agent = None 11 | 12 | # Method 1: Try environment variables 13 | client_id = os.environ.get("REDDIT_CLIENT_ID") 14 | client_secret = os.environ.get("REDDIT_CLIENT_SECRET") 15 | user_agent = os.environ.get("REDDIT_USER_AGENT", "RedditMCP/1.0") 16 | 17 | # Method 2: Try loading from .env file (local development) 18 | if not client_id or not client_secret: 19 | # Find .env file in project root 20 | env_path = Path(__file__).parent.parent / '.env' 21 | if env_path.exists(): 22 | load_dotenv(env_path) 23 | client_id = os.getenv("REDDIT_CLIENT_ID") 24 | client_secret = os.getenv("REDDIT_CLIENT_SECRET") 25 | if not user_agent: 26 | user_agent = os.getenv("REDDIT_USER_AGENT", "RedditMCP/1.0") 27 | 28 | if not client_id or not client_secret: 29 | raise ValueError( 30 | "Reddit API credentials not found. Please set REDDIT_CLIENT_ID " 31 | "and REDDIT_CLIENT_SECRET either as OS environment variables or in a .env file" 32 | ) 33 | 34 | # Create Reddit instance for read-only access 35 | reddit = praw.Reddit( 36 | client_id=client_id, 37 | client_secret=client_secret, 38 | user_agent=user_agent, 39 | redirect_uri="http://localhost:8080", # Required even for read-only 40 | ratelimit_seconds=300 # Auto-handle rate limits 41 | ) 42 | 43 | # Explicitly enable read-only mode 44 | reddit.read_only = True 45 | 46 | return reddit ``` -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- ```toml 1 | [project] 2 | name = "reddit-research-mcp" 3 | version = "0.1.1" 4 | description = "A minimal Model Context Protocol server for Reddit content access" 5 | readme = "README.md" 6 | requires-python = ">=3.11" 7 | authors = [ 8 | { name="King of the Grackles", email="[email protected]" }, 9 | ] 10 | license = {text = "MIT"} 11 | classifiers = [ 12 | "Development Status :: 4 - Beta", 13 | "Intended Audience :: Developers", 14 | "Topic :: Software Development :: Libraries :: Python Modules", 15 | "License :: OSI Approved :: MIT License", 16 | "Programming Language :: Python :: 3", 17 | "Programming Language :: Python :: 3.11", 18 | "Programming Language :: Python :: 3.12", 19 | "Operating System :: OS Independent", 20 | ] 21 | dependencies = [ 22 | "aiohttp>=3.12.15", 23 | "praw>=7.7.1", 24 | "fastmcp>=2.12.4", 25 | "openai-agents>=0.2.8", 26 | "pydantic>=2.0.0", 27 | "python-dotenv>=1.0.0", 28 | "starlette>=0.32.0", 29 | "uvicorn>=0.30.0", 30 | "requests>=2.31.0", 31 | ] 32 | 33 | [project.urls] 34 | Homepage = "https://github.com/king-of-the-grackles/reddit-research-mcp" 35 | Repository = "https://github.com/king-of-the-grackles/reddit-research-mcp" 36 | Issues = "https://github.com/king-of-the-grackles/reddit-research-mcp/issues" 37 | Documentation = "https://github.com/king-of-the-grackles/reddit-research-mcp#readme" 38 | 39 | [project.optional-dependencies] 40 | dev = [ 41 | "pytest>=8.0.0", 42 | "pytest-asyncio>=0.24.0", 43 | "pytest-mock>=3.14.0", 44 | ] 45 | 46 | [project.scripts] 47 | reddit-mcp = "src.server:main" 48 | 49 | [build-system] 50 | requires = ["hatchling"] 51 | build-backend = "hatchling.build" 52 | 53 | [tool.hatch.metadata] 54 | allow-direct-references = true 55 | 56 | [tool.hatch.build.targets.wheel] 57 | packages = ["src"] 58 | 59 | [tool.pytest.ini_options] 60 | asyncio_mode = "auto" 61 | asyncio_default_fixture_loop_scope = "function" 62 | ``` -------------------------------------------------------------------------------- /.specify/scripts/bash/create-new-feature.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/usr/bin/env bash 2 | # (Moved to scripts/bash/) Create a new feature with branch, directory structure, and template 3 | set -e 4 | 5 | JSON_MODE=false 6 | ARGS=() 7 | for arg in "$@"; do 8 | case "$arg" in 9 | --json) JSON_MODE=true ;; 10 | --help|-h) echo "Usage: $0 [--json] <feature_description>"; exit 0 ;; 11 | *) ARGS+=("$arg") ;; 12 | esac 13 | done 14 | 15 | FEATURE_DESCRIPTION="${ARGS[*]}" 16 | if [ -z "$FEATURE_DESCRIPTION" ]; then 17 | echo "Usage: $0 [--json] <feature_description>" >&2 18 | exit 1 19 | fi 20 | 21 | REPO_ROOT=$(git rev-parse --show-toplevel) 22 | SPECS_DIR="$REPO_ROOT/specs" 23 | mkdir -p "$SPECS_DIR" 24 | 25 | HIGHEST=0 26 | if [ -d "$SPECS_DIR" ]; then 27 | for dir in "$SPECS_DIR"/*; do 28 | [ -d "$dir" ] || continue 29 | dirname=$(basename "$dir") 30 | number=$(echo "$dirname" | grep -o '^[0-9]\+' || echo "0") 31 | number=$((10#$number)) 32 | if [ "$number" -gt "$HIGHEST" ]; then HIGHEST=$number; fi 33 | done 34 | fi 35 | 36 | NEXT=$((HIGHEST + 1)) 37 | FEATURE_NUM=$(printf "%03d" "$NEXT") 38 | 39 | BRANCH_NAME=$(echo "$FEATURE_DESCRIPTION" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9]/-/g' | sed 's/-\+/-/g' | sed 's/^-//' | sed 's/-$//') 40 | WORDS=$(echo "$BRANCH_NAME" | tr '-' '\n' | grep -v '^$' | head -3 | tr '\n' '-' | sed 's/-$//') 41 | BRANCH_NAME="${FEATURE_NUM}-${WORDS}" 42 | 43 | git checkout -b "$BRANCH_NAME" 44 | 45 | FEATURE_DIR="$SPECS_DIR/$BRANCH_NAME" 46 | mkdir -p "$FEATURE_DIR" 47 | 48 | TEMPLATE="$REPO_ROOT/templates/spec-template.md" 49 | SPEC_FILE="$FEATURE_DIR/spec.md" 50 | if [ -f "$TEMPLATE" ]; then cp "$TEMPLATE" "$SPEC_FILE"; else touch "$SPEC_FILE"; fi 51 | 52 | if $JSON_MODE; then 53 | printf '{"BRANCH_NAME":"%s","SPEC_FILE":"%s","FEATURE_NUM":"%s"}\n' "$BRANCH_NAME" "$SPEC_FILE" "$FEATURE_NUM" 54 | else 55 | echo "BRANCH_NAME: $BRANCH_NAME" 56 | echo "SPEC_FILE: $SPEC_FILE" 57 | echo "FEATURE_NUM: $FEATURE_NUM" 58 | fi 59 | ``` -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- ```json 1 | { 2 | "name": "@king-of-the-grackles/reddit-research-mcp", 3 | "version": "1.0.0", 4 | "description": "Reddit Research MCP Server - Transform Reddit into your personal research assistant", 5 | "author": "king-of-the-grackles", 6 | "license": "MIT", 7 | "homepage": "https://github.com/king-of-the-grackles/reddit-research-mcp", 8 | "repository": { 9 | "type": "git", 10 | "url": "https://github.com/king-of-the-grackles/reddit-research-mcp.git" 11 | }, 12 | "scripts": { 13 | "start": "python src/server.py" 14 | }, 15 | "mcp": { 16 | "type": "stdio", 17 | "command": "python", 18 | "args": ["src/server.py"], 19 | "configSchema": { 20 | "type": "object", 21 | "required": ["REDDIT_CLIENT_ID", "REDDIT_CLIENT_SECRET", "REDDIT_USER_AGENT"], 22 | "properties": { 23 | "REDDIT_CLIENT_ID": { 24 | "type": "string", 25 | "title": "Reddit Client ID", 26 | "description": "Your Reddit application client ID from reddit.com/prefs/apps" 27 | }, 28 | "REDDIT_CLIENT_SECRET": { 29 | "type": "string", 30 | "title": "Reddit Client Secret", 31 | "description": "Your Reddit application client secret" 32 | }, 33 | "REDDIT_USER_AGENT": { 34 | "type": "string", 35 | "title": "Reddit User Agent", 36 | "description": "User agent string for Reddit API (e.g., 'MCP:reddit-research:v1.0')" 37 | }, 38 | "CHROMA_PROXY_URL": { 39 | "type": "string", 40 | "title": "ChromaDB Proxy URL", 41 | "description": "URL of the ChromaDB proxy server (optional, uses default if not set)" 42 | }, 43 | "CHROMA_PROXY_API_KEY": { 44 | "type": "string", 45 | "title": "ChromaDB Proxy API Key", 46 | "description": "API key for authenticating with the ChromaDB proxy server" 47 | } 48 | } 49 | }, 50 | "exampleConfig": { 51 | "REDDIT_CLIENT_ID": "your_client_id_here", 52 | "REDDIT_CLIENT_SECRET": "your_client_secret_here", 53 | "REDDIT_USER_AGENT": "MCP:reddit-research:v1.0 (by /u/yourusername)", 54 | "CHROMA_PROXY_URL": "https://reddit-mcp-vector-db.onrender.com", 55 | "CHROMA_PROXY_API_KEY": "your_proxy_api_key_here" 56 | } 57 | } 58 | } ``` -------------------------------------------------------------------------------- /.specify/memory/constitution.md: -------------------------------------------------------------------------------- ```markdown 1 | # [PROJECT_NAME] Constitution 2 | <!-- Example: Spec Constitution, TaskFlow Constitution, etc. --> 3 | 4 | ## Core Principles 5 | 6 | ### [PRINCIPLE_1_NAME] 7 | <!-- Example: I. Library-First --> 8 | [PRINCIPLE_1_DESCRIPTION] 9 | <!-- Example: Every feature starts as a standalone library; Libraries must be self-contained, independently testable, documented; Clear purpose required - no organizational-only libraries --> 10 | 11 | ### [PRINCIPLE_2_NAME] 12 | <!-- Example: II. CLI Interface --> 13 | [PRINCIPLE_2_DESCRIPTION] 14 | <!-- Example: Every library exposes functionality via CLI; Text in/out protocol: stdin/args → stdout, errors → stderr; Support JSON + human-readable formats --> 15 | 16 | ### [PRINCIPLE_3_NAME] 17 | <!-- Example: III. Test-First (NON-NEGOTIABLE) --> 18 | [PRINCIPLE_3_DESCRIPTION] 19 | <!-- Example: TDD mandatory: Tests written → User approved → Tests fail → Then implement; Red-Green-Refactor cycle strictly enforced --> 20 | 21 | ### [PRINCIPLE_4_NAME] 22 | <!-- Example: IV. Integration Testing --> 23 | [PRINCIPLE_4_DESCRIPTION] 24 | <!-- Example: Focus areas requiring integration tests: New library contract tests, Contract changes, Inter-service communication, Shared schemas --> 25 | 26 | ### [PRINCIPLE_5_NAME] 27 | <!-- Example: V. Observability, VI. Versioning & Breaking Changes, VII. Simplicity --> 28 | [PRINCIPLE_5_DESCRIPTION] 29 | <!-- Example: Text I/O ensures debuggability; Structured logging required; Or: MAJOR.MINOR.BUILD format; Or: Start simple, YAGNI principles --> 30 | 31 | ## [SECTION_2_NAME] 32 | <!-- Example: Additional Constraints, Security Requirements, Performance Standards, etc. --> 33 | 34 | [SECTION_2_CONTENT] 35 | <!-- Example: Technology stack requirements, compliance standards, deployment policies, etc. --> 36 | 37 | ## [SECTION_3_NAME] 38 | <!-- Example: Development Workflow, Review Process, Quality Gates, etc. --> 39 | 40 | [SECTION_3_CONTENT] 41 | <!-- Example: Code review requirements, testing gates, deployment approval process, etc. --> 42 | 43 | ## Governance 44 | <!-- Example: Constitution supersedes all other practices; Amendments require documentation, approval, migration plan --> 45 | 46 | [GOVERNANCE_RULES] 47 | <!-- Example: All PRs/reviews must verify compliance; Complexity must be justified; Use [GUIDANCE_FILE] for runtime development guidance --> 48 | 49 | **Version**: [CONSTITUTION_VERSION] | **Ratified**: [RATIFICATION_DATE] | **Last Amended**: [LAST_AMENDED_DATE] 50 | <!-- Example: Version: 2.1.1 | Ratified: 2025-06-13 | Last Amended: 2025-07-16 --> ``` -------------------------------------------------------------------------------- /src/tools/search.py: -------------------------------------------------------------------------------- ```python 1 | from typing import Optional, Dict, Any, Literal 2 | import praw 3 | from prawcore import NotFound, Forbidden 4 | from fastmcp import Context 5 | from ..models import SearchResult, RedditPost 6 | 7 | 8 | def search_in_subreddit( 9 | subreddit_name: str, 10 | query: str, 11 | reddit: praw.Reddit, 12 | sort: Literal["relevance", "hot", "top", "new"] = "relevance", 13 | time_filter: Literal["all", "year", "month", "week", "day"] = "all", 14 | limit: int = 10, 15 | ctx: Context = None 16 | ) -> Dict[str, Any]: 17 | """ 18 | Search for posts within a specific subreddit. 19 | 20 | Args: 21 | subreddit_name: Name of the subreddit to search in (required) 22 | query: Search query string 23 | reddit: Configured Reddit client 24 | sort: Sort method for results 25 | time_filter: Time filter for results 26 | limit: Maximum number of results (max 100, default 10) 27 | ctx: FastMCP context (auto-injected by decorator) 28 | 29 | Returns: 30 | Dictionary containing search results from the specified subreddit 31 | """ 32 | # Phase 1: Accept context but don't use it yet 33 | 34 | try: 35 | # Validate limit 36 | limit = min(max(1, limit), 100) 37 | 38 | # Clean subreddit name (remove r/ prefix if present) 39 | clean_name = subreddit_name.replace("r/", "").replace("/r/", "").strip() 40 | 41 | # Search within the specified subreddit 42 | try: 43 | subreddit_obj = reddit.subreddit(clean_name) 44 | # Verify subreddit exists 45 | _ = subreddit_obj.display_name 46 | 47 | search_results = subreddit_obj.search( 48 | query, 49 | sort=sort, 50 | time_filter=time_filter, 51 | limit=limit 52 | ) 53 | except NotFound: 54 | return { 55 | "error": f"Subreddit r/{clean_name} not found", 56 | "suggestion": "discover_subreddits({'query': 'topic'})" 57 | } 58 | except Forbidden: 59 | return {"error": f"Access to r/{clean_name} forbidden (may be private)"} 60 | 61 | # Parse results 62 | results = [] 63 | for submission in search_results: 64 | results.append(RedditPost( 65 | id=submission.id, 66 | title=submission.title, 67 | author=str(submission.author) if submission.author else "[deleted]", 68 | subreddit=submission.subreddit.display_name, 69 | score=submission.score, 70 | created_utc=submission.created_utc, 71 | url=submission.url, 72 | num_comments=submission.num_comments, 73 | permalink=f"https://reddit.com{submission.permalink}" 74 | )) 75 | 76 | result = SearchResult( 77 | results=results, 78 | count=len(results) 79 | ) 80 | 81 | return result.model_dump() 82 | 83 | except Exception as e: 84 | return {"error": f"Search in subreddit failed: {str(e)}"} ``` -------------------------------------------------------------------------------- /.specify/templates/spec-template.md: -------------------------------------------------------------------------------- ```markdown 1 | # Feature Specification: [FEATURE NAME] 2 | 3 | **Feature Branch**: `[###-feature-name]` 4 | **Created**: [DATE] 5 | **Status**: Draft 6 | **Input**: User description: "$ARGUMENTS" 7 | 8 | ## Execution Flow (main) 9 | ``` 10 | 1. Parse user description from Input 11 | → If empty: ERROR "No feature description provided" 12 | 2. Extract key concepts from description 13 | → Identify: actors, actions, data, constraints 14 | 3. For each unclear aspect: 15 | → Mark with [NEEDS CLARIFICATION: specific question] 16 | 4. Fill User Scenarios & Testing section 17 | → If no clear user flow: ERROR "Cannot determine user scenarios" 18 | 5. Generate Functional Requirements 19 | → Each requirement must be testable 20 | → Mark ambiguous requirements 21 | 6. Identify Key Entities (if data involved) 22 | 7. Run Review Checklist 23 | → If any [NEEDS CLARIFICATION]: WARN "Spec has uncertainties" 24 | → If implementation details found: ERROR "Remove tech details" 25 | 8. Return: SUCCESS (spec ready for planning) 26 | ``` 27 | 28 | --- 29 | 30 | ## ⚡ Quick Guidelines 31 | - ✅ Focus on WHAT users need and WHY 32 | - ❌ Avoid HOW to implement (no tech stack, APIs, code structure) 33 | - 👥 Written for business stakeholders, not developers 34 | 35 | ### Section Requirements 36 | - **Mandatory sections**: Must be completed for every feature 37 | - **Optional sections**: Include only when relevant to the feature 38 | - When a section doesn't apply, remove it entirely (don't leave as "N/A") 39 | 40 | ### For AI Generation 41 | When creating this spec from a user prompt: 42 | 1. **Mark all ambiguities**: Use [NEEDS CLARIFICATION: specific question] for any assumption you'd need to make 43 | 2. **Don't guess**: If the prompt doesn't specify something (e.g., "login system" without auth method), mark it 44 | 3. **Think like a tester**: Every vague requirement should fail the "testable and unambiguous" checklist item 45 | 4. **Common underspecified areas**: 46 | - User types and permissions 47 | - Data retention/deletion policies 48 | - Performance targets and scale 49 | - Error handling behaviors 50 | - Integration requirements 51 | - Security/compliance needs 52 | 53 | --- 54 | 55 | ## User Scenarios & Testing *(mandatory)* 56 | 57 | ### Primary User Story 58 | [Describe the main user journey in plain language] 59 | 60 | ### Acceptance Scenarios 61 | 1. **Given** [initial state], **When** [action], **Then** [expected outcome] 62 | 2. **Given** [initial state], **When** [action], **Then** [expected outcome] 63 | 64 | ### Edge Cases 65 | - What happens when [boundary condition]? 66 | - How does system handle [error scenario]? 67 | 68 | ## Requirements *(mandatory)* 69 | 70 | ### Functional Requirements 71 | - **FR-001**: System MUST [specific capability, e.g., "allow users to create accounts"] 72 | - **FR-002**: System MUST [specific capability, e.g., "validate email addresses"] 73 | - **FR-003**: Users MUST be able to [key interaction, e.g., "reset their password"] 74 | - **FR-004**: System MUST [data requirement, e.g., "persist user preferences"] 75 | - **FR-005**: System MUST [behavior, e.g., "log all security events"] 76 | 77 | *Example of marking unclear requirements:* 78 | - **FR-006**: System MUST authenticate users via [NEEDS CLARIFICATION: auth method not specified - email/password, SSO, OAuth?] 79 | - **FR-007**: System MUST retain user data for [NEEDS CLARIFICATION: retention period not specified] 80 | 81 | ### Key Entities *(include if feature involves data)* 82 | - **[Entity 1]**: [What it represents, key attributes without implementation] 83 | - **[Entity 2]**: [What it represents, relationships to other entities] 84 | 85 | --- 86 | 87 | ## Review & Acceptance Checklist 88 | *GATE: Automated checks run during main() execution* 89 | 90 | ### Content Quality 91 | - [ ] No implementation details (languages, frameworks, APIs) 92 | - [ ] Focused on user value and business needs 93 | - [ ] Written for non-technical stakeholders 94 | - [ ] All mandatory sections completed 95 | 96 | ### Requirement Completeness 97 | - [ ] No [NEEDS CLARIFICATION] markers remain 98 | - [ ] Requirements are testable and unambiguous 99 | - [ ] Success criteria are measurable 100 | - [ ] Scope is clearly bounded 101 | - [ ] Dependencies and assumptions identified 102 | 103 | --- 104 | 105 | ## Execution Status 106 | *Updated by main() during processing* 107 | 108 | - [ ] User description parsed 109 | - [ ] Key concepts extracted 110 | - [ ] Ambiguities marked 111 | - [ ] User scenarios defined 112 | - [ ] Requirements generated 113 | - [ ] Entities identified 114 | - [ ] Review checklist passed 115 | 116 | --- 117 | ``` -------------------------------------------------------------------------------- /.specify/templates/tasks-template.md: -------------------------------------------------------------------------------- ```markdown 1 | # Tasks: [FEATURE NAME] 2 | 3 | **Input**: Design documents from `/specs/[###-feature-name]/` 4 | **Prerequisites**: plan.md (required), research.md, data-model.md, contracts/ 5 | 6 | ## Execution Flow (main) 7 | ``` 8 | 1. Load plan.md from feature directory 9 | → If not found: ERROR "No implementation plan found" 10 | → Extract: tech stack, libraries, structure 11 | 2. Load optional design documents: 12 | → data-model.md: Extract entities → model tasks 13 | → contracts/: Each file → contract test task 14 | → research.md: Extract decisions → setup tasks 15 | 3. Generate tasks by category: 16 | → Setup: project init, dependencies, linting 17 | → Tests: contract tests, integration tests 18 | → Core: models, services, CLI commands 19 | → Integration: DB, middleware, logging 20 | → Polish: unit tests, performance, docs 21 | 4. Apply task rules: 22 | → Different files = mark [P] for parallel 23 | → Same file = sequential (no [P]) 24 | → Tests before implementation (TDD) 25 | 5. Number tasks sequentially (T001, T002...) 26 | 6. Generate dependency graph 27 | 7. Create parallel execution examples 28 | 8. Validate task completeness: 29 | → All contracts have tests? 30 | → All entities have models? 31 | → All endpoints implemented? 32 | 9. Return: SUCCESS (tasks ready for execution) 33 | ``` 34 | 35 | ## Format: `[ID] [P?] Description` 36 | - **[P]**: Can run in parallel (different files, no dependencies) 37 | - Include exact file paths in descriptions 38 | 39 | ## Path Conventions 40 | - **Single project**: `src/`, `tests/` at repository root 41 | - **Web app**: `backend/src/`, `frontend/src/` 42 | - **Mobile**: `api/src/`, `ios/src/` or `android/src/` 43 | - Paths shown below assume single project - adjust based on plan.md structure 44 | 45 | ## Phase 3.1: Setup 46 | - [ ] T001 Create project structure per implementation plan 47 | - [ ] T002 Initialize [language] project with [framework] dependencies 48 | - [ ] T003 [P] Configure linting and formatting tools 49 | 50 | ## Phase 3.2: Tests First (TDD) ⚠️ MUST COMPLETE BEFORE 3.3 51 | **CRITICAL: These tests MUST be written and MUST FAIL before ANY implementation** 52 | - [ ] T004 [P] Contract test POST /api/users in tests/contract/test_users_post.py 53 | - [ ] T005 [P] Contract test GET /api/users/{id} in tests/contract/test_users_get.py 54 | - [ ] T006 [P] Integration test user registration in tests/integration/test_registration.py 55 | - [ ] T007 [P] Integration test auth flow in tests/integration/test_auth.py 56 | 57 | ## Phase 3.3: Core Implementation (ONLY after tests are failing) 58 | - [ ] T008 [P] User model in src/models/user.py 59 | - [ ] T009 [P] UserService CRUD in src/services/user_service.py 60 | - [ ] T010 [P] CLI --create-user in src/cli/user_commands.py 61 | - [ ] T011 POST /api/users endpoint 62 | - [ ] T012 GET /api/users/{id} endpoint 63 | - [ ] T013 Input validation 64 | - [ ] T014 Error handling and logging 65 | 66 | ## Phase 3.4: Integration 67 | - [ ] T015 Connect UserService to DB 68 | - [ ] T016 Auth middleware 69 | - [ ] T017 Request/response logging 70 | - [ ] T018 CORS and security headers 71 | 72 | ## Phase 3.5: Polish 73 | - [ ] T019 [P] Unit tests for validation in tests/unit/test_validation.py 74 | - [ ] T020 Performance tests (<200ms) 75 | - [ ] T021 [P] Update docs/api.md 76 | - [ ] T022 Remove duplication 77 | - [ ] T023 Run manual-testing.md 78 | 79 | ## Dependencies 80 | - Tests (T004-T007) before implementation (T008-T014) 81 | - T008 blocks T009, T015 82 | - T016 blocks T018 83 | - Implementation before polish (T019-T023) 84 | 85 | ## Parallel Example 86 | ``` 87 | # Launch T004-T007 together: 88 | Task: "Contract test POST /api/users in tests/contract/test_users_post.py" 89 | Task: "Contract test GET /api/users/{id} in tests/contract/test_users_get.py" 90 | Task: "Integration test registration in tests/integration/test_registration.py" 91 | Task: "Integration test auth in tests/integration/test_auth.py" 92 | ``` 93 | 94 | ## Notes 95 | - [P] tasks = different files, no dependencies 96 | - Verify tests fail before implementing 97 | - Commit after each task 98 | - Avoid: vague tasks, same file conflicts 99 | 100 | ## Task Generation Rules 101 | *Applied during main() execution* 102 | 103 | 1. **From Contracts**: 104 | - Each contract file → contract test task [P] 105 | - Each endpoint → implementation task 106 | 107 | 2. **From Data Model**: 108 | - Each entity → model creation task [P] 109 | - Relationships → service layer tasks 110 | 111 | 3. **From User Stories**: 112 | - Each story → integration test [P] 113 | - Quickstart scenarios → validation tasks 114 | 115 | 4. **Ordering**: 116 | - Setup → Tests → Models → Services → Endpoints → Polish 117 | - Dependencies block parallel execution 118 | 119 | ## Validation Checklist 120 | *GATE: Checked by main() before returning* 121 | 122 | - [ ] All contracts have corresponding tests 123 | - [ ] All entities have model tasks 124 | - [ ] All tests come before implementation 125 | - [ ] Parallel tasks truly independent 126 | - [ ] Each task specifies exact file path 127 | - [ ] No task modifies same file as another [P] task ``` -------------------------------------------------------------------------------- /src/chroma_client.py: -------------------------------------------------------------------------------- ```python 1 | """ 2 | ChromaDB Cloud client for Reddit MCP. 3 | 4 | Provides connection to ChromaDB Cloud for vector storage and retrieval. 5 | """ 6 | 7 | import os 8 | from typing import Optional, List, Dict, Any 9 | import requests 10 | 11 | 12 | _client_instance = None 13 | 14 | 15 | # ============= PROXY CLIENT CLASSES ============= 16 | class ChromaProxyClient: 17 | """Proxy client that mimics ChromaDB interface.""" 18 | 19 | def __init__(self, proxy_url: Optional[str] = None): 20 | self.url = proxy_url or os.getenv( 21 | 'CHROMA_PROXY_URL', 22 | 'https://reddit-mcp-vector-db.onrender.com' 23 | ) 24 | self.api_key = os.getenv('CHROMA_PROXY_API_KEY') 25 | self.session = requests.Session() 26 | 27 | # Set API key in session headers if provided 28 | if self.api_key: 29 | self.session.headers['X-API-Key'] = self.api_key 30 | 31 | def query(self, query_texts: List[str], n_results: int = 10) -> Dict[str, Any]: 32 | """Query through proxy.""" 33 | try: 34 | response = self.session.post( 35 | f"{self.url}/query", 36 | json={"query_texts": query_texts, "n_results": n_results}, 37 | timeout=10 38 | ) 39 | response.raise_for_status() 40 | return response.json() 41 | except requests.exceptions.HTTPError as e: 42 | if e.response.status_code == 401: 43 | raise ConnectionError("Authentication failed: API key required. Set CHROMA_PROXY_API_KEY environment variable.") 44 | elif e.response.status_code == 403: 45 | raise ConnectionError("Authentication failed: Invalid API key provided.") 46 | elif e.response.status_code == 429: 47 | raise ConnectionError("Rate limit exceeded. Please wait before retrying.") 48 | else: 49 | raise ConnectionError(f"Failed to query vector database: HTTP {e.response.status_code}") 50 | except requests.exceptions.RequestException as e: 51 | raise ConnectionError(f"Failed to query vector database: {e}") 52 | 53 | def list_collections(self) -> List[Dict[str, str]]: 54 | """Compatibility method.""" 55 | return [{"name": "reddit_subreddits"}] 56 | 57 | def count(self) -> int: 58 | """Get document count.""" 59 | try: 60 | response = self.session.get(f"{self.url}/stats", timeout=5) 61 | if response.status_code == 200: 62 | return response.json().get('total_subreddits', 20000) 63 | elif response.status_code == 401: 64 | print("Warning: Stats endpoint requires authentication. Using default count.") 65 | elif response.status_code == 403: 66 | print("Warning: Invalid API key for stats endpoint. Using default count.") 67 | except: 68 | pass 69 | return 20000 70 | 71 | 72 | class ProxyCollection: 73 | """Wrapper to match Chroma collection interface.""" 74 | 75 | def __init__(self, proxy_client: ChromaProxyClient): 76 | self.proxy_client = proxy_client 77 | self.name = "reddit_subreddits" 78 | 79 | def query(self, query_texts: List[str], n_results: int = 10) -> Dict[str, Any]: 80 | return self.proxy_client.query(query_texts, n_results) 81 | 82 | def count(self) -> int: 83 | return self.proxy_client.count() 84 | # ============= END PROXY CLIENT CLASSES ============= 85 | 86 | 87 | 88 | 89 | def get_chroma_client(): 90 | """ 91 | Get ChromaDB proxy client for vector database access. 92 | 93 | Returns: 94 | ChromaProxyClient instance 95 | """ 96 | global _client_instance 97 | 98 | # Return cached instance if available 99 | if _client_instance is not None: 100 | return _client_instance 101 | 102 | print("🌐 Using proxy for vector database access") 103 | _client_instance = ChromaProxyClient() 104 | return _client_instance 105 | 106 | 107 | def reset_client_cache(): 108 | """Reset the cached client instance (useful for testing).""" 109 | global _client_instance 110 | _client_instance = None 111 | 112 | 113 | def get_collection( 114 | collection_name: str = "reddit_subreddits", 115 | client = None 116 | ): 117 | """ 118 | Get ProxyCollection for vector database access. 119 | 120 | Args: 121 | collection_name: Name of the collection (always "reddit_subreddits") 122 | client: Optional client instance (uses default if not provided) 123 | 124 | Returns: 125 | ProxyCollection instance 126 | """ 127 | if client is None: 128 | client = get_chroma_client() 129 | 130 | return ProxyCollection(client) 131 | 132 | 133 | def test_connection() -> dict: 134 | """ 135 | Test proxy connection and return status information. 136 | 137 | Returns: 138 | Dictionary with connection status and details 139 | """ 140 | status = { 141 | 'mode': 'proxy', 142 | 'connected': False, 143 | 'error': None, 144 | 'collections': [], 145 | 'document_count': 0, 146 | 'authenticated': False 147 | } 148 | 149 | try: 150 | client = get_chroma_client() 151 | 152 | # Check if API key is configured 153 | if client.api_key: 154 | status['authenticated'] = True 155 | 156 | # Test connection 157 | status['connected'] = True 158 | status['collections'] = ['reddit_subreddits'] 159 | status['document_count'] = client.count() 160 | 161 | except Exception as e: 162 | status['error'] = str(e) 163 | 164 | return status ``` -------------------------------------------------------------------------------- /specs/chroma-proxy-architecture.md: -------------------------------------------------------------------------------- ```markdown 1 | # Minimal Chroma Proxy Architecture 2 | 3 | ## Problem 4 | - You have a Chroma DB with 20,000+ indexed subreddits 5 | - Users need to query it without having your credentials 6 | - MCP server code must stay open source 7 | 8 | ## Solution 9 | Create a minimal proxy service that handles Chroma queries. Users talk to your proxy, proxy talks to Chroma. 10 | 11 | ``` 12 | User → MCP Server → Your Proxy → Your Chroma DB 13 | ``` 14 | 15 | ## Implementation 16 | 17 | ### Part 1: Proxy Service (Private Repo for Render) 18 | 19 | Create a new private repository with just 2 files: 20 | 21 | #### `server.py` 22 | ```python 23 | from fastapi import FastAPI, HTTPException 24 | import chromadb 25 | import os 26 | 27 | app = FastAPI() 28 | 29 | # Connect to your Chroma DB 30 | client = chromadb.CloudClient( 31 | api_key=os.getenv('CHROMA_API_KEY'), 32 | tenant=os.getenv('CHROMA_TENANT'), 33 | database=os.getenv('CHROMA_DATABASE') 34 | ) 35 | 36 | @app.post("/query") 37 | async def query(query_texts: list[str], n_results: int = 10): 38 | """Simple proxy for Chroma queries.""" 39 | try: 40 | collection = client.get_collection("reddit_subreddits") 41 | return collection.query(query_texts=query_texts, n_results=n_results) 42 | except Exception as e: 43 | raise HTTPException(status_code=500, detail=str(e)) 44 | 45 | @app.get("/health") 46 | async def health(): 47 | return {"status": "ok"} 48 | ``` 49 | 50 | #### `requirements.txt` 51 | ``` 52 | fastapi 53 | chromadb 54 | uvicorn 55 | ``` 56 | 57 | ### Part 2: Update MCP Server (Open Source Repo) 58 | 59 | #### Add ONE new file: `src/chroma_proxy.py` 60 | ```python 61 | """Minimal proxy client for Chroma DB access.""" 62 | import os 63 | import requests 64 | 65 | class ChromaProxyClient: 66 | """Proxy client that mimics ChromaDB interface.""" 67 | 68 | def __init__(self): 69 | self.url = os.getenv('CHROMA_PROXY_URL', 'https://your-reddit-proxy.onrender.com') 70 | 71 | def query(self, query_texts, n_results=10): 72 | """Query through proxy.""" 73 | response = requests.post( 74 | f"{self.url}/query", 75 | json={"query_texts": query_texts, "n_results": n_results}, 76 | timeout=10 77 | ) 78 | response.raise_for_status() 79 | return response.json() 80 | 81 | def list_collections(self): 82 | """Compatibility method.""" 83 | return [{"name": "reddit_subreddits"}] 84 | 85 | def count(self): 86 | """Compatibility method.""" 87 | return 20000 # Known count 88 | 89 | class ProxyCollection: 90 | """Wrapper to match Chroma collection interface.""" 91 | 92 | def __init__(self, client): 93 | self.client = client 94 | 95 | def query(self, query_texts, n_results=10): 96 | return self.client.query(query_texts, n_results) 97 | 98 | def count(self): 99 | return self.client.count() 100 | ``` 101 | 102 | #### Update `src/chroma_client.py` (modify 2 functions only): 103 | 104 | 1. Update `get_chroma_client()`: 105 | ```python 106 | def get_chroma_client(): 107 | """Get ChromaDB client - proxy if no credentials, direct if available.""" 108 | global _client_instance 109 | 110 | if _client_instance is not None: 111 | return _client_instance 112 | 113 | # If no direct credentials, use proxy 114 | if not os.getenv('CHROMA_API_KEY'): 115 | from .chroma_proxy import ChromaProxyClient 116 | print("Using proxy for vector database") 117 | _client_instance = ChromaProxyClient() 118 | return _client_instance 119 | 120 | # Rest of existing code for direct connection... 121 | config = get_chroma_config() 122 | # ... existing CloudClient code ... 123 | ``` 124 | 125 | 2. Update `get_collection()`: 126 | ```python 127 | def get_collection(collection_name="reddit_subreddits", client=None): 128 | """Get collection - handle both proxy and direct clients.""" 129 | if client is None: 130 | client = get_chroma_client() 131 | 132 | # Handle proxy client 133 | from .chroma_proxy import ChromaProxyClient, ProxyCollection 134 | if isinstance(client, ChromaProxyClient): 135 | return ProxyCollection(client) 136 | 137 | # Rest of existing code for direct client... 138 | try: 139 | return client.get_collection(collection_name) 140 | # ... existing error handling ... 141 | ``` 142 | 143 | #### Update `Dockerfile` (add 1 line before CMD): 144 | ```dockerfile 145 | # Add this line near the end, before CMD 146 | ENV CHROMA_PROXY_URL=https://your-reddit-proxy.onrender.com 147 | ``` 148 | 149 | #### Update `pyproject.toml` (ensure requests is in dependencies): 150 | ```toml 151 | dependencies = [ 152 | # ... existing dependencies ... 153 | "requests>=2.31.0", # Add if not present 154 | ] 155 | ``` 156 | 157 | ### Part 3: Deploy to Render 158 | 159 | #### Deploy the Proxy: 160 | 161 | 1. Push proxy code to private GitHub repo 162 | 2. In Render Dashboard: 163 | - New → Web Service 164 | - Connect your private repo 165 | - Build Command: `pip install -r requirements.txt` 166 | - Start Command: `uvicorn server:app --host 0.0.0.0 --port $PORT` 167 | - Add Environment Variables: 168 | - `CHROMA_API_KEY` = your-key 169 | - `CHROMA_TENANT` = your-tenant 170 | - `CHROMA_DATABASE` = your-database 171 | 3. Deploy and note the URL (e.g., `https://reddit-proxy-abc.onrender.com`) 172 | 173 | #### Update MCP Server: 174 | 175 | 1. Change the proxy URL in `Dockerfile` to your Render URL 176 | 2. Commit and push to GitHub 177 | 3. Deploy to Smithery 178 | 179 | ## That's It! 180 | 181 | Total changes: 182 | - **New files**: 1 proxy client file 183 | - **Modified files**: 2 functions in chroma_client.py, 1 line in Dockerfile 184 | - **Unchanged**: discover.py and all other tool files work as-is 185 | 186 | ## How It Works 187 | 188 | 1. When `discover.py` calls `get_chroma_client()`: 189 | - If no Chroma credentials → returns proxy client 190 | - If credentials present → returns direct client 191 | 192 | 2. Proxy client mimics Chroma's `query()` interface exactly 193 | 194 | 3. Users only need Reddit credentials, vector search "just works" 195 | 196 | ## Testing Locally 197 | 198 | ```bash 199 | # Test proxy 200 | cd reddit-proxy 201 | CHROMA_API_KEY=xxx CHROMA_TENANT=yyy CHROMA_DATABASE=zzz uvicorn server:app --reload 202 | 203 | # Test MCP with proxy 204 | cd reddit-mcp-poc 205 | CHROMA_PROXY_URL=http://localhost:8000 python src/server.py 206 | ``` 207 | 208 | ## Cost & Security Notes 209 | 210 | - Render free tier works fine for testing 211 | - Add rate limiting later if needed 212 | - Proxy only exposes one endpoint (`/query`) 213 | - No user authentication needed initially (can add later) 214 | 215 | ## Why This Approach 216 | 217 | - **Minimal**: ~50 lines of new code total 218 | - **No breaking changes**: discover.py unchanged 219 | - **Simple deployment**: 2 files to Render, done 220 | - **Flexible**: Users with own Chroma can still use direct connection 221 | - **Secure**: Your credentials never exposed ``` -------------------------------------------------------------------------------- /src/tools/comments.py: -------------------------------------------------------------------------------- ```python 1 | from typing import Optional, Dict, Any, Literal, List 2 | import praw 3 | from praw.models import Submission, Comment as PrawComment, MoreComments 4 | from prawcore import NotFound, Forbidden 5 | from fastmcp import Context 6 | from ..models import SubmissionWithCommentsResult, RedditPost, Comment 7 | 8 | 9 | def parse_comment_tree( 10 | comment: PrawComment, 11 | depth: int = 0, 12 | max_depth: int = 10, 13 | ctx: Context = None 14 | ) -> Comment: 15 | """ 16 | Recursively parse a comment and its replies into our Comment model. 17 | 18 | Args: 19 | comment: PRAW comment object 20 | depth: Current depth in the comment tree 21 | max_depth: Maximum depth to traverse 22 | ctx: FastMCP context (optional) 23 | 24 | Returns: 25 | Parsed Comment object with nested replies 26 | """ 27 | # Phase 1: Accept context but don't use it yet 28 | 29 | replies = [] 30 | if depth < max_depth and hasattr(comment, 'replies'): 31 | for reply in comment.replies: 32 | if isinstance(reply, PrawComment): 33 | replies.append(parse_comment_tree(reply, depth + 1, max_depth, ctx)) 34 | # Skip MoreComments objects for simplicity in MVP 35 | 36 | return Comment( 37 | id=comment.id, 38 | body=comment.body, 39 | author=str(comment.author) if comment.author else "[deleted]", 40 | score=comment.score, 41 | created_utc=comment.created_utc, 42 | depth=depth, 43 | replies=replies 44 | ) 45 | 46 | 47 | async def fetch_submission_with_comments( 48 | reddit: praw.Reddit, 49 | submission_id: Optional[str] = None, 50 | url: Optional[str] = None, 51 | comment_limit: int = 100, 52 | comment_sort: Literal["best", "top", "new"] = "best", 53 | ctx: Context = None 54 | ) -> Dict[str, Any]: 55 | """ 56 | Fetch a Reddit submission with its comment tree. 57 | 58 | Args: 59 | reddit: Configured Reddit client 60 | submission_id: Reddit post ID 61 | url: Full URL to the post (alternative to submission_id) 62 | comment_limit: Maximum number of comments to fetch 63 | comment_sort: How to sort comments 64 | ctx: FastMCP context (auto-injected by decorator) 65 | 66 | Returns: 67 | Dictionary containing submission and comments 68 | """ 69 | # Phase 1: Accept context but don't use it yet 70 | 71 | try: 72 | # Validate that we have either submission_id or url 73 | if not submission_id and not url: 74 | return {"error": "Either submission_id or url must be provided"} 75 | 76 | # Get submission 77 | try: 78 | if submission_id: 79 | submission = reddit.submission(id=submission_id) 80 | else: 81 | submission = reddit.submission(url=url) 82 | 83 | # Force fetch to check if submission exists 84 | _ = submission.title 85 | except NotFound: 86 | return {"error": "Submission not found"} 87 | except Forbidden: 88 | return {"error": "Access to submission forbidden"} 89 | except Exception as e: 90 | return {"error": f"Invalid submission reference: {str(e)}"} 91 | 92 | # Set comment sort 93 | submission.comment_sort = comment_sort 94 | 95 | # Replace "More Comments" with actual comments (up to limit) 96 | submission.comments.replace_more(limit=0) # Don't expand "more" comments in MVP 97 | 98 | # Parse submission 99 | submission_data = RedditPost( 100 | id=submission.id, 101 | title=submission.title, 102 | selftext=submission.selftext if submission.selftext else "", 103 | author=str(submission.author) if submission.author else "[deleted]", 104 | subreddit=submission.subreddit.display_name, 105 | score=submission.score, 106 | upvote_ratio=submission.upvote_ratio, 107 | num_comments=submission.num_comments, 108 | created_utc=submission.created_utc, 109 | url=submission.url 110 | ) 111 | 112 | # Parse comments 113 | comments = [] 114 | comment_count = 0 115 | 116 | for top_level_comment in submission.comments: 117 | # In tests, we might get regular Mock objects instead of PrawComment 118 | # Check if it has the required attributes 119 | if hasattr(top_level_comment, 'id') and hasattr(top_level_comment, 'body'): 120 | if comment_count >= comment_limit: 121 | break 122 | 123 | # Report progress before processing comment 124 | if ctx: 125 | await ctx.report_progress( 126 | progress=comment_count, 127 | total=comment_limit, 128 | message=f"Loading comments ({comment_count}/{comment_limit})" 129 | ) 130 | 131 | if isinstance(top_level_comment, PrawComment): 132 | comments.append(parse_comment_tree(top_level_comment, ctx=ctx)) 133 | else: 134 | # Handle mock objects in tests 135 | comments.append(Comment( 136 | id=top_level_comment.id, 137 | body=top_level_comment.body, 138 | author=str(top_level_comment.author) if top_level_comment.author else "[deleted]", 139 | score=top_level_comment.score, 140 | created_utc=top_level_comment.created_utc, 141 | depth=0, 142 | replies=[] 143 | )) 144 | # Count all comments including replies 145 | comment_count += 1 + count_replies(comments[-1]) 146 | 147 | # Report final completion 148 | if ctx: 149 | await ctx.report_progress( 150 | progress=comment_count, 151 | total=comment_limit, 152 | message=f"Completed: {comment_count} comments loaded" 153 | ) 154 | 155 | result = SubmissionWithCommentsResult( 156 | submission=submission_data, 157 | comments=comments, 158 | total_comments_fetched=comment_count 159 | ) 160 | 161 | return result.model_dump() 162 | 163 | except Exception as e: 164 | return {"error": f"Failed to fetch submission: {str(e)}"} 165 | 166 | 167 | def count_replies(comment: Comment) -> int: 168 | """Count total number of replies in a comment tree.""" 169 | count = len(comment.replies) 170 | for reply in comment.replies: 171 | count += count_replies(reply) 172 | return count ``` -------------------------------------------------------------------------------- /.specify/scripts/bash/update-agent-context.sh: -------------------------------------------------------------------------------- ```bash 1 | #!/usr/bin/env bash 2 | set -e 3 | REPO_ROOT=$(git rev-parse --show-toplevel) 4 | CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD) 5 | FEATURE_DIR="$REPO_ROOT/specs/$CURRENT_BRANCH" 6 | NEW_PLAN="$FEATURE_DIR/plan.md" 7 | CLAUDE_FILE="$REPO_ROOT/CLAUDE.md"; GEMINI_FILE="$REPO_ROOT/GEMINI.md"; COPILOT_FILE="$REPO_ROOT/.github/copilot-instructions.md"; CURSOR_FILE="$REPO_ROOT/.cursor/rules/specify-rules.mdc"; QWEN_FILE="$REPO_ROOT/QWEN.md"; AGENTS_FILE="$REPO_ROOT/AGENTS.md" 8 | AGENT_TYPE="$1" 9 | [ -f "$NEW_PLAN" ] || { echo "ERROR: No plan.md found at $NEW_PLAN"; exit 1; } 10 | echo "=== Updating agent context files for feature $CURRENT_BRANCH ===" 11 | NEW_LANG=$(grep "^**Language/Version**: " "$NEW_PLAN" 2>/dev/null | head -1 | sed 's/^**Language\/Version**: //' | grep -v "NEEDS CLARIFICATION" || echo "") 12 | NEW_FRAMEWORK=$(grep "^**Primary Dependencies**: " "$NEW_PLAN" 2>/dev/null | head -1 | sed 's/^**Primary Dependencies**: //' | grep -v "NEEDS CLARIFICATION" || echo "") 13 | NEW_DB=$(grep "^**Storage**: " "$NEW_PLAN" 2>/dev/null | head -1 | sed 's/^**Storage**: //' | grep -v "N/A" | grep -v "NEEDS CLARIFICATION" || echo "") 14 | NEW_PROJECT_TYPE=$(grep "^**Project Type**: " "$NEW_PLAN" 2>/dev/null | head -1 | sed 's/^**Project Type**: //' || echo "") 15 | update_agent_file() { local target_file="$1" agent_name="$2"; echo "Updating $agent_name context file: $target_file"; local temp_file=$(mktemp); if [ ! -f "$target_file" ]; then 16 | echo "Creating new $agent_name context file..."; if [ -f "$REPO_ROOT/.specify/templates/agent-file-template.md" ]; then cp "$REPO_ROOT/.specify/templates/agent-file-template.md" "$temp_file"; else echo "ERROR: Template not found"; return 1; fi; 17 | sed -i.bak "s/\[PROJECT NAME\]/$(basename $REPO_ROOT)/" "$temp_file"; sed -i.bak "s/\[DATE\]/$(date +%Y-%m-%d)/" "$temp_file"; sed -i.bak "s/\[EXTRACTED FROM ALL PLAN.MD FILES\]/- $NEW_LANG + $NEW_FRAMEWORK ($CURRENT_BRANCH)/" "$temp_file"; 18 | if [[ "$NEW_PROJECT_TYPE" == *"web"* ]]; then sed -i.bak "s|\[ACTUAL STRUCTURE FROM PLANS\]|backend/\nfrontend/\ntests/|" "$temp_file"; else sed -i.bak "s|\[ACTUAL STRUCTURE FROM PLANS\]|src/\ntests/|" "$temp_file"; fi; 19 | if [[ "$NEW_LANG" == *"Python"* ]]; then COMMANDS="cd src && pytest && ruff check ."; elif [[ "$NEW_LANG" == *"Rust"* ]]; then COMMANDS="cargo test && cargo clippy"; elif [[ "$NEW_LANG" == *"JavaScript"* ]] || [[ "$NEW_LANG" == *"TypeScript"* ]]; then COMMANDS="npm test && npm run lint"; else COMMANDS="# Add commands for $NEW_LANG"; fi; sed -i.bak "s|\[ONLY COMMANDS FOR ACTIVE TECHNOLOGIES\]|$COMMANDS|" "$temp_file"; 20 | sed -i.bak "s|\[LANGUAGE-SPECIFIC, ONLY FOR LANGUAGES IN USE\]|$NEW_LANG: Follow standard conventions|" "$temp_file"; sed -i.bak "s|\[LAST 3 FEATURES AND WHAT THEY ADDED\]|- $CURRENT_BRANCH: Added $NEW_LANG + $NEW_FRAMEWORK|" "$temp_file"; rm "$temp_file.bak"; 21 | else 22 | echo "Updating existing $agent_name context file..."; manual_start=$(grep -n "<!-- MANUAL ADDITIONS START -->" "$target_file" | cut -d: -f1); manual_end=$(grep -n "<!-- MANUAL ADDITIONS END -->" "$target_file" | cut -d: -f1); if [ -n "$manual_start" ] && [ -n "$manual_end" ]; then sed -n "${manual_start},${manual_end}p" "$target_file" > /tmp/manual_additions.txt; fi; 23 | python3 - "$target_file" <<'EOF' 24 | import re,sys,datetime 25 | target=sys.argv[1] 26 | with open(target) as f: content=f.read() 27 | NEW_LANG="'$NEW_LANG'";NEW_FRAMEWORK="'$NEW_FRAMEWORK'";CURRENT_BRANCH="'$CURRENT_BRANCH'";NEW_DB="'$NEW_DB'";NEW_PROJECT_TYPE="'$NEW_PROJECT_TYPE'" 28 | # Tech section 29 | m=re.search(r'## Active Technologies\n(.*?)\n\n',content, re.DOTALL) 30 | if m: 31 | existing=m.group(1) 32 | additions=[] 33 | if '$NEW_LANG' and '$NEW_LANG' not in existing: additions.append(f"- $NEW_LANG + $NEW_FRAMEWORK ($CURRENT_BRANCH)") 34 | if '$NEW_DB' and '$NEW_DB' not in existing and '$NEW_DB'!='N/A': additions.append(f"- $NEW_DB ($CURRENT_BRANCH)") 35 | if additions: 36 | new_block=existing+"\n"+"\n".join(additions) 37 | content=content.replace(m.group(0),f"## Active Technologies\n{new_block}\n\n") 38 | # Recent changes 39 | m2=re.search(r'## Recent Changes\n(.*?)(\n\n|$)',content, re.DOTALL) 40 | if m2: 41 | lines=[l for l in m2.group(1).strip().split('\n') if l] 42 | lines.insert(0,f"- $CURRENT_BRANCH: Added $NEW_LANG + $NEW_FRAMEWORK") 43 | lines=lines[:3] 44 | content=re.sub(r'## Recent Changes\n.*?(\n\n|$)', '## Recent Changes\n'+"\n".join(lines)+'\n\n', content, flags=re.DOTALL) 45 | content=re.sub(r'Last updated: \d{4}-\d{2}-\d{2}', 'Last updated: '+datetime.datetime.now().strftime('%Y-%m-%d'), content) 46 | open(target+'.tmp','w').write(content) 47 | EOF 48 | mv "$target_file.tmp" "$target_file"; if [ -f /tmp/manual_additions.txt ]; then sed -i.bak '/<!-- MANUAL ADDITIONS START -->/,/<!-- MANUAL ADDITIONS END -->/d' "$target_file"; cat /tmp/manual_additions.txt >> "$target_file"; rm /tmp/manual_additions.txt "$target_file.bak"; fi; 49 | fi; mv "$temp_file" "$target_file" 2>/dev/null || true; echo "✅ $agent_name context file updated successfully"; } 50 | case "$AGENT_TYPE" in 51 | claude) update_agent_file "$CLAUDE_FILE" "Claude Code" ;; 52 | gemini) update_agent_file "$GEMINI_FILE" "Gemini CLI" ;; 53 | copilot) update_agent_file "$COPILOT_FILE" "GitHub Copilot" ;; 54 | cursor) update_agent_file "$CURSOR_FILE" "Cursor IDE" ;; 55 | qwen) update_agent_file "$QWEN_FILE" "Qwen Code" ;; 56 | opencode) update_agent_file "$AGENTS_FILE" "opencode" ;; 57 | "") [ -f "$CLAUDE_FILE" ] && update_agent_file "$CLAUDE_FILE" "Claude Code"; \ 58 | [ -f "$GEMINI_FILE" ] && update_agent_file "$GEMINI_FILE" "Gemini CLI"; \ 59 | [ -f "$COPILOT_FILE" ] && update_agent_file "$COPILOT_FILE" "GitHub Copilot"; \ 60 | [ -f "$CURSOR_FILE" ] && update_agent_file "$CURSOR_FILE" "Cursor IDE"; \ 61 | [ -f "$QWEN_FILE" ] && update_agent_file "$QWEN_FILE" "Qwen Code"; \ 62 | [ -f "$AGENTS_FILE" ] && update_agent_file "$AGENTS_FILE" "opencode"; \ 63 | if [ ! -f "$CLAUDE_FILE" ] && [ ! -f "$GEMINI_FILE" ] && [ ! -f "$COPILOT_FILE" ] && [ ! -f "$CURSOR_FILE" ] && [ ! -f "$QWEN_FILE" ] && [ ! -f "$AGENTS_FILE" ]; then update_agent_file "$CLAUDE_FILE" "Claude Code"; fi ;; 64 | *) echo "ERROR: Unknown agent type '$AGENT_TYPE' (expected claude|gemini|copilot|cursor|qwen|opencode)"; exit 1 ;; 65 | esac 66 | echo; echo "Summary of changes:"; [ -n "$NEW_LANG" ] && echo "- Added language: $NEW_LANG"; [ -n "$NEW_FRAMEWORK" ] && echo "- Added framework: $NEW_FRAMEWORK"; [ -n "$NEW_DB" ] && [ "$NEW_DB" != "N/A" ] && echo "- Added database: $NEW_DB"; echo; echo "Usage: $0 [claude|gemini|copilot|cursor|qwen|opencode]" 67 | ``` -------------------------------------------------------------------------------- /tests/test_tools.py: -------------------------------------------------------------------------------- ```python 1 | import pytest 2 | import sys 3 | import os 4 | from unittest.mock import Mock, MagicMock 5 | from fastmcp import Context 6 | 7 | # Add project root to Python path so relative imports work 8 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) 9 | 10 | from src.tools.search import search_in_subreddit 11 | from src.tools.posts import fetch_subreddit_posts 12 | from src.tools.comments import fetch_submission_with_comments 13 | 14 | 15 | def create_mock_submission( 16 | id="test123", 17 | title="Test Post", 18 | author="testuser", 19 | score=100, 20 | num_comments=50 21 | ): 22 | """Helper to create a mock Reddit submission.""" 23 | submission = Mock() 24 | submission.id = id 25 | submission.title = title 26 | submission.author = Mock() 27 | submission.author.__str__ = Mock(return_value=author) 28 | submission.score = score 29 | submission.num_comments = num_comments 30 | submission.created_utc = 1234567890.0 31 | submission.url = f"https://reddit.com/r/test/{id}" 32 | submission.selftext = "Test content" 33 | submission.upvote_ratio = 0.95 34 | submission.permalink = f"/r/test/comments/{id}/test_post/" 35 | submission.subreddit = Mock() 36 | submission.subreddit.display_name = "test" 37 | return submission 38 | 39 | 40 | def create_mock_comment( 41 | id="comment123", 42 | body="Test comment", 43 | author="commentuser", 44 | score=10 45 | ): 46 | """Helper to create a mock Reddit comment.""" 47 | comment = Mock() 48 | comment.id = id 49 | comment.body = body 50 | comment.author = Mock() 51 | comment.author.__str__ = Mock(return_value=author) 52 | comment.score = score 53 | comment.created_utc = 1234567890.0 54 | comment.replies = [] 55 | return comment 56 | 57 | 58 | @pytest.fixture 59 | def mock_context(): 60 | """Create a mock Context object for testing.""" 61 | return Mock(spec=Context) 62 | 63 | 64 | class TestSearchReddit: 65 | def test_search_reddit_success(self, mock_context): 66 | """Test successful Reddit search.""" 67 | mock_reddit = Mock() 68 | mock_subreddit = Mock() 69 | mock_subreddit.display_name = "test" 70 | mock_submissions = [ 71 | create_mock_submission(id="1", title="First Post"), 72 | create_mock_submission(id="2", title="Second Post") 73 | ] 74 | 75 | mock_subreddit.search.return_value = mock_submissions 76 | mock_reddit.subreddit.return_value = mock_subreddit 77 | 78 | result = search_in_subreddit( 79 | subreddit_name="test", 80 | query="test query", 81 | reddit=mock_reddit, 82 | limit=10, 83 | ctx=mock_context 84 | ) 85 | 86 | assert "results" in result 87 | assert result["count"] == 2 88 | assert result["results"][0]["title"] == "First Post" 89 | assert result["results"][1]["title"] == "Second Post" 90 | 91 | def test_search_reddit_subreddit_not_found(self, mock_context): 92 | """Test search with failed request.""" 93 | from prawcore import NotFound 94 | mock_reddit = Mock() 95 | mock_reddit.subreddit.side_effect = NotFound(Mock()) 96 | 97 | result = search_in_subreddit( 98 | subreddit_name="test", 99 | query="test", 100 | reddit=mock_reddit, 101 | ctx=mock_context 102 | ) 103 | 104 | assert "error" in result 105 | assert "not found" in result["error"].lower() 106 | 107 | 108 | class TestFetchSubredditPosts: 109 | def test_fetch_posts_success(self, mock_context): 110 | """Test successful fetching of subreddit posts.""" 111 | mock_reddit = Mock() 112 | mock_subreddit = Mock() 113 | mock_subreddit.display_name = "test" 114 | mock_subreddit.subscribers = 1000000 115 | mock_subreddit.public_description = "Test subreddit" 116 | 117 | mock_posts = [ 118 | create_mock_submission(id="1", title="Hot Post 1"), 119 | create_mock_submission(id="2", title="Hot Post 2") 120 | ] 121 | 122 | mock_subreddit.hot.return_value = mock_posts 123 | mock_reddit.subreddit.return_value = mock_subreddit 124 | 125 | result = fetch_subreddit_posts( 126 | subreddit_name="test", 127 | reddit=mock_reddit, 128 | listing_type="hot", 129 | limit=10, 130 | ctx=mock_context 131 | ) 132 | 133 | assert "posts" in result 134 | assert "subreddit" in result 135 | assert result["count"] == 2 136 | assert result["subreddit"]["name"] == "test" 137 | assert result["posts"][0]["title"] == "Hot Post 1" 138 | 139 | def test_fetch_posts_invalid_subreddit(self, mock_context): 140 | """Test fetching from non-existent subreddit.""" 141 | from prawcore import NotFound 142 | mock_reddit = Mock() 143 | mock_reddit.subreddit.side_effect = NotFound(Mock()) 144 | 145 | result = fetch_subreddit_posts( 146 | subreddit_name="nonexistent", 147 | reddit=mock_reddit, 148 | ctx=mock_context 149 | ) 150 | 151 | assert "error" in result 152 | assert "not found" in result["error"].lower() 153 | 154 | 155 | class TestFetchSubmissionWithComments: 156 | async def test_fetch_submission_success(self, mock_context): 157 | """Test successful fetching of submission with comments.""" 158 | mock_reddit = Mock() 159 | mock_submission = create_mock_submission() 160 | 161 | # Create mock comments 162 | mock_comment1 = create_mock_comment(id="c1", body="First comment") 163 | mock_comment2 = create_mock_comment(id="c2", body="Second comment") 164 | 165 | # Create a mock comments object that behaves like a list but has replace_more 166 | mock_comments = Mock() 167 | mock_comments.__iter__ = Mock(return_value=iter([mock_comment1, mock_comment2])) 168 | mock_comments.replace_more = Mock() 169 | 170 | mock_submission.comments = mock_comments 171 | mock_submission.comment_sort = "best" 172 | 173 | mock_reddit.submission.return_value = mock_submission 174 | 175 | result = await fetch_submission_with_comments( 176 | reddit=mock_reddit, 177 | submission_id="test123", 178 | comment_limit=10, 179 | ctx=mock_context 180 | ) 181 | 182 | assert "submission" in result 183 | assert "comments" in result 184 | assert result["submission"]["id"] == "test123" 185 | assert len(result["comments"]) == 2 186 | assert result["comments"][0]["body"] == "First comment" 187 | 188 | async def test_fetch_submission_not_found(self, mock_context): 189 | """Test fetching non-existent submission.""" 190 | from prawcore import NotFound 191 | mock_reddit = Mock() 192 | mock_reddit.submission.side_effect = NotFound(Mock()) 193 | 194 | result = await fetch_submission_with_comments( 195 | reddit=mock_reddit, 196 | submission_id="nonexistent", 197 | ctx=mock_context 198 | ) 199 | 200 | assert "error" in result 201 | assert "not found" in result["error"].lower() 202 | 203 | async def test_fetch_submission_no_id_or_url(self, mock_context): 204 | """Test error when neither submission_id nor url is provided.""" 205 | mock_reddit = Mock() 206 | 207 | result = await fetch_submission_with_comments( 208 | reddit=mock_reddit, 209 | ctx=mock_context 210 | ) 211 | 212 | assert "error" in result 213 | assert "submission_id or url must be provided" in result["error"] ``` -------------------------------------------------------------------------------- /reports/top-50-active-AI-subreddits.md: -------------------------------------------------------------------------------- ```markdown 1 | ''' 2 | Prompt: Can you build me a list of the top 50 most active subreddits related to AI, LLMS, ChatGPT, Claude, Claude Code, Codex, Vibe Coding 3 | ''' 4 | 5 | # Top 50 Most Active AI, LLM, and AI Coding Subreddits 6 | 7 | ## Top Tier Communities (11M+ subscribers) 8 | 1. **r/ChatGPT** - 11,114,896 subscribers - [https://reddit.com/r/ChatGPT](https://reddit.com/r/ChatGPT) 9 | 10 | ## Major Communities (1M+ subscribers) 11 | 2. **r/MachineLearning** - 2,988,159 subscribers - [https://reddit.com/r/MachineLearning](https://reddit.com/r/MachineLearning) 12 | 3. **r/OpenAI** - 2,446,435 subscribers - [https://reddit.com/r/OpenAI](https://reddit.com/r/OpenAI) 13 | 4. **r/ArtificialInteligence** - 1,551,586 subscribers - [https://reddit.com/r/ArtificialInteligence](https://reddit.com/r/ArtificialInteligence) 14 | 5. **r/artificial** - 1,135,505 subscribers - [https://reddit.com/r/artificial](https://reddit.com/r/artificial) 15 | 16 | ## Large Communities (500K+ subscribers) 17 | 6. **r/learnmachinelearning** - 547,704 subscribers - [https://reddit.com/r/learnmachinelearning](https://reddit.com/r/learnmachinelearning) 18 | 7. **r/LocalLLaMA** - 522,475 subscribers - [https://reddit.com/r/LocalLLaMA](https://reddit.com/r/LocalLLaMA) 19 | 20 | ## Established Communities (100K-500K subscribers) 21 | 8. **r/ChatGPTPro** - 486,147 subscribers - [https://reddit.com/r/ChatGPTPro](https://reddit.com/r/ChatGPTPro) 22 | 9. **r/ClaudeAI** - 311,208 subscribers - [https://reddit.com/r/ClaudeAI](https://reddit.com/r/ClaudeAI) 23 | 10. **r/ChatGPTCoding** - 309,810 subscribers - [https://reddit.com/r/ChatGPTCoding](https://reddit.com/r/ChatGPTCoding) 24 | 11. **r/aivideo** - 267,399 subscribers - [https://reddit.com/r/aivideo](https://reddit.com/r/aivideo) 25 | 12. **r/dalle2** - 206,091 subscribers - [https://reddit.com/r/dalle2](https://reddit.com/r/dalle2) 26 | 13. **r/AI_Agents** - 191,203 subscribers - [https://reddit.com/r/AI_Agents](https://reddit.com/r/AI_Agents) 27 | 14. **r/comfyui** - 117,893 subscribers - [https://reddit.com/r/comfyui](https://reddit.com/r/comfyui) 28 | 15. **r/machinelearningnews** - 107,720 subscribers - [https://reddit.com/r/machinelearningnews](https://reddit.com/r/machinelearningnews) 29 | 16. **r/aipromptprogramming** - 107,001 subscribers - [https://reddit.com/r/aipromptprogramming](https://reddit.com/r/aipromptprogramming) 30 | 17. **r/GeminiAI** - 104,691 subscribers - [https://reddit.com/r/GeminiAI](https://reddit.com/r/GeminiAI) 31 | 18. **r/LLMDevs** - 103,689 subscribers - [https://reddit.com/r/LLMDevs](https://reddit.com/r/LLMDevs) 32 | 19. **r/perplexity_ai** - 101,608 subscribers - [https://reddit.com/r/perplexity_ai](https://reddit.com/r/perplexity_ai) 33 | 34 | ## Active Communities (50K-100K subscribers) 35 | 20. **r/cursor** - 91,743 subscribers - [https://reddit.com/r/cursor](https://reddit.com/r/cursor) 36 | 21. **r/AIArtwork** - 83,065 subscribers - [https://reddit.com/r/AIArtwork](https://reddit.com/r/AIArtwork) 37 | 22. **r/MLQuestions** - 83,423 subscribers - [https://reddit.com/r/MLQuestions](https://reddit.com/r/MLQuestions) 38 | 23. **r/nocode** - 82,361 subscribers - [https://reddit.com/r/nocode](https://reddit.com/r/nocode) 39 | 24. **r/LocalLLM** - 81,986 subscribers - [https://reddit.com/r/LocalLLM](https://reddit.com/r/LocalLLM) 40 | 25. **r/ChatGPT_FR** - 81,642 subscribers - [https://reddit.com/r/ChatGPT_FR](https://reddit.com/r/ChatGPT_FR) 41 | 26. **r/GoogleGeminiAI** - 77,148 subscribers - [https://reddit.com/r/GoogleGeminiAI](https://reddit.com/r/GoogleGeminiAI) 42 | 27. **r/AIAssisted** - 71,088 subscribers - [https://reddit.com/r/AIAssisted](https://reddit.com/r/AIAssisted) 43 | 28. **r/reinforcementlearning** - 65,979 subscribers - [https://reddit.com/r/reinforcementlearning](https://reddit.com/r/reinforcementlearning) 44 | 29. **r/WritingWithAI** - 54,806 subscribers - [https://reddit.com/r/WritingWithAI](https://reddit.com/r/WritingWithAI) 45 | 30. **r/outlier_ai** - 52,105 subscribers - [https://reddit.com/r/outlier_ai](https://reddit.com/r/outlier_ai) 46 | 31. **r/SillyTavernAI** - 51,310 subscribers - [https://reddit.com/r/SillyTavernAI](https://reddit.com/r/SillyTavernAI) 47 | 48 | ## Growing Communities (20K-50K subscribers) 49 | 32. **r/PygmalionAI** - 47,809 subscribers - [https://reddit.com/r/PygmalionAI](https://reddit.com/r/PygmalionAI) 50 | 33. **r/AgentsOfAI** - 46,494 subscribers - [https://reddit.com/r/AgentsOfAI](https://reddit.com/r/AgentsOfAI) 51 | 34. **r/bigsleep** - 41,078 subscribers - [https://reddit.com/r/bigsleep](https://reddit.com/r/bigsleep) 52 | 35. **r/antiai** - 37,034 subscribers - [https://reddit.com/r/antiai](https://reddit.com/r/antiai) 53 | 36. **r/MachineLearningJobs** - 34,514 subscribers - [https://reddit.com/r/MachineLearningJobs](https://reddit.com/r/MachineLearningJobs) 54 | 37. **r/chatgpt_promptDesign** - 32,368 subscribers - [https://reddit.com/r/chatgpt_promptDesign](https://reddit.com/r/chatgpt_promptDesign) 55 | 38. **r/tensorflow** - 31,369 subscribers - [https://reddit.com/r/tensorflow](https://reddit.com/r/tensorflow) 56 | 39. **r/AiChatGPT** - 31,346 subscribers - [https://reddit.com/r/AiChatGPT](https://reddit.com/r/AiChatGPT) 57 | 40. **r/neuralnetworks** - 29,721 subscribers - [https://reddit.com/r/neuralnetworks](https://reddit.com/r/neuralnetworks) 58 | 41. **r/civitai** - 28,446 subscribers - [https://reddit.com/r/civitai](https://reddit.com/r/civitai) 59 | 42. **r/MistralAI** - 24,897 subscribers - [https://reddit.com/r/MistralAI](https://reddit.com/r/MistralAI) 60 | 43. **r/pytorch** - 22,695 subscribers - [https://reddit.com/r/pytorch](https://reddit.com/r/pytorch) 61 | 44. **r/PromptDesign** - 21,679 subscribers - [https://reddit.com/r/PromptDesign](https://reddit.com/r/PromptDesign) 62 | 45. **r/FetchAI_Community** - 21,415 subscribers - [https://reddit.com/r/FetchAI_Community](https://reddit.com/r/FetchAI_Community) 63 | 46. **r/Chub_AI** - 21,163 subscribers - [https://reddit.com/r/Chub_AI](https://reddit.com/r/Chub_AI) 64 | 47. **r/generativeAI** - 21,036 subscribers - [https://reddit.com/r/generativeAI](https://reddit.com/r/generativeAI) 65 | 48. **r/aifails** - 20,511 subscribers - [https://reddit.com/r/aifails](https://reddit.com/r/aifails) 66 | 49. **r/ClaudeCode** - 20,480 subscribers - [https://reddit.com/r/ClaudeCode](https://reddit.com/r/ClaudeCode) 67 | 50. **r/CodingJobs** - 20,607 subscribers - [https://reddit.com/r/CodingJobs](https://reddit.com/r/CodingJobs) 68 | 69 | ## Special Mentions (AI Coding Tools) 70 | - **r/CursorAI** - 8,392 subscribers - [https://reddit.com/r/CursorAI](https://reddit.com/r/CursorAI) 71 | - **r/RooCode** - 15,316 subscribers - [https://reddit.com/r/RooCode](https://reddit.com/r/RooCode) 72 | - **r/BlackboxAI_** - 8,357 subscribers - [https://reddit.com/r/BlackboxAI_](https://reddit.com/r/BlackboxAI_) 73 | 74 | ## Summary Statistics 75 | - **Total Combined Subscribers**: ~23.5 million (accounting for overlaps) 76 | - **Largest Community**: r/ChatGPT with over 11 million subscribers 77 | - **Categories Covered**: 78 | - General AI/ML discussions 79 | - LLM-specific communities (ChatGPT, Claude, LLaMA, etc.) 80 | - AI coding and development tools 81 | - Job boards and professional development 82 | - AI art and creative applications 83 | - Research and academic discussions ``` -------------------------------------------------------------------------------- /specs/agent-reasoning-visibility.md: -------------------------------------------------------------------------------- ```markdown 1 | # Deep Agent Reasoning Visibility with Streaming 2 | 3 | ## Understanding the Goal 4 | You want to see the actual LLM reasoning process (thinking tokens) for each agent, streamed in real-time to debug logs, similar to how you see UV's debug output. 5 | 6 | ## Proposed Implementation 7 | 8 | ### 1. Enable OpenAI Agents SDK Streaming & Tracing 9 | ```python 10 | from agents import Runner, RunConfig 11 | from agents.streaming import StreamingRunResult 12 | import logging 13 | 14 | # Configure logging for agent traces 15 | logging.basicConfig( 16 | level=logging.DEBUG, 17 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' 18 | ) 19 | logger = logging.getLogger('reddit_research_agent') 20 | 21 | async def execute_reddit_research(query: str): 22 | # Enable tracing and streaming 23 | run_config = RunConfig( 24 | model="gpt-4", 25 | trace_metadata={"query": query}, 26 | workflow_name="reddit_research", 27 | # Enable detailed tracing 28 | trace_include_sensitive_data=True, 29 | ) 30 | 31 | # Use streaming runner for real-time output 32 | logger.debug(f"🎯 ORCHESTRATOR starting for query: {query}") 33 | 34 | # Stream orchestrator reasoning 35 | orchestrator_stream = await Runner.run_streamed( 36 | orchestrator, 37 | query, 38 | run_config=run_config 39 | ) 40 | 41 | # Process streaming events 42 | async for event in orchestrator_stream.stream_events(): 43 | if event.type == "reasoning": 44 | logger.debug(f"[ORCHESTRATOR THINKING] {event.content}") 45 | elif event.type == "tool_call": 46 | logger.debug(f"[ORCHESTRATOR ACTION] Calling: {event.tool_name}") 47 | 48 | search_plan = orchestrator_stream.final_output_as(SearchTaskPlan) 49 | ``` 50 | 51 | ### 2. Add Custom Context Wrapper for Reasoning Capture 52 | ```python 53 | class ReasoningCapture: 54 | """Capture and log agent reasoning in real-time""" 55 | 56 | def __init__(self, agent_name: str): 57 | self.agent_name = agent_name 58 | self.logger = logging.getLogger(f'agent.{agent_name}') 59 | 60 | async def wrap_agent_run(self, agent, input_data, context=None): 61 | self.logger.debug(f"[{self.agent_name}] Starting with input: {input_data[:100]}...") 62 | 63 | # Run with streaming to capture reasoning 64 | result = await Runner.run_streamed(agent, input_data, context=context) 65 | 66 | reasoning_tokens = [] 67 | async for event in result.stream_events(): 68 | if event.type in ["reasoning", "thinking"]: 69 | reasoning_tokens.append(event.content) 70 | self.logger.debug(f"[{self.agent_name} REASONING] {event.content}") 71 | elif event.type == "output": 72 | self.logger.debug(f"[{self.agent_name} OUTPUT] {event.content[:200]}...") 73 | 74 | # Log summary 75 | self.logger.info(f"[{self.agent_name}] Complete. Reasoning tokens: {len(reasoning_tokens)}") 76 | 77 | return result 78 | ``` 79 | 80 | ### 3. Environment Variable for Debug Mode 81 | ```python 82 | import os 83 | 84 | # Add debug mode toggle 85 | DEBUG_AGENTS = os.getenv("DEBUG_AGENTS", "false").lower() == "true" 86 | STREAM_REASONING = os.getenv("STREAM_REASONING", "false").lower() == "true" 87 | 88 | async def get_reddit_instance(debug=False): 89 | # Only show auth debug if DEBUG_AGENTS is enabled 90 | if debug and DEBUG_AGENTS: 91 | print(f"🔐 Reddit Auth Debug:...") 92 | ``` 93 | 94 | ### 4. Run Script with Debug Flags 95 | ```bash 96 | # In the script header, add environment variable support 97 | #!/usr/bin/env -S DEBUG_AGENTS=true STREAM_REASONING=true uv run --verbose --script 98 | 99 | # Or run with: 100 | DEBUG_AGENTS=true STREAM_REASONING=true uv run --verbose reddit_research_agent.py 101 | ``` 102 | 103 | ### 5. Structured Logging Output 104 | ```python 105 | # Configure different log levels for different components 106 | logging.getLogger('agent.orchestrator').setLevel(logging.DEBUG) 107 | logging.getLogger('agent.search_worker').setLevel(logging.INFO) 108 | logging.getLogger('agent.discovery_worker').setLevel(logging.INFO) 109 | logging.getLogger('agent.validation_worker').setLevel(logging.INFO) 110 | logging.getLogger('agent.synthesizer').setLevel(logging.DEBUG) 111 | logging.getLogger('asyncpraw').setLevel(logging.WARNING) # Reduce Reddit noise 112 | ``` 113 | 114 | ### 6. Custom Debug Output Format 115 | ```python 116 | class AgentDebugFormatter(logging.Formatter): 117 | """Custom formatter for agent debug output""" 118 | 119 | COLORS = { 120 | 'DEBUG': '\033[36m', # Cyan 121 | 'INFO': '\033[32m', # Green 122 | 'WARNING': '\033[33m', # Yellow 123 | 'ERROR': '\033[31m', # Red 124 | 'REASONING': '\033[35m', # Magenta 125 | } 126 | RESET = '\033[0m' 127 | 128 | def format(self, record): 129 | # Add colors for terminal output 130 | if hasattr(record, 'reasoning'): 131 | color = self.COLORS.get('REASONING', '') 132 | record.msg = f"{color}[THINKING] {record.msg}{self.RESET}" 133 | 134 | return super().format(record) 135 | 136 | # Apply formatter 137 | handler = logging.StreamHandler() 138 | handler.setFormatter(AgentDebugFormatter( 139 | '%(asctime)s | %(name)s | %(message)s' 140 | )) 141 | logging.root.addHandler(handler) 142 | ``` 143 | 144 | ## Expected Output with Deep Visibility: 145 | ``` 146 | $ DEBUG_AGENTS=true STREAM_REASONING=true uv run --verbose reddit_research_agent.py 147 | 148 | 2024-01-15 10:23:45 | agent.orchestrator | [ORCHESTRATOR THINKING] The user is asking about Trump and Putin in Alaska. I need to identify: 149 | 2024-01-15 10:23:45 | agent.orchestrator | [ORCHESTRATOR THINKING] 1. Core entities: Trump (person), Putin (person), Alaska (location) 150 | 2024-01-15 10:23:46 | agent.orchestrator | [ORCHESTRATOR THINKING] 2. These are political figures, so political subreddits would be relevant 151 | 2024-01-15 10:23:46 | agent.orchestrator | [ORCHESTRATOR THINKING] 3. For direct searches, I'll use single terms like "trump", "putin", "alaska" 152 | 2024-01-15 10:23:47 | agent.orchestrator | [ORCHESTRATOR OUTPUT] SearchTaskPlan(direct_searches=['trump', 'putin', 'alaska'], ...) 153 | 154 | 2024-01-15 10:23:48 | agent.search_worker | [SEARCH_WORKER THINKING] I received terms: trump, putin, alaska 155 | 2024-01-15 10:23:48 | agent.search_worker | [SEARCH_WORKER THINKING] These are potential subreddit names. I'll search each one. 156 | 2024-01-15 10:23:49 | agent.search_worker | [SEARCH_WORKER ACTION] Calling search_subreddits_tool(query='trump') 157 | 2024-01-15 10:23:50 | reddit.api | Searching for communities matching: 'trump' 158 | 2024-01-15 10:23:51 | reddit.api | Found 24 communities 159 | ``` 160 | 161 | ## Benefits: 162 | 1. **Real thinking tokens**: See actual LLM reasoning, not just formatted output 163 | 2. **Streaming visibility**: Watch agents think in real-time 164 | 3. **Debug control**: Toggle verbosity with environment variables 165 | 4. **Performance metrics**: Track reasoning token usage per agent 166 | 5. **Structured logs**: Filter by agent or log level 167 | 6. **UV integration**: Works alongside UV's --verbose flag 168 | 169 | ## Alternative: OpenAI Tracing Dashboard 170 | The OpenAI Agents SDK also supports sending traces to their dashboard: 171 | ```python 172 | # Traces will appear at https://platform.openai.com/traces 173 | run_config = RunConfig( 174 | workflow_name="reddit_research", 175 | trace_id=f"reddit_{timestamp}", 176 | trace_metadata={"query": query, "version": "1.0"} 177 | ) 178 | ``` 179 | 180 | This gives you a web UI to explore agent reasoning after execution. 181 | 182 | ## Implementation Priority 183 | 1. Start with environment variable debug flags (easiest) 184 | 2. Add structured logging with custom formatter 185 | 3. Implement streaming for orchestrator and synthesizer (most valuable) 186 | 4. Add streaming for worker agents if needed 187 | 5. Consider OpenAI dashboard for production monitoring ``` -------------------------------------------------------------------------------- /.specify/templates/plan-template.md: -------------------------------------------------------------------------------- ```markdown 1 | 2 | # Implementation Plan: [FEATURE] 3 | 4 | **Branch**: `[###-feature-name]` | **Date**: [DATE] | **Spec**: [link] 5 | **Input**: Feature specification from `/specs/[###-feature-name]/spec.md` 6 | 7 | ## Execution Flow (/plan command scope) 8 | ``` 9 | 1. Load feature spec from Input path 10 | → If not found: ERROR "No feature spec at {path}" 11 | 2. Fill Technical Context (scan for NEEDS CLARIFICATION) 12 | → Detect Project Type from context (web=frontend+backend, mobile=app+api) 13 | → Set Structure Decision based on project type 14 | 3. Fill the Constitution Check section based on the content of the constitution document. 15 | 4. Evaluate Constitution Check section below 16 | → If violations exist: Document in Complexity Tracking 17 | → If no justification possible: ERROR "Simplify approach first" 18 | → Update Progress Tracking: Initial Constitution Check 19 | 5. Execute Phase 0 → research.md 20 | → If NEEDS CLARIFICATION remain: ERROR "Resolve unknowns" 21 | 6. Execute Phase 1 → contracts, data-model.md, quickstart.md, agent-specific template file (e.g., `CLAUDE.md` for Claude Code, `.github/copilot-instructions.md` for GitHub Copilot, `GEMINI.md` for Gemini CLI, `QWEN.md` for Qwen Code or `AGENTS.md` for opencode). 22 | 7. Re-evaluate Constitution Check section 23 | → If new violations: Refactor design, return to Phase 1 24 | → Update Progress Tracking: Post-Design Constitution Check 25 | 8. Plan Phase 2 → Describe task generation approach (DO NOT create tasks.md) 26 | 9. STOP - Ready for /tasks command 27 | ``` 28 | 29 | **IMPORTANT**: The /plan command STOPS at step 7. Phases 2-4 are executed by other commands: 30 | - Phase 2: /tasks command creates tasks.md 31 | - Phase 3-4: Implementation execution (manual or via tools) 32 | 33 | ## Summary 34 | [Extract from feature spec: primary requirement + technical approach from research] 35 | 36 | ## Technical Context 37 | **Language/Version**: [e.g., Python 3.11, Swift 5.9, Rust 1.75 or NEEDS CLARIFICATION] 38 | **Primary Dependencies**: [e.g., FastAPI, UIKit, LLVM or NEEDS CLARIFICATION] 39 | **Storage**: [if applicable, e.g., PostgreSQL, CoreData, files or N/A] 40 | **Testing**: [e.g., pytest, XCTest, cargo test or NEEDS CLARIFICATION] 41 | **Target Platform**: [e.g., Linux server, iOS 15+, WASM or NEEDS CLARIFICATION] 42 | **Project Type**: [single/web/mobile - determines source structure] 43 | **Performance Goals**: [domain-specific, e.g., 1000 req/s, 10k lines/sec, 60 fps or NEEDS CLARIFICATION] 44 | **Constraints**: [domain-specific, e.g., <200ms p95, <100MB memory, offline-capable or NEEDS CLARIFICATION] 45 | **Scale/Scope**: [domain-specific, e.g., 10k users, 1M LOC, 50 screens or NEEDS CLARIFICATION] 46 | 47 | ## Constitution Check 48 | *GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.* 49 | 50 | [Gates determined based on constitution file] 51 | 52 | ## Project Structure 53 | 54 | ### Documentation (this feature) 55 | ``` 56 | specs/[###-feature]/ 57 | ├── plan.md # This file (/plan command output) 58 | ├── research.md # Phase 0 output (/plan command) 59 | ├── data-model.md # Phase 1 output (/plan command) 60 | ├── quickstart.md # Phase 1 output (/plan command) 61 | ├── contracts/ # Phase 1 output (/plan command) 62 | └── tasks.md # Phase 2 output (/tasks command - NOT created by /plan) 63 | ``` 64 | 65 | ### Source Code (repository root) 66 | ``` 67 | # Option 1: Single project (DEFAULT) 68 | src/ 69 | ├── models/ 70 | ├── services/ 71 | ├── cli/ 72 | └── lib/ 73 | 74 | tests/ 75 | ├── contract/ 76 | ├── integration/ 77 | └── unit/ 78 | 79 | # Option 2: Web application (when "frontend" + "backend" detected) 80 | backend/ 81 | ├── src/ 82 | │ ├── models/ 83 | │ ├── services/ 84 | │ └── api/ 85 | └── tests/ 86 | 87 | frontend/ 88 | ├── src/ 89 | │ ├── components/ 90 | │ ├── pages/ 91 | │ └── services/ 92 | └── tests/ 93 | 94 | # Option 3: Mobile + API (when "iOS/Android" detected) 95 | api/ 96 | └── [same as backend above] 97 | 98 | ios/ or android/ 99 | └── [platform-specific structure] 100 | ``` 101 | 102 | **Structure Decision**: [DEFAULT to Option 1 unless Technical Context indicates web/mobile app] 103 | 104 | ## Phase 0: Outline & Research 105 | 1. **Extract unknowns from Technical Context** above: 106 | - For each NEEDS CLARIFICATION → research task 107 | - For each dependency → best practices task 108 | - For each integration → patterns task 109 | 110 | 2. **Generate and dispatch research agents**: 111 | ``` 112 | For each unknown in Technical Context: 113 | Task: "Research {unknown} for {feature context}" 114 | For each technology choice: 115 | Task: "Find best practices for {tech} in {domain}" 116 | ``` 117 | 118 | 3. **Consolidate findings** in `research.md` using format: 119 | - Decision: [what was chosen] 120 | - Rationale: [why chosen] 121 | - Alternatives considered: [what else evaluated] 122 | 123 | **Output**: research.md with all NEEDS CLARIFICATION resolved 124 | 125 | ## Phase 1: Design & Contracts 126 | *Prerequisites: research.md complete* 127 | 128 | 1. **Extract entities from feature spec** → `data-model.md`: 129 | - Entity name, fields, relationships 130 | - Validation rules from requirements 131 | - State transitions if applicable 132 | 133 | 2. **Generate API contracts** from functional requirements: 134 | - For each user action → endpoint 135 | - Use standard REST/GraphQL patterns 136 | - Output OpenAPI/GraphQL schema to `/contracts/` 137 | 138 | 3. **Generate contract tests** from contracts: 139 | - One test file per endpoint 140 | - Assert request/response schemas 141 | - Tests must fail (no implementation yet) 142 | 143 | 4. **Extract test scenarios** from user stories: 144 | - Each story → integration test scenario 145 | - Quickstart test = story validation steps 146 | 147 | 5. **Update agent file incrementally** (O(1) operation): 148 | - Run `.specify/scripts/bash/update-agent-context.sh claude` for your AI assistant 149 | - If exists: Add only NEW tech from current plan 150 | - Preserve manual additions between markers 151 | - Update recent changes (keep last 3) 152 | - Keep under 150 lines for token efficiency 153 | - Output to repository root 154 | 155 | **Output**: data-model.md, /contracts/*, failing tests, quickstart.md, agent-specific file 156 | 157 | ## Phase 2: Task Planning Approach 158 | *This section describes what the /tasks command will do - DO NOT execute during /plan* 159 | 160 | **Task Generation Strategy**: 161 | - Load `.specify/templates/tasks-template.md` as base 162 | - Generate tasks from Phase 1 design docs (contracts, data model, quickstart) 163 | - Each contract → contract test task [P] 164 | - Each entity → model creation task [P] 165 | - Each user story → integration test task 166 | - Implementation tasks to make tests pass 167 | 168 | **Ordering Strategy**: 169 | - TDD order: Tests before implementation 170 | - Dependency order: Models before services before UI 171 | - Mark [P] for parallel execution (independent files) 172 | 173 | **Estimated Output**: 25-30 numbered, ordered tasks in tasks.md 174 | 175 | **IMPORTANT**: This phase is executed by the /tasks command, NOT by /plan 176 | 177 | ## Phase 3+: Future Implementation 178 | *These phases are beyond the scope of the /plan command* 179 | 180 | **Phase 3**: Task execution (/tasks command creates tasks.md) 181 | **Phase 4**: Implementation (execute tasks.md following constitutional principles) 182 | **Phase 5**: Validation (run tests, execute quickstart.md, performance validation) 183 | 184 | ## Complexity Tracking 185 | *Fill ONLY if Constitution Check has violations that must be justified* 186 | 187 | | Violation | Why Needed | Simpler Alternative Rejected Because | 188 | |-----------|------------|-------------------------------------| 189 | | [e.g., 4th project] | [current need] | [why 3 projects insufficient] | 190 | | [e.g., Repository pattern] | [specific problem] | [why direct DB access insufficient] | 191 | 192 | 193 | ## Progress Tracking 194 | *This checklist is updated during execution flow* 195 | 196 | **Phase Status**: 197 | - [ ] Phase 0: Research complete (/plan command) 198 | - [ ] Phase 1: Design complete (/plan command) 199 | - [ ] Phase 2: Task planning complete (/plan command - describe approach only) 200 | - [ ] Phase 3: Tasks generated (/tasks command) 201 | - [ ] Phase 4: Implementation complete 202 | - [ ] Phase 5: Validation passed 203 | 204 | **Gate Status**: 205 | - [ ] Initial Constitution Check: PASS 206 | - [ ] Post-Design Constitution Check: PASS 207 | - [ ] All NEEDS CLARIFICATION resolved 208 | - [ ] Complexity deviations documented 209 | 210 | --- 211 | *Based on Constitution v2.1.1 - See `/memory/constitution.md`* 212 | ``` -------------------------------------------------------------------------------- /src/tools/posts.py: -------------------------------------------------------------------------------- ```python 1 | from typing import Optional, Dict, Any, Literal, List 2 | import praw 3 | from prawcore import NotFound, Forbidden 4 | from fastmcp import Context 5 | from ..models import SubredditPostsResult, RedditPost, SubredditInfo 6 | 7 | 8 | def fetch_subreddit_posts( 9 | subreddit_name: str, 10 | reddit: praw.Reddit, 11 | listing_type: Literal["hot", "new", "top", "rising"] = "hot", 12 | time_filter: Optional[Literal["all", "year", "month", "week", "day"]] = None, 13 | limit: int = 25, 14 | ctx: Context = None 15 | ) -> Dict[str, Any]: 16 | """ 17 | Fetch posts from a specific subreddit. 18 | 19 | Args: 20 | subreddit_name: Name of the subreddit (without r/ prefix) 21 | reddit: Configured Reddit client 22 | listing_type: Type of listing to fetch 23 | time_filter: Time filter for top posts 24 | limit: Maximum number of posts (max 100) 25 | ctx: FastMCP context (auto-injected by decorator) 26 | 27 | Returns: 28 | Dictionary containing posts and subreddit info 29 | """ 30 | # Phase 1: Accept context but don't use it yet 31 | 32 | try: 33 | # Validate limit 34 | limit = min(max(1, limit), 100) 35 | 36 | # Clean subreddit name (remove r/ prefix if present) 37 | clean_name = subreddit_name.replace("r/", "").replace("/r/", "").strip() 38 | 39 | # Get subreddit 40 | try: 41 | subreddit = reddit.subreddit(clean_name) 42 | # Force fetch to check if subreddit exists 43 | _ = subreddit.display_name 44 | except NotFound: 45 | return { 46 | "error": f"Subreddit r/{clean_name} not found", 47 | "suggestion": "discover_subreddits({'query': 'topic'})" 48 | } 49 | except Forbidden: 50 | return {"error": f"Access to r/{clean_name} forbidden (may be private)"} 51 | 52 | # Get posts based on listing type 53 | if listing_type == "hot": 54 | submissions = subreddit.hot(limit=limit) 55 | elif listing_type == "new": 56 | submissions = subreddit.new(limit=limit) 57 | elif listing_type == "rising": 58 | submissions = subreddit.rising(limit=limit) 59 | elif listing_type == "top": 60 | # Use time_filter for top posts 61 | time_filter = time_filter or "all" 62 | submissions = subreddit.top(time_filter=time_filter, limit=limit) 63 | else: 64 | return {"error": f"Invalid listing_type: {listing_type}"} 65 | 66 | # Parse posts 67 | posts = [] 68 | for submission in submissions: 69 | posts.append(RedditPost( 70 | id=submission.id, 71 | title=submission.title, 72 | selftext=submission.selftext if submission.selftext else None, 73 | author=str(submission.author) if submission.author else "[deleted]", 74 | subreddit=submission.subreddit.display_name, 75 | score=submission.score, 76 | upvote_ratio=submission.upvote_ratio, 77 | num_comments=submission.num_comments, 78 | created_utc=submission.created_utc, 79 | url=submission.url, 80 | permalink=f"https://reddit.com{submission.permalink}" 81 | )) 82 | 83 | # Get subreddit info 84 | subreddit_info = SubredditInfo( 85 | name=subreddit.display_name, 86 | subscribers=subreddit.subscribers, 87 | description=subreddit.public_description or "" 88 | ) 89 | 90 | result = SubredditPostsResult( 91 | posts=posts, 92 | subreddit=subreddit_info, 93 | count=len(posts) 94 | ) 95 | 96 | return result.model_dump() 97 | 98 | except Exception as e: 99 | return {"error": f"Failed to fetch posts: {str(e)}"} 100 | 101 | 102 | async def fetch_multiple_subreddits( 103 | subreddit_names: List[str], 104 | reddit: praw.Reddit, 105 | listing_type: Literal["hot", "new", "top", "rising"] = "hot", 106 | time_filter: Optional[Literal["all", "year", "month", "week", "day"]] = None, 107 | limit_per_subreddit: int = 5, 108 | ctx: Context = None 109 | ) -> Dict[str, Any]: 110 | """ 111 | Fetch posts from multiple subreddits in a single call. 112 | 113 | Args: 114 | subreddit_names: List of subreddit names to fetch from 115 | reddit: Configured Reddit client 116 | listing_type: Type of listing to fetch 117 | time_filter: Time filter for top posts 118 | limit_per_subreddit: Maximum posts per subreddit (max 25) 119 | ctx: FastMCP context (auto-injected by decorator) 120 | 121 | Returns: 122 | Dictionary containing posts from all requested subreddits 123 | """ 124 | # Phase 1: Accept context but don't use it yet 125 | 126 | try: 127 | # Validate limit 128 | limit_per_subreddit = min(max(1, limit_per_subreddit), 25) 129 | 130 | # Clean subreddit names and join with + 131 | clean_names = [name.replace("r/", "").replace("/r/", "").strip() for name in subreddit_names] 132 | multi_subreddit_str = "+".join(clean_names) 133 | 134 | # Get combined subreddit 135 | try: 136 | multi_subreddit = reddit.subreddit(multi_subreddit_str) 137 | # Calculate total limit (max 100) 138 | total_limit = min(limit_per_subreddit * len(clean_names), 100) 139 | 140 | # Get posts based on listing type 141 | if listing_type == "hot": 142 | submissions = multi_subreddit.hot(limit=total_limit) 143 | elif listing_type == "new": 144 | submissions = multi_subreddit.new(limit=total_limit) 145 | elif listing_type == "rising": 146 | submissions = multi_subreddit.rising(limit=total_limit) 147 | elif listing_type == "top": 148 | time_filter = time_filter or "all" 149 | submissions = multi_subreddit.top(time_filter=time_filter, limit=total_limit) 150 | else: 151 | return {"error": f"Invalid listing_type: {listing_type}"} 152 | 153 | # Parse posts and group by subreddit 154 | posts_by_subreddit = {} 155 | processed_subreddits = set() 156 | 157 | for submission in submissions: 158 | subreddit_name = submission.subreddit.display_name 159 | 160 | # Report progress when encountering a new subreddit 161 | if subreddit_name not in processed_subreddits: 162 | processed_subreddits.add(subreddit_name) 163 | if ctx: 164 | await ctx.report_progress( 165 | progress=len(processed_subreddits), 166 | total=len(clean_names), 167 | message=f"Fetching r/{subreddit_name}" 168 | ) 169 | 170 | if subreddit_name not in posts_by_subreddit: 171 | posts_by_subreddit[subreddit_name] = [] 172 | 173 | # Only add up to limit_per_subreddit posts per subreddit 174 | if len(posts_by_subreddit[subreddit_name]) < limit_per_subreddit: 175 | posts_by_subreddit[subreddit_name].append({ 176 | "id": submission.id, 177 | "title": submission.title, 178 | "author": str(submission.author) if submission.author else "[deleted]", 179 | "score": submission.score, 180 | "num_comments": submission.num_comments, 181 | "created_utc": submission.created_utc, 182 | "url": submission.url, 183 | "permalink": f"https://reddit.com{submission.permalink}" 184 | }) 185 | 186 | return { 187 | "subreddits_requested": clean_names, 188 | "subreddits_found": list(posts_by_subreddit.keys()), 189 | "posts_by_subreddit": posts_by_subreddit, 190 | "total_posts": sum(len(posts) for posts in posts_by_subreddit.values()) 191 | } 192 | 193 | except Exception as e: 194 | return { 195 | "error": f"Failed to fetch from multiple subreddits: {str(e)}", 196 | "suggestion": "discover_subreddits({'query': 'topic'}) to find valid names" 197 | } 198 | 199 | except Exception as e: 200 | return {"error": f"Failed to process request: {str(e)}"} ``` -------------------------------------------------------------------------------- /reports/saas-solopreneur-reddit-communities.md: -------------------------------------------------------------------------------- ```markdown 1 | # Top 50 Reddit Communities for SaaS Founders & Solopreneurs 2 | *Research Report for Reddit Social Listening Tool Audience Development* 3 | 4 | --- 5 | 6 | ## Executive Summary 7 | This report identifies the top 50 Reddit communities where SaaS founders and solopreneurs actively engage. These communities represent prime targets for promoting and gaining audience for a Reddit social listening tool. The communities are ranked by relevance score, subscriber count, and engagement potential. 8 | 9 | --- 10 | 11 | ## 🎯 Tier 1: Primary Target Communities (Highest Relevance) 12 | *These communities have the highest confidence scores for your ICP* 13 | 14 | ### Core Startup & SaaS Communities 15 | 16 | 1. **r/startups** - 1,891,655 subscribers | Confidence: 0.962 17 | - https://reddit.com/r/startups 18 | - Primary hub for startup discussions, perfect for SaaS tools 19 | 20 | 2. **r/SaaS** - 374,943 subscribers | Confidence: 0.660 21 | - https://reddit.com/r/SaaS 22 | - Dedicated SaaS community, ideal for social listening tool discussions 23 | 24 | 3. **r/indiehackers** - 105,674 subscribers | Confidence: 0.821 25 | - https://reddit.com/r/indiehackers 26 | - Bootstrapped founders, perfect for solopreneur tools 27 | 28 | 4. **r/SoloFounders** - 2,113 subscribers | Confidence: 0.811 29 | - https://reddit.com/r/SoloFounders 30 | - Highly targeted community for solo entrepreneurs 31 | 32 | ### Large Entrepreneur Communities 33 | 34 | 5. **r/Entrepreneur** - 4,871,109 subscribers | Confidence: 0.704 35 | - https://reddit.com/r/Entrepreneur 36 | - Massive reach for entrepreneurial tools 37 | 38 | 6. **r/EntrepreneurRideAlong** - 604,396 subscribers | Confidence: 0.793 39 | - https://reddit.com/r/EntrepreneurRideAlong 40 | - Journey-focused community, great for tool adoption stories 41 | 42 | 7. **r/Entrepreneurs** - 77,330 subscribers | Confidence: 0.872 43 | - https://reddit.com/r/Entrepreneurs 44 | - Active discussion community for business builders 45 | 46 | 8. **r/Entrepreneurship** - 99,462 subscribers | Confidence: 0.726 47 | - https://reddit.com/r/Entrepreneurship 48 | - Academic and practical entrepreneurship discussions 49 | 50 | --- 51 | 52 | ## 📊 Tier 2: Marketing & Growth Communities 53 | *Essential for social listening tool promotion* 54 | 55 | 9. **r/DigitalMarketingHack** - 34,155 subscribers | Confidence: 0.909 56 | - https://reddit.com/r/DigitalMarketingHack 57 | - Perfect for marketing automation tools 58 | 59 | 10. **r/SocialMediaMarketing** - 197,241 subscribers | Confidence: 0.754 60 | - https://reddit.com/r/SocialMediaMarketing 61 | - Direct audience for social listening tools 62 | 63 | 11. **r/socialmedia** - 2,061,330 subscribers | Confidence: 0.616 64 | - https://reddit.com/r/socialmedia 65 | - Broad social media community 66 | 67 | 12. **r/MarketingHelp** - 16,148 subscribers | Confidence: 0.701 68 | - https://reddit.com/r/MarketingHelp 69 | - Problem-solving community, great for tool recommendations 70 | 71 | 13. **r/SocialMediaManagers** - 20,614 subscribers | Confidence: 0.649 72 | - https://reddit.com/r/SocialMediaManagers 73 | - Professional community needing social listening tools 74 | 75 | 14. **r/ContentMarketing** - 17,436 subscribers | Confidence: 0.527 76 | - https://reddit.com/r/ContentMarketing 77 | - Content strategy discussions 78 | 79 | --- 80 | 81 | ## 💼 Tier 3: Business & Small Business Communities 82 | 83 | 15. **r/smallbusiness** - 2,211,156 subscribers | Confidence: 0.338 84 | - https://reddit.com/r/smallbusiness 85 | - Massive reach for business tools 86 | 87 | 16. **r/Business_Ideas** - 370,194 subscribers | Confidence: 0.479 88 | - https://reddit.com/r/Business_Ideas 89 | - Idea validation community 90 | 91 | 17. **r/sidehustle** - 3,124,834 subscribers | Confidence: 0.340 92 | - https://reddit.com/r/sidehustle 93 | - Side project enthusiasts 94 | 95 | 18. **r/growmybusiness** - 66,695 subscribers | Confidence: 0.327 96 | - https://reddit.com/r/growmybusiness 97 | - Growth-focused community 98 | 99 | 19. **r/sweatystartup** - 182,854 subscribers | Confidence: 0.432 100 | - https://reddit.com/r/sweatystartup 101 | - Service business focus 102 | 103 | --- 104 | 105 | ## 🚀 Tier 4: Advanced & Specialized Communities 106 | 107 | 20. **r/advancedentrepreneur** - 60,964 subscribers | Confidence: 0.682 108 | - https://reddit.com/r/advancedentrepreneur 109 | - Experienced founders who need advanced tools 110 | 111 | 21. **r/startup** - 225,696 subscribers | Confidence: 0.569 112 | - https://reddit.com/r/startup 113 | - Alternative startup community 114 | 115 | 22. **r/EntrepreneurConnect** - 5,178 subscribers | Confidence: 0.635 116 | - https://reddit.com/r/EntrepreneurConnect 117 | - Networking and connection focus 118 | 119 | 23. **r/cofounderhunt** - 16,287 subscribers | Confidence: 0.650 120 | - https://reddit.com/r/cofounderhunt 121 | - Team building community 122 | 123 | 24. **r/SaaSy** - 3,150 subscribers | Confidence: 0.653 124 | - https://reddit.com/r/SaaSy 125 | - Small but targeted SaaS community 126 | 127 | --- 128 | 129 | ## 🌍 Tier 5: Regional & International Communities 130 | 131 | 25. **r/indianstartups** - 76,422 subscribers | Confidence: 0.717 132 | - https://reddit.com/r/indianstartups 133 | - Indian startup ecosystem 134 | 135 | 26. **r/StartUpIndia** - 361,780 subscribers | Confidence: 0.487 136 | - https://reddit.com/r/StartUpIndia 137 | - Large Indian startup community 138 | 139 | 27. **r/IndianEntrepreneur** - 9,816 subscribers | Confidence: 0.593 140 | - https://reddit.com/r/IndianEntrepreneur 141 | - Indian entrepreneur focus 142 | 143 | 28. **r/PhStartups** - 20,901 subscribers | Confidence: 0.529 144 | - https://reddit.com/r/PhStartups 145 | - Philippines startup community 146 | 147 | 29. **r/Startups_EU** - 2,894 subscribers | Confidence: 0.382 148 | - https://reddit.com/r/Startups_EU 149 | - European startup community 150 | 151 | --- 152 | 153 | ## 📈 Tier 6: Marketing & Growth Specialized 154 | 155 | 30. **r/MarketingMentor** - 66,997 subscribers | Confidence: 0.593 156 | - https://reddit.com/r/MarketingMentor 157 | - Marketing education and mentorship 158 | 159 | 31. **r/Affiliatemarketing** - 239,731 subscribers | Confidence: 0.550 160 | - https://reddit.com/r/Affiliatemarketing 161 | - Performance marketing community 162 | 163 | 32. **r/musicmarketing** - 67,516 subscribers | Confidence: 0.576 164 | - https://reddit.com/r/musicmarketing 165 | - Niche marketing community 166 | 167 | 33. **r/MarketingResearch** - 22,931 subscribers | Confidence: 0.524 168 | - https://reddit.com/r/MarketingResearch 169 | - Research-focused marketing 170 | 171 | 34. **r/SaaS_Email_Marketing** - 7,434 subscribers | Confidence: 0.448 172 | - https://reddit.com/r/SaaS_Email_Marketing 173 | - Email marketing for SaaS 174 | 175 | --- 176 | 177 | ## 💡 Tier 7: Business Ideas & Validation 178 | 179 | 35. **r/small_business_ideas** - 23,034 subscribers | Confidence: 0.657 180 | - https://reddit.com/r/small_business_ideas 181 | - Idea generation and validation 182 | 183 | 36. **r/HowToEntrepreneur** - 3,618 subscribers | Confidence: 0.373 184 | - https://reddit.com/r/HowToEntrepreneur 185 | - Educational entrepreneurship 186 | 187 | 37. **r/PassionsToProfits** - 4,905 subscribers | Confidence: 0.361 188 | - https://reddit.com/r/PassionsToProfits 189 | - Monetization focus 190 | 191 | 38. **r/BusinessVault** - 2,889 subscribers | Confidence: 0.357 192 | - https://reddit.com/r/BusinessVault 193 | - Business knowledge sharing 194 | 195 | --- 196 | 197 | ## 🔧 Tier 8: Technical & Product Communities 198 | 199 | 39. **r/AppBusiness** - 17,876 subscribers | Confidence: 0.334 200 | - https://reddit.com/r/AppBusiness 201 | - App development business 202 | 203 | 40. **r/selfpublish** - 196,096 subscribers | Confidence: 0.495 204 | - https://reddit.com/r/selfpublish 205 | - Independent creators 206 | 207 | 41. **r/kickstarter** - 93,932 subscribers | Confidence: 0.554 208 | - https://reddit.com/r/kickstarter 209 | - Product launch community 210 | 211 | 42. **r/ClothingStartups** - 32,371 subscribers | Confidence: 0.462 212 | - https://reddit.com/r/ClothingStartups 213 | - E-commerce specific 214 | 215 | --- 216 | 217 | ## 🤖 Tier 9: AI & Automation Communities 218 | 219 | 43. **r/AiForSmallBusiness** - 8,963 subscribers | Confidence: 0.363 220 | - https://reddit.com/r/AiForSmallBusiness 221 | - AI tools for business 222 | 223 | 44. **r/CreatorsAI** - 4,509 subscribers | Confidence: 0.381 224 | - https://reddit.com/r/CreatorsAI 225 | - AI for content creators 226 | 227 | --- 228 | 229 | ## 📱 Tier 10: Social & Digital Communities 230 | 231 | 45. **r/SocialMediaLounge** - 17,166 subscribers | Confidence: 0.720 232 | - https://reddit.com/r/SocialMediaLounge 233 | - Casual social media discussions 234 | 235 | 46. **r/digitalproductselling** - 26,528 subscribers | Confidence: 0.541 236 | - https://reddit.com/r/digitalproductselling 237 | - Digital product creators 238 | 239 | 47. **r/Fiverr** - 64,568 subscribers | Confidence: 0.508 240 | - https://reddit.com/r/Fiverr 241 | - Freelance and service providers 242 | 243 | 48. **r/venturecapital** - 66,268 subscribers | Confidence: 0.484 244 | - https://reddit.com/r/venturecapital 245 | - Funding and investment focus 246 | 247 | 49. **r/YouTube_startups** - 127,440 subscribers | Confidence: 0.386 248 | - https://reddit.com/r/YouTube_startups 249 | - Content creator entrepreneurs 250 | 251 | 50. **r/LawFirm** - 84,044 subscribers | Confidence: 0.447 252 | - https://reddit.com/r/LawFirm 253 | - Legal business operations 254 | 255 | --- 256 | 257 | ## 📋 Engagement Strategy Recommendations 258 | 259 | ### Immediate Action Communities (Top 10 Priority) 260 | 1. r/SaaS - Direct product-market fit 261 | 2. r/startups - High engagement potential 262 | 3. r/indiehackers - Bootstrapped audience 263 | 4. r/SocialMediaMarketing - Direct need for tool 264 | 5. r/DigitalMarketingHack - Marketing automation focus 265 | 6. r/Entrepreneurs - Active, engaged community 266 | 7. r/SoloFounders - Highly targeted 267 | 8. r/EntrepreneurRideAlong - Journey documentation 268 | 9. r/advancedentrepreneur - Experienced users 269 | 10. r/SocialMediaManagers - Professional users 270 | 271 | ### Content Strategy Tips 272 | - Share case studies of Reddit research insights 273 | - Offer free audits using your tool 274 | - Create educational content about Reddit listening 275 | - Engage authentically before promoting 276 | - Follow each subreddit's self-promotion rules 277 | 278 | ### Key Metrics to Track 279 | - Total potential reach: 21.5M+ subscribers 280 | - High-confidence communities (>0.7): 10 subreddits 281 | - Medium-confidence communities (0.5-0.7): 25 subreddits 282 | - Broad reach communities (>1M subscribers): 7 subreddits 283 | 284 | --- 285 | 286 | ## 🎯 Next Steps 287 | 1. Join top 10 priority communities 288 | 2. Study each community's rules and culture 289 | 3. Create value-first content calendar 290 | 4. Build relationships before promoting 291 | 5. Track engagement and conversion metrics 292 | 293 | --- 294 | 295 | *Report generated for Reddit social listening tool audience development* 296 | *Focus on authentic engagement and value creation for best results* ``` -------------------------------------------------------------------------------- /reports/top-50-subreddits-saas-ai-builders.md: -------------------------------------------------------------------------------- ```markdown 1 | # Top 50 Subreddits for SaaS Founders, Solopreneurs, AI Developers & AI Builders 2 | 3 | *Research Date: 2025-09-20* 4 | *Generated using Reddit MCP Server with semantic vector search* 5 | 6 | ## Executive Summary 7 | 8 | This report identifies the top 50 Reddit communities where your Ideal Customer Profile (ICP) of SaaS startup founders, solopreneurs, AI developers, and AI builders actively engage. These communities have been ranked based on: 9 | - **Confidence scores** (semantic relevance to ICP) 10 | - **Subscriber count** (community size and reach) 11 | - **Topic relevance** (direct alignment with ICP interests) 12 | - **Engagement potential** (active discussion quality) 13 | 14 | ## Top 50 Subreddits - Master List 15 | 16 | ### Tier 1: Primary Target Communities (Confidence > 0.8) 17 | *These communities have the highest relevance to your ICP* 18 | 19 | 1. **r/aipromptprogramming** - 107,001 subscribers | Confidence: 0.911 20 | - AI development focus with prompt engineering 21 | - https://reddit.com/r/aipromptprogramming 22 | 23 | 2. **r/AI_Agents** - 191,203 subscribers | Confidence: 0.902 24 | - AI agent development and implementation 25 | - https://reddit.com/r/AI_Agents 26 | 27 | 3. **r/indiehackers** - 105,674 subscribers | Confidence: 0.867 28 | - Solo entrepreneurs and indie developers 29 | - https://reddit.com/r/indiehackers 30 | 31 | 4. **r/ArtificialInteligence** - 1,551,586 subscribers | Confidence: 0.838 32 | - Large AI community with diverse discussions 33 | - https://reddit.com/r/ArtificialInteligence 34 | 35 | 5. **r/SoloFounders** - 2,113 subscribers | Confidence: 0.832 36 | - Dedicated to solo entrepreneurs 37 | - https://reddit.com/r/SoloFounders 38 | 39 | 6. **r/AiBuilders** - 8,387 subscribers | Confidence: 0.826 40 | - AI builders and creators community 41 | - https://reddit.com/r/AiBuilders 42 | 43 | 7. **r/startups** - 1,891,655 subscribers | Confidence: 0.82 44 | - Large startup community 45 | - https://reddit.com/r/startups 46 | 47 | 8. **r/learnAIAgents** - 5,203 subscribers | Confidence: 0.814 48 | - Learning AI agent development 49 | - https://reddit.com/r/learnAIAgents 50 | 51 | 9. **r/AiAutomations** - 7,085 subscribers | Confidence: 0.811 52 | - AI automation tools and workflows 53 | - https://reddit.com/r/AiAutomations 54 | 55 | 10. **r/AI_Application** - 14,902 subscribers | Confidence: 0.801 56 | - Applied AI development 57 | - https://reddit.com/r/AI_Application 58 | 59 | ### Tier 2: High-Value Communities (Confidence 0.7 - 0.8) 60 | 61 | 11. **r/Entrepreneur** - 4,871,109 subscribers | Confidence: 0.785 62 | - Massive entrepreneurship community 63 | - https://reddit.com/r/Entrepreneur 64 | 65 | 12. **r/machinelearningnews** - 107,720 subscribers | Confidence: 0.779 66 | - ML news and developments 67 | - https://reddit.com/r/machinelearningnews 68 | 69 | 13. **r/AI_Application** - 14,902 subscribers | Confidence: 0.778 70 | - AI implementation focus 71 | - https://reddit.com/r/AI_Application 72 | 73 | 14. **r/Entrepreneurs** - 77,330 subscribers | Confidence: 0.777 74 | - Active entrepreneur discussions 75 | - https://reddit.com/r/Entrepreneurs 76 | 77 | 15. **r/EntrepreneurRideAlong** - 604,396 subscribers | Confidence: 0.775 78 | - Entrepreneurial journey sharing 79 | - https://reddit.com/r/EntrepreneurRideAlong 80 | 81 | 16. **r/EntrepreneurConnect** - 5,178 subscribers | Confidence: 0.752 82 | - Networking for entrepreneurs 83 | - https://reddit.com/r/EntrepreneurConnect 84 | 85 | 17. **r/AutoGenAI** - 7,165 subscribers | Confidence: 0.726 86 | - Automated AI generation tools 87 | - https://reddit.com/r/AutoGenAI 88 | 89 | 18. **r/Entrepreneurship** - 99,462 subscribers | Confidence: 0.720 90 | - Business and entrepreneurship focus 91 | - https://reddit.com/r/Entrepreneurship 92 | 93 | 19. **r/SoloFounders** - 2,113 subscribers | Confidence: 0.717 94 | - Solo founders community 95 | - https://reddit.com/r/SoloFounders 96 | 97 | 20. **r/HowToAIAgent** - 6,950 subscribers | Confidence: 0.711 98 | - AI agent tutorials and guides 99 | - https://reddit.com/r/HowToAIAgent 100 | 101 | ### Tier 3: Strong Secondary Communities (Confidence 0.6 - 0.7) 102 | 103 | 21. **r/artificial** - 1,135,505 subscribers | Confidence: 0.679 104 | - General AI discussions 105 | - https://reddit.com/r/artificial 106 | 107 | 22. **r/AgentsOfAI** - 46,494 subscribers | Confidence: 0.675 108 | - AI agents community 109 | - https://reddit.com/r/AgentsOfAI 110 | 111 | 23. **r/small_business_ideas** - 23,034 subscribers | Confidence: 0.671 112 | - Business idea validation 113 | - https://reddit.com/r/small_business_ideas 114 | 115 | 24. **r/BlackboxAI_** - 8,357 subscribers | Confidence: 0.659 116 | - AI development tools 117 | - https://reddit.com/r/BlackboxAI_ 118 | 119 | 25. **r/FetchAI_Community** - 21,415 subscribers | Confidence: 0.654 120 | - AI technology community 121 | - https://reddit.com/r/FetchAI_Community 122 | 123 | 26. **r/cofounderhunt** - 16,287 subscribers | Confidence: 0.654 124 | - Finding co-founders 125 | - https://reddit.com/r/cofounderhunt 126 | 127 | 27. **r/AboutAI** - 10,076 subscribers | Confidence: 0.652 128 | - AI education and discussion 129 | - https://reddit.com/r/AboutAI 130 | 131 | 28. **r/PydanticAI** - 3,039 subscribers | Confidence: 0.652 132 | - Python AI development 133 | - https://reddit.com/r/PydanticAI 134 | 135 | 29. **r/AIAssisted** - 71,088 subscribers | Confidence: 0.647 136 | - AI-assisted work and creativity 137 | - https://reddit.com/r/AIAssisted 138 | 139 | 30. **r/AI_Tools_Land** - 6,608 subscribers | Confidence: 0.631 140 | - AI tools discovery 141 | - https://reddit.com/r/AI_Tools_Land 142 | 143 | ### Tier 4: Valuable Niche Communities (Confidence 0.5 - 0.6) 144 | 145 | 31. **r/Automate** - 146,410 subscribers | Confidence: 0.630 146 | - Automation tools and workflows 147 | - https://reddit.com/r/Automate 148 | 149 | 32. **r/SaaS** - 374,943 subscribers | Confidence: 0.629 150 | - SaaS business discussions 151 | - https://reddit.com/r/SaaS 152 | 153 | 33. **r/AI_India** - 13,678 subscribers | Confidence: 0.623 154 | - Indian AI community 155 | - https://reddit.com/r/AI_India 156 | 157 | 34. **r/Business_Ideas** - 370,194 subscribers | Confidence: 0.619 158 | - Business idea discussions 159 | - https://reddit.com/r/Business_Ideas 160 | 161 | 35. **r/ThinkingDeeplyAI** - 11,572 subscribers | Confidence: 0.616 162 | - AI philosophy and deep thinking 163 | - https://reddit.com/r/ThinkingDeeplyAI 164 | 165 | 36. **r/PROJECT_AI** - 2,365 subscribers | Confidence: 0.615 166 | - AI project collaboration 167 | - https://reddit.com/r/PROJECT_AI 168 | 169 | 37. **r/AI_Agents** - 191,203 subscribers | Confidence: 0.608 170 | - AI agent development 171 | - https://reddit.com/r/AI_Agents 172 | 173 | 38. **r/genspark_ai** - 2,224 subscribers | Confidence: 0.590 174 | - AI development platform 175 | - https://reddit.com/r/genspark_ai 176 | 177 | 39. **r/CreatorsAI** - 4,509 subscribers | Confidence: 0.584 178 | - AI for content creators 179 | - https://reddit.com/r/CreatorsAI 180 | 181 | 40. **r/learnAIAgents** - 5,203 subscribers | Confidence: 0.584 182 | - Learning AI development 183 | - https://reddit.com/r/learnAIAgents 184 | 185 | ### Tier 5: Supporting Communities (Confidence 0.45 - 0.5) 186 | 187 | 41. **r/neuralnetworks** - 29,721 subscribers | Confidence: 0.581 188 | - Neural network development 189 | - https://reddit.com/r/neuralnetworks 190 | 191 | 42. **r/cofounderhunt** - 16,287 subscribers | Confidence: 0.578 192 | - Co-founder matching 193 | - https://reddit.com/r/cofounderhunt 194 | 195 | 43. **r/mlops** - 24,727 subscribers | Confidence: 0.574 196 | - Machine learning operations 197 | - https://reddit.com/r/mlops 198 | 199 | 44. **r/HowToAIAgent** - 6,950 subscribers | Confidence: 0.574 200 | - AI agent tutorials 201 | - https://reddit.com/r/HowToAIAgent 202 | 203 | 45. **r/FetchAI_Community** - 21,415 subscribers | Confidence: 0.572 204 | - Fetch.ai community 205 | - https://reddit.com/r/FetchAI_Community 206 | 207 | 46. **r/aiHub** - 9,867 subscribers | Confidence: 0.571 208 | - AI resources hub 209 | - https://reddit.com/r/aiHub 210 | 211 | 47. **r/selfpublish** - 196,096 subscribers | Confidence: 0.566 212 | - Self-publishing entrepreneurs 213 | - https://reddit.com/r/selfpublish 214 | 215 | 48. **r/PydanticAI** - 3,039 subscribers | Confidence: 0.564 216 | - Python AI framework 217 | - https://reddit.com/r/PydanticAI 218 | 219 | 49. **r/aifails** - 20,511 subscribers | Confidence: 0.563 220 | - Learning from AI failures 221 | - https://reddit.com/r/aifails 222 | 223 | 50. **r/learnmachinelearning** - 547,704 subscribers | Confidence: 0.561 224 | - Machine learning education 225 | - https://reddit.com/r/learnmachinelearning 226 | 227 | ## Community Engagement Strategy 228 | 229 | ### Primary Focus (Top Priority) 230 | Focus on communities with: 231 | - High confidence scores (>0.7) 232 | - Active subscriber bases (>10,000) 233 | - Direct ICP alignment 234 | 235 | **Recommended top 5 for immediate engagement:** 236 | 1. r/AI_Agents (191K subscribers, 0.902 confidence) 237 | 2. r/aipromptprogramming (107K subscribers, 0.911 confidence) 238 | 3. r/indiehackers (105K subscribers, 0.867 confidence) 239 | 4. r/startups (1.8M subscribers, 0.82 confidence) 240 | 5. r/Entrepreneur (4.8M subscribers, 0.785 confidence) 241 | 242 | ### Content Strategy by ICP Segment 243 | 244 | #### For SaaS Founders: 245 | - r/SaaS (374K subscribers) 246 | - r/startups (1.8M subscribers) 247 | - r/SoloFounders (2K subscribers) 248 | - r/EntrepreneurRideAlong (604K subscribers) 249 | 250 | #### For Solopreneurs: 251 | - r/indiehackers (105K subscribers) 252 | - r/SoloFounders (2K subscribers) 253 | - r/Entrepreneur (4.8M subscribers) 254 | - r/EntrepreneurConnect (5K subscribers) 255 | 256 | #### For AI Developers: 257 | - r/aipromptprogramming (107K subscribers) 258 | - r/machinelearningnews (107K subscribers) 259 | - r/learnmachinelearning (547K subscribers) 260 | - r/mlops (24K subscribers) 261 | 262 | #### For AI Builders: 263 | - r/AI_Agents (191K subscribers) 264 | - r/AiBuilders (8K subscribers) 265 | - r/AiAutomations (7K subscribers) 266 | - r/AutoGenAI (7K subscribers) 267 | 268 | ## Key Insights 269 | 270 | 1. **Large Communities with High Relevance**: Several communities combine massive reach (>100K subscribers) with high confidence scores (>0.7), offering excellent engagement opportunities. 271 | 272 | 2. **Niche Communities**: Smaller, highly-focused communities like r/SoloFounders and r/AiBuilders may have fewer members but offer highly targeted engagement. 273 | 274 | 3. **Cross-Pollination Opportunities**: Many users are active across multiple communities, allowing for strategic cross-posting and relationship building. 275 | 276 | 4. **AI Focus is Strong**: The AI development and builder communities show extremely high confidence scores, indicating strong alignment with current market trends. 277 | 278 | 5. **Entrepreneurship Overlap**: Strong overlap between entrepreneurship and AI communities suggests your ICP is at the intersection of business and technology. 279 | 280 | ## Recommended Next Steps 281 | 282 | 1. **Profile Analysis**: Conduct deeper analysis of top 10 communities to understand posting rules and culture 283 | 2. **Content Calendar**: Develop community-specific content strategies 284 | 3. **Engagement Tracking**: Monitor which communities drive the most valuable interactions 285 | 4. **Relationship Building**: Identify and connect with key influencers in each community 286 | 5. **Value-First Approach**: Focus on providing value before any promotional activities 287 | 288 | --- 289 | 290 | *Note: Confidence scores are based on semantic relevance to the search queries. Subscriber counts are current as of the research date. Community dynamics and rules should be reviewed before engagement.* ``` -------------------------------------------------------------------------------- /src/tools/discover.py: -------------------------------------------------------------------------------- ```python 1 | """Subreddit discovery using semantic vector search.""" 2 | 3 | import os 4 | import json 5 | from typing import Dict, List, Optional, Union, Any 6 | from fastmcp import Context 7 | from ..chroma_client import get_chroma_client, get_collection 8 | 9 | 10 | async def discover_subreddits( 11 | query: Optional[str] = None, 12 | queries: Optional[Union[List[str], str]] = None, 13 | limit: int = 10, 14 | include_nsfw: bool = False, 15 | ctx: Context = None 16 | ) -> Dict[str, Any]: 17 | """ 18 | Search for subreddits using semantic similarity search. 19 | 20 | Finds relevant subreddits based on semantic embeddings of subreddit names, 21 | descriptions, and community metadata. 22 | 23 | Args: 24 | query: Single search term to find subreddits 25 | queries: List of search terms for batch discovery (more efficient) 26 | Can also be a JSON string like '["term1", "term2"]' 27 | limit: Maximum number of results per query (default 10) 28 | include_nsfw: Whether to include NSFW subreddits (default False) 29 | ctx: FastMCP context (auto-injected by decorator) 30 | 31 | Returns: 32 | Dictionary with discovered subreddits and their metadata 33 | """ 34 | # Phase 1: Accept context but don't use it yet 35 | 36 | # Initialize ChromaDB client 37 | try: 38 | client = get_chroma_client() 39 | collection = get_collection("reddit_subreddits", client) 40 | 41 | except Exception as e: 42 | return { 43 | "error": f"Failed to connect to vector database: {str(e)}", 44 | "results": [], 45 | "summary": { 46 | "total_found": 0, 47 | "returned": 0, 48 | "coverage": "error" 49 | } 50 | } 51 | 52 | # Handle batch queries - convert string to list if needed 53 | if queries: 54 | # Handle case where LLM passes JSON string instead of array 55 | if isinstance(queries, str): 56 | try: 57 | # Try to parse as JSON if it looks like a JSON array 58 | if queries.strip().startswith('[') and queries.strip().endswith(']'): 59 | queries = json.loads(queries) 60 | else: 61 | # Single string query, convert to single-item list 62 | queries = [queries] 63 | except (json.JSONDecodeError, ValueError): 64 | # If JSON parsing fails, treat as single string 65 | queries = [queries] 66 | 67 | batch_results = {} 68 | total_api_calls = 0 69 | 70 | for search_query in queries: 71 | result = await _search_vector_db( 72 | search_query, collection, limit, include_nsfw, ctx 73 | ) 74 | batch_results[search_query] = result 75 | total_api_calls += 1 76 | 77 | return { 78 | "batch_mode": True, 79 | "total_queries": len(queries), 80 | "api_calls_made": total_api_calls, 81 | "results": batch_results, 82 | "tip": "Batch mode reduces API calls. Use the exact 'name' field when calling other tools." 83 | } 84 | 85 | # Handle single query 86 | elif query: 87 | return await _search_vector_db(query, collection, limit, include_nsfw, ctx) 88 | 89 | else: 90 | return { 91 | "error": "Either 'query' or 'queries' parameter must be provided", 92 | "subreddits": [], 93 | "summary": { 94 | "total_found": 0, 95 | "returned": 0, 96 | "coverage": "error" 97 | } 98 | } 99 | 100 | 101 | async def _search_vector_db( 102 | query: str, 103 | collection, 104 | limit: int, 105 | include_nsfw: bool, 106 | ctx: Context = None 107 | ) -> Dict[str, Any]: 108 | """Internal function to perform semantic search for a single query.""" 109 | # Phase 1: Accept context but don't use it yet 110 | 111 | try: 112 | # Search with a larger limit to allow for filtering 113 | search_limit = min(limit * 3, 100) # Get extra results for filtering 114 | 115 | # Perform semantic search 116 | results = collection.query( 117 | query_texts=[query], 118 | n_results=search_limit 119 | ) 120 | 121 | if not results or not results['metadatas'] or not results['metadatas'][0]: 122 | return { 123 | "query": query, 124 | "subreddits": [], 125 | "summary": { 126 | "total_found": 0, 127 | "returned": 0, 128 | "has_more": False 129 | }, 130 | "next_actions": ["Try different search terms"] 131 | } 132 | 133 | # Process results 134 | processed_results = [] 135 | nsfw_filtered = 0 136 | total_results = len(results['metadatas'][0]) 137 | 138 | for i, (metadata, distance) in enumerate(zip( 139 | results['metadatas'][0], 140 | results['distances'][0] 141 | )): 142 | # Report progress 143 | if ctx: 144 | await ctx.report_progress( 145 | progress=i + 1, 146 | total=total_results, 147 | message=f"Analyzing r/{metadata.get('name', 'unknown')}" 148 | ) 149 | 150 | # Skip NSFW if not requested 151 | if metadata.get('nsfw', False) and not include_nsfw: 152 | nsfw_filtered += 1 153 | continue 154 | 155 | # Convert distance to confidence score (lower distance = higher confidence) 156 | # Adjust the scaling based on observed distances (typically 0.8 to 1.6) 157 | # Map distances: 0.8 -> 0.9, 1.0 -> 0.7, 1.2 -> 0.5, 1.4 -> 0.3, 1.6+ -> 0.1 158 | if distance < 0.8: 159 | confidence = 0.9 + (0.1 * (0.8 - distance) / 0.8) # 0.9 to 1.0 160 | elif distance < 1.0: 161 | confidence = 0.7 + (0.2 * (1.0 - distance) / 0.2) # 0.7 to 0.9 162 | elif distance < 1.2: 163 | confidence = 0.5 + (0.2 * (1.2 - distance) / 0.2) # 0.5 to 0.7 164 | elif distance < 1.4: 165 | confidence = 0.3 + (0.2 * (1.4 - distance) / 0.2) # 0.3 to 0.5 166 | else: 167 | confidence = max(0.1, 0.3 * (2.0 - distance) / 0.6) # 0.1 to 0.3 168 | 169 | # Apply penalties for generic subreddits 170 | subreddit_name = metadata.get('name', '').lower() 171 | generic_subs = ['funny', 'pics', 'videos', 'gifs', 'memes', 'aww'] 172 | if subreddit_name in generic_subs and query.lower() not in subreddit_name: 173 | confidence *= 0.3 # Heavy penalty for generic subs unless directly searched 174 | 175 | # Boost for high-activity subreddits (optional) 176 | subscribers = metadata.get('subscribers', 0) 177 | if subscribers > 1000000: 178 | confidence = min(1.0, confidence * 1.1) # Small boost for very large subs 179 | elif subscribers < 10000: 180 | confidence *= 0.9 # Small penalty for tiny subs 181 | 182 | # Determine match type based on distance 183 | if distance < 0.3: 184 | match_type = "exact_match" 185 | elif distance < 0.7: 186 | match_type = "strong_match" 187 | elif distance < 1.0: 188 | match_type = "partial_match" 189 | else: 190 | match_type = "weak_match" 191 | 192 | processed_results.append({ 193 | "name": metadata.get('name', 'unknown'), 194 | "subscribers": metadata.get('subscribers', 0), 195 | "confidence": round(confidence, 3), 196 | "url": metadata.get('url', f"https://reddit.com/r/{metadata.get('name', '')}") 197 | }) 198 | 199 | # Sort by confidence (highest first), then by subscribers 200 | processed_results.sort(key=lambda x: (-x['confidence'], -(x['subscribers'] or 0))) 201 | 202 | # Limit to requested number 203 | limited_results = processed_results[:limit] 204 | 205 | # Calculate basic stats 206 | total_found = len(processed_results) 207 | 208 | # Generate next actions (only meaningful ones) 209 | next_actions = [] 210 | if len(processed_results) > limit: 211 | next_actions.append(f"{len(processed_results)} total results found, showing {limit}") 212 | if nsfw_filtered > 0: 213 | next_actions.append(f"{nsfw_filtered} NSFW subreddits filtered") 214 | 215 | return { 216 | "query": query, 217 | "subreddits": limited_results, 218 | "summary": { 219 | "total_found": total_found, 220 | "returned": len(limited_results), 221 | "has_more": total_found > len(limited_results) 222 | }, 223 | "next_actions": next_actions 224 | } 225 | 226 | except Exception as e: 227 | # Map error patterns to specific recovery actions 228 | error_str = str(e).lower() 229 | if "not found" in error_str: 230 | guidance = "Verify subreddit name spelling" 231 | elif "rate" in error_str: 232 | guidance = "Rate limited - wait 60 seconds" 233 | elif "timeout" in error_str: 234 | guidance = "Reduce limit parameter to 10" 235 | else: 236 | guidance = "Try simpler search terms" 237 | 238 | return { 239 | "error": f"Failed to search vector database: {str(e)}", 240 | "query": query, 241 | "subreddits": [], 242 | "summary": { 243 | "total_found": 0, 244 | "returned": 0, 245 | "has_more": False 246 | }, 247 | "next_actions": [guidance] 248 | } 249 | 250 | 251 | def validate_subreddit( 252 | subreddit_name: str, 253 | ctx: Context = None 254 | ) -> Dict[str, Any]: 255 | """ 256 | Validate if a subreddit exists in the indexed database. 257 | 258 | Checks if the subreddit exists in our semantic search index 259 | and returns its metadata if found. 260 | 261 | Args: 262 | subreddit_name: Name of the subreddit to validate 263 | ctx: FastMCP context (optional) 264 | 265 | Returns: 266 | Dictionary with validation result and subreddit info if found 267 | """ 268 | # Phase 1: Accept context but don't use it yet 269 | 270 | # Clean the subreddit name 271 | clean_name = subreddit_name.replace("r/", "").replace("/r/", "").strip() 272 | 273 | try: 274 | # Search for exact match in vector database 275 | client = get_chroma_client() 276 | collection = get_collection("reddit_subreddits", client) 277 | 278 | # Search for the exact subreddit name 279 | results = collection.query( 280 | query_texts=[clean_name], 281 | n_results=5 282 | ) 283 | 284 | if results and results['metadatas'] and results['metadatas'][0]: 285 | # Look for exact match in results 286 | for metadata in results['metadatas'][0]: 287 | if metadata.get('name', '').lower() == clean_name.lower(): 288 | return { 289 | "valid": True, 290 | "name": metadata.get('name'), 291 | "subscribers": metadata.get('subscribers', 0), 292 | "is_private": False, # We only index public subreddits 293 | "over_18": metadata.get('nsfw', False), 294 | "indexed": True 295 | } 296 | 297 | return { 298 | "valid": False, 299 | "name": clean_name, 300 | "error": f"Subreddit '{clean_name}' not found", 301 | "suggestion": "Use discover_subreddits to find similar communities" 302 | } 303 | 304 | except Exception as e: 305 | return { 306 | "valid": False, 307 | "name": clean_name, 308 | "error": f"Database error: {str(e)}", 309 | "suggestion": "Check database connection and retry" 310 | } ``` -------------------------------------------------------------------------------- /src/resources.py: -------------------------------------------------------------------------------- ```python 1 | """Reddit MCP Resources - Server information endpoint.""" 2 | 3 | from typing import Dict, Any 4 | import praw 5 | 6 | 7 | def register_resources(mcp, reddit: praw.Reddit) -> None: 8 | """Register server info resource with the MCP server.""" 9 | 10 | @mcp.resource("reddit://server-info") 11 | def get_server_info() -> Dict[str, Any]: 12 | """ 13 | Get comprehensive information about the Reddit MCP server's capabilities. 14 | 15 | Returns server version, available tools, prompts, and usage examples. 16 | """ 17 | # Try to get rate limit info from Reddit 18 | rate_limit_info = {} 19 | try: 20 | # Access auth to check rate limit status 21 | rate_limit_info = { 22 | "requests_remaining": reddit.auth.limits.get('remaining', 'unknown'), 23 | "reset_timestamp": reddit.auth.limits.get('reset_timestamp', 'unknown'), 24 | "used": reddit.auth.limits.get('used', 'unknown') 25 | } 26 | except: 27 | rate_limit_info = { 28 | "status": "Rate limits tracked automatically by PRAW", 29 | "strategy": "Automatic retry with exponential backoff" 30 | } 31 | 32 | return { 33 | "name": "Reddit Research MCP Server", 34 | "version": "0.4.0", 35 | "description": "MCP server for comprehensive Reddit research with semantic search across 20,000+ indexed subreddits", 36 | "changelog": { 37 | "0.4.0": [ 38 | "Added reddit_research prompt for automated comprehensive research", 39 | "Streamlined resources to focus on server-info only", 40 | "Enhanced documentation for prompt-based workflows" 41 | ], 42 | "0.3.0": [ 43 | "Implemented three-layer architecture for clearer operation flow", 44 | "Added semantic subreddit discovery with vector search", 45 | "Enhanced workflow guidance with confidence-based recommendations", 46 | "Improved error recovery suggestions" 47 | ], 48 | "0.2.0": [ 49 | "Added discover_subreddits with confidence scoring", 50 | "Added fetch_multiple_subreddits for batch operations", 51 | "Enhanced server-info with comprehensive documentation", 52 | "Improved error handling and rate limit management" 53 | ], 54 | "0.1.0": [ 55 | "Initial release with search, fetch, and comment tools", 56 | "Basic resources for popular subreddits and server info" 57 | ] 58 | }, 59 | "capabilities": { 60 | "key_features": [ 61 | "Semantic search across 20,000+ indexed subreddits", 62 | "Batch operations reducing API calls by 70%", 63 | "Automated research workflow via prompt", 64 | "Three-layer architecture for guided operations", 65 | "Comprehensive citation tracking with Reddit URLs" 66 | ], 67 | "architecture": { 68 | "type": "Three-Layer Architecture", 69 | "workflow": [ 70 | "Layer 1: discover_operations() - See available operations", 71 | "Layer 2: get_operation_schema(operation_id) - Get requirements", 72 | "Layer 3: execute_operation(operation_id, parameters) - Execute" 73 | ], 74 | "description": "ALWAYS start with Layer 1, then Layer 2, then Layer 3" 75 | }, 76 | "tools": [ 77 | { 78 | "name": "discover_operations", 79 | "layer": 1, 80 | "description": "Discover available Reddit operations", 81 | "parameters": "NONE - Call without any parameters: discover_operations() NOT discover_operations({})", 82 | "purpose": "Shows all available operations and recommended workflows" 83 | }, 84 | { 85 | "name": "get_operation_schema", 86 | "layer": 2, 87 | "description": "Get parameter requirements for an operation", 88 | "parameters": { 89 | "operation_id": "The operation to get schema for (from Layer 1)", 90 | "include_examples": "Whether to include examples (optional, default: true)" 91 | }, 92 | "purpose": "Provides parameter schemas, validation rules, and examples" 93 | }, 94 | { 95 | "name": "execute_operation", 96 | "layer": 3, 97 | "description": "Execute a Reddit operation", 98 | "parameters": { 99 | "operation_id": "The operation to execute", 100 | "parameters": "Parameters matching the schema from Layer 2" 101 | }, 102 | "purpose": "Actually performs the Reddit API calls" 103 | } 104 | ], 105 | "prompts": [ 106 | { 107 | "name": "reddit_research", 108 | "description": "Conduct comprehensive Reddit research on any topic or question", 109 | "parameters": { 110 | "research_request": "Natural language description of what to research (e.g., 'How do people feel about remote work?')" 111 | }, 112 | "returns": "Structured workflow guiding complete research process", 113 | "output": "Comprehensive markdown report with citations and metrics", 114 | "usage": "Select prompt, provide research question, receive guided workflow" 115 | } 116 | ], 117 | "available_operations": { 118 | "discover_subreddits": "Find communities using semantic vector search (20,000+ indexed)", 119 | "search_subreddit": "Search within a specific community", 120 | "fetch_posts": "Get posts from one subreddit", 121 | "fetch_multiple": "Batch fetch from multiple subreddits (70% more efficient)", 122 | "fetch_comments": "Get complete comment tree for deep analysis" 123 | }, 124 | "resources": [ 125 | { 126 | "uri": "reddit://server-info", 127 | "description": "Comprehensive server capabilities, version, and usage information", 128 | "cacheable": False, 129 | "always_current": True 130 | } 131 | ], 132 | "statistics": { 133 | "total_tools": 3, 134 | "total_prompts": 1, 135 | "total_operations": 5, 136 | "total_resources": 1, 137 | "indexed_subreddits": "20,000+" 138 | } 139 | }, 140 | "usage_examples": { 141 | "automated_research": { 142 | "description": "Use the reddit_research prompt for complete automated workflow", 143 | "steps": [ 144 | "1. Select the 'reddit_research' prompt in your MCP client", 145 | "2. Provide your research question: 'What are the best practices for React development?'", 146 | "3. The prompt guides the LLM through discovery, gathering, analysis, and reporting", 147 | "4. Receive comprehensive markdown report with citations" 148 | ] 149 | }, 150 | "manual_workflow": { 151 | "description": "Step-by-step manual research using the three-layer architecture", 152 | "steps": [ 153 | "1. discover_operations() - See what's available", 154 | "2. get_operation_schema('discover_subreddits') - Get requirements", 155 | "3. execute_operation('discover_subreddits', {'query': 'machine learning', 'limit': 15})", 156 | "4. get_operation_schema('fetch_multiple') - Get batch fetch requirements", 157 | "5. execute_operation('fetch_multiple', {'subreddit_names': [...], 'limit_per_subreddit': 10})", 158 | "6. get_operation_schema('fetch_comments') - Get comment requirements", 159 | "7. execute_operation('fetch_comments', {'submission_id': 'abc123', 'comment_limit': 100})" 160 | ] 161 | }, 162 | "targeted_search": { 163 | "description": "Find specific content in known communities", 164 | "steps": [ 165 | "1. discover_operations()", 166 | "2. get_operation_schema('search_subreddit')", 167 | "3. execute_operation('search_subreddit', {'subreddit_name': 'Python', 'query': 'async', 'limit': 20})" 168 | ] 169 | } 170 | }, 171 | "performance_tips": [ 172 | "Use the reddit_research prompt for automated comprehensive research", 173 | "Always follow the three-layer workflow for manual operations", 174 | "Use fetch_multiple for 2+ subreddits (70% fewer API calls)", 175 | "Single semantic search finds all relevant communities", 176 | "Use confidence scores to guide strategy (>0.7 = high confidence)", 177 | "Expect ~15-20K tokens for comprehensive research" 178 | ], 179 | "workflow_guidance": { 180 | "confidence_based_strategy": { 181 | "high_confidence": "Scores > 0.7: Focus on top 5-8 subreddits", 182 | "medium_confidence": "Scores 0.4-0.7: Cast wider net with 10-12 subreddits", 183 | "low_confidence": "Scores < 0.4: Refine search terms and retry" 184 | }, 185 | "research_depth": { 186 | "minimum_coverage": "10+ threads, 100+ comments, 3+ subreddits", 187 | "quality_thresholds": "Posts: 5+ upvotes, Comments: 2+ upvotes", 188 | "author_credibility": "Prioritize 100+ karma for key insights" 189 | }, 190 | "token_optimization": { 191 | "discover_subreddits": "~1-2K tokens for semantic search", 192 | "fetch_multiple": "~500-1000 tokens per subreddit", 193 | "fetch_comments": "~2-5K tokens per post with comments", 194 | "full_research": "~15-20K tokens for comprehensive analysis" 195 | } 196 | }, 197 | "rate_limiting": { 198 | "handler": "PRAW automatic rate limit handling", 199 | "strategy": "Exponential backoff with retry", 200 | "current_status": rate_limit_info 201 | }, 202 | "authentication": { 203 | "type": "Application-only OAuth", 204 | "scope": "Read-only access", 205 | "capabilities": "Search, browse, and read public content" 206 | }, 207 | "support": { 208 | "repository": "https://github.com/king-of-the-grackles/reddit-research-mcp", 209 | "issues": "https://github.com/king-of-the-grackles/reddit-research-mcp/issues", 210 | "documentation": "See README.md and specs/ directory for architecture details" 211 | } 212 | } ```