This is page 4 of 11. Use http://codebase.md/saidsurucu/yargi-mcp?lines=true&page={x} to view the full context.
# Directory Structure
```
├── __main__.py
├── .dockerignore
├── .env.example
├── .gitattributes
├── .github
│ └── workflows
│ └── publish.yml
├── .gitignore
├── .serena
│ ├── .gitignore
│ └── project.yml
├── 5ire-settings.png
├── analyze_kik_hash_generation.py
├── anayasa_mcp_module
│ ├── __init__.py
│ ├── bireysel_client.py
│ ├── client.py
│ ├── models.py
│ └── unified_client.py
├── asgi_app.py
├── bddk_mcp_module
│ ├── __init__.py
│ ├── client.py
│ └── models.py
├── bedesten_mcp_module
│ ├── __init__.py
│ ├── client.py
│ ├── enums.py
│ └── models.py
├── check_response_format.py
├── CLAUDE.md
├── danistay_mcp_module
│ ├── __init__.py
│ ├── client.py
│ └── models.py
├── docker-compose.yml
├── Dockerfile
├── docs
│ └── DEPLOYMENT.md
├── emsal_mcp_module
│ ├── __init__.py
│ ├── client.py
│ └── models.py
├── example_fastapi_app.py
├── fly-no-auth.toml
├── fly.toml
├── kik_mcp_module
│ ├── __init__.py
│ ├── client_v2.py
│ ├── client.py
│ ├── models_v2.py
│ └── models.py
├── kvkk_mcp_module
│ ├── __init__.py
│ ├── client.py
│ └── models.py
├── LICENSE
├── mcp_auth
│ ├── __init__.py
│ ├── clerk_config.py
│ ├── middleware.py
│ ├── oauth.py
│ ├── policy.py
│ └── storage.py
├── mcp_auth_factory.py
├── mcp_auth_http_adapter.py
├── mcp_auth_http_simple.py
├── mcp_server_main.py
├── nginx.conf
├── ornek.png
├── Procfile
├── pyproject.toml
├── railway.json
├── README.md
├── redis_session_store.py
├── rekabet_mcp_module
│ ├── __init__.py
│ ├── client.py
│ └── models.py
├── requirements.txt
├── run_asgi.py
├── saidsurucu-yargi-mcp-f5fa007
│ ├── __main__.py
│ ├── .dockerignore
│ ├── .env.example
│ ├── .gitattributes
│ ├── .github
│ │ └── workflows
│ │ └── publish.yml
│ ├── .gitignore
│ ├── 5ire-settings.png
│ ├── anayasa_mcp_module
│ │ ├── __init__.py
│ │ ├── bireysel_client.py
│ │ ├── client.py
│ │ ├── models.py
│ │ └── unified_client.py
│ ├── asgi_app.py
│ ├── bddk_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── models.py
│ ├── bedesten_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ ├── enums.py
│ │ └── models.py
│ ├── check_response_format.py
│ ├── danistay_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── models.py
│ ├── docker-compose.yml
│ ├── Dockerfile
│ ├── docs
│ │ └── DEPLOYMENT.md
│ ├── emsal_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── models.py
│ ├── example_fastapi_app.py
│ ├── kik_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── models.py
│ ├── kvkk_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── models.py
│ ├── LICENSE
│ ├── mcp_auth
│ │ ├── __init__.py
│ │ ├── clerk_config.py
│ │ ├── middleware.py
│ │ ├── oauth.py
│ │ ├── policy.py
│ │ └── storage.py
│ ├── mcp_auth_factory.py
│ ├── mcp_auth_http_adapter.py
│ ├── mcp_auth_http_simple.py
│ ├── mcp_server_main.py
│ ├── nginx.conf
│ ├── ornek.png
│ ├── Procfile
│ ├── pyproject.toml
│ ├── railway.json
│ ├── README.md
│ ├── redis_session_store.py
│ ├── rekabet_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── models.py
│ ├── run_asgi.py
│ ├── sayistay_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ ├── enums.py
│ │ ├── models.py
│ │ └── unified_client.py
│ ├── starlette_app.py
│ ├── stripe_webhook.py
│ ├── uyusmazlik_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── models.py
│ └── yargitay_mcp_module
│ ├── __init__.py
│ ├── client.py
│ └── models.py
├── sayistay_mcp_module
│ ├── __init__.py
│ ├── client.py
│ ├── enums.py
│ ├── models.py
│ └── unified_client.py
├── starlette_app.py
├── stripe_webhook.py
├── uv.lock
├── uyusmazlik_mcp_module
│ ├── __init__.py
│ ├── client.py
│ └── models.py
└── yargitay_mcp_module
├── __init__.py
├── client.py
└── models.py
```
# Files
--------------------------------------------------------------------------------
/mcp_auth/policy.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Authorization policy engine for MCP tools
3 | """
4 |
5 | import re
6 | import logging
7 | from dataclasses import dataclass
8 | from enum import Enum
9 | from typing import Any
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 |
14 | class PolicyAction(Enum):
15 | ALLOW = "allow"
16 | DENY = "deny"
17 |
18 |
19 | @dataclass
20 | class ToolPolicy:
21 | """Policy rule for MCP tool access"""
22 |
23 | tool_pattern: str # regex pattern for tool names
24 | required_scopes: list[str]
25 | action: PolicyAction = PolicyAction.ALLOW
26 | conditions: dict[str, Any] | None = None
27 |
28 | def matches_tool(self, tool_name: str) -> bool:
29 | """Check if the policy applies to given tool"""
30 | return bool(re.match(self.tool_pattern, tool_name))
31 |
32 | def evaluate_scopes(self, user_scopes: list[str]) -> bool:
33 | """Check if user has required scopes"""
34 | return all(scope in user_scopes for scope in self.required_scopes)
35 |
36 |
37 | class PolicyEngine:
38 | """Authorization policy engine for Turkish legal database tools"""
39 |
40 | def __init__(self):
41 | self.policies: list[ToolPolicy] = []
42 | self.default_action = PolicyAction.DENY
43 |
44 | def add_policy(self, policy: ToolPolicy):
45 | """Add a policy rule"""
46 | self.policies.append(policy)
47 | logger.debug(f"Added policy: {policy.tool_pattern} -> {policy.required_scopes}")
48 |
49 | def add_tool_scope_policy(
50 | self,
51 | tool_pattern: str,
52 | required_scopes: str | list[str],
53 | action: PolicyAction = PolicyAction.ALLOW,
54 | ):
55 | """Convenience method to add tool-scope policy"""
56 | if isinstance(required_scopes, str):
57 | required_scopes = [required_scopes]
58 |
59 | policy = ToolPolicy(
60 | tool_pattern=tool_pattern, required_scopes=required_scopes, action=action
61 | )
62 | self.add_policy(policy)
63 |
64 | def authorize_tool_call(
65 | self,
66 | tool_name: str,
67 | user_scopes: list[str],
68 | user_claims: dict[str, Any] | None = None,
69 | ) -> tuple[bool, str | None]:
70 | """
71 | Authorize a tool call
72 |
73 | Returns:
74 | (authorized: bool, reason: Optional[str])
75 | """
76 |
77 | logger.debug(f"Authorizing tool '{tool_name}' for user with scopes: {user_scopes}")
78 |
79 | matching_policies = [
80 | policy for policy in self.policies if policy.matches_tool(tool_name)
81 | ]
82 |
83 | if not matching_policies:
84 | if self.default_action == PolicyAction.ALLOW:
85 | logger.debug(f"No policies found for '{tool_name}', allowing by default")
86 | return True, None
87 | else:
88 | logger.warning(f"No policies found for '{tool_name}', denying by default")
89 | return False, f"No policy found for tool '{tool_name}', default deny"
90 |
91 | # Check for explicit deny policies first
92 | for policy in matching_policies:
93 | if policy.action == PolicyAction.DENY:
94 | if policy.evaluate_scopes(user_scopes):
95 | logger.warning(f"Explicit deny policy matched for '{tool_name}'")
96 | return False, f"Explicit deny policy for tool '{tool_name}'"
97 |
98 | # Check allow policies
99 | allow_policies = [
100 | p for p in matching_policies if p.action == PolicyAction.ALLOW
101 | ]
102 |
103 | if not allow_policies:
104 | logger.warning(f"No allow policies found for '{tool_name}'")
105 | return False, f"No allow policies found for tool '{tool_name}'"
106 |
107 | for policy in allow_policies:
108 | if policy.evaluate_scopes(user_scopes):
109 | if self._evaluate_conditions(policy.conditions, user_claims):
110 | logger.debug(f"Authorization granted for '{tool_name}'")
111 | return True, None
112 |
113 | logger.warning(f"Insufficient scopes for '{tool_name}'. Required: {[p.required_scopes for p in allow_policies]}, User has: {user_scopes}")
114 | return False, f"Insufficient scopes for tool '{tool_name}'"
115 |
116 | def _evaluate_conditions(
117 | self,
118 | conditions: dict[str, Any] | None,
119 | user_claims: dict[str, Any] | None,
120 | ) -> bool:
121 | """Evaluate additional policy conditions"""
122 |
123 | if not conditions:
124 | return True
125 |
126 | if not user_claims:
127 | logger.debug("No user claims provided, conditions evaluation failed")
128 | return False
129 |
130 | for key, expected_value in conditions.items():
131 | user_value = user_claims.get(key)
132 |
133 | if isinstance(expected_value, list):
134 | if user_value not in expected_value:
135 | logger.debug(f"Condition failed: {key} = {user_value} not in {expected_value}")
136 | return False
137 | elif user_value != expected_value:
138 | logger.debug(f"Condition failed: {key} = {user_value} != {expected_value}")
139 | return False
140 |
141 | return True
142 |
143 | def get_allowed_tools(self, user_scopes: list[str]) -> list[str]:
144 | """Get list of tool patterns user is allowed to call"""
145 |
146 | allowed_tools = []
147 |
148 | for policy in self.policies:
149 | if policy.action == PolicyAction.ALLOW and policy.evaluate_scopes(
150 | user_scopes
151 | ):
152 | allowed_tools.append(policy.tool_pattern)
153 |
154 | return allowed_tools
155 |
156 |
157 | def create_turkish_legal_policies() -> PolicyEngine:
158 | """Create policy set for Turkish legal database MCP server"""
159 |
160 | engine = PolicyEngine()
161 |
162 | # Administrative tools (full access)
163 | engine.add_tool_scope_policy(".*", ["mcp:tools:admin"])
164 |
165 | # Search tools - require read access
166 | engine.add_tool_scope_policy("search.*", ["mcp:tools:read"])
167 |
168 | # Fetch/get document tools - require read access
169 | engine.add_tool_scope_policy("get_.*", ["mcp:tools:read"])
170 | engine.add_tool_scope_policy("fetch.*", ["mcp:tools:read"])
171 |
172 | # Specific Turkish legal database tools
173 | engine.add_tool_scope_policy("search_yargitay.*", ["mcp:tools:read"])
174 | engine.add_tool_scope_policy("search_danistay.*", ["mcp:tools:read"])
175 | engine.add_tool_scope_policy("search_anayasa.*", ["mcp:tools:read"])
176 | engine.add_tool_scope_policy("search_rekabet.*", ["mcp:tools:read"])
177 | engine.add_tool_scope_policy("search_kik.*", ["mcp:tools:read"])
178 | engine.add_tool_scope_policy("search_emsal.*", ["mcp:tools:read"])
179 | engine.add_tool_scope_policy("search_uyusmazlik.*", ["mcp:tools:read"])
180 | engine.add_tool_scope_policy("search_sayistay.*", ["mcp:tools:read"])
181 | engine.add_tool_scope_policy("search_.*_bedesten", ["mcp:tools:read"])
182 | engine.add_tool_scope_policy("search_yerel_hukuk.*", ["mcp:tools:read"])
183 | engine.add_tool_scope_policy("search_istinaf_hukuk.*", ["mcp:tools:read"])
184 | engine.add_tool_scope_policy("search_kyb.*", ["mcp:tools:read"])
185 |
186 | # Document retrieval tools
187 | engine.add_tool_scope_policy("get_.*_document.*", ["mcp:tools:read"])
188 | engine.add_tool_scope_policy("get_.*_markdown", ["mcp:tools:read"])
189 |
190 | # Write operations (if any future tools need them)
191 | engine.add_tool_scope_policy("create_.*", ["mcp:tools:write"])
192 | engine.add_tool_scope_policy("update_.*", ["mcp:tools:write"])
193 | engine.add_tool_scope_policy("delete_.*", ["mcp:tools:write"])
194 |
195 | logger.info("Created Turkish legal database policy engine")
196 | return engine
197 |
198 |
199 | def create_default_policies() -> PolicyEngine:
200 | """Create a default policy set for MCP servers (backwards compatibility)"""
201 | return create_turkish_legal_policies()
```
--------------------------------------------------------------------------------
/saidsurucu-yargi-mcp-f5fa007/mcp_auth/policy.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | Authorization policy engine for MCP tools
3 | """
4 |
5 | import re
6 | import logging
7 | from dataclasses import dataclass
8 | from enum import Enum
9 | from typing import Any
10 |
11 | logger = logging.getLogger(__name__)
12 |
13 |
14 | class PolicyAction(Enum):
15 | ALLOW = "allow"
16 | DENY = "deny"
17 |
18 |
19 | @dataclass
20 | class ToolPolicy:
21 | """Policy rule for MCP tool access"""
22 |
23 | tool_pattern: str # regex pattern for tool names
24 | required_scopes: list[str]
25 | action: PolicyAction = PolicyAction.ALLOW
26 | conditions: dict[str, Any] | None = None
27 |
28 | def matches_tool(self, tool_name: str) -> bool:
29 | """Check if the policy applies to given tool"""
30 | return bool(re.match(self.tool_pattern, tool_name))
31 |
32 | def evaluate_scopes(self, user_scopes: list[str]) -> bool:
33 | """Check if user has required scopes"""
34 | return all(scope in user_scopes for scope in self.required_scopes)
35 |
36 |
37 | class PolicyEngine:
38 | """Authorization policy engine for Turkish legal database tools"""
39 |
40 | def __init__(self):
41 | self.policies: list[ToolPolicy] = []
42 | self.default_action = PolicyAction.DENY
43 |
44 | def add_policy(self, policy: ToolPolicy):
45 | """Add a policy rule"""
46 | self.policies.append(policy)
47 | logger.debug(f"Added policy: {policy.tool_pattern} -> {policy.required_scopes}")
48 |
49 | def add_tool_scope_policy(
50 | self,
51 | tool_pattern: str,
52 | required_scopes: str | list[str],
53 | action: PolicyAction = PolicyAction.ALLOW,
54 | ):
55 | """Convenience method to add tool-scope policy"""
56 | if isinstance(required_scopes, str):
57 | required_scopes = [required_scopes]
58 |
59 | policy = ToolPolicy(
60 | tool_pattern=tool_pattern, required_scopes=required_scopes, action=action
61 | )
62 | self.add_policy(policy)
63 |
64 | def authorize_tool_call(
65 | self,
66 | tool_name: str,
67 | user_scopes: list[str],
68 | user_claims: dict[str, Any] | None = None,
69 | ) -> tuple[bool, str | None]:
70 | """
71 | Authorize a tool call
72 |
73 | Returns:
74 | (authorized: bool, reason: Optional[str])
75 | """
76 |
77 | logger.debug(f"Authorizing tool '{tool_name}' for user with scopes: {user_scopes}")
78 |
79 | matching_policies = [
80 | policy for policy in self.policies if policy.matches_tool(tool_name)
81 | ]
82 |
83 | if not matching_policies:
84 | if self.default_action == PolicyAction.ALLOW:
85 | logger.debug(f"No policies found for '{tool_name}', allowing by default")
86 | return True, None
87 | else:
88 | logger.warning(f"No policies found for '{tool_name}', denying by default")
89 | return False, f"No policy found for tool '{tool_name}', default deny"
90 |
91 | # Check for explicit deny policies first
92 | for policy in matching_policies:
93 | if policy.action == PolicyAction.DENY:
94 | if policy.evaluate_scopes(user_scopes):
95 | logger.warning(f"Explicit deny policy matched for '{tool_name}'")
96 | return False, f"Explicit deny policy for tool '{tool_name}'"
97 |
98 | # Check allow policies
99 | allow_policies = [
100 | p for p in matching_policies if p.action == PolicyAction.ALLOW
101 | ]
102 |
103 | if not allow_policies:
104 | logger.warning(f"No allow policies found for '{tool_name}'")
105 | return False, f"No allow policies found for tool '{tool_name}'"
106 |
107 | for policy in allow_policies:
108 | if policy.evaluate_scopes(user_scopes):
109 | if self._evaluate_conditions(policy.conditions, user_claims):
110 | logger.debug(f"Authorization granted for '{tool_name}'")
111 | return True, None
112 |
113 | logger.warning(f"Insufficient scopes for '{tool_name}'. Required: {[p.required_scopes for p in allow_policies]}, User has: {user_scopes}")
114 | return False, f"Insufficient scopes for tool '{tool_name}'"
115 |
116 | def _evaluate_conditions(
117 | self,
118 | conditions: dict[str, Any] | None,
119 | user_claims: dict[str, Any] | None,
120 | ) -> bool:
121 | """Evaluate additional policy conditions"""
122 |
123 | if not conditions:
124 | return True
125 |
126 | if not user_claims:
127 | logger.debug("No user claims provided, conditions evaluation failed")
128 | return False
129 |
130 | for key, expected_value in conditions.items():
131 | user_value = user_claims.get(key)
132 |
133 | if isinstance(expected_value, list):
134 | if user_value not in expected_value:
135 | logger.debug(f"Condition failed: {key} = {user_value} not in {expected_value}")
136 | return False
137 | elif user_value != expected_value:
138 | logger.debug(f"Condition failed: {key} = {user_value} != {expected_value}")
139 | return False
140 |
141 | return True
142 |
143 | def get_allowed_tools(self, user_scopes: list[str]) -> list[str]:
144 | """Get list of tool patterns user is allowed to call"""
145 |
146 | allowed_tools = []
147 |
148 | for policy in self.policies:
149 | if policy.action == PolicyAction.ALLOW and policy.evaluate_scopes(
150 | user_scopes
151 | ):
152 | allowed_tools.append(policy.tool_pattern)
153 |
154 | return allowed_tools
155 |
156 |
157 | def create_turkish_legal_policies() -> PolicyEngine:
158 | """Create policy set for Turkish legal database MCP server"""
159 |
160 | engine = PolicyEngine()
161 |
162 | # Administrative tools (full access)
163 | engine.add_tool_scope_policy(".*", ["mcp:tools:admin"])
164 |
165 | # Search tools - require read access
166 | engine.add_tool_scope_policy("search.*", ["mcp:tools:read"])
167 |
168 | # Fetch/get document tools - require read access
169 | engine.add_tool_scope_policy("get_.*", ["mcp:tools:read"])
170 | engine.add_tool_scope_policy("fetch.*", ["mcp:tools:read"])
171 |
172 | # Specific Turkish legal database tools
173 | engine.add_tool_scope_policy("search_yargitay.*", ["mcp:tools:read"])
174 | engine.add_tool_scope_policy("search_danistay.*", ["mcp:tools:read"])
175 | engine.add_tool_scope_policy("search_anayasa.*", ["mcp:tools:read"])
176 | engine.add_tool_scope_policy("search_rekabet.*", ["mcp:tools:read"])
177 | engine.add_tool_scope_policy("search_kik.*", ["mcp:tools:read"])
178 | engine.add_tool_scope_policy("search_emsal.*", ["mcp:tools:read"])
179 | engine.add_tool_scope_policy("search_uyusmazlik.*", ["mcp:tools:read"])
180 | engine.add_tool_scope_policy("search_sayistay.*", ["mcp:tools:read"])
181 | engine.add_tool_scope_policy("search_.*_bedesten", ["mcp:tools:read"])
182 | engine.add_tool_scope_policy("search_yerel_hukuk.*", ["mcp:tools:read"])
183 | engine.add_tool_scope_policy("search_istinaf_hukuk.*", ["mcp:tools:read"])
184 | engine.add_tool_scope_policy("search_kyb.*", ["mcp:tools:read"])
185 |
186 | # Document retrieval tools
187 | engine.add_tool_scope_policy("get_.*_document.*", ["mcp:tools:read"])
188 | engine.add_tool_scope_policy("get_.*_markdown", ["mcp:tools:read"])
189 |
190 | # Write operations (if any future tools need them)
191 | engine.add_tool_scope_policy("create_.*", ["mcp:tools:write"])
192 | engine.add_tool_scope_policy("update_.*", ["mcp:tools:write"])
193 | engine.add_tool_scope_policy("delete_.*", ["mcp:tools:write"])
194 |
195 | logger.info("Created Turkish legal database policy engine")
196 | return engine
197 |
198 |
199 | def create_default_policies() -> PolicyEngine:
200 | """Create a default policy set for MCP servers (backwards compatibility)"""
201 | return create_turkish_legal_policies()
```
--------------------------------------------------------------------------------
/saidsurucu-yargi-mcp-f5fa007/bedesten_mcp_module/client.py:
--------------------------------------------------------------------------------
```python
1 | # bedesten_mcp_module/client.py
2 |
3 | import httpx
4 | import base64
5 | from typing import Optional
6 | import logging
7 | from markitdown import MarkItDown
8 | import io
9 |
10 | from .models import (
11 | BedestenSearchRequest, BedestenSearchResponse,
12 | BedestenDocumentRequest, BedestenDocumentResponse,
13 | BedestenDocumentMarkdown, BedestenDocumentRequestData
14 | )
15 | from .enums import get_full_birim_adi
16 |
17 | logger = logging.getLogger(__name__)
18 |
19 | class BedestenApiClient:
20 | """
21 | API Client for Bedesten (bedesten.adalet.gov.tr) - Alternative legal decision search system.
22 | Currently used for Yargıtay decisions, but can be extended for other court types.
23 | """
24 | BASE_URL = "https://bedesten.adalet.gov.tr"
25 | SEARCH_ENDPOINT = "/emsal-karar/searchDocuments"
26 | DOCUMENT_ENDPOINT = "/emsal-karar/getDocumentContent"
27 |
28 | def __init__(self, request_timeout: float = 60.0):
29 | self.http_client = httpx.AsyncClient(
30 | base_url=self.BASE_URL,
31 | headers={
32 | "Accept": "*/*",
33 | "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7",
34 | "AdaletApplicationName": "UyapMevzuat",
35 | "Content-Type": "application/json; charset=utf-8",
36 | "Origin": "https://mevzuat.adalet.gov.tr",
37 | "Referer": "https://mevzuat.adalet.gov.tr/",
38 | "Sec-Fetch-Dest": "empty",
39 | "Sec-Fetch-Mode": "cors",
40 | "Sec-Fetch-Site": "same-site",
41 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"
42 | },
43 | timeout=request_timeout
44 | )
45 |
46 | async def search_documents(self, search_request: BedestenSearchRequest) -> BedestenSearchResponse:
47 | """
48 | Search for documents using Bedesten API.
49 | Currently supports: YARGITAYKARARI, DANISTAYKARARI, YERELHUKMAHKARARI, etc.
50 | """
51 | logger.info(f"BedestenApiClient: Searching documents with phrase: {search_request.data.phrase}")
52 |
53 | # Map abbreviated birimAdi to full Turkish name before sending to API
54 | original_birim_adi = search_request.data.birimAdi
55 | mapped_birim_adi = get_full_birim_adi(original_birim_adi)
56 | search_request.data.birimAdi = mapped_birim_adi
57 | if original_birim_adi != "ALL":
58 | logger.info(f"BedestenApiClient: Mapped birimAdi '{original_birim_adi}' to '{mapped_birim_adi}'")
59 |
60 | try:
61 | # Create request dict and remove birimAdi if empty
62 | request_dict = search_request.model_dump()
63 | if not request_dict["data"]["birimAdi"]: # Remove if empty string
64 | del request_dict["data"]["birimAdi"]
65 |
66 | response = await self.http_client.post(
67 | self.SEARCH_ENDPOINT,
68 | json=request_dict
69 | )
70 | response.raise_for_status()
71 | response_json = response.json()
72 |
73 | # Parse and return the response
74 | return BedestenSearchResponse(**response_json)
75 |
76 | except httpx.RequestError as e:
77 | logger.error(f"BedestenApiClient: HTTP request error during search: {e}")
78 | raise
79 | except Exception as e:
80 | logger.error(f"BedestenApiClient: Error processing search response: {e}")
81 | raise
82 |
83 | async def get_document_as_markdown(self, document_id: str) -> BedestenDocumentMarkdown:
84 | """
85 | Get document content and convert to markdown.
86 | Handles both HTML (text/html) and PDF (application/pdf) content types.
87 | """
88 | logger.info(f"BedestenApiClient: Fetching document for markdown conversion (ID: {document_id})")
89 |
90 | try:
91 | # Prepare request
92 | doc_request = BedestenDocumentRequest(
93 | data=BedestenDocumentRequestData(documentId=document_id)
94 | )
95 |
96 | # Get document
97 | response = await self.http_client.post(
98 | self.DOCUMENT_ENDPOINT,
99 | json=doc_request.model_dump()
100 | )
101 | response.raise_for_status()
102 | response_json = response.json()
103 | doc_response = BedestenDocumentResponse(**response_json)
104 |
105 | # Decode base64 content
106 | content_bytes = base64.b64decode(doc_response.data.content)
107 | mime_type = doc_response.data.mimeType
108 |
109 | logger.info(f"BedestenApiClient: Document mime type: {mime_type}")
110 |
111 | # Convert to markdown based on mime type
112 | if mime_type == "text/html":
113 | html_content = content_bytes.decode('utf-8')
114 | markdown_content = self._convert_html_to_markdown(html_content)
115 | elif mime_type == "application/pdf":
116 | markdown_content = self._convert_pdf_to_markdown(content_bytes)
117 | else:
118 | logger.warning(f"Unsupported mime type: {mime_type}")
119 | markdown_content = f"Unsupported content type: {mime_type}. Unable to convert to markdown."
120 |
121 | return BedestenDocumentMarkdown(
122 | documentId=document_id,
123 | markdown_content=markdown_content,
124 | source_url=f"{self.BASE_URL}/document/{document_id}",
125 | mime_type=mime_type
126 | )
127 |
128 | except httpx.RequestError as e:
129 | logger.error(f"BedestenApiClient: HTTP error fetching document {document_id}: {e}")
130 | raise
131 | except Exception as e:
132 | logger.error(f"BedestenApiClient: Error processing document {document_id}: {e}")
133 | raise
134 |
135 | def _convert_html_to_markdown(self, html_content: str) -> Optional[str]:
136 | """Convert HTML to Markdown using MarkItDown"""
137 | if not html_content:
138 | return None
139 |
140 | try:
141 | # Convert HTML string to bytes and create BytesIO stream
142 | html_bytes = html_content.encode('utf-8')
143 | html_stream = io.BytesIO(html_bytes)
144 |
145 | # Pass BytesIO stream to MarkItDown to avoid temp file creation
146 | md_converter = MarkItDown()
147 | result = md_converter.convert(html_stream)
148 | markdown_content = result.text_content
149 |
150 | logger.info("Successfully converted HTML to Markdown")
151 | return markdown_content
152 |
153 | except Exception as e:
154 | logger.error(f"Error converting HTML to Markdown: {e}")
155 | return f"Error converting HTML content: {str(e)}"
156 |
157 | def _convert_pdf_to_markdown(self, pdf_bytes: bytes) -> Optional[str]:
158 | """Convert PDF to Markdown using MarkItDown"""
159 | if not pdf_bytes:
160 | return None
161 |
162 | try:
163 | # Create BytesIO stream from PDF bytes
164 | pdf_stream = io.BytesIO(pdf_bytes)
165 |
166 | # Pass BytesIO stream to MarkItDown to avoid temp file creation
167 | md_converter = MarkItDown()
168 | result = md_converter.convert(pdf_stream)
169 | markdown_content = result.text_content
170 |
171 | logger.info("Successfully converted PDF to Markdown")
172 | return markdown_content
173 |
174 | except Exception as e:
175 | logger.error(f"Error converting PDF to Markdown: {e}")
176 | return f"Error converting PDF content: {str(e)}. The document may be corrupted or in an unsupported format."
177 |
178 | async def close_client_session(self):
179 | """Close HTTP client session"""
180 | await self.http_client.aclose()
181 | logger.info("BedestenApiClient: HTTP client session closed.")
```
--------------------------------------------------------------------------------
/docs/DEPLOYMENT.md:
--------------------------------------------------------------------------------
```markdown
1 | # Yargı MCP Server Dağıtım Rehberi
2 |
3 | Bu rehber, Yargı MCP Server'ın ASGI web servisi olarak çeşitli dağıtım seçeneklerini kapsar.
4 |
5 | ## İçindekiler
6 |
7 | - [Hızlı Başlangıç](#hızlı-başlangıç)
8 | - [Yerel Geliştirme](#yerel-geliştirme)
9 | - [Production Dağıtımı](#production-dağıtımı)
10 | - [Cloud Dağıtımı](#cloud-dağıtımı)
11 | - [Docker Dağıtımı](#docker-dağıtımı)
12 | - [Güvenlik Hususları](#güvenlik-hususları)
13 | - [İzleme](#izleme)
14 |
15 | ## Hızlı Başlangıç
16 |
17 | ### 1. Bağımlılıkları Yükleyin
18 |
19 | ```bash
20 | # ASGI sunucusu için uvicorn yükleyin
21 | pip install uvicorn
22 |
23 | # Veya tüm bağımlılıklarla birlikte yükleyin
24 | pip install -e .
25 | pip install uvicorn
26 | ```
27 |
28 | ### 2. Sunucuyu Çalıştırın
29 |
30 | ```bash
31 | # Temel başlatma
32 | python run_asgi.py
33 |
34 | # Veya doğrudan uvicorn ile
35 | uvicorn asgi_app:app --host 0.0.0.0 --port 8000
36 | ```
37 |
38 | Sunucu şu adreslerde kullanılabilir olacak:
39 | - MCP Endpoint: `http://localhost:8000/mcp/`
40 | - Sağlık Kontrolü: `http://localhost:8000/health`
41 | - API Durumu: `http://localhost:8000/status`
42 |
43 | ## Yerel Geliştirme
44 |
45 | ### Otomatik Yeniden Yükleme ile Geliştirme Sunucusu
46 |
47 | ```bash
48 | python run_asgi.py --reload --log-level debug
49 | ```
50 |
51 | ### FastAPI Entegrasyonunu Kullanma
52 |
53 | Ek REST API endpoint'leri için:
54 |
55 | ```bash
56 | uvicorn fastapi_app:app --reload
57 | ```
58 |
59 | Bu şunları sağlar:
60 | - `/docs` adresinde interaktif API dokümantasyonu
61 | - `/api/tools` adresinde araç listesi
62 | - `/api/databases` adresinde veritabanı bilgileri
63 |
64 | ### Ortam Değişkenleri
65 |
66 | `.env.example` dosyasını temel alarak bir `.env` dosyası oluşturun:
67 |
68 | ```bash
69 | cp .env.example .env
70 | ```
71 |
72 | Temel değişkenler:
73 | - `HOST`: Sunucu host adresi (varsayılan: 127.0.0.1)
74 | - `PORT`: Sunucu portu (varsayılan: 8000)
75 | - `ALLOWED_ORIGINS`: CORS kökenleri (virgülle ayrılmış)
76 | - `LOG_LEVEL`: Log seviyesi (debug, info, warning, error)
77 |
78 | ## Production Dağıtımı
79 |
80 | ### 1. Uvicorn ile Çoklu Worker Kullanımı
81 |
82 | ```bash
83 | python run_asgi.py --host 0.0.0.0 --port 8000 --workers 4
84 | ```
85 |
86 | ### 2. Gunicorn Kullanımı
87 |
88 | ```bash
89 | pip install gunicorn
90 | gunicorn asgi_app:app -w 4 -k uvicorn.workers.UvicornWorker --bind 0.0.0.0:8000
91 | ```
92 |
93 | ### 3. Nginx Reverse Proxy ile
94 |
95 | 1. Nginx'i yükleyin
96 | 2. Sağlanan `nginx.conf` dosyasını kullanın:
97 |
98 | ```bash
99 | sudo cp nginx.conf /etc/nginx/sites-available/yargi-mcp
100 | sudo ln -s /etc/nginx/sites-available/yargi-mcp /etc/nginx/sites-enabled/
101 | sudo nginx -t
102 | sudo systemctl reload nginx
103 | ```
104 |
105 | ### 4. Systemd Servisi
106 |
107 | `/etc/systemd/system/yargi-mcp.service` dosyasını oluşturun:
108 |
109 | ```ini
110 | [Unit]
111 | Description=Yargı MCP Server
112 | After=network.target
113 |
114 | [Service]
115 | Type=exec
116 | User=www-data
117 | WorkingDirectory=/opt/yargi-mcp
118 | Environment="PATH=/opt/yargi-mcp/venv/bin"
119 | ExecStart=/opt/yargi-mcp/venv/bin/uvicorn asgi_app:app --host 0.0.0.0 --port 8000 --workers 4
120 | Restart=on-failure
121 | RestartSec=5
122 |
123 | [Install]
124 | WantedBy=multi-user.target
125 | ```
126 |
127 | Etkinleştirin ve başlatın:
128 |
129 | ```bash
130 | sudo systemctl enable yargi-mcp
131 | sudo systemctl start yargi-mcp
132 | ```
133 |
134 | ## Cloud Dağıtımı
135 |
136 | ### Heroku
137 |
138 | 1. `Procfile` oluşturun:
139 | ```
140 | web: uvicorn asgi_app:app --host 0.0.0.0 --port $PORT
141 | ```
142 |
143 | 2. Dağıtın:
144 | ```bash
145 | heroku create uygulama-isminiz
146 | git push heroku main
147 | ```
148 |
149 | ### Railway
150 |
151 | 1. `railway.json` ekleyin:
152 | ```json
153 | {
154 | "build": {
155 | "builder": "NIXPACKS"
156 | },
157 | "deploy": {
158 | "startCommand": "uvicorn asgi_app:app --host 0.0.0.0 --port $PORT"
159 | }
160 | }
161 | ```
162 |
163 | 2. Railway CLI veya GitHub entegrasyonu ile dağıtın
164 |
165 | ### Google Cloud Run
166 |
167 | 1. Container oluşturun:
168 | ```bash
169 | docker build -t yargi-mcp .
170 | docker tag yargi-mcp gcr.io/PROJE_ADINIZ/yargi-mcp
171 | docker push gcr.io/PROJE_ADINIZ/yargi-mcp
172 | ```
173 |
174 | 2. Dağıtın:
175 | ```bash
176 | gcloud run deploy yargi-mcp \
177 | --image gcr.io/PROJE_ADINIZ/yargi-mcp \
178 | --platform managed \
179 | --region us-central1 \
180 | --allow-unauthenticated
181 | ```
182 |
183 | ### AWS Lambda (Mangum kullanarak)
184 |
185 | 1. Mangum'u yükleyin:
186 | ```bash
187 | pip install mangum
188 | ```
189 |
190 | 2. `lambda_handler.py` oluşturun:
191 | ```python
192 | from mangum import Mangum
193 | from asgi_app import app
194 |
195 | handler = Mangum(app, lifespan="off")
196 | ```
197 |
198 | 3. AWS SAM veya Serverless Framework kullanarak dağıtın
199 |
200 | ## Docker Dağıtımı
201 |
202 | ### Tek Container
203 |
204 | ```bash
205 | # Oluşturun
206 | docker build -t yargi-mcp .
207 |
208 | # Çalıştırın
209 | docker run -p 8000:8000 --env-file .env yargi-mcp
210 | ```
211 |
212 | ### Docker Compose
213 |
214 | ```bash
215 | # Geliştirme
216 | docker-compose up
217 |
218 | # Nginx ile Production
219 | docker-compose --profile production up
220 |
221 | # Redis önbellekleme ile
222 | docker-compose --profile with-cache up
223 | ```
224 |
225 | ### Kubernetes
226 |
227 | Deployment YAML oluşturun:
228 |
229 | ```yaml
230 | apiVersion: apps/v1
231 | kind: Deployment
232 | metadata:
233 | name: yargi-mcp
234 | spec:
235 | replicas: 3
236 | selector:
237 | matchLabels:
238 | app: yargi-mcp
239 | template:
240 | metadata:
241 | labels:
242 | app: yargi-mcp
243 | spec:
244 | containers:
245 | - name: yargi-mcp
246 | image: yargi-mcp:latest
247 | ports:
248 | - containerPort: 8000
249 | env:
250 | - name: HOST
251 | value: "0.0.0.0"
252 | - name: PORT
253 | value: "8000"
254 | livenessProbe:
255 | httpGet:
256 | path: /health
257 | port: 8000
258 | initialDelaySeconds: 10
259 | periodSeconds: 30
260 | ---
261 | apiVersion: v1
262 | kind: Service
263 | metadata:
264 | name: yargi-mcp-service
265 | spec:
266 | selector:
267 | app: yargi-mcp
268 | ports:
269 | - port: 80
270 | targetPort: 8000
271 | type: LoadBalancer
272 | ```
273 |
274 | ## Güvenlik Hususları
275 |
276 | ### 1. Kimlik Doğrulama
277 |
278 | `API_TOKEN` ortam değişkenini ayarlayarak token kimlik doğrulamasını etkinleştirin:
279 |
280 | ```bash
281 | export API_TOKEN=gizli-token-degeri
282 | ```
283 |
284 | Ardından isteklere ekleyin:
285 | ```bash
286 | curl -H "Authorization: Bearer gizli-token-degeri" http://localhost:8000/api/tools
287 | ```
288 |
289 | ### 2. HTTPS/SSL
290 |
291 | Production için her zaman HTTPS kullanın:
292 |
293 | 1. SSL sertifikası edinin (Let's Encrypt vb.)
294 | 2. Nginx veya cloud sağlayıcıda yapılandırın
295 | 3. `ALLOWED_ORIGINS` değerini https:// kullanacak şekilde güncelleyin
296 |
297 | ### 3. Rate Limiting (Hız Sınırlama)
298 |
299 | Sağlanan Nginx yapılandırması rate limiting içerir:
300 | - API endpoint'leri: 10 istek/saniye
301 | - MCP endpoint: 100 istek/saniye
302 |
303 | ### 4. CORS Yapılandırması
304 |
305 | Production için belirli kaynaklara izin verin:
306 |
307 | ```bash
308 | ALLOWED_ORIGINS=https://app.sizindomain.com,https://www.sizindomain.com
309 | ```
310 |
311 | ## İzleme
312 |
313 | ### Sağlık Kontrolleri
314 |
315 | `/health` endpoint'ini izleyin:
316 |
317 | ```bash
318 | curl http://localhost:8000/health
319 | ```
320 |
321 | Yanıt:
322 | ```json
323 | {
324 | "status": "healthy",
325 | "timestamp": "2024-12-26T10:00:00",
326 | "uptime_seconds": 3600,
327 | "tools_operational": true
328 | }
329 | ```
330 |
331 | ### Loglama
332 |
333 | Ortam değişkeni ile log seviyesini yapılandırın:
334 |
335 | ```bash
336 | LOG_LEVEL=info # veya debug, warning, error
337 | ```
338 |
339 | Loglar şuraya yazılır:
340 | - Konsol (stdout)
341 | - `logs/mcp_server.log` dosyası
342 |
343 | ### Metrikler (Opsiyonel)
344 |
345 | OpenTelemetry desteği için:
346 |
347 | ```bash
348 | pip install opentelemetry-instrumentation-fastapi
349 | ```
350 |
351 | Ortam değişkenlerini ayarlayın:
352 | ```bash
353 | OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317
354 | OTEL_SERVICE_NAME=yargi-mcp-server
355 | ```
356 |
357 | ## Sorun Giderme
358 |
359 | ### Port Zaten Kullanımda
360 |
361 | ```bash
362 | # 8000 portunu kullanan işlemi bulun
363 | lsof -i :8000
364 |
365 | # İşlemi sonlandırın
366 | kill -9 <PID>
367 | ```
368 |
369 | ### İzin Hataları
370 |
371 | Dosya izinlerinin doğru olduğundan emin olun:
372 |
373 | ```bash
374 | chmod +x run_asgi.py
375 | chown -R www-data:www-data /opt/yargi-mcp
376 | ```
377 |
378 | ### Bellek Sorunları
379 |
380 | Büyük belge işleme için worker belleğini artırın:
381 |
382 | ```bash
383 | # systemd servisinde
384 | Environment="PYTHONMALLOC=malloc"
385 | LimitNOFILE=65536
386 | ```
387 |
388 | ### Zaman Aşımı Sorunları
389 |
390 | Zaman aşımlarını ayarlayın:
391 | 1. Uvicorn: `--timeout-keep-alive 75`
392 | 2. Nginx: `proxy_read_timeout 300s;`
393 | 3. Cloud sağlayıcılar: Platform özel zaman aşımı ayarlarını kontrol edin
394 |
395 | ## Performans Ayarlama
396 |
397 | ### 1. Worker İşlemleri
398 |
399 | - Geliştirme: 1 worker
400 | - Production: CPU çekirdeği başına 2-4 worker
401 |
402 | ### 2. Bağlantı Havuzlama
403 |
404 | Sunucu varsayılan olarak httpx ile bağlantı havuzlama kullanır.
405 |
406 | ### 3. Önbellekleme (Gelecek Geliştirme)
407 |
408 | Redis önbellekleme docker-compose ile etkinleştirilebilir:
409 |
410 | ```bash
411 | docker-compose --profile with-cache up
412 | ```
413 |
414 | ### 4. Veritabanı Zaman Aşımları
415 |
416 | `.env` dosyasında veritabanı başına zaman aşımlarını ayarlayın:
417 |
418 | ```bash
419 | YARGITAY_TIMEOUT=60
420 | DANISTAY_TIMEOUT=60
421 | ANAYASA_TIMEOUT=90
422 | ```
423 |
424 | ## Destek
425 |
426 | Sorunlar ve sorular için:
427 | - GitHub Issues: https://github.com/saidsurucu/yargi-mcp/issues
428 | - Dokümantasyon: README.md dosyasına bakın
```
--------------------------------------------------------------------------------
/saidsurucu-yargi-mcp-f5fa007/docs/DEPLOYMENT.md:
--------------------------------------------------------------------------------
```markdown
1 | # Yargı MCP Server Dağıtım Rehberi
2 |
3 | Bu rehber, Yargı MCP Server'ın ASGI web servisi olarak çeşitli dağıtım seçeneklerini kapsar.
4 |
5 | ## İçindekiler
6 |
7 | - [Hızlı Başlangıç](#hızlı-başlangıç)
8 | - [Yerel Geliştirme](#yerel-geliştirme)
9 | - [Production Dağıtımı](#production-dağıtımı)
10 | - [Cloud Dağıtımı](#cloud-dağıtımı)
11 | - [Docker Dağıtımı](#docker-dağıtımı)
12 | - [Güvenlik Hususları](#güvenlik-hususları)
13 | - [İzleme](#izleme)
14 |
15 | ## Hızlı Başlangıç
16 |
17 | ### 1. Bağımlılıkları Yükleyin
18 |
19 | ```bash
20 | # ASGI sunucusu için uvicorn yükleyin
21 | pip install uvicorn
22 |
23 | # Veya tüm bağımlılıklarla birlikte yükleyin
24 | pip install -e .
25 | pip install uvicorn
26 | ```
27 |
28 | ### 2. Sunucuyu Çalıştırın
29 |
30 | ```bash
31 | # Temel başlatma
32 | python run_asgi.py
33 |
34 | # Veya doğrudan uvicorn ile
35 | uvicorn asgi_app:app --host 0.0.0.0 --port 8000
36 | ```
37 |
38 | Sunucu şu adreslerde kullanılabilir olacak:
39 | - MCP Endpoint: `http://localhost:8000/mcp/`
40 | - Sağlık Kontrolü: `http://localhost:8000/health`
41 | - API Durumu: `http://localhost:8000/status`
42 |
43 | ## Yerel Geliştirme
44 |
45 | ### Otomatik Yeniden Yükleme ile Geliştirme Sunucusu
46 |
47 | ```bash
48 | python run_asgi.py --reload --log-level debug
49 | ```
50 |
51 | ### FastAPI Entegrasyonunu Kullanma
52 |
53 | Ek REST API endpoint'leri için:
54 |
55 | ```bash
56 | uvicorn fastapi_app:app --reload
57 | ```
58 |
59 | Bu şunları sağlar:
60 | - `/docs` adresinde interaktif API dokümantasyonu
61 | - `/api/tools` adresinde araç listesi
62 | - `/api/databases` adresinde veritabanı bilgileri
63 |
64 | ### Ortam Değişkenleri
65 |
66 | `.env.example` dosyasını temel alarak bir `.env` dosyası oluşturun:
67 |
68 | ```bash
69 | cp .env.example .env
70 | ```
71 |
72 | Temel değişkenler:
73 | - `HOST`: Sunucu host adresi (varsayılan: 127.0.0.1)
74 | - `PORT`: Sunucu portu (varsayılan: 8000)
75 | - `ALLOWED_ORIGINS`: CORS kökenleri (virgülle ayrılmış)
76 | - `LOG_LEVEL`: Log seviyesi (debug, info, warning, error)
77 |
78 | ## Production Dağıtımı
79 |
80 | ### 1. Uvicorn ile Çoklu Worker Kullanımı
81 |
82 | ```bash
83 | python run_asgi.py --host 0.0.0.0 --port 8000 --workers 4
84 | ```
85 |
86 | ### 2. Gunicorn Kullanımı
87 |
88 | ```bash
89 | pip install gunicorn
90 | gunicorn asgi_app:app -w 4 -k uvicorn.workers.UvicornWorker --bind 0.0.0.0:8000
91 | ```
92 |
93 | ### 3. Nginx Reverse Proxy ile
94 |
95 | 1. Nginx'i yükleyin
96 | 2. Sağlanan `nginx.conf` dosyasını kullanın:
97 |
98 | ```bash
99 | sudo cp nginx.conf /etc/nginx/sites-available/yargi-mcp
100 | sudo ln -s /etc/nginx/sites-available/yargi-mcp /etc/nginx/sites-enabled/
101 | sudo nginx -t
102 | sudo systemctl reload nginx
103 | ```
104 |
105 | ### 4. Systemd Servisi
106 |
107 | `/etc/systemd/system/yargi-mcp.service` dosyasını oluşturun:
108 |
109 | ```ini
110 | [Unit]
111 | Description=Yargı MCP Server
112 | After=network.target
113 |
114 | [Service]
115 | Type=exec
116 | User=www-data
117 | WorkingDirectory=/opt/yargi-mcp
118 | Environment="PATH=/opt/yargi-mcp/venv/bin"
119 | ExecStart=/opt/yargi-mcp/venv/bin/uvicorn asgi_app:app --host 0.0.0.0 --port 8000 --workers 4
120 | Restart=on-failure
121 | RestartSec=5
122 |
123 | [Install]
124 | WantedBy=multi-user.target
125 | ```
126 |
127 | Etkinleştirin ve başlatın:
128 |
129 | ```bash
130 | sudo systemctl enable yargi-mcp
131 | sudo systemctl start yargi-mcp
132 | ```
133 |
134 | ## Cloud Dağıtımı
135 |
136 | ### Heroku
137 |
138 | 1. `Procfile` oluşturun:
139 | ```
140 | web: uvicorn asgi_app:app --host 0.0.0.0 --port $PORT
141 | ```
142 |
143 | 2. Dağıtın:
144 | ```bash
145 | heroku create uygulama-isminiz
146 | git push heroku main
147 | ```
148 |
149 | ### Railway
150 |
151 | 1. `railway.json` ekleyin:
152 | ```json
153 | {
154 | "build": {
155 | "builder": "NIXPACKS"
156 | },
157 | "deploy": {
158 | "startCommand": "uvicorn asgi_app:app --host 0.0.0.0 --port $PORT"
159 | }
160 | }
161 | ```
162 |
163 | 2. Railway CLI veya GitHub entegrasyonu ile dağıtın
164 |
165 | ### Google Cloud Run
166 |
167 | 1. Container oluşturun:
168 | ```bash
169 | docker build -t yargi-mcp .
170 | docker tag yargi-mcp gcr.io/PROJE_ADINIZ/yargi-mcp
171 | docker push gcr.io/PROJE_ADINIZ/yargi-mcp
172 | ```
173 |
174 | 2. Dağıtın:
175 | ```bash
176 | gcloud run deploy yargi-mcp \
177 | --image gcr.io/PROJE_ADINIZ/yargi-mcp \
178 | --platform managed \
179 | --region us-central1 \
180 | --allow-unauthenticated
181 | ```
182 |
183 | ### AWS Lambda (Mangum kullanarak)
184 |
185 | 1. Mangum'u yükleyin:
186 | ```bash
187 | pip install mangum
188 | ```
189 |
190 | 2. `lambda_handler.py` oluşturun:
191 | ```python
192 | from mangum import Mangum
193 | from asgi_app import app
194 |
195 | handler = Mangum(app, lifespan="off")
196 | ```
197 |
198 | 3. AWS SAM veya Serverless Framework kullanarak dağıtın
199 |
200 | ## Docker Dağıtımı
201 |
202 | ### Tek Container
203 |
204 | ```bash
205 | # Oluşturun
206 | docker build -t yargi-mcp .
207 |
208 | # Çalıştırın
209 | docker run -p 8000:8000 --env-file .env yargi-mcp
210 | ```
211 |
212 | ### Docker Compose
213 |
214 | ```bash
215 | # Geliştirme
216 | docker-compose up
217 |
218 | # Nginx ile Production
219 | docker-compose --profile production up
220 |
221 | # Redis önbellekleme ile
222 | docker-compose --profile with-cache up
223 | ```
224 |
225 | ### Kubernetes
226 |
227 | Deployment YAML oluşturun:
228 |
229 | ```yaml
230 | apiVersion: apps/v1
231 | kind: Deployment
232 | metadata:
233 | name: yargi-mcp
234 | spec:
235 | replicas: 3
236 | selector:
237 | matchLabels:
238 | app: yargi-mcp
239 | template:
240 | metadata:
241 | labels:
242 | app: yargi-mcp
243 | spec:
244 | containers:
245 | - name: yargi-mcp
246 | image: yargi-mcp:latest
247 | ports:
248 | - containerPort: 8000
249 | env:
250 | - name: HOST
251 | value: "0.0.0.0"
252 | - name: PORT
253 | value: "8000"
254 | livenessProbe:
255 | httpGet:
256 | path: /health
257 | port: 8000
258 | initialDelaySeconds: 10
259 | periodSeconds: 30
260 | ---
261 | apiVersion: v1
262 | kind: Service
263 | metadata:
264 | name: yargi-mcp-service
265 | spec:
266 | selector:
267 | app: yargi-mcp
268 | ports:
269 | - port: 80
270 | targetPort: 8000
271 | type: LoadBalancer
272 | ```
273 |
274 | ## Güvenlik Hususları
275 |
276 | ### 1. Kimlik Doğrulama
277 |
278 | `API_TOKEN` ortam değişkenini ayarlayarak token kimlik doğrulamasını etkinleştirin:
279 |
280 | ```bash
281 | export API_TOKEN=gizli-token-degeri
282 | ```
283 |
284 | Ardından isteklere ekleyin:
285 | ```bash
286 | curl -H "Authorization: Bearer gizli-token-degeri" http://localhost:8000/api/tools
287 | ```
288 |
289 | ### 2. HTTPS/SSL
290 |
291 | Production için her zaman HTTPS kullanın:
292 |
293 | 1. SSL sertifikası edinin (Let's Encrypt vb.)
294 | 2. Nginx veya cloud sağlayıcıda yapılandırın
295 | 3. `ALLOWED_ORIGINS` değerini https:// kullanacak şekilde güncelleyin
296 |
297 | ### 3. Rate Limiting (Hız Sınırlama)
298 |
299 | Sağlanan Nginx yapılandırması rate limiting içerir:
300 | - API endpoint'leri: 10 istek/saniye
301 | - MCP endpoint: 100 istek/saniye
302 |
303 | ### 4. CORS Yapılandırması
304 |
305 | Production için belirli kaynaklara izin verin:
306 |
307 | ```bash
308 | ALLOWED_ORIGINS=https://app.sizindomain.com,https://www.sizindomain.com
309 | ```
310 |
311 | ## İzleme
312 |
313 | ### Sağlık Kontrolleri
314 |
315 | `/health` endpoint'ini izleyin:
316 |
317 | ```bash
318 | curl http://localhost:8000/health
319 | ```
320 |
321 | Yanıt:
322 | ```json
323 | {
324 | "status": "healthy",
325 | "timestamp": "2024-12-26T10:00:00",
326 | "uptime_seconds": 3600,
327 | "tools_operational": true
328 | }
329 | ```
330 |
331 | ### Loglama
332 |
333 | Ortam değişkeni ile log seviyesini yapılandırın:
334 |
335 | ```bash
336 | LOG_LEVEL=info # veya debug, warning, error
337 | ```
338 |
339 | Loglar şuraya yazılır:
340 | - Konsol (stdout)
341 | - `logs/mcp_server.log` dosyası
342 |
343 | ### Metrikler (Opsiyonel)
344 |
345 | OpenTelemetry desteği için:
346 |
347 | ```bash
348 | pip install opentelemetry-instrumentation-fastapi
349 | ```
350 |
351 | Ortam değişkenlerini ayarlayın:
352 | ```bash
353 | OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317
354 | OTEL_SERVICE_NAME=yargi-mcp-server
355 | ```
356 |
357 | ## Sorun Giderme
358 |
359 | ### Port Zaten Kullanımda
360 |
361 | ```bash
362 | # 8000 portunu kullanan işlemi bulun
363 | lsof -i :8000
364 |
365 | # İşlemi sonlandırın
366 | kill -9 <PID>
367 | ```
368 |
369 | ### İzin Hataları
370 |
371 | Dosya izinlerinin doğru olduğundan emin olun:
372 |
373 | ```bash
374 | chmod +x run_asgi.py
375 | chown -R www-data:www-data /opt/yargi-mcp
376 | ```
377 |
378 | ### Bellek Sorunları
379 |
380 | Büyük belge işleme için worker belleğini artırın:
381 |
382 | ```bash
383 | # systemd servisinde
384 | Environment="PYTHONMALLOC=malloc"
385 | LimitNOFILE=65536
386 | ```
387 |
388 | ### Zaman Aşımı Sorunları
389 |
390 | Zaman aşımlarını ayarlayın:
391 | 1. Uvicorn: `--timeout-keep-alive 75`
392 | 2. Nginx: `proxy_read_timeout 300s;`
393 | 3. Cloud sağlayıcılar: Platform özel zaman aşımı ayarlarını kontrol edin
394 |
395 | ## Performans Ayarlama
396 |
397 | ### 1. Worker İşlemleri
398 |
399 | - Geliştirme: 1 worker
400 | - Production: CPU çekirdeği başına 2-4 worker
401 |
402 | ### 2. Bağlantı Havuzlama
403 |
404 | Sunucu varsayılan olarak httpx ile bağlantı havuzlama kullanır.
405 |
406 | ### 3. Önbellekleme (Gelecek Geliştirme)
407 |
408 | Redis önbellekleme docker-compose ile etkinleştirilebilir:
409 |
410 | ```bash
411 | docker-compose --profile with-cache up
412 | ```
413 |
414 | ### 4. Veritabanı Zaman Aşımları
415 |
416 | `.env` dosyasında veritabanı başına zaman aşımlarını ayarlayın:
417 |
418 | ```bash
419 | YARGITAY_TIMEOUT=60
420 | DANISTAY_TIMEOUT=60
421 | ANAYASA_TIMEOUT=90
422 | ```
423 |
424 | ## Destek
425 |
426 | Sorunlar ve sorular için:
427 | - GitHub Issues: https://github.com/saidsurucu/yargi-mcp/issues
428 | - Dokümantasyon: README.md dosyasına bakın
```
--------------------------------------------------------------------------------
/emsal_mcp_module/client.py:
--------------------------------------------------------------------------------
```python
1 | # emsal_mcp_module/client.py
2 |
3 | import httpx
4 | # from bs4 import BeautifulSoup # Uncomment if needed for advanced HTML pre-processing
5 | from typing import Dict, Any, List, Optional
6 | import logging
7 | import html
8 | import re
9 | import io
10 | from markitdown import MarkItDown
11 |
12 | from .models import (
13 | EmsalSearchRequest,
14 | EmsalDetailedSearchRequestData,
15 | EmsalApiResponse,
16 | EmsalDocumentMarkdown
17 | )
18 |
19 | logger = logging.getLogger(__name__)
20 | if not logger.hasHandlers():
21 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
22 |
23 | class EmsalApiClient:
24 | """API Client for Emsal (UYAP Precedent Decision) search system."""
25 | BASE_URL = "https://emsal.uyap.gov.tr"
26 | DETAILED_SEARCH_ENDPOINT = "/aramadetaylist"
27 | DOCUMENT_ENDPOINT = "/getDokuman"
28 |
29 | def __init__(self, request_timeout: float = 30.0):
30 | self.http_client = httpx.AsyncClient(
31 | base_url=self.BASE_URL,
32 | headers={
33 | "Content-Type": "application/json; charset=UTF-8",
34 | "Accept": "application/json, text/plain, */*",
35 | "X-Requested-With": "XMLHttpRequest",
36 | },
37 | timeout=request_timeout,
38 | verify=False # As per user's original FastAPI code
39 | )
40 |
41 | async def search_detailed_decisions(
42 | self,
43 | params: EmsalSearchRequest
44 | ) -> EmsalApiResponse:
45 | """Performs a detailed search for Emsal decisions."""
46 |
47 | data_for_api_payload = EmsalDetailedSearchRequestData(
48 | arananKelime=params.keyword or "",
49 | Bam_Hukuk_Mahkemeleri=params.selected_bam_civil_court, # Uses alias "Bam Hukuk Mahkemeleri"
50 | Hukuk_Mahkemeleri=params.selected_civil_court, # Uses alias "Hukuk Mahkemeleri"
51 | birimHukukMah="+".join(params.selected_regional_civil_chambers) if params.selected_regional_civil_chambers else "",
52 | esasYil=params.case_year_esas or "",
53 | esasIlkSiraNo=params.case_start_seq_esas or "",
54 | esasSonSiraNo=params.case_end_seq_esas or "",
55 | kararYil=params.decision_year_karar or "",
56 | kararIlkSiraNo=params.decision_start_seq_karar or "",
57 | kararSonSiraNo=params.decision_end_seq_karar or "",
58 | baslangicTarihi=params.start_date or "",
59 | bitisTarihi=params.end_date or "",
60 | siralama=params.sort_criteria,
61 | siralamaDirection=params.sort_direction,
62 | pageSize=params.page_size,
63 | pageNumber=params.page_number
64 | )
65 |
66 | # Create request dict and remove empty string fields to avoid API issues
67 | payload_dict = data_for_api_payload.model_dump(by_alias=True, exclude_none=True)
68 | # Remove empty string fields that might cause API issues
69 | cleaned_payload = {k: v for k, v in payload_dict.items() if v != ""}
70 | final_payload = {"data": cleaned_payload}
71 |
72 | logger.info(f"EmsalApiClient: Performing DETAILED search with payload: {final_payload}")
73 | return await self._execute_api_search(self.DETAILED_SEARCH_ENDPOINT, final_payload)
74 |
75 | async def _execute_api_search(self, endpoint: str, payload: Dict) -> EmsalApiResponse:
76 | """Helper method to execute search POST request and process response for Emsal."""
77 | try:
78 | response = await self.http_client.post(endpoint, json=payload)
79 | response.raise_for_status()
80 | response_json_data = response.json()
81 | logger.debug(f"EmsalApiClient: Raw API response from {endpoint}: {response_json_data}")
82 |
83 | api_response_parsed = EmsalApiResponse(**response_json_data)
84 |
85 | if api_response_parsed.data and api_response_parsed.data.data:
86 | for decision_item in api_response_parsed.data.data:
87 | if decision_item.id:
88 | decision_item.document_url = f"{self.BASE_URL}{self.DOCUMENT_ENDPOINT}?id={decision_item.id}"
89 |
90 | return api_response_parsed
91 | except httpx.RequestError as e:
92 | logger.error(f"EmsalApiClient: HTTP request error during Emsal search to {endpoint}: {e}")
93 | raise
94 | except Exception as e:
95 | logger.error(f"EmsalApiClient: Error processing or validating Emsal search response from {endpoint}: {e}")
96 | raise
97 |
98 | def _clean_html_and_convert_to_markdown_emsal(self, html_content_from_api_data_field: str) -> Optional[str]:
99 | """
100 | Cleans HTML (from Emsal API 'data' field containing HTML string)
101 | and converts it to Markdown using MarkItDown.
102 | This assumes Emsal /getDokuman response is JSON with HTML in "data" field,
103 | similar to Yargitay and the last Emsal /getDokuman example.
104 | """
105 | if not html_content_from_api_data_field:
106 | return None
107 |
108 | # Basic HTML unescaping and fixing common escaped characters
109 | # Based on user's original fix_html_content in app/routers/emsal.py
110 | content = html.unescape(html_content_from_api_data_field)
111 | content = content.replace('\\"', '"')
112 | content = content.replace('\\r\\n', '\n')
113 | content = content.replace('\\n', '\n')
114 | content = content.replace('\\t', '\t')
115 |
116 | # The HTML string from "data" field starts with "<html><head>..."
117 | html_input_for_markdown = content
118 |
119 | markdown_text = None
120 | try:
121 | # Convert HTML string to bytes and create BytesIO stream
122 | html_bytes = html_input_for_markdown.encode('utf-8')
123 | html_stream = io.BytesIO(html_bytes)
124 |
125 | # Pass BytesIO stream to MarkItDown to avoid temp file creation
126 | md_converter = MarkItDown()
127 | conversion_result = md_converter.convert(html_stream)
128 | markdown_text = conversion_result.text_content
129 | logger.info("EmsalApiClient: HTML to Markdown conversion successful.")
130 | except Exception as e:
131 | logger.error(f"EmsalApiClient: Error during MarkItDown HTML to Markdown conversion for Emsal: {e}")
132 |
133 | return markdown_text
134 |
135 | async def get_decision_document_as_markdown(self, id: str) -> EmsalDocumentMarkdown:
136 | """
137 | Retrieves a specific Emsal decision by ID and returns its content as Markdown.
138 | Assumes Emsal /getDokuman endpoint returns JSON with HTML content in the 'data' field.
139 | """
140 | document_api_url = f"{self.DOCUMENT_ENDPOINT}?id={id}"
141 | source_url = f"{self.BASE_URL}{document_api_url}"
142 | logger.info(f"EmsalApiClient: Fetching Emsal document for Markdown (ID: {id}) from {source_url}")
143 |
144 | try:
145 | response = await self.http_client.get(document_api_url)
146 | response.raise_for_status()
147 |
148 | # Emsal /getDokuman returns JSON with HTML in 'data' field (confirmed by user example)
149 | response_json = response.json()
150 | html_content_from_api = response_json.get("data")
151 |
152 | if not isinstance(html_content_from_api, str) or not html_content_from_api.strip():
153 | logger.warning(f"EmsalApiClient: Received empty or non-string HTML in 'data' field for Emsal ID {id}.")
154 | return EmsalDocumentMarkdown(id=id, markdown_content=None, source_url=source_url)
155 |
156 | markdown_content = self._clean_html_and_convert_to_markdown_emsal(html_content_from_api)
157 |
158 | return EmsalDocumentMarkdown(
159 | id=id,
160 | markdown_content=markdown_content,
161 | source_url=source_url
162 | )
163 | except httpx.RequestError as e:
164 | logger.error(f"EmsalApiClient: HTTP error fetching Emsal document (ID: {id}): {e}")
165 | raise
166 | except ValueError as e:
167 | logger.error(f"EmsalApiClient: ValueError processing Emsal document response (ID: {id}): {e}")
168 | raise
169 | except Exception as e:
170 | logger.error(f"EmsalApiClient: General error processing Emsal document (ID: {id}): {e}")
171 | raise
172 |
173 | async def close_client_session(self):
174 | """Closes the HTTPX client session."""
175 | if self.http_client and not self.http_client.is_closed:
176 | await self.http_client.aclose()
177 | logger.info("EmsalApiClient: HTTP client session closed.")
```
--------------------------------------------------------------------------------
/saidsurucu-yargi-mcp-f5fa007/emsal_mcp_module/client.py:
--------------------------------------------------------------------------------
```python
1 | # emsal_mcp_module/client.py
2 |
3 | import httpx
4 | # from bs4 import BeautifulSoup # Uncomment if needed for advanced HTML pre-processing
5 | from typing import Dict, Any, List, Optional
6 | import logging
7 | import html
8 | import re
9 | import io
10 | from markitdown import MarkItDown
11 |
12 | from .models import (
13 | EmsalSearchRequest,
14 | EmsalDetailedSearchRequestData,
15 | EmsalApiResponse,
16 | EmsalDocumentMarkdown
17 | )
18 |
19 | logger = logging.getLogger(__name__)
20 | if not logger.hasHandlers():
21 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
22 |
23 | class EmsalApiClient:
24 | """API Client for Emsal (UYAP Precedent Decision) search system."""
25 | BASE_URL = "https://emsal.uyap.gov.tr"
26 | DETAILED_SEARCH_ENDPOINT = "/aramadetaylist"
27 | DOCUMENT_ENDPOINT = "/getDokuman"
28 |
29 | def __init__(self, request_timeout: float = 30.0):
30 | self.http_client = httpx.AsyncClient(
31 | base_url=self.BASE_URL,
32 | headers={
33 | "Content-Type": "application/json; charset=UTF-8",
34 | "Accept": "application/json, text/plain, */*",
35 | "X-Requested-With": "XMLHttpRequest",
36 | },
37 | timeout=request_timeout,
38 | verify=False # As per user's original FastAPI code
39 | )
40 |
41 | async def search_detailed_decisions(
42 | self,
43 | params: EmsalSearchRequest
44 | ) -> EmsalApiResponse:
45 | """Performs a detailed search for Emsal decisions."""
46 |
47 | data_for_api_payload = EmsalDetailedSearchRequestData(
48 | arananKelime=params.keyword or "",
49 | Bam_Hukuk_Mahkemeleri=params.selected_bam_civil_court, # Uses alias "Bam Hukuk Mahkemeleri"
50 | Hukuk_Mahkemeleri=params.selected_civil_court, # Uses alias "Hukuk Mahkemeleri"
51 | birimHukukMah="+".join(params.selected_regional_civil_chambers) if params.selected_regional_civil_chambers else "",
52 | esasYil=params.case_year_esas or "",
53 | esasIlkSiraNo=params.case_start_seq_esas or "",
54 | esasSonSiraNo=params.case_end_seq_esas or "",
55 | kararYil=params.decision_year_karar or "",
56 | kararIlkSiraNo=params.decision_start_seq_karar or "",
57 | kararSonSiraNo=params.decision_end_seq_karar or "",
58 | baslangicTarihi=params.start_date or "",
59 | bitisTarihi=params.end_date or "",
60 | siralama=params.sort_criteria,
61 | siralamaDirection=params.sort_direction,
62 | pageSize=params.page_size,
63 | pageNumber=params.page_number
64 | )
65 |
66 | # Create request dict and remove empty string fields to avoid API issues
67 | payload_dict = data_for_api_payload.model_dump(by_alias=True, exclude_none=True)
68 | # Remove empty string fields that might cause API issues
69 | cleaned_payload = {k: v for k, v in payload_dict.items() if v != ""}
70 | final_payload = {"data": cleaned_payload}
71 |
72 | logger.info(f"EmsalApiClient: Performing DETAILED search with payload: {final_payload}")
73 | return await self._execute_api_search(self.DETAILED_SEARCH_ENDPOINT, final_payload)
74 |
75 | async def _execute_api_search(self, endpoint: str, payload: Dict) -> EmsalApiResponse:
76 | """Helper method to execute search POST request and process response for Emsal."""
77 | try:
78 | response = await self.http_client.post(endpoint, json=payload)
79 | response.raise_for_status()
80 | response_json_data = response.json()
81 | logger.debug(f"EmsalApiClient: Raw API response from {endpoint}: {response_json_data}")
82 |
83 | api_response_parsed = EmsalApiResponse(**response_json_data)
84 |
85 | if api_response_parsed.data and api_response_parsed.data.data:
86 | for decision_item in api_response_parsed.data.data:
87 | if decision_item.id:
88 | decision_item.document_url = f"{self.BASE_URL}{self.DOCUMENT_ENDPOINT}?id={decision_item.id}"
89 |
90 | return api_response_parsed
91 | except httpx.RequestError as e:
92 | logger.error(f"EmsalApiClient: HTTP request error during Emsal search to {endpoint}: {e}")
93 | raise
94 | except Exception as e:
95 | logger.error(f"EmsalApiClient: Error processing or validating Emsal search response from {endpoint}: {e}")
96 | raise
97 |
98 | def _clean_html_and_convert_to_markdown_emsal(self, html_content_from_api_data_field: str) -> Optional[str]:
99 | """
100 | Cleans HTML (from Emsal API 'data' field containing HTML string)
101 | and converts it to Markdown using MarkItDown.
102 | This assumes Emsal /getDokuman response is JSON with HTML in "data" field,
103 | similar to Yargitay and the last Emsal /getDokuman example.
104 | """
105 | if not html_content_from_api_data_field:
106 | return None
107 |
108 | # Basic HTML unescaping and fixing common escaped characters
109 | # Based on user's original fix_html_content in app/routers/emsal.py
110 | content = html.unescape(html_content_from_api_data_field)
111 | content = content.replace('\\"', '"')
112 | content = content.replace('\\r\\n', '\n')
113 | content = content.replace('\\n', '\n')
114 | content = content.replace('\\t', '\t')
115 |
116 | # The HTML string from "data" field starts with "<html><head>..."
117 | html_input_for_markdown = content
118 |
119 | markdown_text = None
120 | try:
121 | # Convert HTML string to bytes and create BytesIO stream
122 | html_bytes = html_input_for_markdown.encode('utf-8')
123 | html_stream = io.BytesIO(html_bytes)
124 |
125 | # Pass BytesIO stream to MarkItDown to avoid temp file creation
126 | md_converter = MarkItDown()
127 | conversion_result = md_converter.convert(html_stream)
128 | markdown_text = conversion_result.text_content
129 | logger.info("EmsalApiClient: HTML to Markdown conversion successful.")
130 | except Exception as e:
131 | logger.error(f"EmsalApiClient: Error during MarkItDown HTML to Markdown conversion for Emsal: {e}")
132 |
133 | return markdown_text
134 |
135 | async def get_decision_document_as_markdown(self, id: str) -> EmsalDocumentMarkdown:
136 | """
137 | Retrieves a specific Emsal decision by ID and returns its content as Markdown.
138 | Assumes Emsal /getDokuman endpoint returns JSON with HTML content in the 'data' field.
139 | """
140 | document_api_url = f"{self.DOCUMENT_ENDPOINT}?id={id}"
141 | source_url = f"{self.BASE_URL}{document_api_url}"
142 | logger.info(f"EmsalApiClient: Fetching Emsal document for Markdown (ID: {id}) from {source_url}")
143 |
144 | try:
145 | response = await self.http_client.get(document_api_url)
146 | response.raise_for_status()
147 |
148 | # Emsal /getDokuman returns JSON with HTML in 'data' field (confirmed by user example)
149 | response_json = response.json()
150 | html_content_from_api = response_json.get("data")
151 |
152 | if not isinstance(html_content_from_api, str) or not html_content_from_api.strip():
153 | logger.warning(f"EmsalApiClient: Received empty or non-string HTML in 'data' field for Emsal ID {id}.")
154 | return EmsalDocumentMarkdown(id=id, markdown_content=None, source_url=source_url)
155 |
156 | markdown_content = self._clean_html_and_convert_to_markdown_emsal(html_content_from_api)
157 |
158 | return EmsalDocumentMarkdown(
159 | id=id,
160 | markdown_content=markdown_content,
161 | source_url=source_url
162 | )
163 | except httpx.RequestError as e:
164 | logger.error(f"EmsalApiClient: HTTP error fetching Emsal document (ID: {id}): {e}")
165 | raise
166 | except ValueError as e:
167 | logger.error(f"EmsalApiClient: ValueError processing Emsal document response (ID: {id}): {e}")
168 | raise
169 | except Exception as e:
170 | logger.error(f"EmsalApiClient: General error processing Emsal document (ID: {id}): {e}")
171 | raise
172 |
173 | async def close_client_session(self):
174 | """Closes the HTTPX client session."""
175 | if self.http_client and not self.http_client.is_closed:
176 | await self.http_client.aclose()
177 | logger.info("EmsalApiClient: HTTP client session closed.")
```
--------------------------------------------------------------------------------
/bedesten_mcp_module/client.py:
--------------------------------------------------------------------------------
```python
1 | # bedesten_mcp_module/client.py
2 |
3 | import httpx
4 | import base64
5 | from typing import Optional
6 | import logging
7 | from markitdown import MarkItDown
8 | import io
9 |
10 | from .models import (
11 | BedestenSearchRequest, BedestenSearchResponse,
12 | BedestenDocumentRequest, BedestenDocumentResponse,
13 | BedestenDocumentMarkdown, BedestenDocumentRequestData
14 | )
15 | from .enums import get_full_birim_adi
16 |
17 | logger = logging.getLogger(__name__)
18 |
19 | class BedestenApiClient:
20 | """
21 | API Client for Bedesten (bedesten.adalet.gov.tr) - Alternative legal decision search system.
22 | Currently used for Yargıtay decisions, but can be extended for other court types.
23 | """
24 | BASE_URL = "https://bedesten.adalet.gov.tr"
25 | SEARCH_ENDPOINT = "/emsal-karar/searchDocuments"
26 | DOCUMENT_ENDPOINT = "/emsal-karar/getDocumentContent"
27 |
28 | def __init__(self, request_timeout: float = 60.0):
29 | self.http_client = httpx.AsyncClient(
30 | base_url=self.BASE_URL,
31 | headers={
32 | "Accept": "*/*",
33 | "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7",
34 | "AdaletApplicationName": "UyapMevzuat",
35 | "Content-Type": "application/json; charset=utf-8",
36 | "Origin": "https://mevzuat.adalet.gov.tr",
37 | "Referer": "https://mevzuat.adalet.gov.tr/",
38 | "Sec-Fetch-Dest": "empty",
39 | "Sec-Fetch-Mode": "cors",
40 | "Sec-Fetch-Site": "same-site",
41 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"
42 | },
43 | timeout=request_timeout
44 | )
45 |
46 | async def search_documents(self, search_request: BedestenSearchRequest) -> BedestenSearchResponse:
47 | """
48 | Search for documents using Bedesten API.
49 | Currently supports: YARGITAYKARARI, DANISTAYKARARI, YERELHUKMAHKARARI, etc.
50 | """
51 | logger.info(f"BedestenApiClient: Searching documents with phrase: {search_request.data.phrase}")
52 |
53 | # Map abbreviated birimAdi to full Turkish name before sending to API
54 | original_birim_adi = search_request.data.birimAdi
55 | mapped_birim_adi = get_full_birim_adi(original_birim_adi)
56 | search_request.data.birimAdi = mapped_birim_adi
57 | if original_birim_adi != "ALL":
58 | logger.info(f"BedestenApiClient: Mapped birimAdi '{original_birim_adi}' to '{mapped_birim_adi}'")
59 |
60 | try:
61 | # Create request dict and remove birimAdi if empty
62 | request_dict = search_request.model_dump()
63 | if not request_dict["data"]["birimAdi"]: # Remove if empty string
64 | del request_dict["data"]["birimAdi"]
65 |
66 | response = await self.http_client.post(
67 | self.SEARCH_ENDPOINT,
68 | json=request_dict
69 | )
70 | response.raise_for_status()
71 | response_json = response.json()
72 |
73 | # Parse and return the response
74 | return BedestenSearchResponse(**response_json)
75 |
76 | except httpx.RequestError as e:
77 | logger.error(f"BedestenApiClient: HTTP request error during search: {e}")
78 | raise
79 | except Exception as e:
80 | logger.error(f"BedestenApiClient: Error processing search response: {e}")
81 | raise
82 |
83 | async def get_document_as_markdown(self, document_id: str) -> BedestenDocumentMarkdown:
84 | """
85 | Get document content and convert to markdown.
86 | Handles both HTML (text/html) and PDF (application/pdf) content types.
87 | """
88 | logger.info(f"BedestenApiClient: Fetching document for markdown conversion (ID: {document_id})")
89 |
90 | try:
91 | # Prepare request
92 | doc_request = BedestenDocumentRequest(
93 | data=BedestenDocumentRequestData(documentId=document_id)
94 | )
95 |
96 | # Get document
97 | response = await self.http_client.post(
98 | self.DOCUMENT_ENDPOINT,
99 | json=doc_request.model_dump()
100 | )
101 | response.raise_for_status()
102 | response_json = response.json()
103 | doc_response = BedestenDocumentResponse(**response_json)
104 |
105 | # Add null safety checks for document data
106 | if not hasattr(doc_response, 'data') or doc_response.data is None:
107 | raise ValueError("Document response does not contain data")
108 |
109 | if not hasattr(doc_response.data, 'content') or doc_response.data.content is None:
110 | raise ValueError("Document data does not contain content")
111 |
112 | if not hasattr(doc_response.data, 'mimeType') or doc_response.data.mimeType is None:
113 | raise ValueError("Document data does not contain mimeType")
114 |
115 | # Decode base64 content with error handling
116 | try:
117 | content_bytes = base64.b64decode(doc_response.data.content)
118 | except Exception as e:
119 | raise ValueError(f"Failed to decode base64 content: {str(e)}")
120 |
121 | mime_type = doc_response.data.mimeType
122 |
123 | logger.info(f"BedestenApiClient: Document mime type: {mime_type}")
124 |
125 | # Convert to markdown based on mime type
126 | if mime_type == "text/html":
127 | html_content = content_bytes.decode('utf-8')
128 | markdown_content = self._convert_html_to_markdown(html_content)
129 | elif mime_type == "application/pdf":
130 | markdown_content = self._convert_pdf_to_markdown(content_bytes)
131 | else:
132 | logger.warning(f"Unsupported mime type: {mime_type}")
133 | markdown_content = f"Unsupported content type: {mime_type}. Unable to convert to markdown."
134 |
135 | return BedestenDocumentMarkdown(
136 | documentId=document_id,
137 | markdown_content=markdown_content,
138 | source_url=f"{self.BASE_URL}/document/{document_id}",
139 | mime_type=mime_type
140 | )
141 |
142 | except httpx.RequestError as e:
143 | logger.error(f"BedestenApiClient: HTTP error fetching document {document_id}: {e}")
144 | raise
145 | except Exception as e:
146 | logger.error(f"BedestenApiClient: Error processing document {document_id}: {e}")
147 | raise
148 |
149 | def _convert_html_to_markdown(self, html_content: str) -> Optional[str]:
150 | """Convert HTML to Markdown using MarkItDown"""
151 | if not html_content:
152 | return None
153 |
154 | try:
155 | # Convert HTML string to bytes and create BytesIO stream
156 | html_bytes = html_content.encode('utf-8')
157 | html_stream = io.BytesIO(html_bytes)
158 |
159 | # Pass BytesIO stream to MarkItDown to avoid temp file creation
160 | md_converter = MarkItDown()
161 | result = md_converter.convert(html_stream)
162 | markdown_content = result.text_content
163 |
164 | logger.info("Successfully converted HTML to Markdown")
165 | return markdown_content
166 |
167 | except Exception as e:
168 | logger.error(f"Error converting HTML to Markdown: {e}")
169 | return f"Error converting HTML content: {str(e)}"
170 |
171 | def _convert_pdf_to_markdown(self, pdf_bytes: bytes) -> Optional[str]:
172 | """Convert PDF to Markdown using MarkItDown"""
173 | if not pdf_bytes:
174 | return None
175 |
176 | try:
177 | # Create BytesIO stream from PDF bytes
178 | pdf_stream = io.BytesIO(pdf_bytes)
179 |
180 | # Pass BytesIO stream to MarkItDown to avoid temp file creation
181 | md_converter = MarkItDown()
182 | result = md_converter.convert(pdf_stream)
183 | markdown_content = result.text_content
184 |
185 | logger.info("Successfully converted PDF to Markdown")
186 | return markdown_content
187 |
188 | except Exception as e:
189 | logger.error(f"Error converting PDF to Markdown: {e}")
190 | return f"Error converting PDF content: {str(e)}. The document may be corrupted or in an unsupported format."
191 |
192 | async def close_client_session(self):
193 | """Close HTTP client session"""
194 | await self.http_client.aclose()
195 | logger.info("BedestenApiClient: HTTP client session closed.")
```
--------------------------------------------------------------------------------
/saidsurucu-yargi-mcp-f5fa007/yargitay_mcp_module/client.py:
--------------------------------------------------------------------------------
```python
1 | # yargitay_mcp_module/client.py
2 |
3 | import httpx
4 | from bs4 import BeautifulSoup # Still needed for pre-processing HTML before markitdown
5 | from typing import Dict, Any, List, Optional
6 | import logging
7 | import html
8 | import re
9 | import io
10 | from markitdown import MarkItDown
11 |
12 | from .models import (
13 | YargitayDetailedSearchRequest,
14 | YargitayApiSearchResponse,
15 | YargitayApiDecisionEntry,
16 | YargitayDocumentMarkdown,
17 | CompactYargitaySearchResult
18 | )
19 |
20 | logger = logging.getLogger(__name__)
21 | # Basic logging configuration if no handlers are configured
22 | if not logger.hasHandlers():
23 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
24 |
25 | class YargitayOfficialApiClient:
26 | """
27 | API Client for Yargitay's official decision search system.
28 | Targets the detailed search endpoint (e.g., /aramadetaylist) based on user-provided payload.
29 | """
30 | BASE_URL = "https://karararama.yargitay.gov.tr"
31 | # The form action was "/detayliArama". This often maps to an API endpoint like "/aramadetaylist".
32 | # This should be confirmed with the actual API.
33 | DETAILED_SEARCH_ENDPOINT = "/aramadetaylist"
34 | DOCUMENT_ENDPOINT = "/getDokuman"
35 |
36 | def __init__(self, request_timeout: float = 60.0):
37 | self.http_client = httpx.AsyncClient(
38 | base_url=self.BASE_URL,
39 | headers={
40 | "Content-Type": "application/json; charset=UTF-8",
41 | "Accept": "application/json, text/plain, */*",
42 | "X-Requested-With": "XMLHttpRequest",
43 | "X-KL-KIS-Ajax-Request": "Ajax_Request", # Seen in a Yargitay client example
44 | "Referer": f"{self.BASE_URL}/" # Some APIs might check referer
45 | },
46 | timeout=request_timeout,
47 | verify=False # SSL verification disabled as per original user code - use with caution
48 | )
49 |
50 | async def search_detailed_decisions(
51 | self,
52 | search_params: YargitayDetailedSearchRequest
53 | ) -> YargitayApiSearchResponse:
54 | """
55 | Performs a detailed search for decisions in Yargitay
56 | using the structured search_params.
57 | """
58 | # Create the main payload structure with the 'data' key
59 | request_payload = {"data": search_params.model_dump(exclude_none=True, by_alias=True)}
60 |
61 | logger.info(f"YargitayOfficialApiClient: Performing detailed search with payload: {request_payload}")
62 |
63 | try:
64 | response = await self.http_client.post(self.DETAILED_SEARCH_ENDPOINT, json=request_payload)
65 | response.raise_for_status() # Raise an exception for HTTP 4xx or 5xx status codes
66 | response_json_data = response.json()
67 |
68 | logger.debug(f"YargitayOfficialApiClient: Raw API response: {response_json_data}")
69 |
70 | # Handle None or empty data response from API
71 | if response_json_data is None:
72 | logger.warning("YargitayOfficialApiClient: API returned None response")
73 | response_json_data = {"data": {"data": [], "recordsTotal": 0, "recordsFiltered": 0}}
74 | elif not isinstance(response_json_data, dict):
75 | logger.warning(f"YargitayOfficialApiClient: API returned unexpected response type: {type(response_json_data)}")
76 | response_json_data = {"data": {"data": [], "recordsTotal": 0, "recordsFiltered": 0}}
77 | elif response_json_data.get("data") is None:
78 | logger.warning("YargitayOfficialApiClient: API response data field is None")
79 | response_json_data["data"] = {"data": [], "recordsTotal": 0, "recordsFiltered": 0}
80 |
81 | # Validate and parse the response using Pydantic models
82 | api_response = YargitayApiSearchResponse(**response_json_data)
83 |
84 | # Populate the document_url for each decision entry
85 | if api_response.data and api_response.data.data:
86 | for decision_item in api_response.data.data:
87 | decision_item.document_url = f"{self.BASE_URL}{self.DOCUMENT_ENDPOINT}?id={decision_item.id}"
88 |
89 | return api_response
90 |
91 | except httpx.RequestError as e:
92 | logger.error(f"YargitayOfficialApiClient: HTTP request error during detailed search: {e}")
93 | raise # Re-raise to be handled by the calling MCP tool
94 | except Exception as e: # Catches Pydantic ValidationErrors as well
95 | logger.error(f"YargitayOfficialApiClient: Error processing or validating detailed search response: {e}")
96 | raise
97 |
98 | def _convert_html_to_markdown(self, html_from_api_data_field: str) -> Optional[str]:
99 | """
100 | Takes raw HTML string (from Yargitay API 'data' field for a document),
101 | pre-processes it, and converts it to Markdown using MarkItDown.
102 | Returns only the Markdown string or None if conversion fails.
103 | """
104 | if not html_from_api_data_field:
105 | return None
106 |
107 | # Pre-process HTML: unescape entities and fix common escaped sequences
108 | # Based on user's original fix_html_content
109 | processed_html = html.unescape(html_from_api_data_field)
110 | processed_html = processed_html.replace('\\"', '"')
111 | processed_html = processed_html.replace('\\r\\n', '\n')
112 | processed_html = processed_html.replace('\\n', '\n')
113 | processed_html = processed_html.replace('\\t', '\t')
114 |
115 | # MarkItDown often works best with a full HTML document structure.
116 | # The Yargitay /getDokuman response already provides a full <html>...</html> string.
117 | # If it were just a fragment, we might wrap it like:
118 | # html_to_convert = f"<html><head><meta charset=\"UTF-8\"></head><body>{processed_html}</body></html>"
119 | # But since it's already a full HTML string in "data":
120 | html_to_convert = processed_html
121 |
122 | markdown_output = None
123 | try:
124 | # Convert HTML string to bytes and create BytesIO stream
125 | html_bytes = html_to_convert.encode('utf-8')
126 | html_stream = io.BytesIO(html_bytes)
127 |
128 | # Pass BytesIO stream to MarkItDown to avoid temp file creation
129 | md_converter = MarkItDown()
130 | conversion_result = md_converter.convert(html_stream)
131 | markdown_output = conversion_result.text_content
132 |
133 | logger.info("Successfully converted HTML to Markdown.")
134 |
135 | except Exception as e:
136 | logger.error(f"Error during MarkItDown HTML to Markdown conversion: {e}")
137 |
138 | return markdown_output
139 |
140 | async def get_decision_document_as_markdown(self, id: str) -> YargitayDocumentMarkdown:
141 | """
142 | Retrieves a specific Yargitay decision by its ID and returns its content
143 | as Markdown.
144 | Based on user-provided /getDokuman response structure.
145 | """
146 | document_api_url = f"{self.DOCUMENT_ENDPOINT}?id={id}"
147 | source_url = f"{self.BASE_URL}{document_api_url}" # The original URL of the document
148 | logger.info(f"YargitayOfficialApiClient: Fetching document for Markdown conversion (ID: {id})")
149 |
150 | try:
151 | response = await self.http_client.get(document_api_url)
152 | response.raise_for_status()
153 |
154 | # Expecting JSON response with HTML content in the 'data' field
155 | response_json = response.json()
156 | html_content_from_api = response_json.get("data")
157 |
158 | if not isinstance(html_content_from_api, str):
159 | logger.error(f"YargitayOfficialApiClient: 'data' field in API response is not a string or not found (ID: {id}).")
160 | raise ValueError("Expected HTML content not found in API response's 'data' field.")
161 |
162 | markdown_content = self._convert_html_to_markdown(html_content_from_api)
163 |
164 | return YargitayDocumentMarkdown(
165 | id=id,
166 | markdown_content=markdown_content,
167 | source_url=source_url
168 | )
169 | except httpx.RequestError as e:
170 | logger.error(f"YargitayOfficialApiClient: HTTP error fetching document for Markdown (ID: {id}): {e}")
171 | raise
172 | except ValueError as e: # For JSON parsing errors or missing 'data' field
173 | logger.error(f"YargitayOfficialApiClient: Error processing document response for Markdown (ID: {id}): {e}")
174 | raise
175 | except Exception as e: # For other unexpected errors
176 | logger.error(f"YargitayOfficialApiClient: General error fetching/processing document for Markdown (ID: {id}): {e}")
177 | raise
178 |
179 | async def close_client_session(self):
180 | """Closes the HTTPX client session."""
181 | await self.http_client.aclose()
182 | logger.info("YargitayOfficialApiClient: HTTP client session closed.")
```
--------------------------------------------------------------------------------
/yargitay_mcp_module/client.py:
--------------------------------------------------------------------------------
```python
1 | # yargitay_mcp_module/client.py
2 |
3 | import httpx
4 | from bs4 import BeautifulSoup # Still needed for pre-processing HTML before markitdown
5 | from typing import Dict, Any, List, Optional
6 | import logging
7 | import html
8 | import re
9 | import io
10 | from markitdown import MarkItDown
11 |
12 | from .models import (
13 | YargitayDetailedSearchRequest,
14 | YargitayApiSearchResponse,
15 | YargitayApiDecisionEntry,
16 | YargitayDocumentMarkdown,
17 | CompactYargitaySearchResult
18 | )
19 |
20 | logger = logging.getLogger(__name__)
21 | # Basic logging configuration if no handlers are configured
22 | if not logger.hasHandlers():
23 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
24 |
25 | class YargitayOfficialApiClient:
26 | """
27 | API Client for Yargitay's official decision search system.
28 | Targets the detailed search endpoint (e.g., /aramadetaylist) based on user-provided payload.
29 | """
30 | BASE_URL = "https://karararama.yargitay.gov.tr"
31 | # The form action was "/detayliArama". This often maps to an API endpoint like "/aramadetaylist".
32 | # This should be confirmed with the actual API.
33 | DETAILED_SEARCH_ENDPOINT = "/aramadetaylist"
34 | DOCUMENT_ENDPOINT = "/getDokuman"
35 |
36 | def __init__(self, request_timeout: float = 60.0):
37 | self.http_client = httpx.AsyncClient(
38 | base_url=self.BASE_URL,
39 | headers={
40 | "Content-Type": "application/json; charset=UTF-8",
41 | "Accept": "application/json, text/plain, */*",
42 | "X-Requested-With": "XMLHttpRequest",
43 | "X-KL-KIS-Ajax-Request": "Ajax_Request", # Seen in a Yargitay client example
44 | "Referer": f"{self.BASE_URL}/" # Some APIs might check referer
45 | },
46 | timeout=request_timeout,
47 | verify=False # SSL verification disabled as per original user code - use with caution
48 | )
49 |
50 | async def search_detailed_decisions(
51 | self,
52 | search_params: YargitayDetailedSearchRequest
53 | ) -> YargitayApiSearchResponse:
54 | """
55 | Performs a detailed search for decisions in Yargitay
56 | using the structured search_params.
57 | """
58 | # Create the main payload structure with the 'data' key
59 | request_payload = {"data": search_params.model_dump(exclude_none=True, by_alias=True)}
60 |
61 | logger.info(f"YargitayOfficialApiClient: Performing detailed search with payload: {request_payload}")
62 |
63 | try:
64 | response = await self.http_client.post(self.DETAILED_SEARCH_ENDPOINT, json=request_payload)
65 | response.raise_for_status() # Raise an exception for HTTP 4xx or 5xx status codes
66 | response_json_data = response.json()
67 |
68 | logger.debug(f"YargitayOfficialApiClient: Raw API response: {response_json_data}")
69 |
70 | # Handle None or empty data response from API
71 | if response_json_data is None:
72 | logger.warning("YargitayOfficialApiClient: API returned None response")
73 | response_json_data = {"data": {"data": [], "recordsTotal": 0, "recordsFiltered": 0}}
74 | elif not isinstance(response_json_data, dict):
75 | logger.warning(f"YargitayOfficialApiClient: API returned unexpected response type: {type(response_json_data)}")
76 | response_json_data = {"data": {"data": [], "recordsTotal": 0, "recordsFiltered": 0}}
77 | elif response_json_data.get("data") is None:
78 | logger.warning("YargitayOfficialApiClient: API response data field is None")
79 | response_json_data["data"] = {"data": [], "recordsTotal": 0, "recordsFiltered": 0}
80 |
81 | # Validate and parse the response using Pydantic models
82 | api_response = YargitayApiSearchResponse(**response_json_data)
83 |
84 | # Populate the document_url for each decision entry
85 | if api_response.data and api_response.data.data:
86 | for decision_item in api_response.data.data:
87 | decision_item.document_url = f"{self.BASE_URL}{self.DOCUMENT_ENDPOINT}?id={decision_item.id}"
88 |
89 | return api_response
90 |
91 | except httpx.RequestError as e:
92 | logger.error(f"YargitayOfficialApiClient: HTTP request error during detailed search: {e}")
93 | raise # Re-raise to be handled by the calling MCP tool
94 | except Exception as e: # Catches Pydantic ValidationErrors as well
95 | logger.error(f"YargitayOfficialApiClient: Error processing or validating detailed search response: {e}")
96 | raise
97 |
98 | def _convert_html_to_markdown(self, html_from_api_data_field: str) -> Optional[str]:
99 | """
100 | Takes raw HTML string (from Yargitay API 'data' field for a document),
101 | pre-processes it, and converts it to Markdown using MarkItDown.
102 | Returns only the Markdown string or None if conversion fails.
103 | """
104 | if not html_from_api_data_field:
105 | return None
106 |
107 | # Pre-process HTML: unescape entities and fix common escaped sequences
108 | # Based on user's original fix_html_content
109 | processed_html = html.unescape(html_from_api_data_field)
110 | processed_html = processed_html.replace('\\"', '"')
111 | processed_html = processed_html.replace('\\r\\n', '\n')
112 | processed_html = processed_html.replace('\\n', '\n')
113 | processed_html = processed_html.replace('\\t', '\t')
114 |
115 | # MarkItDown often works best with a full HTML document structure.
116 | # The Yargitay /getDokuman response already provides a full <html>...</html> string.
117 | # If it were just a fragment, we might wrap it like:
118 | # html_to_convert = f"<html><head><meta charset=\"UTF-8\"></head><body>{processed_html}</body></html>"
119 | # But since it's already a full HTML string in "data":
120 | html_to_convert = processed_html
121 |
122 | markdown_output = None
123 | try:
124 | # Convert HTML string to bytes and create BytesIO stream
125 | html_bytes = html_to_convert.encode('utf-8')
126 | html_stream = io.BytesIO(html_bytes)
127 |
128 | # Pass BytesIO stream to MarkItDown to avoid temp file creation
129 | md_converter = MarkItDown()
130 | conversion_result = md_converter.convert(html_stream)
131 | markdown_output = conversion_result.text_content
132 |
133 | logger.info("Successfully converted HTML to Markdown.")
134 |
135 | except Exception as e:
136 | logger.error(f"Error during MarkItDown HTML to Markdown conversion: {e}")
137 |
138 | return markdown_output
139 |
140 | async def get_decision_document_as_markdown(self, id: str) -> YargitayDocumentMarkdown:
141 | """
142 | Retrieves a specific Yargitay decision by its ID and returns its content
143 | as Markdown.
144 | Based on user-provided /getDokuman response structure.
145 | """
146 | document_api_url = f"{self.DOCUMENT_ENDPOINT}?id={id}"
147 | source_url = f"{self.BASE_URL}{document_api_url}" # The original URL of the document
148 | logger.info(f"YargitayOfficialApiClient: Fetching document for Markdown conversion (ID: {id})")
149 |
150 | try:
151 | response = await self.http_client.get(document_api_url)
152 | response.raise_for_status()
153 |
154 | # Expecting JSON response with HTML content in the 'data' field
155 | response_json = response.json()
156 | html_content_from_api = response_json.get("data")
157 |
158 | if not isinstance(html_content_from_api, str):
159 | logger.error(f"YargitayOfficialApiClient: 'data' field in API response is not a string or not found (ID: {id}).")
160 | raise ValueError("Expected HTML content not found in API response's 'data' field.")
161 |
162 | markdown_content = self._convert_html_to_markdown(html_content_from_api)
163 |
164 | return YargitayDocumentMarkdown(
165 | id=id,
166 | markdown_content=markdown_content,
167 | source_url=source_url
168 | )
169 | except httpx.RequestError as e:
170 | logger.error(f"YargitayOfficialApiClient: HTTP error fetching document for Markdown (ID: {id}): {e}")
171 | raise
172 | except ValueError as e: # For JSON parsing errors or missing 'data' field
173 | logger.error(f"YargitayOfficialApiClient: Error processing document response for Markdown (ID: {id}): {e}")
174 | raise
175 | except Exception as e: # For other unexpected errors
176 | logger.error(f"YargitayOfficialApiClient: General error fetching/processing document for Markdown (ID: {id}): {e}")
177 | raise
178 |
179 | async def close_client_session(self):
180 | """Closes the HTTPX client session."""
181 | await self.http_client.aclose()
182 | logger.info("YargitayOfficialApiClient: HTTP client session closed.")
```
--------------------------------------------------------------------------------
/danistay_mcp_module/client.py:
--------------------------------------------------------------------------------
```python
1 | # danistay_mcp_module/client.py
2 |
3 | import httpx
4 | from bs4 import BeautifulSoup
5 | from typing import Dict, Any, List, Optional
6 | import logging
7 | import html
8 | import re
9 | import io
10 | from markitdown import MarkItDown
11 |
12 | from .models import (
13 | DanistayKeywordSearchRequest,
14 | DanistayDetailedSearchRequest,
15 | DanistayApiResponse,
16 | DanistayDocumentMarkdown,
17 | DanistayKeywordSearchRequestData,
18 | DanistayDetailedSearchRequestData
19 | )
20 |
21 | logger = logging.getLogger(__name__)
22 | if not logger.hasHandlers():
23 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
24 |
25 | class DanistayApiClient:
26 | BASE_URL = "https://karararama.danistay.gov.tr"
27 | KEYWORD_SEARCH_ENDPOINT = "/aramalist"
28 | DETAILED_SEARCH_ENDPOINT = "/aramadetaylist"
29 | DOCUMENT_ENDPOINT = "/getDokuman"
30 |
31 | def __init__(self, request_timeout: float = 30.0):
32 | self.http_client = httpx.AsyncClient(
33 | base_url=self.BASE_URL,
34 | headers={
35 | "Content-Type": "application/json; charset=UTF-8", # Arama endpoint'leri için
36 | "Accept": "application/json, text/plain, */*", # Arama endpoint'leri için
37 | "X-Requested-With": "XMLHttpRequest",
38 | },
39 | timeout=request_timeout,
40 | verify=False
41 | )
42 |
43 | def _prepare_keywords_for_api(self, keywords: List[str]) -> List[str]:
44 | return ['"' + k.strip('"') + '"' for k in keywords if k and k.strip()]
45 |
46 | async def search_keyword_decisions(
47 | self,
48 | params: DanistayKeywordSearchRequest
49 | ) -> DanistayApiResponse:
50 | data_for_payload = DanistayKeywordSearchRequestData(
51 | andKelimeler=self._prepare_keywords_for_api(params.andKelimeler),
52 | orKelimeler=self._prepare_keywords_for_api(params.orKelimeler),
53 | notAndKelimeler=self._prepare_keywords_for_api(params.notAndKelimeler),
54 | notOrKelimeler=self._prepare_keywords_for_api(params.notOrKelimeler),
55 | pageSize=params.pageSize,
56 | pageNumber=params.pageNumber
57 | )
58 | final_payload = {"data": data_for_payload.model_dump(exclude_none=True)}
59 | logger.info(f"DanistayApiClient: Performing KEYWORD search via {self.KEYWORD_SEARCH_ENDPOINT} with payload: {final_payload}")
60 | return await self._execute_api_search(self.KEYWORD_SEARCH_ENDPOINT, final_payload)
61 |
62 | async def search_detailed_decisions(
63 | self,
64 | params: DanistayDetailedSearchRequest
65 | ) -> DanistayApiResponse:
66 | data_for_payload = DanistayDetailedSearchRequestData(
67 | daire=params.daire or "",
68 | esasYil=params.esasYil or "",
69 | esasIlkSiraNo=params.esasIlkSiraNo or "",
70 | esasSonSiraNo=params.esasSonSiraNo or "",
71 | kararYil=params.kararYil or "",
72 | kararIlkSiraNo=params.kararIlkSiraNo or "",
73 | kararSonSiraNo=params.kararSonSiraNo or "",
74 | baslangicTarihi=params.baslangicTarihi or "",
75 | bitisTarihi=params.bitisTarihi or "",
76 | mevzuatNumarasi=params.mevzuatNumarasi or "",
77 | mevzuatAdi=params.mevzuatAdi or "",
78 | madde=params.madde or "",
79 | siralama="1",
80 | siralamaDirection="desc",
81 | pageSize=params.pageSize,
82 | pageNumber=params.pageNumber
83 | )
84 | # Create request dict and remove empty string fields to avoid API issues
85 | payload_dict = data_for_payload.model_dump(exclude_defaults=False, exclude_none=False)
86 | # Remove empty string fields that might cause API issues
87 | cleaned_payload = {k: v for k, v in payload_dict.items() if v != ""}
88 | final_payload = {"data": cleaned_payload}
89 | logger.info(f"DanistayApiClient: Performing DETAILED search via {self.DETAILED_SEARCH_ENDPOINT} with payload: {final_payload}")
90 | return await self._execute_api_search(self.DETAILED_SEARCH_ENDPOINT, final_payload)
91 |
92 | async def _execute_api_search(self, endpoint: str, payload: Dict) -> DanistayApiResponse:
93 | try:
94 | response = await self.http_client.post(endpoint, json=payload)
95 | response.raise_for_status()
96 | response_json_data = response.json()
97 | logger.debug(f"DanistayApiClient: Raw API response from {endpoint}: {response_json_data}")
98 | api_response_parsed = DanistayApiResponse(**response_json_data)
99 | if api_response_parsed.data and api_response_parsed.data.data:
100 | for decision_item in api_response_parsed.data.data:
101 | if decision_item.id:
102 | decision_item.document_url = f"{self.BASE_URL}{self.DOCUMENT_ENDPOINT}?id={decision_item.id}"
103 | return api_response_parsed
104 | except httpx.RequestError as e:
105 | logger.error(f"DanistayApiClient: HTTP request error during search to {endpoint}: {e}")
106 | raise
107 | except Exception as e:
108 | logger.error(f"DanistayApiClient: Error processing or validating search response from {endpoint}: {e}")
109 | raise
110 |
111 | def _convert_html_to_markdown_danistay(self, direct_html_content: str) -> Optional[str]:
112 | """
113 | Converts direct HTML content (assumed from Danıştay /getDokuman) to Markdown.
114 | """
115 | if not direct_html_content:
116 | return None
117 |
118 | # Basic HTML unescaping and fixing common escaped characters
119 | # This step might be less critical if MarkItDown handles them, but good for pre-cleaning.
120 | processed_html = html.unescape(direct_html_content)
121 | processed_html = processed_html.replace('\\"', '"') # If any such JS-escaped strings exist
122 | # Danistay HTML doesn't seem to have \\r\\n etc. from the example, but keeping for robustness
123 | processed_html = processed_html.replace('\\r\\n', '\n').replace('\\n', '\n').replace('\\t', '\t')
124 |
125 | # For simplicity and to leverage MarkItDown's capability to handle full docs,
126 | # we pass the pre-processed full HTML.
127 | html_input_for_markdown = processed_html
128 |
129 | markdown_text = None
130 | try:
131 | # Convert HTML string to bytes and create BytesIO stream
132 | html_bytes = html_input_for_markdown.encode('utf-8')
133 | html_stream = io.BytesIO(html_bytes)
134 |
135 | # Pass BytesIO stream to MarkItDown to avoid temp file creation
136 | md_converter = MarkItDown()
137 | conversion_result = md_converter.convert(html_stream)
138 | markdown_text = conversion_result.text_content
139 | logger.info("DanistayApiClient: HTML to Markdown conversion successful.")
140 | except Exception as e:
141 | logger.error(f"DanistayApiClient: Error during MarkItDown HTML to Markdown conversion: {e}")
142 |
143 | return markdown_text
144 |
145 | async def get_decision_document_as_markdown(self, id: str) -> DanistayDocumentMarkdown:
146 | """
147 | Retrieves a specific Danıştay decision by ID and returns its content as Markdown.
148 | The /getDokuman endpoint for Danıştay requires arananKelime parameter.
149 | """
150 | # Add required arananKelime parameter - using empty string as minimum requirement
151 | document_api_url = f"{self.DOCUMENT_ENDPOINT}?id={id}&arananKelime="
152 | source_url = f"{self.BASE_URL}{document_api_url}"
153 | logger.info(f"DanistayApiClient: Fetching Danistay document for Markdown (ID: {id}) from {source_url}")
154 |
155 | try:
156 | # For direct HTML response, we might want different headers if the API is sensitive,
157 | # but httpx usually handles basic GET requests well.
158 | response = await self.http_client.get(document_api_url)
159 | response.raise_for_status()
160 |
161 | # Danıştay /getDokuman directly returns HTML text
162 | html_content_from_api = response.text
163 |
164 | if not isinstance(html_content_from_api, str) or not html_content_from_api.strip():
165 | logger.warning(f"DanistayApiClient: Received empty or non-string HTML content for ID {id}.")
166 | # Return with None markdown_content if HTML is effectively empty
167 | return DanistayDocumentMarkdown(
168 | id=id,
169 | markdown_content=None,
170 | source_url=source_url
171 | )
172 |
173 | markdown_content = self._convert_html_to_markdown_danistay(html_content_from_api)
174 |
175 | return DanistayDocumentMarkdown(
176 | id=id,
177 | markdown_content=markdown_content,
178 | source_url=source_url
179 | )
180 | except httpx.RequestError as e:
181 | logger.error(f"DanistayApiClient: HTTP error fetching Danistay document (ID: {id}): {e}")
182 | raise
183 | # Removed ValueError for JSON as Danistay /getDokuman returns direct HTML
184 | except Exception as e: # Catches other errors like MarkItDown issues if they propagate
185 | logger.error(f"DanistayApiClient: General error processing Danistay document (ID: {id}): {e}")
186 | raise
187 |
188 | async def close_client_session(self):
189 | """Closes the HTTPX client session."""
190 | if self.http_client and not self.http_client.is_closed:
191 | await self.http_client.aclose()
192 | logger.info("DanistayApiClient: HTTP client session closed.")
```
--------------------------------------------------------------------------------
/saidsurucu-yargi-mcp-f5fa007/danistay_mcp_module/client.py:
--------------------------------------------------------------------------------
```python
1 | # danistay_mcp_module/client.py
2 |
3 | import httpx
4 | from bs4 import BeautifulSoup
5 | from typing import Dict, Any, List, Optional
6 | import logging
7 | import html
8 | import re
9 | import io
10 | from markitdown import MarkItDown
11 |
12 | from .models import (
13 | DanistayKeywordSearchRequest,
14 | DanistayDetailedSearchRequest,
15 | DanistayApiResponse,
16 | DanistayDocumentMarkdown,
17 | DanistayKeywordSearchRequestData,
18 | DanistayDetailedSearchRequestData
19 | )
20 |
21 | logger = logging.getLogger(__name__)
22 | if not logger.hasHandlers():
23 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
24 |
25 | class DanistayApiClient:
26 | BASE_URL = "https://karararama.danistay.gov.tr"
27 | KEYWORD_SEARCH_ENDPOINT = "/aramalist"
28 | DETAILED_SEARCH_ENDPOINT = "/aramadetaylist"
29 | DOCUMENT_ENDPOINT = "/getDokuman"
30 |
31 | def __init__(self, request_timeout: float = 30.0):
32 | self.http_client = httpx.AsyncClient(
33 | base_url=self.BASE_URL,
34 | headers={
35 | "Content-Type": "application/json; charset=UTF-8", # Arama endpoint'leri için
36 | "Accept": "application/json, text/plain, */*", # Arama endpoint'leri için
37 | "X-Requested-With": "XMLHttpRequest",
38 | },
39 | timeout=request_timeout,
40 | verify=False
41 | )
42 |
43 | def _prepare_keywords_for_api(self, keywords: List[str]) -> List[str]:
44 | return ['"' + k.strip('"') + '"' for k in keywords if k and k.strip()]
45 |
46 | async def search_keyword_decisions(
47 | self,
48 | params: DanistayKeywordSearchRequest
49 | ) -> DanistayApiResponse:
50 | data_for_payload = DanistayKeywordSearchRequestData(
51 | andKelimeler=self._prepare_keywords_for_api(params.andKelimeler),
52 | orKelimeler=self._prepare_keywords_for_api(params.orKelimeler),
53 | notAndKelimeler=self._prepare_keywords_for_api(params.notAndKelimeler),
54 | notOrKelimeler=self._prepare_keywords_for_api(params.notOrKelimeler),
55 | pageSize=params.pageSize,
56 | pageNumber=params.pageNumber
57 | )
58 | final_payload = {"data": data_for_payload.model_dump(exclude_none=True)}
59 | logger.info(f"DanistayApiClient: Performing KEYWORD search via {self.KEYWORD_SEARCH_ENDPOINT} with payload: {final_payload}")
60 | return await self._execute_api_search(self.KEYWORD_SEARCH_ENDPOINT, final_payload)
61 |
62 | async def search_detailed_decisions(
63 | self,
64 | params: DanistayDetailedSearchRequest
65 | ) -> DanistayApiResponse:
66 | data_for_payload = DanistayDetailedSearchRequestData(
67 | daire=params.daire or "",
68 | esasYil=params.esasYil or "",
69 | esasIlkSiraNo=params.esasIlkSiraNo or "",
70 | esasSonSiraNo=params.esasSonSiraNo or "",
71 | kararYil=params.kararYil or "",
72 | kararIlkSiraNo=params.kararIlkSiraNo or "",
73 | kararSonSiraNo=params.kararSonSiraNo or "",
74 | baslangicTarihi=params.baslangicTarihi or "",
75 | bitisTarihi=params.bitisTarihi or "",
76 | mevzuatNumarasi=params.mevzuatNumarasi or "",
77 | mevzuatAdi=params.mevzuatAdi or "",
78 | madde=params.madde or "",
79 | siralama="1",
80 | siralamaDirection="desc",
81 | pageSize=params.pageSize,
82 | pageNumber=params.pageNumber
83 | )
84 | # Create request dict and remove empty string fields to avoid API issues
85 | payload_dict = data_for_payload.model_dump(exclude_defaults=False, exclude_none=False)
86 | # Remove empty string fields that might cause API issues
87 | cleaned_payload = {k: v for k, v in payload_dict.items() if v != ""}
88 | final_payload = {"data": cleaned_payload}
89 | logger.info(f"DanistayApiClient: Performing DETAILED search via {self.DETAILED_SEARCH_ENDPOINT} with payload: {final_payload}")
90 | return await self._execute_api_search(self.DETAILED_SEARCH_ENDPOINT, final_payload)
91 |
92 | async def _execute_api_search(self, endpoint: str, payload: Dict) -> DanistayApiResponse:
93 | try:
94 | response = await self.http_client.post(endpoint, json=payload)
95 | response.raise_for_status()
96 | response_json_data = response.json()
97 | logger.debug(f"DanistayApiClient: Raw API response from {endpoint}: {response_json_data}")
98 | api_response_parsed = DanistayApiResponse(**response_json_data)
99 | if api_response_parsed.data and api_response_parsed.data.data:
100 | for decision_item in api_response_parsed.data.data:
101 | if decision_item.id:
102 | decision_item.document_url = f"{self.BASE_URL}{self.DOCUMENT_ENDPOINT}?id={decision_item.id}"
103 | return api_response_parsed
104 | except httpx.RequestError as e:
105 | logger.error(f"DanistayApiClient: HTTP request error during search to {endpoint}: {e}")
106 | raise
107 | except Exception as e:
108 | logger.error(f"DanistayApiClient: Error processing or validating search response from {endpoint}: {e}")
109 | raise
110 |
111 | def _convert_html_to_markdown_danistay(self, direct_html_content: str) -> Optional[str]:
112 | """
113 | Converts direct HTML content (assumed from Danıştay /getDokuman) to Markdown.
114 | """
115 | if not direct_html_content:
116 | return None
117 |
118 | # Basic HTML unescaping and fixing common escaped characters
119 | # This step might be less critical if MarkItDown handles them, but good for pre-cleaning.
120 | processed_html = html.unescape(direct_html_content)
121 | processed_html = processed_html.replace('\\"', '"') # If any such JS-escaped strings exist
122 | # Danistay HTML doesn't seem to have \\r\\n etc. from the example, but keeping for robustness
123 | processed_html = processed_html.replace('\\r\\n', '\n').replace('\\n', '\n').replace('\\t', '\t')
124 |
125 | # For simplicity and to leverage MarkItDown's capability to handle full docs,
126 | # we pass the pre-processed full HTML.
127 | html_input_for_markdown = processed_html
128 |
129 | markdown_text = None
130 | try:
131 | # Convert HTML string to bytes and create BytesIO stream
132 | html_bytes = html_input_for_markdown.encode('utf-8')
133 | html_stream = io.BytesIO(html_bytes)
134 |
135 | # Pass BytesIO stream to MarkItDown to avoid temp file creation
136 | md_converter = MarkItDown()
137 | conversion_result = md_converter.convert(html_stream)
138 | markdown_text = conversion_result.text_content
139 | logger.info("DanistayApiClient: HTML to Markdown conversion successful.")
140 | except Exception as e:
141 | logger.error(f"DanistayApiClient: Error during MarkItDown HTML to Markdown conversion: {e}")
142 |
143 | return markdown_text
144 |
145 | async def get_decision_document_as_markdown(self, id: str) -> DanistayDocumentMarkdown:
146 | """
147 | Retrieves a specific Danıştay decision by ID and returns its content as Markdown.
148 | The /getDokuman endpoint for Danıştay requires arananKelime parameter.
149 | """
150 | # Add required arananKelime parameter - using empty string as minimum requirement
151 | document_api_url = f"{self.DOCUMENT_ENDPOINT}?id={id}&arananKelime="
152 | source_url = f"{self.BASE_URL}{document_api_url}"
153 | logger.info(f"DanistayApiClient: Fetching Danistay document for Markdown (ID: {id}) from {source_url}")
154 |
155 | try:
156 | # For direct HTML response, we might want different headers if the API is sensitive,
157 | # but httpx usually handles basic GET requests well.
158 | response = await self.http_client.get(document_api_url)
159 | response.raise_for_status()
160 |
161 | # Danıştay /getDokuman directly returns HTML text
162 | html_content_from_api = response.text
163 |
164 | if not isinstance(html_content_from_api, str) or not html_content_from_api.strip():
165 | logger.warning(f"DanistayApiClient: Received empty or non-string HTML content for ID {id}.")
166 | # Return with None markdown_content if HTML is effectively empty
167 | return DanistayDocumentMarkdown(
168 | id=id,
169 | markdown_content=None,
170 | source_url=source_url
171 | )
172 |
173 | markdown_content = self._convert_html_to_markdown_danistay(html_content_from_api)
174 |
175 | return DanistayDocumentMarkdown(
176 | id=id,
177 | markdown_content=markdown_content,
178 | source_url=source_url
179 | )
180 | except httpx.RequestError as e:
181 | logger.error(f"DanistayApiClient: HTTP error fetching Danistay document (ID: {id}): {e}")
182 | raise
183 | # Removed ValueError for JSON as Danistay /getDokuman returns direct HTML
184 | except Exception as e: # Catches other errors like MarkItDown issues if they propagate
185 | logger.error(f"DanistayApiClient: General error processing Danistay document (ID: {id}): {e}")
186 | raise
187 |
188 | async def close_client_session(self):
189 | """Closes the HTTPX client session."""
190 | if self.http_client and not self.http_client.is_closed:
191 | await self.http_client.aclose()
192 | logger.info("DanistayApiClient: HTTP client session closed.")
```
--------------------------------------------------------------------------------
/bddk_mcp_module/client.py:
--------------------------------------------------------------------------------
```python
1 | # bddk_mcp_module/client.py
2 |
3 | import httpx
4 | from typing import List, Optional, Dict, Any
5 | import logging
6 | import os
7 | import re
8 | import io
9 | import math
10 | from urllib.parse import urlparse
11 | from markitdown import MarkItDown
12 |
13 | from .models import (
14 | BddkSearchRequest,
15 | BddkDecisionSummary,
16 | BddkSearchResult,
17 | BddkDocumentMarkdown
18 | )
19 |
20 | logger = logging.getLogger(__name__)
21 | if not logger.hasHandlers():
22 | logging.basicConfig(
23 | level=logging.INFO,
24 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
25 | )
26 |
27 | class BddkApiClient:
28 | """
29 | API client for searching and retrieving BDDK (Banking Regulation Authority) decisions
30 | using Tavily Search API for discovery and direct HTTP requests for content retrieval.
31 | """
32 |
33 | TAVILY_API_URL = "https://api.tavily.com/search"
34 | BDDK_BASE_URL = "https://www.bddk.org.tr"
35 | DOCUMENT_URL_TEMPLATE = "https://www.bddk.org.tr/Mevzuat/DokumanGetir/{document_id}"
36 | DOCUMENT_MARKDOWN_CHUNK_SIZE = 5000 # Character limit per page
37 |
38 | def __init__(self, request_timeout: float = 60.0):
39 | """Initialize the BDDK API client."""
40 | self.tavily_api_key = os.getenv("TAVILY_API_KEY")
41 | if not self.tavily_api_key:
42 | # Fallback to development token
43 | self.tavily_api_key = "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"
44 | logger.info("Using fallback Tavily API token (development token)")
45 | else:
46 | logger.info("Using Tavily API key from environment variable")
47 |
48 | self.http_client = httpx.AsyncClient(
49 | headers={
50 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
51 | },
52 | timeout=httpx.Timeout(request_timeout)
53 | )
54 | self.markitdown = MarkItDown()
55 |
56 | async def close_client_session(self):
57 | """Close the HTTP client session."""
58 | await self.http_client.aclose()
59 | logger.info("BddkApiClient: HTTP client session closed.")
60 |
61 | def _extract_document_id(self, url: str) -> Optional[str]:
62 | """Extract document ID from BDDK URL."""
63 | # Primary pattern: https://www.bddk.org.tr/Mevzuat/DokumanGetir/310
64 | match = re.search(r'/DokumanGetir/(\d+)', url)
65 | if match:
66 | return match.group(1)
67 |
68 | # Alternative patterns for different BDDK URL formats
69 | # Pattern: /Liste/55 -> use as document ID
70 | match = re.search(r'/Liste/(\d+)', url)
71 | if match:
72 | return match.group(1)
73 |
74 | # Pattern: /EkGetir/13?ekId=381 -> use ekId as document ID
75 | match = re.search(r'ekId=(\d+)', url)
76 | if match:
77 | return match.group(1)
78 |
79 | return None
80 |
81 | async def search_decisions(
82 | self,
83 | request: BddkSearchRequest
84 | ) -> BddkSearchResult:
85 | """
86 | Search for BDDK decisions using Tavily API.
87 |
88 | Args:
89 | request: Search request parameters
90 |
91 | Returns:
92 | BddkSearchResult with matching decisions
93 | """
94 | try:
95 | headers = {
96 | "Content-Type": "application/json",
97 | "Authorization": f"Bearer {self.tavily_api_key}"
98 | }
99 |
100 | # Tavily API request - enhanced for BDDK decision documents
101 | query = f"{request.keywords} \"Karar Sayısı\""
102 | payload = {
103 | "query": query,
104 | "country": "turkey",
105 | "include_domains": ["https://www.bddk.org.tr/Mevzuat/DokumanGetir"],
106 | "max_results": request.pageSize,
107 | "search_depth": "advanced"
108 | }
109 |
110 | # Calculate offset for pagination
111 | if request.page > 1:
112 | # Tavily doesn't have direct pagination, so we'll need to handle this
113 | # For now, we'll just return empty for pages > 1
114 | logger.warning(f"Tavily API doesn't support pagination. Page {request.page} requested.")
115 |
116 | response = await self.http_client.post(
117 | self.TAVILY_API_URL,
118 | json=payload,
119 | headers=headers
120 | )
121 | response.raise_for_status()
122 |
123 | data = response.json()
124 |
125 | # Log raw Tavily response for debugging
126 | logger.info(f"Tavily returned {len(data.get('results', []))} results")
127 |
128 | # Convert Tavily results to our format
129 | decisions = []
130 | for result in data.get("results", []):
131 | # Extract document ID from URL
132 | url = result.get("url", "")
133 | logger.debug(f"Processing URL: {url}")
134 | doc_id = self._extract_document_id(url)
135 | if doc_id:
136 | decision = BddkDecisionSummary(
137 | title=result.get("title", "").replace("[PDF] ", "").strip(),
138 | document_id=doc_id,
139 | content=result.get("content", "")[:500] # Limit content length
140 | )
141 | decisions.append(decision)
142 | logger.debug(f"Added decision: {decision.title} (ID: {doc_id})")
143 | else:
144 | logger.warning(f"Could not extract document ID from URL: {url}")
145 |
146 | return BddkSearchResult(
147 | decisions=decisions,
148 | total_results=len(data.get("results", [])),
149 | page=request.page,
150 | pageSize=request.pageSize
151 | )
152 |
153 | except httpx.HTTPStatusError as e:
154 | logger.error(f"HTTP error searching BDDK decisions: {e}")
155 | if e.response.status_code == 401:
156 | raise Exception("Tavily API authentication failed. Check API key.")
157 | raise Exception(f"Failed to search BDDK decisions: {str(e)}")
158 | except Exception as e:
159 | logger.error(f"Error searching BDDK decisions: {e}")
160 | raise Exception(f"Failed to search BDDK decisions: {str(e)}")
161 |
162 | async def get_document_markdown(
163 | self,
164 | document_id: str,
165 | page_number: int = 1
166 | ) -> BddkDocumentMarkdown:
167 | """
168 | Retrieve a BDDK document and convert it to Markdown format.
169 |
170 | Args:
171 | document_id: BDDK document ID (e.g., '310')
172 | page_number: Page number for paginated content (1-indexed)
173 |
174 | Returns:
175 | BddkDocumentMarkdown with paginated content
176 | """
177 | try:
178 | # Try different URL patterns for BDDK documents
179 | potential_urls = [
180 | f"https://www.bddk.org.tr/Mevzuat/DokumanGetir/{document_id}",
181 | f"https://www.bddk.org.tr/Mevzuat/Liste/{document_id}",
182 | f"https://www.bddk.org.tr/KurumHakkinda/EkGetir/13?ekId={document_id}",
183 | f"https://www.bddk.org.tr/KurumHakkinda/EkGetir/5?ekId={document_id}"
184 | ]
185 |
186 | document_url = None
187 | response = None
188 |
189 | # Try each URL pattern until one works
190 | for url in potential_urls:
191 | try:
192 | logger.info(f"Trying BDDK document URL: {url}")
193 | response = await self.http_client.get(
194 | url,
195 | follow_redirects=True
196 | )
197 | response.raise_for_status()
198 | document_url = url
199 | break
200 | except httpx.HTTPStatusError:
201 | continue
202 |
203 | if not response or not document_url:
204 | raise Exception(f"Could not find document with ID {document_id}")
205 |
206 | logger.info(f"Successfully fetched BDDK document from: {document_url}")
207 |
208 | # Determine content type
209 | content_type = response.headers.get("content-type", "").lower()
210 |
211 | # Convert to Markdown based on content type
212 | if "pdf" in content_type:
213 | # Handle PDF documents
214 | pdf_stream = io.BytesIO(response.content)
215 | result = self.markitdown.convert_stream(pdf_stream, file_extension=".pdf")
216 | markdown_content = result.text_content
217 | else:
218 | # Handle HTML documents
219 | html_stream = io.BytesIO(response.content)
220 | result = self.markitdown.convert_stream(html_stream, file_extension=".html")
221 | markdown_content = result.text_content
222 |
223 | # Clean up the markdown content
224 | markdown_content = markdown_content.strip()
225 |
226 | # Calculate pagination
227 | total_length = len(markdown_content)
228 | total_pages = math.ceil(total_length / self.DOCUMENT_MARKDOWN_CHUNK_SIZE)
229 |
230 | # Extract the requested page
231 | start_idx = (page_number - 1) * self.DOCUMENT_MARKDOWN_CHUNK_SIZE
232 | end_idx = start_idx + self.DOCUMENT_MARKDOWN_CHUNK_SIZE
233 | page_content = markdown_content[start_idx:end_idx]
234 |
235 | return BddkDocumentMarkdown(
236 | document_id=document_id,
237 | markdown_content=page_content,
238 | page_number=page_number,
239 | total_pages=total_pages
240 | )
241 |
242 | except httpx.HTTPStatusError as e:
243 | logger.error(f"HTTP error fetching BDDK document {document_id}: {e}")
244 | raise Exception(f"Failed to fetch BDDK document: {str(e)}")
245 | except Exception as e:
246 | logger.error(f"Error processing BDDK document {document_id}: {e}")
247 | raise Exception(f"Failed to process BDDK document: {str(e)}")
```
--------------------------------------------------------------------------------
/saidsurucu-yargi-mcp-f5fa007/bddk_mcp_module/client.py:
--------------------------------------------------------------------------------
```python
1 | # bddk_mcp_module/client.py
2 |
3 | import httpx
4 | from typing import List, Optional, Dict, Any
5 | import logging
6 | import os
7 | import re
8 | import io
9 | import math
10 | from urllib.parse import urlparse
11 | from markitdown import MarkItDown
12 |
13 | from .models import (
14 | BddkSearchRequest,
15 | BddkDecisionSummary,
16 | BddkSearchResult,
17 | BddkDocumentMarkdown
18 | )
19 |
20 | logger = logging.getLogger(__name__)
21 | if not logger.hasHandlers():
22 | logging.basicConfig(
23 | level=logging.INFO,
24 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
25 | )
26 |
27 | class BddkApiClient:
28 | """
29 | API client for searching and retrieving BDDK (Banking Regulation Authority) decisions
30 | using Tavily Search API for discovery and direct HTTP requests for content retrieval.
31 | """
32 |
33 | TAVILY_API_URL = "https://api.tavily.com/search"
34 | BDDK_BASE_URL = "https://www.bddk.org.tr"
35 | DOCUMENT_URL_TEMPLATE = "https://www.bddk.org.tr/Mevzuat/DokumanGetir/{document_id}"
36 | DOCUMENT_MARKDOWN_CHUNK_SIZE = 5000 # Character limit per page
37 |
38 | def __init__(self, request_timeout: float = 60.0):
39 | """Initialize the BDDK API client."""
40 | self.tavily_api_key = os.getenv("TAVILY_API_KEY")
41 | if not self.tavily_api_key:
42 | # Fallback to development token
43 | self.tavily_api_key = "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"
44 | logger.info("Using fallback Tavily API token (development token)")
45 | else:
46 | logger.info("Using Tavily API key from environment variable")
47 |
48 | self.http_client = httpx.AsyncClient(
49 | headers={
50 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
51 | },
52 | timeout=httpx.Timeout(request_timeout)
53 | )
54 | self.markitdown = MarkItDown()
55 |
56 | async def close_client_session(self):
57 | """Close the HTTP client session."""
58 | await self.http_client.aclose()
59 | logger.info("BddkApiClient: HTTP client session closed.")
60 |
61 | def _extract_document_id(self, url: str) -> Optional[str]:
62 | """Extract document ID from BDDK URL."""
63 | # Primary pattern: https://www.bddk.org.tr/Mevzuat/DokumanGetir/310
64 | match = re.search(r'/DokumanGetir/(\d+)', url)
65 | if match:
66 | return match.group(1)
67 |
68 | # Alternative patterns for different BDDK URL formats
69 | # Pattern: /Liste/55 -> use as document ID
70 | match = re.search(r'/Liste/(\d+)', url)
71 | if match:
72 | return match.group(1)
73 |
74 | # Pattern: /EkGetir/13?ekId=381 -> use ekId as document ID
75 | match = re.search(r'ekId=(\d+)', url)
76 | if match:
77 | return match.group(1)
78 |
79 | return None
80 |
81 | async def search_decisions(
82 | self,
83 | request: BddkSearchRequest
84 | ) -> BddkSearchResult:
85 | """
86 | Search for BDDK decisions using Tavily API.
87 |
88 | Args:
89 | request: Search request parameters
90 |
91 | Returns:
92 | BddkSearchResult with matching decisions
93 | """
94 | try:
95 | headers = {
96 | "Content-Type": "application/json",
97 | "Authorization": f"Bearer {self.tavily_api_key}"
98 | }
99 |
100 | # Tavily API request - enhanced for BDDK decision documents
101 | query = f"{request.keywords} \"Karar Sayısı\""
102 | payload = {
103 | "query": query,
104 | "country": "turkey",
105 | "include_domains": ["https://www.bddk.org.tr/Mevzuat/DokumanGetir"],
106 | "max_results": request.pageSize,
107 | "search_depth": "advanced"
108 | }
109 |
110 | # Calculate offset for pagination
111 | if request.page > 1:
112 | # Tavily doesn't have direct pagination, so we'll need to handle this
113 | # For now, we'll just return empty for pages > 1
114 | logger.warning(f"Tavily API doesn't support pagination. Page {request.page} requested.")
115 |
116 | response = await self.http_client.post(
117 | self.TAVILY_API_URL,
118 | json=payload,
119 | headers=headers
120 | )
121 | response.raise_for_status()
122 |
123 | data = response.json()
124 |
125 | # Log raw Tavily response for debugging
126 | logger.info(f"Tavily returned {len(data.get('results', []))} results")
127 |
128 | # Convert Tavily results to our format
129 | decisions = []
130 | for result in data.get("results", []):
131 | # Extract document ID from URL
132 | url = result.get("url", "")
133 | logger.debug(f"Processing URL: {url}")
134 | doc_id = self._extract_document_id(url)
135 | if doc_id:
136 | decision = BddkDecisionSummary(
137 | title=result.get("title", "").replace("[PDF] ", "").strip(),
138 | document_id=doc_id,
139 | content=result.get("content", "")[:500] # Limit content length
140 | )
141 | decisions.append(decision)
142 | logger.debug(f"Added decision: {decision.title} (ID: {doc_id})")
143 | else:
144 | logger.warning(f"Could not extract document ID from URL: {url}")
145 |
146 | return BddkSearchResult(
147 | decisions=decisions,
148 | total_results=len(data.get("results", [])),
149 | page=request.page,
150 | pageSize=request.pageSize
151 | )
152 |
153 | except httpx.HTTPStatusError as e:
154 | logger.error(f"HTTP error searching BDDK decisions: {e}")
155 | if e.response.status_code == 401:
156 | raise Exception("Tavily API authentication failed. Check API key.")
157 | raise Exception(f"Failed to search BDDK decisions: {str(e)}")
158 | except Exception as e:
159 | logger.error(f"Error searching BDDK decisions: {e}")
160 | raise Exception(f"Failed to search BDDK decisions: {str(e)}")
161 |
162 | async def get_document_markdown(
163 | self,
164 | document_id: str,
165 | page_number: int = 1
166 | ) -> BddkDocumentMarkdown:
167 | """
168 | Retrieve a BDDK document and convert it to Markdown format.
169 |
170 | Args:
171 | document_id: BDDK document ID (e.g., '310')
172 | page_number: Page number for paginated content (1-indexed)
173 |
174 | Returns:
175 | BddkDocumentMarkdown with paginated content
176 | """
177 | try:
178 | # Try different URL patterns for BDDK documents
179 | potential_urls = [
180 | f"https://www.bddk.org.tr/Mevzuat/DokumanGetir/{document_id}",
181 | f"https://www.bddk.org.tr/Mevzuat/Liste/{document_id}",
182 | f"https://www.bddk.org.tr/KurumHakkinda/EkGetir/13?ekId={document_id}",
183 | f"https://www.bddk.org.tr/KurumHakkinda/EkGetir/5?ekId={document_id}"
184 | ]
185 |
186 | document_url = None
187 | response = None
188 |
189 | # Try each URL pattern until one works
190 | for url in potential_urls:
191 | try:
192 | logger.info(f"Trying BDDK document URL: {url}")
193 | response = await self.http_client.get(
194 | url,
195 | follow_redirects=True
196 | )
197 | response.raise_for_status()
198 | document_url = url
199 | break
200 | except httpx.HTTPStatusError:
201 | continue
202 |
203 | if not response or not document_url:
204 | raise Exception(f"Could not find document with ID {document_id}")
205 |
206 | logger.info(f"Successfully fetched BDDK document from: {document_url}")
207 |
208 | # Determine content type
209 | content_type = response.headers.get("content-type", "").lower()
210 |
211 | # Convert to Markdown based on content type
212 | if "pdf" in content_type:
213 | # Handle PDF documents
214 | pdf_stream = io.BytesIO(response.content)
215 | result = self.markitdown.convert_stream(pdf_stream, file_extension=".pdf")
216 | markdown_content = result.text_content
217 | else:
218 | # Handle HTML documents
219 | html_stream = io.BytesIO(response.content)
220 | result = self.markitdown.convert_stream(html_stream, file_extension=".html")
221 | markdown_content = result.text_content
222 |
223 | # Clean up the markdown content
224 | markdown_content = markdown_content.strip()
225 |
226 | # Calculate pagination
227 | total_length = len(markdown_content)
228 | total_pages = math.ceil(total_length / self.DOCUMENT_MARKDOWN_CHUNK_SIZE)
229 |
230 | # Extract the requested page
231 | start_idx = (page_number - 1) * self.DOCUMENT_MARKDOWN_CHUNK_SIZE
232 | end_idx = start_idx + self.DOCUMENT_MARKDOWN_CHUNK_SIZE
233 | page_content = markdown_content[start_idx:end_idx]
234 |
235 | return BddkDocumentMarkdown(
236 | document_id=document_id,
237 | markdown_content=page_content,
238 | page_number=page_number,
239 | total_pages=total_pages
240 | )
241 |
242 | except httpx.HTTPStatusError as e:
243 | logger.error(f"HTTP error fetching BDDK document {document_id}: {e}")
244 | raise Exception(f"Failed to fetch BDDK document: {str(e)}")
245 | except Exception as e:
246 | logger.error(f"Error processing BDDK document {document_id}: {e}")
247 | raise Exception(f"Failed to process BDDK document: {str(e)}")
```
--------------------------------------------------------------------------------
/analyze_kik_hash_generation.py:
--------------------------------------------------------------------------------
```python
1 | #!/usr/bin/env python3
2 |
3 | """
4 | Analyze KİK v2 hash generation by examining JavaScript code patterns
5 | and trying to reverse engineer the hash generation logic.
6 | """
7 |
8 | import asyncio
9 | import json
10 | import hashlib
11 | import hmac
12 | import base64
13 | from fastmcp import Client
14 | from mcp_server_main import app
15 |
16 | def analyze_webpack_hash_patterns():
17 | """
18 | Analyze the webpack JavaScript code you provided to find hash generation patterns
19 | """
20 | print("🔍 Analyzing webpack hash generation patterns...")
21 |
22 | # From the JavaScript code, I can see several hash/ID generation patterns:
23 | hash_patterns = {
24 | # Webpack chunk system hashes (from the JS code)
25 | "webpack_chunks": {
26 | 315: "d9a9486a4f5ba326",
27 | 531: "cd8fb385c88033ae",
28 | 671: "04c48b287646627a",
29 | 856: "682c9a7b87351f90",
30 | 1017: "9de022378fc275f6",
31 | # ... many more from the __webpack_require__.u function
32 | },
33 |
34 | # Symbol generation from Zone.js
35 | "zone_symbols": [
36 | "__zone_symbol__",
37 | "__Zone_symbol_prefix",
38 | "Zone.__symbol__"
39 | ],
40 |
41 | # Angular module federation patterns
42 | "module_federation": [
43 | "__webpack_modules__",
44 | "__webpack_module_cache__",
45 | "__webpack_require__"
46 | ]
47 | }
48 |
49 | # The target hash format
50 | target_hash = "42f9bcd59e0dfbca36dec9accf5686c7a92aa97724cd8fc3550beb84b80409da"
51 | print(f"🎯 Target hash: {target_hash}")
52 | print(f" Length: {len(target_hash)} characters")
53 | print(f" Format: {'SHA256' if len(target_hash) == 64 else 'Other'} (64 chars = SHA256)")
54 |
55 | return hash_patterns
56 |
57 | def test_webpack_style_hashing(data_dict):
58 | """Test webpack-style hash generation methods"""
59 | hashes = {}
60 |
61 | for key, value in data_dict.items():
62 | test_string = str(value)
63 |
64 | # Try various webpack-style hash methods
65 | hashes[f"webpack_md5_{key}"] = hashlib.md5(test_string.encode()).hexdigest()
66 | hashes[f"webpack_sha1_{key}"] = hashlib.sha1(test_string.encode()).hexdigest()
67 | hashes[f"webpack_sha256_{key}"] = hashlib.sha256(test_string.encode()).hexdigest()
68 |
69 | # Try with various prefixes/suffixes (common in webpack)
70 | prefixed = f"__webpack__{test_string}"
71 | hashes[f"webpack_prefixed_sha256_{key}"] = hashlib.sha256(prefixed.encode()).hexdigest()
72 |
73 | # Try with module federation style
74 | module_style = f"shell:{test_string}"
75 | hashes[f"module_fed_sha256_{key}"] = hashlib.sha256(module_style.encode()).hexdigest()
76 |
77 | # Try JSON stringified
78 | json_style = json.dumps({"id": value, "type": "decision"}, separators=(',', ':'))
79 | hashes[f"json_sha256_{key}"] = hashlib.sha256(json_style.encode()).hexdigest()
80 |
81 | # Try with timestamp or sequence
82 | with_seq = f"{test_string}_0"
83 | hashes[f"seq_sha256_{key}"] = hashlib.sha256(with_seq.encode()).hexdigest()
84 |
85 | return hashes
86 |
87 | def test_angular_routing_hashes(data_dict):
88 | """Test Angular routing/state management hash generation"""
89 | hashes = {}
90 |
91 | for key, value in data_dict.items():
92 | # Angular often uses route parameters for hash generation
93 | route_style = f"/kurul-kararlari/{value}"
94 | hashes[f"route_sha256_{key}"] = hashlib.sha256(route_style.encode()).hexdigest()
95 |
96 | # Component state style
97 | state_style = f"KurulKararGoster_{value}"
98 | hashes[f"state_sha256_{key}"] = hashlib.sha256(state_style.encode()).hexdigest()
99 |
100 | # Angular module style
101 | module_style = f"kik.kurul.karar.{value}"
102 | hashes[f"module_sha256_{key}"] = hashlib.sha256(module_style.encode()).hexdigest()
103 |
104 | return hashes
105 |
106 | def test_base64_encoding_variants(data_dict):
107 | """Test various base64 and encoding variants"""
108 | hashes = {}
109 |
110 | for key, value in data_dict.items():
111 | test_string = str(value)
112 |
113 | # Try base64 encoding then hashing
114 | b64_encoded = base64.b64encode(test_string.encode()).decode()
115 | hashes[f"b64_sha256_{key}"] = hashlib.sha256(b64_encoded.encode()).hexdigest()
116 |
117 | # Try URL-safe base64
118 | b64_url = base64.urlsafe_b64encode(test_string.encode()).decode()
119 | hashes[f"b64url_sha256_{key}"] = hashlib.sha256(b64_url.encode()).hexdigest()
120 |
121 | # Try hex encoding
122 | hex_encoded = test_string.encode().hex()
123 | hashes[f"hex_sha256_{key}"] = hashlib.sha256(hex_encoded.encode()).hexdigest()
124 |
125 | return hashes
126 |
127 | async def test_hash_generation_comprehensive():
128 | print("🔐 Comprehensive KİK document hash generation analysis...")
129 | print("=" * 70)
130 |
131 | # First analyze the webpack patterns
132 | webpack_patterns = analyze_webpack_hash_patterns()
133 |
134 | client = Client(app)
135 |
136 | async with client:
137 | print("✅ MCP client connected")
138 |
139 | # Get sample decisions
140 | print("\n📊 Getting sample decisions for hash analysis...")
141 | search_result = await client.call_tool("search_kik_v2_decisions", {
142 | "decision_type": "uyusmazlik",
143 | "karar_metni": "2024"
144 | })
145 |
146 | if hasattr(search_result, 'content') and search_result.content:
147 | search_data = json.loads(search_result.content[0].text)
148 | decisions = search_data.get('decisions', [])
149 |
150 | if decisions:
151 | print(f"✅ Found {len(decisions)} decisions")
152 |
153 | # Test with first decision
154 | sample_decision = decisions[0]
155 | print(f"\n📋 Sample decision for hash analysis:")
156 | for key, value in sample_decision.items():
157 | print(f" {key}: {value}")
158 |
159 | target_hash = "42f9bcd59e0dfbca36dec9accf5686c7a92aa97724cd8fc3550beb84b80409da"
160 | print(f"\n🎯 Target hash to match: {target_hash}")
161 |
162 | all_hashes = {}
163 |
164 | # Test different hash generation methods
165 | print(f"\n🔨 Testing webpack-style hashing...")
166 | webpack_hashes = test_webpack_style_hashing(sample_decision)
167 | all_hashes.update(webpack_hashes)
168 |
169 | print(f"🔨 Testing Angular routing hashes...")
170 | angular_hashes = test_angular_routing_hashes(sample_decision)
171 | all_hashes.update(angular_hashes)
172 |
173 | print(f"🔨 Testing base64 encoding variants...")
174 | b64_hashes = test_base64_encoding_variants(sample_decision)
175 | all_hashes.update(b64_hashes)
176 |
177 | # Check for matches
178 | print(f"\n🎯 Checking for hash matches...")
179 | matches_found = []
180 | partial_matches = []
181 |
182 | for hash_name, hash_value in all_hashes.items():
183 | if hash_value == target_hash:
184 | matches_found.append((hash_name, hash_value))
185 | print(f" 🎉 EXACT MATCH FOUND: {hash_name}")
186 | elif hash_value[:8] == target_hash[:8]: # First 8 chars match
187 | partial_matches.append((hash_name, hash_value))
188 | print(f" 🔍 Partial match (first 8): {hash_name} -> {hash_value[:16]}...")
189 | elif hash_value[-8:] == target_hash[-8:]: # Last 8 chars match
190 | partial_matches.append((hash_name, hash_value))
191 | print(f" 🔍 Partial match (last 8): {hash_name} -> ...{hash_value[-16:]}")
192 |
193 | if not matches_found and not partial_matches:
194 | print(f" ❌ No matches found")
195 | print(f"\n📝 Sample generated hashes (first 10):")
196 | for i, (hash_name, hash_value) in enumerate(list(all_hashes.items())[:10]):
197 | print(f" {hash_name}: {hash_value}")
198 |
199 | # Try combinations with other decisions
200 | print(f"\n🔄 Testing hash combinations with multiple decisions...")
201 | if len(decisions) > 1:
202 | for i, decision in enumerate(decisions[1:3]): # Test 2 more
203 | print(f"\n Testing decision {i+2}: {decision.get('kararNo')}")
204 | decision_hashes = test_webpack_style_hashing(decision)
205 |
206 | for hash_name, hash_value in decision_hashes.items():
207 | if hash_value == target_hash:
208 | print(f" 🎉 MATCH FOUND in decision {i+2}: {hash_name}")
209 | matches_found.append((f"decision_{i+2}_{hash_name}", hash_value))
210 |
211 | # Try composite hashes (combining multiple fields)
212 | print(f"\n🔗 Testing composite hash generation...")
213 | composite_tests = [
214 | f"{sample_decision.get('gundemMaddesiId')}_{sample_decision.get('kararNo')}",
215 | f"{sample_decision.get('kararNo')}_{sample_decision.get('kararTarihi')}",
216 | f"uyusmazlik_{sample_decision.get('gundemMaddesiId')}_{sample_decision.get('kararTarihi')}",
217 | json.dumps(sample_decision, separators=(',', ':'), sort_keys=True),
218 | f"{sample_decision.get('basvuran')}_{sample_decision.get('gundemMaddesiId')}",
219 | ]
220 |
221 | for i, composite_str in enumerate(composite_tests):
222 | composite_hash = hashlib.sha256(composite_str.encode()).hexdigest()
223 | if composite_hash == target_hash:
224 | print(f" 🎉 COMPOSITE MATCH FOUND: test_{i} -> {composite_str[:50]}...")
225 | matches_found.append((f"composite_{i}", composite_hash))
226 |
227 | print(f"\n🎯 Hash analysis completed!")
228 | print(f" Total matches found: {len(matches_found)}")
229 | print(f" Partial matches: {len(partial_matches)}")
230 |
231 | else:
232 | print("❌ No decisions found")
233 | else:
234 | print("❌ Search failed")
235 |
236 | print("=" * 70)
237 |
238 | if __name__ == "__main__":
239 | asyncio.run(test_hash_generation_comprehensive())
```
--------------------------------------------------------------------------------
/mcp_auth/oauth.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | OAuth 2.1 + PKCE implementation for MCP servers with Clerk integration
3 | """
4 |
5 | import base64
6 | import hashlib
7 | import secrets
8 | import time
9 | import logging
10 | from dataclasses import dataclass
11 | from datetime import datetime, timedelta
12 | from typing import Any, Optional
13 | from urllib.parse import urlencode
14 |
15 | import httpx
16 | import jwt
17 | from jwt.exceptions import PyJWTError, InvalidTokenError
18 |
19 | from .storage import PersistentStorage
20 |
21 | # Try to import Clerk SDK
22 | try:
23 | from clerk_backend_api import Clerk
24 | CLERK_AVAILABLE = True
25 | except ImportError:
26 | CLERK_AVAILABLE = False
27 | Clerk = None
28 |
29 | logger = logging.getLogger(__name__)
30 |
31 |
32 | @dataclass
33 | class OAuthConfig:
34 | """OAuth provider configuration for Clerk"""
35 |
36 | client_id: str
37 | client_secret: str
38 | authorization_endpoint: str
39 | token_endpoint: str
40 | jwks_uri: str | None = None
41 | issuer: str = "mcp-auth"
42 | scopes: list[str] = None
43 |
44 | def __post_init__(self):
45 | if self.scopes is None:
46 | self.scopes = ["mcp:tools:read", "mcp:tools:write"]
47 |
48 |
49 | class PKCEChallenge:
50 | """PKCE challenge/verifier pair for OAuth 2.1"""
51 |
52 | def __init__(self):
53 | self.verifier = (
54 | base64.urlsafe_b64encode(secrets.token_bytes(32))
55 | .decode("utf-8")
56 | .rstrip("=")
57 | )
58 |
59 | challenge_bytes = hashlib.sha256(self.verifier.encode("utf-8")).digest()
60 | self.challenge = (
61 | base64.urlsafe_b64encode(challenge_bytes).decode("utf-8").rstrip("=")
62 | )
63 |
64 |
65 | class OAuthProvider:
66 | """OAuth 2.1 provider with PKCE support and Clerk integration"""
67 |
68 | def __init__(self, config: OAuthConfig, jwt_secret: str):
69 | self.config = config
70 | self.jwt_secret = jwt_secret
71 | # Use persistent storage instead of memory
72 | self.storage = PersistentStorage()
73 |
74 | # Initialize Clerk SDK if available
75 | self.clerk = None
76 | if CLERK_AVAILABLE and config.client_secret:
77 | try:
78 | self.clerk = Clerk(bearer_auth=config.client_secret)
79 | logger.info("Clerk SDK initialized successfully")
80 | except Exception as e:
81 | logger.warning(f"Failed to initialize Clerk SDK: {e}")
82 |
83 | logger.info("OAuth provider initialized with persistent storage")
84 |
85 | def generate_authorization_url(
86 | self,
87 | redirect_uri: str,
88 | state: str | None = None,
89 | scopes: list[str] | None = None,
90 | ) -> tuple[str, PKCEChallenge]:
91 | """Generate OAuth authorization URL with PKCE for Clerk"""
92 |
93 | pkce = PKCEChallenge()
94 | session_id = secrets.token_urlsafe(32)
95 |
96 | if state is None:
97 | state = secrets.token_urlsafe(16)
98 |
99 | if scopes is None:
100 | scopes = self.config.scopes
101 |
102 | # Store session data with expiration
103 | session_data = {
104 | "pkce_verifier": pkce.verifier,
105 | "state": state,
106 | "redirect_uri": redirect_uri,
107 | "scopes": scopes,
108 | "created_at": time.time(),
109 | "expires_at": (datetime.utcnow() + timedelta(minutes=10)).timestamp(),
110 | }
111 | self.storage.set_session(session_id, session_data)
112 |
113 | # Build Clerk OAuth URL
114 | # Check if this is a custom domain (sign-in endpoint)
115 | if self.config.authorization_endpoint.endswith('/sign-in'):
116 | # For custom domains, Clerk expects redirect_url parameter
117 | params = {
118 | "redirect_url": redirect_uri,
119 | "state": f"{state}:{session_id}",
120 | }
121 | auth_url = f"{self.config.authorization_endpoint}?{urlencode(params)}"
122 | else:
123 | # Standard OAuth flow with PKCE
124 | params = {
125 | "response_type": "code",
126 | "client_id": self.config.client_id,
127 | "redirect_uri": redirect_uri,
128 | "scope": " ".join(scopes),
129 | "state": f"{state}:{session_id}", # Combine state with session ID
130 | "code_challenge": pkce.challenge,
131 | "code_challenge_method": "S256",
132 | }
133 | auth_url = f"{self.config.authorization_endpoint}?{urlencode(params)}"
134 |
135 | logger.info(f"Generated OAuth URL with session {session_id[:8]}...")
136 | logger.debug(f"Auth URL: {auth_url}")
137 | return auth_url, pkce
138 |
139 | async def exchange_code_for_token(
140 | self, code: str, state: str, redirect_uri: str
141 | ) -> dict[str, Any]:
142 | """Exchange authorization code for access token with Clerk"""
143 |
144 | try:
145 | original_state, session_id = state.split(":", 1)
146 | except ValueError as e:
147 | logger.error(f"Invalid state format: {state}")
148 | raise ValueError("Invalid state format") from e
149 |
150 | session = self.storage.get_session(session_id)
151 | if not session:
152 | logger.error(f"Session {session_id} not found")
153 | raise ValueError("Invalid session")
154 |
155 | # Check session expiration
156 | if datetime.utcnow().timestamp() > session.get("expires_at", 0):
157 | self.storage.delete_session(session_id)
158 | logger.error(f"Session {session_id} expired")
159 | raise ValueError("Session expired")
160 |
161 | if session["state"] != original_state:
162 | logger.error(f"State mismatch: expected {session['state']}, got {original_state}")
163 | raise ValueError("State mismatch")
164 |
165 | if session["redirect_uri"] != redirect_uri:
166 | logger.error(f"Redirect URI mismatch: expected {session['redirect_uri']}, got {redirect_uri}")
167 | raise ValueError("Redirect URI mismatch")
168 |
169 | # Prepare token exchange request for Clerk
170 | token_data = {
171 | "grant_type": "authorization_code",
172 | "client_id": self.config.client_id,
173 | "client_secret": self.config.client_secret,
174 | "code": code,
175 | "redirect_uri": redirect_uri,
176 | "code_verifier": session["pkce_verifier"],
177 | }
178 |
179 | logger.info(f"Exchanging code with Clerk for session {session_id[:8]}...")
180 |
181 | async with httpx.AsyncClient() as client:
182 | response = await client.post(
183 | self.config.token_endpoint,
184 | data=token_data,
185 | headers={"Content-Type": "application/x-www-form-urlencoded"},
186 | timeout=30.0,
187 | )
188 |
189 | if response.status_code != 200:
190 | logger.error(f"Clerk token exchange failed: {response.status_code} - {response.text}")
191 | raise ValueError(f"Token exchange failed: {response.text}")
192 |
193 | token_response = response.json()
194 | logger.info("Successfully exchanged code for Clerk token")
195 |
196 | # Create MCP-scoped JWT token
197 | access_token = self._create_mcp_token(
198 | session["scopes"], token_response.get("access_token"), session_id
199 | )
200 |
201 | # Store token for introspection
202 | token_id = secrets.token_urlsafe(16)
203 | token_data = {
204 | "access_token": access_token,
205 | "scopes": session["scopes"],
206 | "created_at": time.time(),
207 | "expires_at": (datetime.utcnow() + timedelta(hours=1)).timestamp(),
208 | "session_id": session_id,
209 | "clerk_token": token_response.get("access_token"),
210 | }
211 | self.storage.set_token(token_id, token_data)
212 |
213 | # Clean up session
214 | self.storage.delete_session(session_id)
215 |
216 | return {
217 | "access_token": access_token,
218 | "token_type": "bearer",
219 | "expires_in": 3600,
220 | "scope": " ".join(session["scopes"]),
221 | }
222 |
223 | def validate_pkce(self, code_verifier: str, code_challenge: str) -> bool:
224 | """Validate PKCE code challenge (RFC 7636)"""
225 | # S256 method
226 | verifier_hash = hashlib.sha256(code_verifier.encode()).digest()
227 | expected_challenge = base64.urlsafe_b64encode(verifier_hash).decode().rstrip('=')
228 | return expected_challenge == code_challenge
229 |
230 | def _create_mcp_token(
231 | self, scopes: list[str], upstream_token: str, session_id: str
232 | ) -> str:
233 | """Create MCP-scoped JWT token with Clerk token embedded"""
234 |
235 | now = int(time.time())
236 | payload = {
237 | "iss": self.config.issuer,
238 | "sub": session_id,
239 | "aud": "mcp-server",
240 | "iat": now,
241 | "exp": now + 3600, # 1 hour expiration
242 | "mcp_tool_scopes": scopes,
243 | "upstream_token": upstream_token,
244 | "clerk_integration": True,
245 | }
246 |
247 | return jwt.encode(payload, self.jwt_secret, algorithm="HS256")
248 |
249 | def introspect_token(self, token: str) -> dict[str, Any]:
250 | """Introspect and validate MCP token"""
251 |
252 | try:
253 | payload = jwt.decode(token, self.jwt_secret, algorithms=["HS256"])
254 |
255 | # Check if token is expired
256 | if payload.get("exp", 0) < time.time():
257 | return {"active": False, "error": "token_expired"}
258 |
259 | return {
260 | "active": True,
261 | "sub": payload.get("sub"),
262 | "aud": payload.get("aud"),
263 | "iss": payload.get("iss"),
264 | "exp": payload.get("exp"),
265 | "iat": payload.get("iat"),
266 | "mcp_tool_scopes": payload.get("mcp_tool_scopes", []),
267 | "upstream_token": payload.get("upstream_token"),
268 | "clerk_integration": payload.get("clerk_integration", False),
269 | }
270 |
271 | except PyJWTError as e:
272 | logger.warning(f"Token validation failed: {e}")
273 | return {"active": False, "error": "invalid_token"}
274 |
275 | def revoke_token(self, token: str) -> bool:
276 | """Revoke a token"""
277 |
278 | try:
279 | payload = jwt.decode(token, self.jwt_secret, algorithms=["HS256"])
280 | session_id = payload.get("sub")
281 |
282 | # Remove all tokens associated with this session
283 | all_tokens = self.storage.get_tokens()
284 | tokens_to_remove = [
285 | token_id
286 | for token_id, token_data in all_tokens.items()
287 | if token_data.get("session_id") == session_id
288 | ]
289 |
290 | for token_id in tokens_to_remove:
291 | self.storage.delete_token(token_id)
292 |
293 | logger.info(f"Revoked {len(tokens_to_remove)} tokens for session {session_id}")
294 | return True
295 |
296 | except InvalidTokenError as e:
297 | logger.warning(f"Token revocation failed: {e}")
298 | return False
299 |
300 | def cleanup_expired_sessions(self):
301 | """Clean up expired sessions and tokens"""
302 | # This is now handled automatically by persistent storage
303 | self.storage.cleanup_expired_sessions()
304 | logger.debug("Cleanup completed via persistent storage")
```
--------------------------------------------------------------------------------
/saidsurucu-yargi-mcp-f5fa007/mcp_auth/oauth.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | OAuth 2.1 + PKCE implementation for MCP servers with Clerk integration
3 | """
4 |
5 | import base64
6 | import hashlib
7 | import secrets
8 | import time
9 | import logging
10 | from dataclasses import dataclass
11 | from datetime import datetime, timedelta
12 | from typing import Any, Optional
13 | from urllib.parse import urlencode
14 |
15 | import httpx
16 | import jwt
17 | from jwt.exceptions import PyJWTError, InvalidTokenError
18 |
19 | from .storage import PersistentStorage
20 |
21 | # Try to import Clerk SDK
22 | try:
23 | from clerk_backend_api import Clerk
24 | CLERK_AVAILABLE = True
25 | except ImportError:
26 | CLERK_AVAILABLE = False
27 | Clerk = None
28 |
29 | logger = logging.getLogger(__name__)
30 |
31 |
32 | @dataclass
33 | class OAuthConfig:
34 | """OAuth provider configuration for Clerk"""
35 |
36 | client_id: str
37 | client_secret: str
38 | authorization_endpoint: str
39 | token_endpoint: str
40 | jwks_uri: str | None = None
41 | issuer: str = "mcp-auth"
42 | scopes: list[str] = None
43 |
44 | def __post_init__(self):
45 | if self.scopes is None:
46 | self.scopes = ["mcp:tools:read", "mcp:tools:write"]
47 |
48 |
49 | class PKCEChallenge:
50 | """PKCE challenge/verifier pair for OAuth 2.1"""
51 |
52 | def __init__(self):
53 | self.verifier = (
54 | base64.urlsafe_b64encode(secrets.token_bytes(32))
55 | .decode("utf-8")
56 | .rstrip("=")
57 | )
58 |
59 | challenge_bytes = hashlib.sha256(self.verifier.encode("utf-8")).digest()
60 | self.challenge = (
61 | base64.urlsafe_b64encode(challenge_bytes).decode("utf-8").rstrip("=")
62 | )
63 |
64 |
65 | class OAuthProvider:
66 | """OAuth 2.1 provider with PKCE support and Clerk integration"""
67 |
68 | def __init__(self, config: OAuthConfig, jwt_secret: str):
69 | self.config = config
70 | self.jwt_secret = jwt_secret
71 | # Use persistent storage instead of memory
72 | self.storage = PersistentStorage()
73 |
74 | # Initialize Clerk SDK if available
75 | self.clerk = None
76 | if CLERK_AVAILABLE and config.client_secret:
77 | try:
78 | self.clerk = Clerk(bearer_auth=config.client_secret)
79 | logger.info("Clerk SDK initialized successfully")
80 | except Exception as e:
81 | logger.warning(f"Failed to initialize Clerk SDK: {e}")
82 |
83 | logger.info("OAuth provider initialized with persistent storage")
84 |
85 | def generate_authorization_url(
86 | self,
87 | redirect_uri: str,
88 | state: str | None = None,
89 | scopes: list[str] | None = None,
90 | ) -> tuple[str, PKCEChallenge]:
91 | """Generate OAuth authorization URL with PKCE for Clerk"""
92 |
93 | pkce = PKCEChallenge()
94 | session_id = secrets.token_urlsafe(32)
95 |
96 | if state is None:
97 | state = secrets.token_urlsafe(16)
98 |
99 | if scopes is None:
100 | scopes = self.config.scopes
101 |
102 | # Store session data with expiration
103 | session_data = {
104 | "pkce_verifier": pkce.verifier,
105 | "state": state,
106 | "redirect_uri": redirect_uri,
107 | "scopes": scopes,
108 | "created_at": time.time(),
109 | "expires_at": (datetime.utcnow() + timedelta(minutes=10)).timestamp(),
110 | }
111 | self.storage.set_session(session_id, session_data)
112 |
113 | # Build Clerk OAuth URL
114 | # Check if this is a custom domain (sign-in endpoint)
115 | if self.config.authorization_endpoint.endswith('/sign-in'):
116 | # For custom domains, Clerk expects redirect_url parameter
117 | params = {
118 | "redirect_url": redirect_uri,
119 | "state": f"{state}:{session_id}",
120 | }
121 | auth_url = f"{self.config.authorization_endpoint}?{urlencode(params)}"
122 | else:
123 | # Standard OAuth flow with PKCE
124 | params = {
125 | "response_type": "code",
126 | "client_id": self.config.client_id,
127 | "redirect_uri": redirect_uri,
128 | "scope": " ".join(scopes),
129 | "state": f"{state}:{session_id}", # Combine state with session ID
130 | "code_challenge": pkce.challenge,
131 | "code_challenge_method": "S256",
132 | }
133 | auth_url = f"{self.config.authorization_endpoint}?{urlencode(params)}"
134 |
135 | logger.info(f"Generated OAuth URL with session {session_id[:8]}...")
136 | logger.debug(f"Auth URL: {auth_url}")
137 | return auth_url, pkce
138 |
139 | async def exchange_code_for_token(
140 | self, code: str, state: str, redirect_uri: str
141 | ) -> dict[str, Any]:
142 | """Exchange authorization code for access token with Clerk"""
143 |
144 | try:
145 | original_state, session_id = state.split(":", 1)
146 | except ValueError as e:
147 | logger.error(f"Invalid state format: {state}")
148 | raise ValueError("Invalid state format") from e
149 |
150 | session = self.storage.get_session(session_id)
151 | if not session:
152 | logger.error(f"Session {session_id} not found")
153 | raise ValueError("Invalid session")
154 |
155 | # Check session expiration
156 | if datetime.utcnow().timestamp() > session.get("expires_at", 0):
157 | self.storage.delete_session(session_id)
158 | logger.error(f"Session {session_id} expired")
159 | raise ValueError("Session expired")
160 |
161 | if session["state"] != original_state:
162 | logger.error(f"State mismatch: expected {session['state']}, got {original_state}")
163 | raise ValueError("State mismatch")
164 |
165 | if session["redirect_uri"] != redirect_uri:
166 | logger.error(f"Redirect URI mismatch: expected {session['redirect_uri']}, got {redirect_uri}")
167 | raise ValueError("Redirect URI mismatch")
168 |
169 | # Prepare token exchange request for Clerk
170 | token_data = {
171 | "grant_type": "authorization_code",
172 | "client_id": self.config.client_id,
173 | "client_secret": self.config.client_secret,
174 | "code": code,
175 | "redirect_uri": redirect_uri,
176 | "code_verifier": session["pkce_verifier"],
177 | }
178 |
179 | logger.info(f"Exchanging code with Clerk for session {session_id[:8]}...")
180 |
181 | async with httpx.AsyncClient() as client:
182 | response = await client.post(
183 | self.config.token_endpoint,
184 | data=token_data,
185 | headers={"Content-Type": "application/x-www-form-urlencoded"},
186 | timeout=30.0,
187 | )
188 |
189 | if response.status_code != 200:
190 | logger.error(f"Clerk token exchange failed: {response.status_code} - {response.text}")
191 | raise ValueError(f"Token exchange failed: {response.text}")
192 |
193 | token_response = response.json()
194 | logger.info("Successfully exchanged code for Clerk token")
195 |
196 | # Create MCP-scoped JWT token
197 | access_token = self._create_mcp_token(
198 | session["scopes"], token_response.get("access_token"), session_id
199 | )
200 |
201 | # Store token for introspection
202 | token_id = secrets.token_urlsafe(16)
203 | token_data = {
204 | "access_token": access_token,
205 | "scopes": session["scopes"],
206 | "created_at": time.time(),
207 | "expires_at": (datetime.utcnow() + timedelta(hours=1)).timestamp(),
208 | "session_id": session_id,
209 | "clerk_token": token_response.get("access_token"),
210 | }
211 | self.storage.set_token(token_id, token_data)
212 |
213 | # Clean up session
214 | self.storage.delete_session(session_id)
215 |
216 | return {
217 | "access_token": access_token,
218 | "token_type": "bearer",
219 | "expires_in": 3600,
220 | "scope": " ".join(session["scopes"]),
221 | }
222 |
223 | def validate_pkce(self, code_verifier: str, code_challenge: str) -> bool:
224 | """Validate PKCE code challenge (RFC 7636)"""
225 | # S256 method
226 | verifier_hash = hashlib.sha256(code_verifier.encode()).digest()
227 | expected_challenge = base64.urlsafe_b64encode(verifier_hash).decode().rstrip('=')
228 | return expected_challenge == code_challenge
229 |
230 | def _create_mcp_token(
231 | self, scopes: list[str], upstream_token: str, session_id: str
232 | ) -> str:
233 | """Create MCP-scoped JWT token with Clerk token embedded"""
234 |
235 | now = int(time.time())
236 | payload = {
237 | "iss": self.config.issuer,
238 | "sub": session_id,
239 | "aud": "mcp-server",
240 | "iat": now,
241 | "exp": now + 3600, # 1 hour expiration
242 | "mcp_tool_scopes": scopes,
243 | "upstream_token": upstream_token,
244 | "clerk_integration": True,
245 | }
246 |
247 | return jwt.encode(payload, self.jwt_secret, algorithm="HS256")
248 |
249 | def introspect_token(self, token: str) -> dict[str, Any]:
250 | """Introspect and validate MCP token"""
251 |
252 | try:
253 | payload = jwt.decode(token, self.jwt_secret, algorithms=["HS256"])
254 |
255 | # Check if token is expired
256 | if payload.get("exp", 0) < time.time():
257 | return {"active": False, "error": "token_expired"}
258 |
259 | return {
260 | "active": True,
261 | "sub": payload.get("sub"),
262 | "aud": payload.get("aud"),
263 | "iss": payload.get("iss"),
264 | "exp": payload.get("exp"),
265 | "iat": payload.get("iat"),
266 | "mcp_tool_scopes": payload.get("mcp_tool_scopes", []),
267 | "upstream_token": payload.get("upstream_token"),
268 | "clerk_integration": payload.get("clerk_integration", False),
269 | }
270 |
271 | except PyJWTError as e:
272 | logger.warning(f"Token validation failed: {e}")
273 | return {"active": False, "error": "invalid_token"}
274 |
275 | def revoke_token(self, token: str) -> bool:
276 | """Revoke a token"""
277 |
278 | try:
279 | payload = jwt.decode(token, self.jwt_secret, algorithms=["HS256"])
280 | session_id = payload.get("sub")
281 |
282 | # Remove all tokens associated with this session
283 | all_tokens = self.storage.get_tokens()
284 | tokens_to_remove = [
285 | token_id
286 | for token_id, token_data in all_tokens.items()
287 | if token_data.get("session_id") == session_id
288 | ]
289 |
290 | for token_id in tokens_to_remove:
291 | self.storage.delete_token(token_id)
292 |
293 | logger.info(f"Revoked {len(tokens_to_remove)} tokens for session {session_id}")
294 | return True
295 |
296 | except InvalidTokenError as e:
297 | logger.warning(f"Token revocation failed: {e}")
298 | return False
299 |
300 | def cleanup_expired_sessions(self):
301 | """Clean up expired sessions and tokens"""
302 | # This is now handled automatically by persistent storage
303 | self.storage.cleanup_expired_sessions()
304 | logger.debug("Cleanup completed via persistent storage")
```