saidsurucu/yargi-mcp # codebase.md

This is page 7 of 11. Use http://codebase.md/saidsurucu/yargi-mcp?lines=true&page={x} to view the full context.

# Directory Structure

```
├── __main__.py
├── .dockerignore
├── .env.example
├── .gitattributes
├── .github
│   └── workflows
│       └── publish.yml
├── .gitignore
├── .serena
│   ├── .gitignore
│   └── project.yml
├── 5ire-settings.png
├── analyze_kik_hash_generation.py
├── anayasa_mcp_module
│   ├── __init__.py
│   ├── bireysel_client.py
│   ├── client.py
│   ├── models.py
│   └── unified_client.py
├── asgi_app.py
├── bddk_mcp_module
│   ├── __init__.py
│   ├── client.py
│   └── models.py
├── bedesten_mcp_module
│   ├── __init__.py
│   ├── client.py
│   ├── enums.py
│   └── models.py
├── check_response_format.py
├── CLAUDE.md
├── danistay_mcp_module
│   ├── __init__.py
│   ├── client.py
│   └── models.py
├── docker-compose.yml
├── Dockerfile
├── docs
│   └── DEPLOYMENT.md
├── emsal_mcp_module
│   ├── __init__.py
│   ├── client.py
│   └── models.py
├── example_fastapi_app.py
├── fly-no-auth.toml
├── fly.toml
├── kik_mcp_module
│   ├── __init__.py
│   ├── client_v2.py
│   ├── client.py
│   ├── models_v2.py
│   └── models.py
├── kvkk_mcp_module
│   ├── __init__.py
│   ├── client.py
│   └── models.py
├── LICENSE
├── mcp_auth
│   ├── __init__.py
│   ├── clerk_config.py
│   ├── middleware.py
│   ├── oauth.py
│   ├── policy.py
│   └── storage.py
├── mcp_auth_factory.py
├── mcp_auth_http_adapter.py
├── mcp_auth_http_simple.py
├── mcp_server_main.py
├── nginx.conf
├── ornek.png
├── Procfile
├── pyproject.toml
├── railway.json
├── README.md
├── redis_session_store.py
├── rekabet_mcp_module
│   ├── __init__.py
│   ├── client.py
│   └── models.py
├── requirements.txt
├── run_asgi.py
├── saidsurucu-yargi-mcp-f5fa007
│   ├── __main__.py
│   ├── .dockerignore
│   ├── .env.example
│   ├── .gitattributes
│   ├── .github
│   │   └── workflows
│   │       └── publish.yml
│   ├── .gitignore
│   ├── 5ire-settings.png
│   ├── anayasa_mcp_module
│   │   ├── __init__.py
│   │   ├── bireysel_client.py
│   │   ├── client.py
│   │   ├── models.py
│   │   └── unified_client.py
│   ├── asgi_app.py
│   ├── bddk_mcp_module
│   │   ├── __init__.py
│   │   ├── client.py
│   │   └── models.py
│   ├── bedesten_mcp_module
│   │   ├── __init__.py
│   │   ├── client.py
│   │   ├── enums.py
│   │   └── models.py
│   ├── check_response_format.py
│   ├── danistay_mcp_module
│   │   ├── __init__.py
│   │   ├── client.py
│   │   └── models.py
│   ├── docker-compose.yml
│   ├── Dockerfile
│   ├── docs
│   │   └── DEPLOYMENT.md
│   ├── emsal_mcp_module
│   │   ├── __init__.py
│   │   ├── client.py
│   │   └── models.py
│   ├── example_fastapi_app.py
│   ├── kik_mcp_module
│   │   ├── __init__.py
│   │   ├── client.py
│   │   └── models.py
│   ├── kvkk_mcp_module
│   │   ├── __init__.py
│   │   ├── client.py
│   │   └── models.py
│   ├── LICENSE
│   ├── mcp_auth
│   │   ├── __init__.py
│   │   ├── clerk_config.py
│   │   ├── middleware.py
│   │   ├── oauth.py
│   │   ├── policy.py
│   │   └── storage.py
│   ├── mcp_auth_factory.py
│   ├── mcp_auth_http_adapter.py
│   ├── mcp_auth_http_simple.py
│   ├── mcp_server_main.py
│   ├── nginx.conf
│   ├── ornek.png
│   ├── Procfile
│   ├── pyproject.toml
│   ├── railway.json
│   ├── README.md
│   ├── redis_session_store.py
│   ├── rekabet_mcp_module
│   │   ├── __init__.py
│   │   ├── client.py
│   │   └── models.py
│   ├── run_asgi.py
│   ├── sayistay_mcp_module
│   │   ├── __init__.py
│   │   ├── client.py
│   │   ├── enums.py
│   │   ├── models.py
│   │   └── unified_client.py
│   ├── starlette_app.py
│   ├── stripe_webhook.py
│   ├── uyusmazlik_mcp_module
│   │   ├── __init__.py
│   │   ├── client.py
│   │   └── models.py
│   └── yargitay_mcp_module
│       ├── __init__.py
│       ├── client.py
│       └── models.py
├── sayistay_mcp_module
│   ├── __init__.py
│   ├── client.py
│   ├── enums.py
│   ├── models.py
│   └── unified_client.py
├── starlette_app.py
├── stripe_webhook.py
├── uv.lock
├── uyusmazlik_mcp_module
│   ├── __init__.py
│   ├── client.py
│   └── models.py
└── yargitay_mcp_module
    ├── __init__.py
    ├── client.py
    └── models.py
```

# Files

--------------------------------------------------------------------------------
/asgi_app.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | ASGI application for Yargı MCP Server
  3 | 
  4 | This module provides ASGI/HTTP access to the Yargı MCP server,
  5 | allowing it to be deployed as a web service with FastAPI wrapper
  6 | for OAuth integration and proper middleware support.
  7 | 
  8 | Usage:
  9 |     uvicorn asgi_app:app --host 0.0.0.0 --port 8000
 10 | """
 11 | 
 12 | import os
 13 | import time
 14 | import logging
 15 | import json
 16 | from datetime import datetime, timedelta
 17 | from fastapi import FastAPI, Request, HTTPException, Query
 18 | from fastapi.responses import JSONResponse, HTMLResponse, Response
 19 | from fastapi.exception_handlers import http_exception_handler
 20 | from starlette.middleware import Middleware
 21 | from starlette.middleware.cors import CORSMiddleware
 22 | from starlette.middleware.base import BaseHTTPMiddleware
 23 | 
 24 | # Import the proper create_app function that includes all middleware
 25 | from mcp_server_main import create_app
 26 | 
 27 | # Conditional auth-related imports (only if auth enabled)
 28 | _auth_check = os.getenv("ENABLE_AUTH", "false").lower() == "true"
 29 | 
 30 | if _auth_check:
 31 |     # Import MCP Auth HTTP adapter (OAuth endpoints)
 32 |     try:
 33 |         from mcp_auth_http_simple import router as mcp_auth_router
 34 |     except ImportError:
 35 |         mcp_auth_router = None
 36 | 
 37 |     # Import Stripe webhook router
 38 |     try:
 39 |         from stripe_webhook import router as stripe_router
 40 |     except ImportError:
 41 |         stripe_router = None
 42 | else:
 43 |     mcp_auth_router = None
 44 |     stripe_router = None
 45 | 
 46 | # OAuth configuration from environment variables
 47 | CLERK_ISSUER = os.getenv("CLERK_ISSUER", "https://clerk.yargimcp.com")
 48 | BASE_URL = os.getenv("BASE_URL", "https://api.yargimcp.com")
 49 | CLERK_SECRET_KEY = os.getenv("CLERK_SECRET_KEY")
 50 | CLERK_PUBLISHABLE_KEY = os.getenv("CLERK_PUBLISHABLE_KEY")
 51 | 
 52 | # Setup logging
 53 | logger = logging.getLogger(__name__)
 54 | 
 55 | # Configure CORS and Auth middleware
 56 | cors_origins = os.getenv("ALLOWED_ORIGINS", "*").split(",")
 57 | 
 58 | # Import FastMCP Bearer Auth Provider
 59 | from fastmcp.server.auth import BearerAuthProvider
 60 | from fastmcp.server.auth.providers.bearer import RSAKeyPair
 61 | 
 62 | # Import Clerk SDK at module level for performance
 63 | try:
 64 |     from clerk_backend_api import Clerk
 65 |     CLERK_SDK_AVAILABLE = True
 66 | except ImportError:
 67 |     CLERK_SDK_AVAILABLE = False
 68 |     logger.warning("Clerk SDK not available - falling back to development mode")
 69 | 
 70 | # Configure Bearer token authentication based on ENABLE_AUTH
 71 | auth_enabled = os.getenv("ENABLE_AUTH", "false").lower() == "true"
 72 | bearer_auth = None
 73 | 
 74 | if CLERK_SECRET_KEY and CLERK_ISSUER:
 75 |     # Production: Use Clerk JWKS endpoint for token validation
 76 |     bearer_auth = BearerAuthProvider(
 77 |         jwks_uri=f"{CLERK_ISSUER}/.well-known/jwks.json",
 78 |         issuer=None,
 79 |         algorithm="RS256",
 80 |         audience=None,
 81 |         required_scopes=[]
 82 |     )
 83 | else:
 84 |     # Development: Generate RSA key pair for testing
 85 |     dev_key_pair = RSAKeyPair.generate()
 86 |     bearer_auth = BearerAuthProvider(
 87 |         public_key=dev_key_pair.public_key,
 88 |         issuer="https://dev.yargimcp.com",
 89 |         audience="dev-mcp-server",
 90 |         required_scopes=["yargi.read"]
 91 |     )
 92 | 
 93 | # Create MCP app with Bearer authentication
 94 | mcp_server = create_app(auth=bearer_auth if auth_enabled else None)
 95 | 
 96 | # Create MCP Starlette sub-application with root path - mount will add /mcp prefix
 97 | mcp_app = mcp_server.http_app(path="/")
 98 | 
 99 | 
100 | # Configure JSON encoder for proper Turkish character support
101 | class UTF8JSONResponse(JSONResponse):
102 |     def __init__(self, content=None, status_code=200, headers=None, **kwargs):
103 |         if headers is None:
104 |             headers = {}
105 |         headers["Content-Type"] = "application/json; charset=utf-8"
106 |         super().__init__(content, status_code, headers, **kwargs)
107 |     
108 |     def render(self, content) -> bytes:
109 |         return json.dumps(
110 |             content,
111 |             ensure_ascii=False,
112 |             allow_nan=False,
113 |             indent=None,
114 |             separators=(",", ":"),
115 |         ).encode("utf-8")
116 | 
117 | custom_middleware = [
118 |     Middleware(
119 |         CORSMiddleware,
120 |         allow_origins=cors_origins,
121 |         allow_credentials=True,
122 |         allow_methods=["GET", "POST", "OPTIONS", "DELETE"],
123 |         allow_headers=["Content-Type", "Authorization", "X-Request-ID", "X-Session-ID"],
124 |     ),
125 | ]
126 | 
127 | # Create FastAPI wrapper application
128 | app = FastAPI(
129 |     title="Yargı MCP Server",
130 |     description="MCP server for Turkish legal databases with OAuth authentication",
131 |     version="0.1.0",
132 |     middleware=custom_middleware,
133 |     default_response_class=UTF8JSONResponse,  # Use UTF-8 JSON encoder
134 |     redirect_slashes=False  # Disable to prevent 307 redirects on /mcp endpoint
135 | )
136 | 
137 | # Add auth-related routers to FastAPI (only if available)
138 | if stripe_router:
139 |     app.include_router(stripe_router, prefix="/api/stripe")
140 | 
141 | if mcp_auth_router:
142 |     app.include_router(mcp_auth_router)
143 | 
144 | # Custom 401 exception handler for MCP spec compliance
145 | @app.exception_handler(401)
146 | async def custom_401_handler(request: Request, exc: HTTPException):
147 |     """Custom 401 handler that adds WWW-Authenticate header as required by MCP spec"""
148 |     response = await http_exception_handler(request, exc)
149 |     
150 |     # Add WWW-Authenticate header pointing to protected resource metadata
151 |     # as required by RFC 9728 Section 5.1 and MCP Authorization spec
152 |     response.headers["WWW-Authenticate"] = (
153 |         'Bearer '
154 |         'error="invalid_token", '
155 |         'error_description="The access token is missing or invalid", '
156 |         f'resource="{BASE_URL}/.well-known/oauth-protected-resource"'
157 |     )
158 |     
159 |     return response
160 | 
161 | # FastAPI health check endpoint - BEFORE mounting MCP app
162 | @app.get("/health")
163 | async def health_check():
164 |     """Health check endpoint for monitoring"""
165 |     return {
166 |         "status": "healthy",
167 |         "service": "Yargı MCP Server",
168 |         "version": "0.1.0",
169 |         "tools_count": len(mcp_server._tool_manager._tools),
170 |         "auth_enabled": os.getenv("ENABLE_AUTH", "false").lower() == "true"
171 |     }
172 | 
173 | # Add explicit redirect for /mcp to /mcp/ with method preservation
174 | @app.api_route("/mcp", methods=["GET", "POST", "HEAD", "OPTIONS"])
175 | async def redirect_to_slash(request: Request):
176 |     """Redirect /mcp to /mcp/ preserving HTTP method with 308"""
177 |     from fastapi.responses import RedirectResponse
178 |     return RedirectResponse(url="/mcp/", status_code=308)
179 | 
180 | # MCP mount at /mcp handles path routing correctly
181 | 
182 | # IMPORTANT: Add FastAPI endpoints BEFORE mounting MCP app
183 | # Otherwise mount at root will catch all requests
184 | 
185 | # Debug endpoint to test routing
186 | @app.get("/debug/test")
187 | async def debug_test():
188 |     """Debug endpoint to test if FastAPI routes work"""
189 |     return {"message": "FastAPI routes working", "debug": True}
190 | 
191 | # Clerk CORS proxy endpoints
192 | @app.api_route("/clerk-proxy/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"])
193 | async def clerk_cors_proxy(request: Request, path: str):
194 |     """
195 |     Proxy requests to Clerk to bypass CORS restrictions.
196 |     Forwards requests from Claude AI to clerk.yargimcp.com with proper CORS headers.
197 |     """
198 |     import httpx
199 |     
200 |     # Build target URL
201 |     clerk_url = f"https://clerk.yargimcp.com/{path}"
202 |     
203 |     # Forward query parameters
204 |     if request.url.query:
205 |         clerk_url += f"?{request.url.query}"
206 |     
207 |     # Copy headers (exclude host/origin)
208 |     headers = dict(request.headers)
209 |     headers.pop('host', None)
210 |     headers.pop('origin', None)
211 |     headers['origin'] = 'https://yargimcp.com'  # Use our frontend domain
212 |     
213 |     try:
214 |         async with httpx.AsyncClient() as client:
215 |             # Forward the request to Clerk
216 |             if request.method == "OPTIONS":
217 |                 # Handle preflight
218 |                 response = await client.request(
219 |                     method=request.method,
220 |                     url=clerk_url,
221 |                     headers=headers
222 |                 )
223 |             else:
224 |                 # Forward body for POST/PUT requests
225 |                 body = None
226 |                 if request.method in ["POST", "PUT", "PATCH"]:
227 |                     body = await request.body()
228 |                 
229 |                 response = await client.request(
230 |                     method=request.method,
231 |                     url=clerk_url,
232 |                     headers=headers,
233 |                     content=body
234 |                 )
235 |             
236 |             # Create response with CORS headers
237 |             response_headers = dict(response.headers)
238 |             response_headers.update({
239 |                 "Access-Control-Allow-Origin": "*",
240 |                 "Access-Control-Allow-Methods": "GET, POST, PUT, DELETE, OPTIONS",
241 |                 "Access-Control-Allow-Headers": "Content-Type, Authorization, Accept, Origin, X-Requested-With",
242 |                 "Access-Control-Allow-Credentials": "true",
243 |                 "Access-Control-Max-Age": "86400"
244 |             })
245 |             
246 |             return Response(
247 |                 content=response.content,
248 |                 status_code=response.status_code,
249 |                 headers=response_headers,
250 |                 media_type=response.headers.get("content-type")
251 |             )
252 |             
253 |     except Exception as e:
254 |         return JSONResponse(
255 |             {"error": "proxy_error", "message": str(e)},
256 |             status_code=500,
257 |             headers={"Access-Control-Allow-Origin": "*"}
258 |         )
259 | 
260 | # FastAPI root endpoint
261 | @app.get("/")
262 | async def root():
263 |     """Root endpoint with service information"""
264 |     return {
265 |         "service": "Yargı MCP Server",
266 |         "description": "MCP server for Turkish legal databases with OAuth authentication",
267 |         "endpoints": {
268 |             "mcp": "/mcp",
269 |             "health": "/health",
270 |             "status": "/status",
271 |             "stripe_webhook": "/api/stripe/webhook",
272 |             "oauth_login": "/auth/login",
273 |             "oauth_callback": "/auth/callback",
274 |             "oauth_google": "/auth/google/login",
275 |             "user_info": "/auth/user"
276 |         },
277 |         "transports": {
278 |             "http": "/mcp"
279 |         },
280 |         "supported_databases": [
281 |             "Yargıtay (Court of Cassation)",
282 |             "Danıştay (Council of State)", 
283 |             "Emsal (Precedent)",
284 |             "Uyuşmazlık Mahkemesi (Court of Jurisdictional Disputes)",
285 |             "Anayasa Mahkemesi (Constitutional Court)",
286 |             "Kamu İhale Kurulu (Public Procurement Authority)",
287 |             "Rekabet Kurumu (Competition Authority)",
288 |             "Sayıştay (Court of Accounts)",
289 |             "KVKK (Personal Data Protection Authority)",
290 |             "BDDK (Banking Regulation and Supervision Agency)",
291 |             "Bedesten API (Multiple courts)"
292 |         ],
293 |         "authentication": {
294 |             "enabled": os.getenv("ENABLE_AUTH", "false").lower() == "true",
295 |             "type": "OAuth 2.0 via Clerk",
296 |             "issuer": CLERK_ISSUER,
297 |             "providers": ["google"],
298 |             "flow": "authorization_code"
299 |         }
300 |     }
301 | 
302 | # OAuth 2.0 Authorization Server Metadata - MCP standard location
303 | @app.get("/.well-known/oauth-authorization-server")
304 | async def oauth_authorization_server_root():
305 |     """OAuth 2.0 Authorization Server Metadata - root level for compatibility"""
306 |     return {
307 |         "issuer": BASE_URL,  # Use BASE_URL as issuer for MCP integration
308 |         "authorization_endpoint": f"{BASE_URL}/auth/login",
309 |         "token_endpoint": f"{BASE_URL}/token", 
310 |         "jwks_uri": f"{CLERK_ISSUER}/.well-known/jwks.json",
311 |         "response_types_supported": ["code"],
312 |         "grant_types_supported": ["authorization_code", "refresh_token"],
313 |         "token_endpoint_auth_methods_supported": ["client_secret_basic", "none"],
314 |         "scopes_supported": ["read", "search", "openid", "profile", "email"],
315 |         "subject_types_supported": ["public"],
316 |         "id_token_signing_alg_values_supported": ["RS256"],
317 |         "claims_supported": ["sub", "iss", "aud", "exp", "iat", "email", "name"],
318 |         "code_challenge_methods_supported": ["S256"],
319 |         "service_documentation": f"{BASE_URL}/mcp",
320 |         "registration_endpoint": f"{BASE_URL}/register",
321 |         "resource_documentation": f"{BASE_URL}/mcp"
322 |     }
323 | 
324 | # Claude AI MCP specific endpoint format - suffix versions
325 | @app.get("/.well-known/oauth-authorization-server/mcp")
326 | async def oauth_authorization_server_mcp_suffix():
327 |     """OAuth 2.0 Authorization Server Metadata - Claude AI MCP specific format"""
328 |     return {
329 |         "issuer": BASE_URL,  # Use BASE_URL as issuer for MCP integration
330 |         "authorization_endpoint": f"{BASE_URL}/auth/login",
331 |         "token_endpoint": f"{BASE_URL}/token", 
332 |         "jwks_uri": f"{CLERK_ISSUER}/.well-known/jwks.json",
333 |         "response_types_supported": ["code"],
334 |         "grant_types_supported": ["authorization_code", "refresh_token"],
335 |         "token_endpoint_auth_methods_supported": ["client_secret_basic", "none"],
336 |         "scopes_supported": ["read", "search", "openid", "profile", "email"],
337 |         "subject_types_supported": ["public"],
338 |         "id_token_signing_alg_values_supported": ["RS256"],
339 |         "claims_supported": ["sub", "iss", "aud", "exp", "iat", "email", "name"],
340 |         "code_challenge_methods_supported": ["S256"],
341 |         "service_documentation": f"{BASE_URL}/mcp",
342 |         "registration_endpoint": f"{BASE_URL}/register",
343 |         "resource_documentation": f"{BASE_URL}/mcp"
344 |     }
345 | 
346 | @app.get("/.well-known/oauth-protected-resource/mcp")
347 | async def oauth_protected_resource_mcp_suffix():
348 |     """OAuth 2.0 Protected Resource Metadata - Claude AI MCP specific format"""
349 |     return {
350 |         "resource": BASE_URL,
351 |         "authorization_servers": [
352 |             BASE_URL
353 |         ],
354 |         "scopes_supported": ["read", "search"],
355 |         "bearer_methods_supported": ["header"],
356 |         "resource_documentation": f"{BASE_URL}/mcp",
357 |         "resource_policy_uri": f"{BASE_URL}/privacy"
358 |     }
359 | 
360 | # OAuth 2.0 Protected Resource Metadata (RFC 9728) - MCP Spec Required
361 | @app.get("/.well-known/oauth-protected-resource")
362 | async def oauth_protected_resource():
363 |     """OAuth 2.0 Protected Resource Metadata as required by MCP spec"""
364 |     return {
365 |         "resource": BASE_URL,
366 |         "authorization_servers": [
367 |             BASE_URL
368 |         ],
369 |         "scopes_supported": ["read", "search"],
370 |         "bearer_methods_supported": ["header"],
371 |         "resource_documentation": f"{BASE_URL}/mcp",
372 |         "resource_policy_uri": f"{BASE_URL}/privacy"
373 |     }
374 | 
375 | # Standard well-known discovery endpoint
376 | @app.get("/.well-known/mcp")
377 | async def well_known_mcp():
378 |     """Standard MCP discovery endpoint"""
379 |     return {
380 |         "mcp_server": {
381 |             "name": "Yargı MCP Server",
382 |             "version": "0.1.0",
383 |             "endpoint": f"{BASE_URL}/mcp",
384 |             "authentication": {
385 |                 "type": "oauth2",
386 |                 "authorization_url": f"{BASE_URL}/auth/login",
387 |                 "scopes": ["read", "search"]
388 |             },
389 |             "capabilities": ["tools", "resources"],
390 |             "tools_count": len(mcp_server._tool_manager._tools)
391 |         }
392 |     }
393 | 
394 | # MCP Discovery endpoint for ChatGPT integration
395 | @app.get("/mcp/discovery")
396 | async def mcp_discovery():
397 |     """MCP Discovery endpoint for ChatGPT and other MCP clients"""
398 |     return {
399 |         "name": "Yargı MCP Server",
400 |         "description": "MCP server for Turkish legal databases",
401 |         "version": "0.1.0",
402 |         "protocol": "mcp",
403 |         "transport": "http",
404 |         "endpoint": "/mcp",
405 |         "authentication": {
406 |             "type": "oauth2",
407 |             "authorization_url": "/auth/login",
408 |             "token_url": "/token",
409 |             "scopes": ["read", "search"],
410 |             "provider": "clerk"
411 |         },
412 |         "capabilities": {
413 |             "tools": True,
414 |             "resources": True,
415 |             "prompts": False
416 |         },
417 |         "tools_count": len(mcp_server._tool_manager._tools),
418 |         "contact": {
419 |             "url": BASE_URL,
420 |             "email": "[email protected]"
421 |         }
422 |     }
423 | 
424 | # FastAPI status endpoint
425 | @app.get("/status")
426 | async def status():
427 |     """Status endpoint with detailed information"""
428 |     tools = []
429 |     for tool in mcp_server._tool_manager._tools.values():
430 |         tools.append({
431 |             "name": tool.name,
432 |             "description": tool.description[:100] + "..." if len(tool.description) > 100 else tool.description
433 |         })
434 |     
435 |     return {
436 |         "status": "operational",
437 |         "tools": tools,
438 |         "total_tools": len(tools),
439 |         "transport": "streamable_http",
440 |         "architecture": "FastAPI wrapper + MCP Starlette sub-app",
441 |         "auth_status": "enabled" if os.getenv("ENABLE_AUTH", "false").lower() == "true" else "disabled"
442 |     }
443 | 
444 | # Simplified OAuth session validation for callback endpoints only
445 | async def validate_clerk_session_for_oauth(request: Request, clerk_token: str = None) -> str:
446 |     """Validate Clerk session for OAuth callback endpoints only (not for MCP endpoints)"""
447 |     
448 |     try:
449 |         # Use Clerk SDK if available
450 |         if not CLERK_SDK_AVAILABLE:
451 |             raise ImportError("Clerk SDK not available")
452 |         clerk = Clerk(bearer_auth=CLERK_SECRET_KEY)
453 |         
454 |         # Try JWT token first (from URL parameter)
455 |         if clerk_token:
456 |             try:
457 |                 return "oauth_user_from_token"
458 |             except Exception as e:
459 |                 pass
460 | 
461 |         # Fallback to cookie validation
462 |         clerk_session = request.cookies.get("__session")
463 |         if not clerk_session:
464 |             raise HTTPException(status_code=401, detail="No Clerk session found")
465 | 
466 |         # Validate session with Clerk
467 |         session = clerk.sessions.verify_session(clerk_session)
468 |         return session.user_id
469 |         
470 |     except ImportError:
471 |         return "dev_user_123"
472 |     except Exception as e:
473 |         raise HTTPException(status_code=401, detail=f"OAuth session validation failed: {str(e)}")
474 | 
475 | # MCP OAuth Callback Endpoint
476 | @app.get("/auth/mcp-callback")
477 | async def mcp_oauth_callback(request: Request, clerk_token: str = Query(None)):
478 |     """Handle OAuth callback for MCP token generation"""
479 |     
480 |     try:
481 |         # Validate Clerk session with JWT token support
482 |         user_id = await validate_clerk_session_for_oauth(request, clerk_token)
483 |         
484 |         # Return success response
485 |         return HTMLResponse(f"""
486 |         <html>
487 |             <head>
488 |                 <title>MCP Connection Successful</title>
489 |                 <style>
490 |                     body {{ font-family: Arial, sans-serif; text-align: center; padding: 50px; }}
491 |                     .success {{ color: #28a745; }}
492 |                     .token {{ background: #f8f9fa; padding: 15px; border-radius: 5px; margin: 20px 0; word-break: break-all; }}
493 |                 </style>
494 |             </head>
495 |             <body>
496 |                 <h1 class="success">✅ MCP Connection Successful!</h1>
497 |                 <p>Your Yargı MCP integration is now active.</p>
498 |                 <div class="token">
499 |                     <strong>Authentication:</strong><br>
500 |                     <code>Use your Clerk JWT token directly with Bearer authentication</code>
501 |                 </div>
502 |                 <p>You can now close this window and return to your MCP client.</p>
503 |                 <script>
504 |                     // Try to close the popup if opened as such
505 |                     if (window.opener) {{
506 |                         window.opener.postMessage({{
507 |                             type: 'MCP_AUTH_SUCCESS',
508 |                             token: 'use_clerk_jwt_token'
509 |                         }}, '*');
510 |                         setTimeout(() => window.close(), 3000);
511 |                     }}
512 |                 </script>
513 |             </body>
514 |         </html>
515 |         """)
516 |         
517 |     except HTTPException as e:
518 |         return HTMLResponse(f"""
519 |         <html>
520 |             <head>
521 |                 <title>MCP Connection Failed</title>
522 |                 <style>
523 |                     body {{ font-family: Arial, sans-serif; text-align: center; padding: 50px; }}
524 |                     .error {{ color: #dc3545; }}
525 |                     .debug {{ background: #f8f9fa; padding: 10px; margin: 20px 0; border-radius: 5px; font-family: monospace; }}
526 |                 </style>
527 |             </head>
528 |             <body>
529 |                 <h1 class="error">❌ MCP Connection Failed</h1>
530 |                 <p>{e.detail}</p>
531 |                 <div class="debug">
532 |                     <strong>Debug Info:</strong><br>
533 |                     Clerk Token: {'✅ Provided' if clerk_token else '❌ Missing'}<br>
534 |                     Error: {e.detail}<br>
535 |                     Status: {e.status_code}
536 |                 </div>
537 |                 <p>Please try again or contact support.</p>
538 |                 <a href="https://yargimcp.com/sign-in">Return to Sign In</a>
539 |             </body>
540 |         </html>
541 |         """, status_code=e.status_code)
542 |     except Exception as e:
543 |         return HTMLResponse(f"""
544 |         <html>
545 |             <head>
546 |                 <title>MCP Connection Error</title>
547 |                 <style>
548 |                     body {{ font-family: Arial, sans-serif; text-align: center; padding: 50px; }}
549 |                     .error {{ color: #dc3545; }}
550 |                 </style>
551 |             </head>
552 |             <body>
553 |                 <h1 class="error">❌ Unexpected Error</h1>
554 |                 <p>An unexpected error occurred during authentication.</p>
555 |                 <p>Error: {str(e)}</p>
556 |                 <a href="https://yargimcp.com/sign-in">Return to Sign In</a>
557 |             </body>
558 |         </html>
559 |         """, status_code=500)
560 | 
561 | # OAuth2 Token Endpoint - Now uses Clerk JWT tokens directly
562 | @app.post("/auth/mcp-token")
563 | async def mcp_token_endpoint(request: Request):
564 |     """OAuth2 token endpoint for MCP clients - returns Clerk JWT token info"""
565 |     try:
566 |         # Validate Clerk session
567 |         user_id = await validate_clerk_session_for_oauth(request)
568 |         
569 |         return {
570 |             "message": "Use your Clerk JWT token directly with Bearer authentication",
571 |             "token_type": "Bearer",
572 |             "scope": "yargi.read",
573 |             "user_id": user_id,
574 |             "instructions": "Include 'Authorization: Bearer YOUR_CLERK_JWT_TOKEN' in your requests"
575 |         }
576 |     except HTTPException as e:
577 |         return JSONResponse(
578 |             status_code=e.status_code,
579 |             content={"error": "invalid_request", "error_description": e.detail}
580 |         )
581 | 
582 | # Mount MCP app at /mcp/ with trailing slash
583 | app.mount("/mcp/", mcp_app)
584 | 
585 | # Set the lifespan context after mounting
586 | app.router.lifespan_context = mcp_app.lifespan
587 | 
588 | # Export for uvicorn
589 | __all__ = ["app"]
```

--------------------------------------------------------------------------------
/saidsurucu-yargi-mcp-f5fa007/asgi_app.py:
--------------------------------------------------------------------------------

```python
  1 | """
  2 | ASGI application for Yargı MCP Server
  3 | 
  4 | This module provides ASGI/HTTP access to the Yargı MCP server,
  5 | allowing it to be deployed as a web service with FastAPI wrapper
  6 | for Stripe webhook integration.
  7 | 
  8 | Usage:
  9 |     uvicorn asgi_app:app --host 0.0.0.0 --port 8000
 10 | """
 11 | 
 12 | import os
 13 | import time
 14 | import logging
 15 | from datetime import datetime, timedelta
 16 | from fastapi import FastAPI, Request, HTTPException, Query
 17 | from fastapi.responses import JSONResponse, HTMLResponse
 18 | from fastapi.exception_handlers import http_exception_handler
 19 | from starlette.middleware import Middleware
 20 | from starlette.middleware.cors import CORSMiddleware
 21 | from starlette.responses import Response
 22 | from starlette.requests import Request as StarletteRequest
 23 | 
 24 | # Import the MCP app creator function
 25 | from mcp_server_main import create_app
 26 | 
 27 | # Import Stripe webhook router
 28 | from stripe_webhook import router as stripe_router
 29 | 
 30 | # Import simplified MCP Auth HTTP adapter
 31 | from mcp_auth_http_simple import router as mcp_auth_router
 32 | 
 33 | # OAuth configuration from environment variables
 34 | CLERK_ISSUER = os.getenv("CLERK_ISSUER", "https://accounts.yargimcp.com")
 35 | BASE_URL = os.getenv("BASE_URL", "https://yargimcp.com")
 36 | 
 37 | # Setup logging
 38 | logger = logging.getLogger(__name__)
 39 | 
 40 | # Configure CORS and Auth middleware
 41 | cors_origins = os.getenv("ALLOWED_ORIGINS", "*").split(",")
 42 | 
 43 | # Import FastMCP Bearer Auth Provider
 44 | from fastmcp.server.auth import BearerAuthProvider
 45 | from fastmcp.server.auth.providers.bearer import RSAKeyPair
 46 | 
 47 | # Clerk JWT configuration for Bearer token validation
 48 | CLERK_SECRET_KEY = os.getenv("CLERK_SECRET_KEY")
 49 | CLERK_ISSUER = os.getenv("CLERK_ISSUER", "https://accounts.yargimcp.com")
 50 | CLERK_PUBLISHABLE_KEY = os.getenv("CLERK_PUBLISHABLE_KEY")
 51 | 
 52 | # Configure Bearer token authentication
 53 | bearer_auth = None
 54 | if CLERK_SECRET_KEY and CLERK_ISSUER:
 55 |     # Production: Use Clerk JWKS endpoint for token validation
 56 |     bearer_auth = BearerAuthProvider(
 57 |         jwks_uri=f"{CLERK_ISSUER}/.well-known/jwks.json",
 58 |         issuer=CLERK_ISSUER,
 59 |         algorithm="RS256",
 60 |         audience=None,  # Disable audience validation - Clerk uses different audience format
 61 |         required_scopes=[]  # Disable scope validation - Clerk JWT has ['read', 'search']
 62 |     )
 63 |     logger.info(f"Bearer auth configured with Clerk JWKS: {CLERK_ISSUER}/.well-known/jwks.json")
 64 | else:
 65 |     # Development: Generate RSA key pair for testing
 66 |     logger.warning("No Clerk credentials found - using development RSA key pair")
 67 |     dev_key_pair = RSAKeyPair.generate()
 68 |     bearer_auth = BearerAuthProvider(
 69 |         public_key=dev_key_pair.public_key,
 70 |         issuer="https://dev.yargimcp.com",
 71 |         audience="dev-mcp-server",
 72 |         required_scopes=["yargi.read"]
 73 |     )
 74 |     
 75 |     # Generate a test token for development
 76 |     dev_token = dev_key_pair.create_token(
 77 |         subject="dev-user",
 78 |         issuer="https://dev.yargimcp.com",
 79 |         audience="dev-mcp-server",
 80 |         scopes=["yargi.read", "yargi.search"],
 81 |         expires_in_seconds=3600 * 24  # 24 hours for development
 82 |     )
 83 |     logger.info(f"Development Bearer token: {dev_token}")
 84 | 
 85 | custom_middleware = [
 86 |     Middleware(
 87 |         CORSMiddleware,
 88 |         allow_origins=cors_origins,
 89 |         allow_credentials=True,
 90 |         allow_methods=["GET", "POST", "OPTIONS", "DELETE"],
 91 |         allow_headers=["Content-Type", "Authorization", "X-Request-ID", "X-Session-ID"],
 92 |     ),
 93 | ]
 94 | 
 95 | # Create MCP app with Bearer authentication
 96 | mcp_server = create_app(auth=bearer_auth)
 97 | 
 98 | # Add Starlette middleware to FastAPI (not MCP)
 99 | # MCP already has Bearer auth, no need for additional middleware on MCP level
100 | 
101 | # Create MCP Starlette sub-application with root path - mount will add /mcp prefix
102 | mcp_app = mcp_server.http_app(path="/")
103 | 
104 | # Configure JSON encoder for proper Turkish character support
105 | import json
106 | from fastapi.responses import JSONResponse
107 | 
108 | class UTF8JSONResponse(JSONResponse):
109 |     def __init__(self, content=None, status_code=200, headers=None, **kwargs):
110 |         if headers is None:
111 |             headers = {}
112 |         headers["Content-Type"] = "application/json; charset=utf-8"
113 |         super().__init__(content, status_code, headers, **kwargs)
114 |     
115 |     def render(self, content) -> bytes:
116 |         return json.dumps(
117 |             content,
118 |             ensure_ascii=False,
119 |             allow_nan=False,
120 |             indent=None,
121 |             separators=(",", ":"),
122 |         ).encode("utf-8")
123 | 
124 | # Create FastAPI wrapper application
125 | app = FastAPI(
126 |     title="Yargı MCP Server",
127 |     description="MCP server for Turkish legal databases with OAuth authentication",
128 |     version="0.1.0",
129 |     middleware=custom_middleware,
130 |     default_response_class=UTF8JSONResponse  # Use UTF-8 JSON encoder
131 | )
132 | 
133 | # Add Stripe webhook router to FastAPI
134 | app.include_router(stripe_router, prefix="/api")
135 | 
136 | # Add MCP Auth HTTP adapter to FastAPI (handles OAuth endpoints)
137 | app.include_router(mcp_auth_router)
138 | 
139 | # Custom 401 exception handler for MCP spec compliance
140 | @app.exception_handler(401)
141 | async def custom_401_handler(request: Request, exc: HTTPException):
142 |     """Custom 401 handler that adds WWW-Authenticate header as required by MCP spec"""
143 |     response = await http_exception_handler(request, exc)
144 |     
145 |     # Add WWW-Authenticate header pointing to protected resource metadata
146 |     # as required by RFC 9728 Section 5.1 and MCP Authorization spec
147 |     response.headers["WWW-Authenticate"] = (
148 |         'Bearer '
149 |         'error="invalid_token", '
150 |         'error_description="The access token is missing or invalid", '
151 |         f'resource="{BASE_URL}/.well-known/oauth-protected-resource"'
152 |     )
153 |     
154 |     return response
155 | 
156 | # FastAPI health check endpoint - BEFORE mounting MCP app
157 | @app.get("/health")
158 | async def health_check():
159 |     """Health check endpoint for monitoring"""
160 |     return JSONResponse({
161 |         "status": "healthy",
162 |         "service": "Yargı MCP Server",
163 |         "version": "0.1.0",
164 |         "tools_count": len(mcp_server._tool_manager._tools),
165 |         "auth_enabled": os.getenv("ENABLE_AUTH", "false").lower() == "true"
166 |     })
167 | 
168 | # Add explicit redirect for /mcp to /mcp/ with method preservation
169 | @app.api_route("/mcp", methods=["GET", "POST", "HEAD", "OPTIONS"])
170 | async def redirect_to_slash(request: Request):
171 |     """Redirect /mcp to /mcp/ preserving HTTP method with 308"""
172 |     from fastapi.responses import RedirectResponse
173 |     return RedirectResponse(url="/mcp/", status_code=308)
174 | 
175 | # Mount MCP app at /mcp/ with trailing slash
176 | app.mount("/mcp/", mcp_app)
177 | 
178 | # Set the lifespan context after mounting
179 | app.router.lifespan_context = mcp_app.lifespan
180 | 
181 | 
182 | # SSE transport deprecated - removed
183 | 
184 | # FastAPI root endpoint
185 | @app.get("/")
186 | async def root():
187 |     """Root endpoint with service information"""
188 |     return JSONResponse({
189 |         "service": "Yargı MCP Server",
190 |         "description": "MCP server for Turkish legal databases with OAuth authentication",
191 |         "endpoints": {
192 |             "mcp": "/mcp",
193 |             "health": "/health",
194 |             "status": "/status",
195 |             "stripe_webhook": "/api/stripe/webhook",
196 |             "oauth_login": "/auth/login",
197 |             "oauth_callback": "/auth/callback",
198 |             "oauth_google": "/auth/google/login",
199 |             "user_info": "/auth/user"
200 |         },
201 |         "transports": {
202 |             "http": "/mcp"
203 |         },
204 |         "supported_databases": [
205 |             "Yargıtay (Court of Cassation)",
206 |             "Danıştay (Council of State)", 
207 |             "Emsal (Precedent)",
208 |             "Uyuşmazlık Mahkemesi (Court of Jurisdictional Disputes)",
209 |             "Anayasa Mahkemesi (Constitutional Court)",
210 |             "Kamu İhale Kurulu (Public Procurement Authority)",
211 |             "Rekabet Kurumu (Competition Authority)",
212 |             "Sayıştay (Court of Accounts)",
213 |             "Bedesten API (Multiple courts)"
214 |         ],
215 |         "authentication": {
216 |             "enabled": os.getenv("ENABLE_AUTH", "false").lower() == "true",
217 |             "type": "OAuth 2.0 via Clerk",
218 |             "issuer": os.getenv("CLERK_ISSUER", "https://clerk.accounts.dev"),
219 |             "providers": ["google"],
220 |             "flow": "authorization_code"
221 |         }
222 |     })
223 | 
224 | # OAuth 2.0 Authorization Server Metadata proxy (for MCP clients that can't reach Clerk directly)
225 | # MCP Auth Toolkit expects this to be under /mcp/.well-known/oauth-authorization-server
226 | @app.get("/mcp/.well-known/oauth-authorization-server")
227 | async def oauth_authorization_server():
228 |     """OAuth 2.0 Authorization Server Metadata proxy to Clerk - MCP Auth Toolkit standard location"""
229 |     return JSONResponse({
230 |         "issuer": BASE_URL,
231 |         "authorization_endpoint": "https://yargimcp.com/mcp-callback",
232 |         "token_endpoint": f"{BASE_URL}/token", 
233 |         "jwks_uri": f"{CLERK_ISSUER}/.well-known/jwks.json",
234 |         "response_types_supported": ["code"],
235 |         "grant_types_supported": ["authorization_code", "refresh_token"],
236 |         "token_endpoint_auth_methods_supported": ["client_secret_basic", "none"],
237 |         "scopes_supported": ["read", "search", "openid", "profile", "email"],
238 |         "subject_types_supported": ["public"],
239 |         "id_token_signing_alg_values_supported": ["RS256"],
240 |         "claims_supported": ["sub", "iss", "aud", "exp", "iat", "email", "name"],
241 |         "code_challenge_methods_supported": ["S256"],
242 |         "service_documentation": f"{BASE_URL}/mcp",
243 |         "registration_endpoint": f"{BASE_URL}/register",
244 |         "resource_documentation": f"{BASE_URL}/mcp"
245 |     })
246 | 
247 | # Claude AI MCP specific endpoint format
248 | @app.get("/.well-known/oauth-authorization-server/mcp")
249 | async def oauth_authorization_server_mcp_suffix():
250 |     """OAuth 2.0 Authorization Server Metadata - Claude AI MCP specific format"""
251 |     return JSONResponse({
252 |         "issuer": BASE_URL,
253 |         "authorization_endpoint": "https://yargimcp.com/mcp-callback",
254 |         "token_endpoint": f"{BASE_URL}/token", 
255 |         "jwks_uri": f"{CLERK_ISSUER}/.well-known/jwks.json",
256 |         "response_types_supported": ["code"],
257 |         "grant_types_supported": ["authorization_code", "refresh_token"],
258 |         "token_endpoint_auth_methods_supported": ["client_secret_basic", "none"],
259 |         "scopes_supported": ["read", "search", "openid", "profile", "email"],
260 |         "subject_types_supported": ["public"],
261 |         "id_token_signing_alg_values_supported": ["RS256"],
262 |         "claims_supported": ["sub", "iss", "aud", "exp", "iat", "email", "name"],
263 |         "code_challenge_methods_supported": ["S256"],
264 |         "service_documentation": f"{BASE_URL}/mcp",
265 |         "registration_endpoint": f"{BASE_URL}/register",
266 |         "resource_documentation": f"{BASE_URL}/mcp"
267 |     })
268 | 
269 | @app.get("/.well-known/oauth-protected-resource/mcp")
270 | async def oauth_protected_resource_mcp_suffix():
271 |     """OAuth 2.0 Protected Resource Metadata - Claude AI MCP specific format"""
272 |     return JSONResponse({
273 |         "resource": BASE_URL,
274 |         "authorization_servers": [
275 |             BASE_URL
276 |         ],
277 |         "scopes_supported": ["read", "search"],
278 |         "bearer_methods_supported": ["header"],
279 |         "resource_documentation": f"{BASE_URL}/mcp",
280 |         "resource_policy_uri": f"{BASE_URL}/privacy"
281 |     })
282 | 
283 | # Keep root level for compatibility with some MCP clients
284 | @app.get("/.well-known/oauth-authorization-server")
285 | async def oauth_authorization_server_root():
286 |     """OAuth 2.0 Authorization Server Metadata proxy to Clerk - root level for compatibility"""
287 |     return JSONResponse({
288 |         "issuer": BASE_URL,
289 |         "authorization_endpoint": "https://yargimcp.com/mcp-callback",
290 |         "token_endpoint": f"{BASE_URL}/token", 
291 |         "jwks_uri": f"{CLERK_ISSUER}/.well-known/jwks.json",
292 |         "response_types_supported": ["code"],
293 |         "grant_types_supported": ["authorization_code", "refresh_token"],
294 |         "token_endpoint_auth_methods_supported": ["client_secret_basic", "none"],
295 |         "scopes_supported": ["read", "search", "openid", "profile", "email"],
296 |         "subject_types_supported": ["public"],
297 |         "id_token_signing_alg_values_supported": ["RS256"],
298 |         "claims_supported": ["sub", "iss", "aud", "exp", "iat", "email", "name"],
299 |         "code_challenge_methods_supported": ["S256"],
300 |         "service_documentation": f"{BASE_URL}/mcp",
301 |         "registration_endpoint": f"{BASE_URL}/register",
302 |         "resource_documentation": f"{BASE_URL}/mcp"
303 |     })
304 | 
305 | # Note: GET /mcp is handled by the mounted MCP app itself
306 | # This prevents 405 Method Not Allowed errors on POST requests
307 | 
308 | # OAuth 2.0 Protected Resource Metadata (RFC 9728) - MCP Spec Required
309 | @app.get("/.well-known/oauth-protected-resource")
310 | async def oauth_protected_resource():
311 |     """OAuth 2.0 Protected Resource Metadata as required by MCP spec"""
312 |     return JSONResponse({
313 |         "resource": BASE_URL,
314 |         "authorization_servers": [
315 |             BASE_URL
316 |         ],
317 |         "scopes_supported": ["read", "search"],
318 |         "bearer_methods_supported": ["header"],
319 |         "resource_documentation": f"{BASE_URL}/mcp",
320 |         "resource_policy_uri": f"{BASE_URL}/privacy"
321 |     })
322 | 
323 | # Standard well-known discovery endpoint
324 | @app.get("/.well-known/mcp")
325 | async def well_known_mcp():
326 |     """Standard MCP discovery endpoint"""
327 |     return JSONResponse({
328 |         "mcp_server": {
329 |             "name": "Yargı MCP Server",
330 |             "version": "0.1.0",
331 |             "endpoint": f"{BASE_URL}/mcp",
332 |             "authentication": {
333 |                 "type": "oauth2",
334 |                 "authorization_url": f"{BASE_URL}/auth/login",
335 |                 "scopes": ["read", "search"]
336 |             },
337 |             "capabilities": ["tools", "resources"],
338 |             "tools_count": len(mcp_server._tool_manager._tools)
339 |         }
340 |     })
341 | 
342 | # MCP Discovery endpoint for ChatGPT integration
343 | @app.get("/mcp/discovery")
344 | async def mcp_discovery():
345 |     """MCP Discovery endpoint for ChatGPT and other MCP clients"""
346 |     return JSONResponse({
347 |         "name": "Yargı MCP Server",
348 |         "description": "MCP server for Turkish legal databases",
349 |         "version": "0.1.0",
350 |         "protocol": "mcp",
351 |         "transport": "http",
352 |         "endpoint": "/mcp",
353 |         "authentication": {
354 |             "type": "oauth2",
355 |             "authorization_url": "/auth/login",
356 |             "token_url": "/auth/callback",
357 |             "scopes": ["read", "search"],
358 |             "provider": "clerk"
359 |         },
360 |         "capabilities": {
361 |             "tools": True,
362 |             "resources": True,
363 |             "prompts": False
364 |         },
365 |         "tools_count": len(mcp_server._tool_manager._tools),
366 |         "contact": {
367 |             "url": BASE_URL,
368 |             "email": "[email protected]"
369 |         }
370 |     })
371 | 
372 | # FastAPI status endpoint
373 | @app.get("/status")
374 | async def status():
375 |     """Status endpoint with detailed information"""
376 |     tools = []
377 |     for tool in mcp_server._tool_manager._tools.values():
378 |         tools.append({
379 |             "name": tool.name,
380 |             "description": tool.description[:100] + "..." if len(tool.description) > 100 else tool.description
381 |         })
382 |     
383 |     return JSONResponse({
384 |         "status": "operational",
385 |         "tools": tools,
386 |         "total_tools": len(tools),
387 |         "transport": "streamable_http",
388 |         "architecture": "FastAPI wrapper + MCP Starlette sub-app",
389 |         "auth_status": "enabled" if os.getenv("ENABLE_AUTH", "false").lower() == "true" else "disabled"
390 |     })
391 | 
392 | # Note: JWT token validation is now handled entirely by Clerk
393 | # All authentication flows use Clerk JWT tokens directly
394 | 
395 | async def validate_clerk_session(request: Request, clerk_token: str = None) -> str:
396 |     """Validate Clerk session from cookies or JWT token and return user_id"""
397 |     logger.info(f"Validating Clerk session - token provided: {bool(clerk_token)}")
398 |     
399 |     try:
400 |         # Try to import Clerk SDK
401 |         from clerk_backend_api import Clerk
402 |         clerk = Clerk(bearer_auth=os.getenv("CLERK_SECRET_KEY"))
403 |         
404 |         # Try JWT token first (from URL parameter)
405 |         if clerk_token:
406 |             logger.info("Validating Clerk JWT token from URL parameter")
407 |             try:
408 |                 # Extract session_id from JWT token and verify with Clerk
409 |                 import jwt
410 |                 decoded_token = jwt.decode(clerk_token, options={"verify_signature": False})
411 |                 session_id = decoded_token.get("sid")  # Use standard JWT 'sid' claim
412 |                 
413 |                 if session_id:
414 |                     # Verify with Clerk using session_id
415 |                     session = clerk.sessions.verify(session_id=session_id, token=clerk_token)
416 |                     user_id = session.user_id if session else None
417 |                     
418 |                     if user_id:
419 |                         logger.info(f"JWT token validation successful - user_id: {user_id}")
420 |                         return user_id
421 |                     else:
422 |                         logger.error("JWT token validation failed - no user_id in session")
423 |                 else:
424 |                     logger.error("No session_id found in JWT token")
425 |             except Exception as e:
426 |                 logger.error(f"JWT token validation failed: {str(e)}")
427 |                 # Fall through to cookie validation
428 |         
429 |         # Fallback to cookie validation
430 |         logger.info("Attempting cookie-based session validation")
431 |         clerk_session = request.cookies.get("__session")
432 |         if not clerk_session:
433 |             logger.error("No Clerk session cookie found")
434 |             raise HTTPException(status_code=401, detail="No Clerk session found")
435 |         
436 |         # Validate session with Clerk
437 |         session = clerk.sessions.verify_session(clerk_session)
438 |         logger.info(f"Cookie session validation successful - user_id: {session.user_id}")
439 |         return session.user_id
440 |         
441 |     except ImportError:
442 |         # Fallback for development without Clerk SDK
443 |         logger.warning("Clerk SDK not available - using development fallback")
444 |         return "dev_user_123"
445 |     except Exception as e:
446 |         logger.error(f"Session validation failed: {str(e)}")
447 |         raise HTTPException(status_code=401, detail=f"Session validation failed: {str(e)}")
448 | 
449 | # MCP OAuth Callback Endpoint
450 | @app.get("/auth/mcp-callback")
451 | async def mcp_oauth_callback(request: Request, clerk_token: str = Query(None)):
452 |     """Handle OAuth callback for MCP token generation"""
453 |     logger.info(f"MCP OAuth callback - clerk_token provided: {bool(clerk_token)}")
454 |     
455 |     try:
456 |         # Validate Clerk session with JWT token support
457 |         user_id = await validate_clerk_session(request, clerk_token)
458 |         logger.info(f"User authenticated successfully - user_id: {user_id}")
459 |         
460 |         # Use the Clerk JWT token directly (no need to generate custom token)
461 |         logger.info("User authenticated successfully via Clerk")
462 |         
463 |         # Return success response
464 |         return HTMLResponse(f"""
465 |         <html>
466 |             <head>
467 |                 <title>MCP Connection Successful</title>
468 |                 <style>
469 |                     body {{ font-family: Arial, sans-serif; text-align: center; padding: 50px; }}
470 |                     .success {{ color: #28a745; }}
471 |                     .token {{ background: #f8f9fa; padding: 15px; border-radius: 5px; margin: 20px 0; word-break: break-all; }}
472 |                 </style>
473 |             </head>
474 |             <body>
475 |                 <h1 class="success">✅ MCP Connection Successful!</h1>
476 |                 <p>Your Yargı MCP integration is now active.</p>
477 |                 <div class="token">
478 |                     <strong>Authentication:</strong><br>
479 |                     <code>Use your Clerk JWT token directly with Bearer authentication</code>
480 |                 </div>
481 |                 <p>You can now close this window and return to your MCP client.</p>
482 |                 <script>
483 |                     // Try to close the popup if opened as such
484 |                     if (window.opener) {{
485 |                         window.opener.postMessage({{
486 |                             type: 'MCP_AUTH_SUCCESS',
487 |                             token: 'use_clerk_jwt_token'
488 |                         }}, '*');
489 |                         setTimeout(() => window.close(), 3000);
490 |                     }}
491 |                 </script>
492 |             </body>
493 |         </html>
494 |         """)
495 |         
496 |     except HTTPException as e:
497 |         logger.error(f"MCP OAuth callback failed: {e.detail}")
498 |         return HTMLResponse(f"""
499 |         <html>
500 |             <head>
501 |                 <title>MCP Connection Failed</title>
502 |                 <style>
503 |                     body {{ font-family: Arial, sans-serif; text-align: center; padding: 50px; }}
504 |                     .error {{ color: #dc3545; }}
505 |                     .debug {{ background: #f8f9fa; padding: 10px; margin: 20px 0; border-radius: 5px; font-family: monospace; }}
506 |                 </style>
507 |             </head>
508 |             <body>
509 |                 <h1 class="error">❌ MCP Connection Failed</h1>
510 |                 <p>{e.detail}</p>
511 |                 <div class="debug">
512 |                     <strong>Debug Info:</strong><br>
513 |                     Clerk Token: {'✅ Provided' if clerk_token else '❌ Missing'}<br>
514 |                     Error: {e.detail}<br>
515 |                     Status: {e.status_code}
516 |                 </div>
517 |                 <p>Please try again or contact support.</p>
518 |                 <a href="https://yargimcp.com/sign-in">Return to Sign In</a>
519 |             </body>
520 |         </html>
521 |         """, status_code=e.status_code)
522 |     except Exception as e:
523 |         logger.error(f"Unexpected error in MCP OAuth callback: {str(e)}")
524 |         return HTMLResponse(f"""
525 |         <html>
526 |             <head>
527 |                 <title>MCP Connection Error</title>
528 |                 <style>
529 |                     body {{ font-family: Arial, sans-serif; text-align: center; padding: 50px; }}
530 |                     .error {{ color: #dc3545; }}
531 |                 </style>
532 |             </head>
533 |             <body>
534 |                 <h1 class="error">❌ Unexpected Error</h1>
535 |                 <p>An unexpected error occurred during authentication.</p>
536 |                 <p>Error: {str(e)}</p>
537 |                 <a href="https://yargimcp.com/sign-in">Return to Sign In</a>
538 |             </body>
539 |         </html>
540 |         """, status_code=500)
541 | 
542 | # OAuth2 Token Endpoint - Now uses Clerk JWT tokens directly
543 | @app.post("/auth/mcp-token")
544 | async def mcp_token_endpoint(request: Request):
545 |     """OAuth2 token endpoint for MCP clients - returns Clerk JWT token info"""
546 |     try:
547 |         # Validate Clerk session
548 |         user_id = await validate_clerk_session(request)
549 |         
550 |         return JSONResponse({
551 |             "message": "Use your Clerk JWT token directly with Bearer authentication",
552 |             "token_type": "Bearer",
553 |             "scope": "yargi.read",
554 |             "user_id": user_id,
555 |             "instructions": "Include 'Authorization: Bearer YOUR_CLERK_JWT_TOKEN' in your requests"
556 |         })
557 |     except HTTPException as e:
558 |         return JSONResponse(
559 |             status_code=e.status_code,
560 |             content={"error": "invalid_request", "error_description": e.detail}
561 |         )
562 | 
563 | # Note: Only HTTP transport supported - SSE transport deprecated
564 | 
565 | # Export for uvicorn
566 | __all__ = ["app"]
```

--------------------------------------------------------------------------------
/rekabet_mcp_module/client.py:
--------------------------------------------------------------------------------

```python
  1 | # rekabet_mcp_module/client.py
  2 | 
  3 | import httpx
  4 | from bs4 import BeautifulSoup
  5 | from typing import List, Optional, Tuple, Dict, Any
  6 | import logging
  7 | import html
  8 | import re
  9 | import io # For io.BytesIO
 10 | from urllib.parse import urlencode, urljoin, quote, parse_qs, urlparse
 11 | from markitdown import MarkItDown
 12 | import math
 13 | 
 14 | # pypdf for PDF processing (lighter alternative to PyMuPDF)
 15 | from pypdf import PdfReader, PdfWriter # PyPDF2'nin devamı niteliğindeki pypdf
 16 | 
 17 | from .models import (
 18 |     RekabetKurumuSearchRequest,
 19 |     RekabetDecisionSummary,
 20 |     RekabetSearchResult,
 21 |     RekabetDocument,
 22 |     RekabetKararTuruGuidEnum
 23 | )
 24 | from pydantic import HttpUrl # Ensure HttpUrl is imported from pydantic
 25 | 
 26 | logger = logging.getLogger(__name__)
 27 | if not logger.hasHandlers(): # Pragma: no cover
 28 |     logging.basicConfig(
 29 |         level=logging.INFO,  # Varsayılan log seviyesi
 30 |         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 31 |     )
 32 |     # Debug betiğinde daha detaylı loglama için seviye ayrıca ayarlanabilir.
 33 | 
 34 | class RekabetKurumuApiClient:
 35 |     BASE_URL = "https://www.rekabet.gov.tr"
 36 |     SEARCH_PATH = "/tr/Kararlar"
 37 |     DECISION_LANDING_PATH_TEMPLATE = "/Karar"
 38 |     # PDF sayfa bazlı Markdown döndürüldüğü için bu sabit artık doğrudan kullanılmıyor.
 39 |     # DOCUMENT_MARKDOWN_CHUNK_SIZE = 5000 
 40 | 
 41 |     def __init__(self, request_timeout: float = 60.0):
 42 |         self.http_client = httpx.AsyncClient(
 43 |             base_url=self.BASE_URL,
 44 |             headers={
 45 |                 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
 46 |                 "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7",
 47 |                 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
 48 |             },
 49 |             timeout=request_timeout,
 50 |             verify=True,
 51 |             follow_redirects=True
 52 |         )
 53 | 
 54 |     def _build_search_query_params(self, params: RekabetKurumuSearchRequest) -> List[Tuple[str, str]]:
 55 |         query_params: List[Tuple[str, str]] = []
 56 |         query_params.append(("sayfaAdi", params.sayfaAdi if params.sayfaAdi is not None else ""))
 57 |         query_params.append(("YayinlanmaTarihi", params.YayinlanmaTarihi if params.YayinlanmaTarihi is not None else ""))
 58 |         query_params.append(("PdfText", params.PdfText if params.PdfText is not None else ""))
 59 |         
 60 |         karar_turu_id_value = ""
 61 |         if params.KararTuruID is not None: 
 62 |             karar_turu_id_value = params.KararTuruID.value if params.KararTuruID.value != "ALL" else "" 
 63 |         query_params.append(("KararTuruID", karar_turu_id_value))
 64 |             
 65 |         query_params.append(("KararSayisi", params.KararSayisi if params.KararSayisi is not None else ""))
 66 |         query_params.append(("KararTarihi", params.KararTarihi if params.KararTarihi is not None else ""))
 67 |         
 68 |         if params.page and params.page > 1:
 69 |             query_params.append(("page", str(params.page)))
 70 |             
 71 |         return query_params
 72 | 
 73 |     async def search_decisions(self, params: RekabetKurumuSearchRequest) -> RekabetSearchResult:
 74 |         request_path = self.SEARCH_PATH
 75 |         final_query_params = self._build_search_query_params(params)
 76 |         logger.info(f"RekabetKurumuApiClient: Performing search. Path: {request_path}, Parameters: {final_query_params}")
 77 |         
 78 |         try:
 79 |             response = await self.http_client.get(request_path, params=final_query_params)
 80 |             response.raise_for_status()
 81 |             html_content = response.text
 82 |         except httpx.RequestError as e:
 83 |             logger.error(f"RekabetKurumuApiClient: HTTP request error during search: {e}")
 84 |             raise
 85 |         
 86 |         soup = BeautifulSoup(html_content, 'html.parser')
 87 |         processed_decisions: List[RekabetDecisionSummary] = []
 88 |         total_records: Optional[int] = None
 89 |         total_pages: Optional[int] = None
 90 |         
 91 |         pagination_div = soup.find("div", class_="yazi01")
 92 |         if pagination_div:
 93 |             text_content = pagination_div.get_text(separator=" ", strip=True)
 94 |             total_match = re.search(r"Toplam\s*:\s*(\d+)", text_content)
 95 |             if total_match:
 96 |                 try:
 97 |                     total_records = int(total_match.group(1))
 98 |                     logger.debug(f"Total records found from pagination: {total_records}")
 99 |                 except ValueError:
100 |                     logger.warning(f"Could not convert 'Toplam' value to int: {total_match.group(1)}")
101 |             else:
102 |                 logger.warning("'Toplam :' string not found in pagination section.")
103 |             
104 |             results_per_page_assumed = 10 
105 |             if total_records is not None:
106 |                 calculated_total_pages = math.ceil(total_records / results_per_page_assumed)
107 |                 total_pages = calculated_total_pages if calculated_total_pages > 0 else (1 if total_records > 0 else 0)
108 |                 logger.debug(f"Calculated total pages: {total_pages}")
109 |             
110 |             if total_pages is None: # Fallback if total_records couldn't be parsed
111 |                 last_page_link = pagination_div.select_one("li.PagedList-skipToLast a")
112 |                 if last_page_link and last_page_link.has_attr('href'):
113 |                     qs = parse_qs(urlparse(last_page_link['href']).query)
114 |                     if 'page' in qs and qs['page']:
115 |                         try:
116 |                             total_pages = int(qs['page'][0])
117 |                             logger.debug(f"Total pages found from 'Last >>' link: {total_pages}")
118 |                         except ValueError:
119 |                             logger.warning(f"Could not convert page value from 'Last >>' link to int: {qs['page'][0]}")
120 |                 elif total_records == 0 : total_pages = 0 # If no records, 0 pages
121 |                 elif total_records is not None and total_records > 0 : total_pages = 1 # If records exist but no last page link (e.g. single page)
122 |                 else: logger.warning("'Last >>' link not found in pagination section.")
123 |         
124 |         decision_tables_container = soup.find("div", id="kararList")
125 |         if not decision_tables_container:
126 |             logger.warning("`div#kararList` (decision list container) not found. HTML structure might have changed or no decisions on this page.")
127 |         else:
128 |             decision_tables = decision_tables_container.find_all("table", class_="equalDivide")
129 |             logger.info(f"Found {len(decision_tables)} 'table' elements with class='equalDivide' for parsing.")
130 |             
131 |             if not decision_tables and total_records is not None and total_records > 0 :
132 |                  logger.warning(f"Page indicates {total_records} records but no decision tables found with class='equalDivide'.")
133 | 
134 |             for idx, table in enumerate(decision_tables):
135 |                 logger.debug(f"Processing table {idx + 1}...")
136 |                 try:
137 |                     rows = table.find_all("tr")
138 |                     if len(rows) != 3: 
139 |                         logger.warning(f"Table {idx + 1} has an unexpected number of rows ({len(rows)} instead of 3). Skipping. HTML snippet:\n{table.prettify()[:500]}")
140 |                         continue
141 | 
142 |                     # Row 1: Publication Date, Decision Number, Related Cases Link
143 |                     td_elements_r1 = rows[0].find_all("td")
144 |                     pub_date = td_elements_r1[0].get_text(strip=True) if len(td_elements_r1) > 0 else None
145 |                     dec_num = td_elements_r1[1].get_text(strip=True) if len(td_elements_r1) > 1 else None
146 |                     
147 |                     related_cases_link_tag = td_elements_r1[2].find("a", href=True) if len(td_elements_r1) > 2 else None
148 |                     related_cases_url_str: Optional[str] = None
149 |                     karar_id_from_related: Optional[str] = None
150 |                     if related_cases_link_tag and related_cases_link_tag.has_attr('href'):
151 |                         related_cases_url_str = urljoin(self.BASE_URL, related_cases_link_tag['href'])
152 |                         qs_related = parse_qs(urlparse(related_cases_link_tag['href']).query)
153 |                         if 'kararId' in qs_related and qs_related['kararId']:
154 |                             karar_id_from_related = qs_related['kararId'][0]
155 |                     
156 |                     # Row 2: Decision Date, Decision Type
157 |                     td_elements_r2 = rows[1].find_all("td")
158 |                     dec_date = td_elements_r2[0].get_text(strip=True) if len(td_elements_r2) > 0 else None
159 |                     dec_type_text = td_elements_r2[1].get_text(strip=True) if len(td_elements_r2) > 1 else None
160 | 
161 |                     # Row 3: Title and Main Decision Link
162 |                     title_cell = rows[2].find("td", colspan="5")
163 |                     decision_link_tag = title_cell.find("a", href=True) if title_cell else None
164 |                     
165 |                     title_text: Optional[str] = None
166 |                     decision_landing_url_str: Optional[str] = None 
167 |                     karar_id_from_main_link: Optional[str] = None
168 | 
169 |                     if decision_link_tag and decision_link_tag.has_attr('href'):
170 |                         title_text = decision_link_tag.get_text(strip=True)
171 |                         href_val = decision_link_tag['href']
172 |                         if href_val.startswith(self.DECISION_LANDING_PATH_TEMPLATE + "?kararId="): # Ensure it's a decision link
173 |                             decision_landing_url_str = urljoin(self.BASE_URL, href_val)
174 |                             qs_main = parse_qs(urlparse(href_val).query)
175 |                             if 'kararId' in qs_main and qs_main['kararId']:
176 |                                 karar_id_from_main_link = qs_main['kararId'][0]
177 |                         else:
178 |                             logger.warning(f"Table {idx+1} decision link has unexpected format: {href_val}")
179 |                     else:
180 |                         logger.warning(f"Table {idx+1} could not find title/decision link tag.")
181 |                     
182 |                     current_karar_id = karar_id_from_main_link or karar_id_from_related
183 | 
184 |                     if not current_karar_id: 
185 |                         logger.warning(f"Table {idx+1} Karar ID not found. Skipping. Title (if any): {title_text}")
186 |                         continue
187 |                     
188 |                     # Convert string URLs to HttpUrl for the model
189 |                     final_decision_url = HttpUrl(decision_landing_url_str) if decision_landing_url_str else None
190 |                     final_related_cases_url = HttpUrl(related_cases_url_str) if related_cases_url_str else None
191 | 
192 |                     processed_decisions.append(RekabetDecisionSummary(
193 |                         publication_date=pub_date, decision_number=dec_num, decision_date=dec_date,
194 |                         decision_type_text=dec_type_text, title=title_text, 
195 |                         decision_url=final_decision_url, 
196 |                         karar_id=current_karar_id, 
197 |                         related_cases_url=final_related_cases_url
198 |                     ))
199 |                     logger.debug(f"Table {idx+1} parsed successfully: Karar ID '{current_karar_id}', Title '{title_text[:50] if title_text else 'N/A'}...'")
200 | 
201 |                 except Exception as e:
202 |                     logger.warning(f"RekabetKurumuApiClient: Error parsing decision summary {idx+1}: {e}. Problematic Table HTML:\n{table.prettify()}", exc_info=True)
203 |                     continue
204 |         
205 |         return RekabetSearchResult(
206 |             decisions=processed_decisions, total_records_found=total_records, 
207 |             retrieved_page_number=params.page, total_pages=total_pages if total_pages is not None else 0
208 |         )
209 | 
210 |     async def _extract_pdf_url_and_landing_page_metadata(self, karar_id: str, landing_page_html: str, landing_page_url: str) -> Dict[str, Any]:
211 |         soup = BeautifulSoup(landing_page_html, 'html.parser')
212 |         data: Dict[str, Any] = {
213 |             "pdf_url": None,
214 |             "title_on_landing_page": soup.title.string.strip() if soup.title and soup.title.string else f"Rekabet Kurumu Kararı {karar_id}",
215 |         }
216 |         # This part needs to be robust and specific to Rekabet Kurumu's landing page structure.
217 |         # Look for common patterns: direct links, download buttons, embedded viewers.
218 |         pdf_anchor = soup.find("a", href=re.compile(r"\.pdf(\?|$)", re.IGNORECASE)) # Basic PDF link
219 |         if not pdf_anchor: # Try other common patterns if the basic one fails
220 |             # Example: Look for links with specific text or class
221 |             pdf_anchor = soup.find("a", string=re.compile(r"karar metni|pdf indir", re.IGNORECASE))
222 |         
223 |         if pdf_anchor and pdf_anchor.has_attr('href'):
224 |             pdf_path = pdf_anchor['href']
225 |             data["pdf_url"] = urljoin(landing_page_url, pdf_path)
226 |             logger.info(f"PDF link found on landing page (<a>): {data['pdf_url']}")
227 |         else:
228 |             iframe_pdf = soup.find("iframe", src=re.compile(r"\.pdf(\?|$)", re.IGNORECASE))
229 |             if iframe_pdf and iframe_pdf.has_attr('src'):
230 |                 pdf_path = iframe_pdf['src']
231 |                 data["pdf_url"] = urljoin(landing_page_url, pdf_path)
232 |                 logger.info(f"PDF link found on landing page (<iframe>): {data['pdf_url']}")
233 |             else:
234 |                 embed_pdf = soup.find("embed", src=re.compile(r"\.pdf(\?|$)", re.IGNORECASE), type="application/pdf")
235 |                 if embed_pdf and embed_pdf.has_attr('src'):
236 |                     pdf_path = embed_pdf['src']
237 |                     data["pdf_url"] = urljoin(landing_page_url, pdf_path)
238 |                     logger.info(f"PDF link found on landing page (<embed>): {data['pdf_url']}")
239 |                 else:
240 |                     logger.warning(f"No PDF link found on landing page {landing_page_url} for kararId {karar_id} using common selectors.")
241 |         return data
242 | 
243 |     async def _download_pdf_bytes(self, pdf_url: str) -> Optional[bytes]:
244 |         try:
245 |             url_to_fetch = pdf_url if pdf_url.startswith(('http://', 'https://')) else urljoin(self.BASE_URL, pdf_url)
246 |             logger.info(f"Downloading PDF from: {url_to_fetch}")
247 |             response = await self.http_client.get(url_to_fetch)
248 |             response.raise_for_status()
249 |             pdf_bytes = await response.aread()
250 |             logger.info(f"PDF content downloaded ({len(pdf_bytes)} bytes) from: {url_to_fetch}")
251 |             return pdf_bytes
252 |         except httpx.RequestError as e:
253 |             logger.error(f"HTTP error downloading PDF from {pdf_url}: {e}")
254 |         except Exception as e:
255 |             logger.error(f"General error downloading PDF from {pdf_url}: {e}")
256 |         return None
257 | 
258 |     def _extract_single_pdf_page_as_pdf_bytes(self, original_pdf_bytes: bytes, page_number_to_extract: int) -> Tuple[Optional[bytes], int]:
259 |         total_pages_in_original_pdf = 0
260 |         single_page_pdf_bytes: Optional[bytes] = None
261 |         
262 |         if not original_pdf_bytes:
263 |             logger.warning("No original PDF bytes provided for page extraction.")
264 |             return None, 0
265 | 
266 |         try:
267 |             pdf_stream = io.BytesIO(original_pdf_bytes)
268 |             reader = PdfReader(pdf_stream)
269 |             total_pages_in_original_pdf = len(reader.pages)
270 |             
271 |             if not (0 < page_number_to_extract <= total_pages_in_original_pdf):
272 |                 logger.warning(f"Requested page number ({page_number_to_extract}) is out of PDF page range (1-{total_pages_in_original_pdf}).")
273 |                 return None, total_pages_in_original_pdf
274 | 
275 |             writer = PdfWriter()
276 |             writer.add_page(reader.pages[page_number_to_extract - 1]) # pypdf is 0-indexed
277 |             
278 |             output_pdf_stream = io.BytesIO()
279 |             writer.write(output_pdf_stream)
280 |             single_page_pdf_bytes = output_pdf_stream.getvalue()
281 |             
282 |             logger.debug(f"Page {page_number_to_extract} of original PDF (total {total_pages_in_original_pdf} pages) extracted as new PDF using pypdf.")
283 |             
284 |         except Exception as e:
285 |             logger.error(f"Error extracting PDF page using pypdf: {e}", exc_info=True)
286 |             return None, total_pages_in_original_pdf 
287 |         return single_page_pdf_bytes, total_pages_in_original_pdf
288 | 
289 |     def _convert_pdf_bytes_to_markdown(self, pdf_bytes: bytes, source_url_for_logging: str) -> Optional[str]:
290 |         if not pdf_bytes:
291 |             logger.warning(f"No PDF bytes provided for Markdown conversion (source: {source_url_for_logging}).")
292 |             return None
293 |         
294 |         pdf_stream = io.BytesIO(pdf_bytes)
295 |         try:
296 |             md_converter = MarkItDown(enable_plugins=False) 
297 |             conversion_result = md_converter.convert(pdf_stream) 
298 |             markdown_text = conversion_result.text_content
299 |             
300 |             if not markdown_text:
301 |                  logger.warning(f"MarkItDown returned empty content from PDF byte stream (source: {source_url_for_logging}). PDF page might be image-based or MarkItDown could not process the PDF stream.")
302 |             return markdown_text
303 |         except Exception as e:
304 |             logger.error(f"MarkItDown conversion error for PDF byte stream (source: {source_url_for_logging}): {e}", exc_info=True)
305 |             return None
306 | 
307 |     async def get_decision_document(self, karar_id: str, page_number: int = 1) -> RekabetDocument:
308 |         if not karar_id:
309 |              return RekabetDocument(
310 |                 source_landing_page_url=HttpUrl(f"{self.BASE_URL}"), 
311 |                 karar_id=karar_id or "UNKNOWN_KARAR_ID",
312 |                 error_message="karar_id is required.",
313 |                 current_page=1, total_pages=0, is_paginated=False )
314 | 
315 |         decision_url_path = f"{self.DECISION_LANDING_PATH_TEMPLATE}?kararId={karar_id}"
316 |         full_landing_page_url = urljoin(self.BASE_URL, decision_url_path)
317 |         
318 |         logger.info(f"RekabetKurumuApiClient: Getting decision document: {full_landing_page_url}, Requested PDF Page: {page_number}")
319 | 
320 |         pdf_url_to_report: Optional[HttpUrl] = None
321 |         title_to_report: Optional[str] = f"Rekabet Kurumu Kararı {karar_id}" # Default
322 |         error_message: Optional[str] = None
323 |         markdown_for_requested_page: Optional[str] = None
324 |         total_pdf_pages: int = 0
325 |         
326 |         try:
327 |             async with self.http_client.stream("GET", full_landing_page_url) as response:
328 |                 response.raise_for_status()
329 |                 content_type = response.headers.get("content-type", "").lower()
330 |                 final_url_of_response = HttpUrl(str(response.url))
331 |                 original_pdf_bytes: Optional[bytes] = None
332 | 
333 |                 if "application/pdf" in content_type:
334 |                     logger.info(f"URL {final_url_of_response} is a direct PDF. Processing content.")
335 |                     pdf_url_to_report = final_url_of_response
336 |                     original_pdf_bytes = await response.aread()
337 |                 elif "text/html" in content_type:
338 |                     logger.info(f"URL {final_url_of_response} is an HTML landing page. Looking for PDF link.")
339 |                     landing_page_html_bytes = await response.aread()
340 |                     detected_charset = response.charset_encoding or 'utf-8'
341 |                     try: landing_page_html = landing_page_html_bytes.decode(detected_charset)
342 |                     except UnicodeDecodeError: landing_page_html = landing_page_html_bytes.decode('utf-8', errors='replace')
343 | 
344 |                     if landing_page_html.strip():
345 |                         landing_page_data = self._extract_pdf_url_and_landing_page_metadata(karar_id, landing_page_html, str(final_url_of_response))
346 |                         pdf_url_str_from_html = landing_page_data.get("pdf_url")
347 |                         if landing_page_data.get("title_on_landing_page"): title_to_report = landing_page_data.get("title_on_landing_page")
348 |                         if pdf_url_str_from_html:
349 |                             pdf_url_to_report = HttpUrl(pdf_url_str_from_html)
350 |                             original_pdf_bytes = await self._download_pdf_bytes(str(pdf_url_to_report))
351 |                         else: error_message = (error_message or "") + " PDF URL not found on HTML landing page."
352 |                     else: error_message = "Decision landing page content is empty."
353 |                 else: error_message = f"Unexpected content type ({content_type}) for URL: {final_url_of_response}"
354 | 
355 |                 if original_pdf_bytes:
356 |                     single_page_pdf_bytes, total_pdf_pages_from_extraction = self._extract_single_pdf_page_as_pdf_bytes(original_pdf_bytes, page_number)
357 |                     total_pdf_pages = total_pdf_pages_from_extraction 
358 | 
359 |                     if single_page_pdf_bytes:
360 |                         markdown_for_requested_page = self._convert_pdf_bytes_to_markdown(single_page_pdf_bytes, str(pdf_url_to_report or full_landing_page_url))
361 |                         if not markdown_for_requested_page:
362 |                             error_message = (error_message or "") + f"; Could not convert page {page_number} of PDF to Markdown."
363 |                     elif total_pdf_pages > 0 : 
364 |                         error_message = (error_message or "") + f"; Could not extract page {page_number} from PDF (page may be out of range or extraction failed)."
365 |                     else: 
366 |                          error_message = (error_message or "") + "; PDF could not be processed or page count was zero (original PDF might be invalid)."
367 |                 elif not error_message: 
368 |                     error_message = "PDF content could not be downloaded or identified."
369 |             
370 |             is_paginated = total_pdf_pages > 1
371 |             current_page_final = page_number
372 |             if total_pdf_pages > 0: 
373 |                 current_page_final = max(1, min(page_number, total_pdf_pages))
374 |             elif markdown_for_requested_page is None: 
375 |                 current_page_final = 1 
376 |             
377 |             # If markdown is None but there was no specific error for markdown conversion (e.g. PDF not found first)
378 |             # make sure error_message reflects that.
379 |             if markdown_for_requested_page is None and pdf_url_to_report and not error_message:
380 |                  error_message = (error_message or "") + "; Failed to produce Markdown from PDF page."
381 | 
382 | 
383 |             return RekabetDocument(
384 |                 source_landing_page_url=full_landing_page_url, karar_id=karar_id,
385 |                 title_on_landing_page=title_to_report, pdf_url=pdf_url_to_report,
386 |                 markdown_chunk=markdown_for_requested_page, current_page=current_page_final,
387 |                 total_pages=total_pdf_pages, is_paginated=is_paginated,
388 |                 error_message=error_message.strip("; ") if error_message else None )
389 | 
390 |         except httpx.HTTPStatusError as e: error_msg_detail = f"HTTP Status error {e.response.status_code} while processing decision page."
391 |         except httpx.RequestError as e: error_msg_detail = f"HTTP Request error while processing decision page: {str(e)}"
392 |         except Exception as e: error_msg_detail = f"General error while processing decision: {str(e)}"
393 |         
394 |         exc_info_flag = not isinstance(e, (httpx.HTTPStatusError, httpx.RequestError)) if 'e' in locals() else True
395 |         logger.error(f"RekabetKurumuApiClient: Error processing decision {karar_id} from {full_landing_page_url}: {error_msg_detail}", exc_info=exc_info_flag)
396 |         error_message = (error_message + "; " if error_message else "") + error_msg_detail
397 |         
398 |         return RekabetDocument(
399 |             source_landing_page_url=full_landing_page_url, karar_id=karar_id,
400 |             title_on_landing_page=title_to_report, pdf_url=pdf_url_to_report,
401 |             markdown_chunk=None, current_page=page_number, total_pages=0, is_paginated=False,
402 |             error_message=error_message.strip("; ") if error_message else "An unexpected error occurred." )
403 | 
404 |     async def close_client_session(self): # Pragma: no cover
405 |         if hasattr(self, 'http_client') and self.http_client and not self.http_client.is_closed:
406 |             await self.http_client.aclose()
407 |             logger.info("RekabetKurumuApiClient: HTTP client session closed.")
```

--------------------------------------------------------------------------------
/saidsurucu-yargi-mcp-f5fa007/rekabet_mcp_module/client.py:
--------------------------------------------------------------------------------

```python
  1 | # rekabet_mcp_module/client.py
  2 | 
  3 | import httpx
  4 | from bs4 import BeautifulSoup
  5 | from typing import List, Optional, Tuple, Dict, Any
  6 | import logging
  7 | import html
  8 | import re
  9 | import io # For io.BytesIO
 10 | from urllib.parse import urlencode, urljoin, quote, parse_qs, urlparse
 11 | from markitdown import MarkItDown
 12 | import math
 13 | 
 14 | # pypdf for PDF processing (lighter alternative to PyMuPDF)
 15 | from pypdf import PdfReader, PdfWriter # PyPDF2'nin devamı niteliğindeki pypdf
 16 | 
 17 | from .models import (
 18 |     RekabetKurumuSearchRequest,
 19 |     RekabetDecisionSummary,
 20 |     RekabetSearchResult,
 21 |     RekabetDocument,
 22 |     RekabetKararTuruGuidEnum
 23 | )
 24 | from pydantic import HttpUrl # Ensure HttpUrl is imported from pydantic
 25 | 
 26 | logger = logging.getLogger(__name__)
 27 | if not logger.hasHandlers(): # Pragma: no cover
 28 |     logging.basicConfig(
 29 |         level=logging.INFO,  # Varsayılan log seviyesi
 30 |         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 31 |     )
 32 |     # Debug betiğinde daha detaylı loglama için seviye ayrıca ayarlanabilir.
 33 | 
 34 | class RekabetKurumuApiClient:
 35 |     BASE_URL = "https://www.rekabet.gov.tr"
 36 |     SEARCH_PATH = "/tr/Kararlar"
 37 |     DECISION_LANDING_PATH_TEMPLATE = "/Karar"
 38 |     # PDF sayfa bazlı Markdown döndürüldüğü için bu sabit artık doğrudan kullanılmıyor.
 39 |     # DOCUMENT_MARKDOWN_CHUNK_SIZE = 5000 
 40 | 
 41 |     def __init__(self, request_timeout: float = 60.0):
 42 |         self.http_client = httpx.AsyncClient(
 43 |             base_url=self.BASE_URL,
 44 |             headers={
 45 |                 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
 46 |                 "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7",
 47 |                 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
 48 |             },
 49 |             timeout=request_timeout,
 50 |             verify=True,
 51 |             follow_redirects=True
 52 |         )
 53 | 
 54 |     def _build_search_query_params(self, params: RekabetKurumuSearchRequest) -> List[Tuple[str, str]]:
 55 |         query_params: List[Tuple[str, str]] = []
 56 |         query_params.append(("sayfaAdi", params.sayfaAdi if params.sayfaAdi is not None else ""))
 57 |         query_params.append(("YayinlanmaTarihi", params.YayinlanmaTarihi if params.YayinlanmaTarihi is not None else ""))
 58 |         query_params.append(("PdfText", params.PdfText if params.PdfText is not None else ""))
 59 |         
 60 |         karar_turu_id_value = ""
 61 |         if params.KararTuruID is not None: 
 62 |             karar_turu_id_value = params.KararTuruID.value if params.KararTuruID.value != "ALL" else "" 
 63 |         query_params.append(("KararTuruID", karar_turu_id_value))
 64 |             
 65 |         query_params.append(("KararSayisi", params.KararSayisi if params.KararSayisi is not None else ""))
 66 |         query_params.append(("KararTarihi", params.KararTarihi if params.KararTarihi is not None else ""))
 67 |         
 68 |         if params.page and params.page > 1:
 69 |             query_params.append(("page", str(params.page)))
 70 |             
 71 |         return query_params
 72 | 
 73 |     async def search_decisions(self, params: RekabetKurumuSearchRequest) -> RekabetSearchResult:
 74 |         request_path = self.SEARCH_PATH
 75 |         final_query_params = self._build_search_query_params(params)
 76 |         logger.info(f"RekabetKurumuApiClient: Performing search. Path: {request_path}, Parameters: {final_query_params}")
 77 |         
 78 |         try:
 79 |             response = await self.http_client.get(request_path, params=final_query_params)
 80 |             response.raise_for_status()
 81 |             html_content = response.text
 82 |         except httpx.RequestError as e:
 83 |             logger.error(f"RekabetKurumuApiClient: HTTP request error during search: {e}")
 84 |             raise
 85 |         
 86 |         soup = BeautifulSoup(html_content, 'html.parser')
 87 |         processed_decisions: List[RekabetDecisionSummary] = []
 88 |         total_records: Optional[int] = None
 89 |         total_pages: Optional[int] = None
 90 |         
 91 |         pagination_div = soup.find("div", class_="yazi01")
 92 |         if pagination_div:
 93 |             text_content = pagination_div.get_text(separator=" ", strip=True)
 94 |             total_match = re.search(r"Toplam\s*:\s*(\d+)", text_content)
 95 |             if total_match:
 96 |                 try:
 97 |                     total_records = int(total_match.group(1))
 98 |                     logger.debug(f"Total records found from pagination: {total_records}")
 99 |                 except ValueError:
100 |                     logger.warning(f"Could not convert 'Toplam' value to int: {total_match.group(1)}")
101 |             else:
102 |                 logger.warning("'Toplam :' string not found in pagination section.")
103 |             
104 |             results_per_page_assumed = 10 
105 |             if total_records is not None:
106 |                 calculated_total_pages = math.ceil(total_records / results_per_page_assumed)
107 |                 total_pages = calculated_total_pages if calculated_total_pages > 0 else (1 if total_records > 0 else 0)
108 |                 logger.debug(f"Calculated total pages: {total_pages}")
109 |             
110 |             if total_pages is None: # Fallback if total_records couldn't be parsed
111 |                 last_page_link = pagination_div.select_one("li.PagedList-skipToLast a")
112 |                 if last_page_link and last_page_link.has_attr('href'):
113 |                     qs = parse_qs(urlparse(last_page_link['href']).query)
114 |                     if 'page' in qs and qs['page']:
115 |                         try:
116 |                             total_pages = int(qs['page'][0])
117 |                             logger.debug(f"Total pages found from 'Last >>' link: {total_pages}")
118 |                         except ValueError:
119 |                             logger.warning(f"Could not convert page value from 'Last >>' link to int: {qs['page'][0]}")
120 |                 elif total_records == 0 : total_pages = 0 # If no records, 0 pages
121 |                 elif total_records is not None and total_records > 0 : total_pages = 1 # If records exist but no last page link (e.g. single page)
122 |                 else: logger.warning("'Last >>' link not found in pagination section.")
123 |         
124 |         decision_tables_container = soup.find("div", id="kararList")
125 |         if not decision_tables_container:
126 |             logger.warning("`div#kararList` (decision list container) not found. HTML structure might have changed or no decisions on this page.")
127 |         else:
128 |             decision_tables = decision_tables_container.find_all("table", class_="equalDivide")
129 |             logger.info(f"Found {len(decision_tables)} 'table' elements with class='equalDivide' for parsing.")
130 |             
131 |             if not decision_tables and total_records is not None and total_records > 0 :
132 |                  logger.warning(f"Page indicates {total_records} records but no decision tables found with class='equalDivide'.")
133 | 
134 |             for idx, table in enumerate(decision_tables):
135 |                 logger.debug(f"Processing table {idx + 1}...")
136 |                 try:
137 |                     rows = table.find_all("tr")
138 |                     if len(rows) != 3: 
139 |                         logger.warning(f"Table {idx + 1} has an unexpected number of rows ({len(rows)} instead of 3). Skipping. HTML snippet:\n{table.prettify()[:500]}")
140 |                         continue
141 | 
142 |                     # Row 1: Publication Date, Decision Number, Related Cases Link
143 |                     td_elements_r1 = rows[0].find_all("td")
144 |                     pub_date = td_elements_r1[0].get_text(strip=True) if len(td_elements_r1) > 0 else None
145 |                     dec_num = td_elements_r1[1].get_text(strip=True) if len(td_elements_r1) > 1 else None
146 |                     
147 |                     related_cases_link_tag = td_elements_r1[2].find("a", href=True) if len(td_elements_r1) > 2 else None
148 |                     related_cases_url_str: Optional[str] = None
149 |                     karar_id_from_related: Optional[str] = None
150 |                     if related_cases_link_tag and related_cases_link_tag.has_attr('href'):
151 |                         related_cases_url_str = urljoin(self.BASE_URL, related_cases_link_tag['href'])
152 |                         qs_related = parse_qs(urlparse(related_cases_link_tag['href']).query)
153 |                         if 'kararId' in qs_related and qs_related['kararId']:
154 |                             karar_id_from_related = qs_related['kararId'][0]
155 |                     
156 |                     # Row 2: Decision Date, Decision Type
157 |                     td_elements_r2 = rows[1].find_all("td")
158 |                     dec_date = td_elements_r2[0].get_text(strip=True) if len(td_elements_r2) > 0 else None
159 |                     dec_type_text = td_elements_r2[1].get_text(strip=True) if len(td_elements_r2) > 1 else None
160 | 
161 |                     # Row 3: Title and Main Decision Link
162 |                     title_cell = rows[2].find("td", colspan="5")
163 |                     decision_link_tag = title_cell.find("a", href=True) if title_cell else None
164 |                     
165 |                     title_text: Optional[str] = None
166 |                     decision_landing_url_str: Optional[str] = None 
167 |                     karar_id_from_main_link: Optional[str] = None
168 | 
169 |                     if decision_link_tag and decision_link_tag.has_attr('href'):
170 |                         title_text = decision_link_tag.get_text(strip=True)
171 |                         href_val = decision_link_tag['href']
172 |                         if href_val.startswith(self.DECISION_LANDING_PATH_TEMPLATE + "?kararId="): # Ensure it's a decision link
173 |                             decision_landing_url_str = urljoin(self.BASE_URL, href_val)
174 |                             qs_main = parse_qs(urlparse(href_val).query)
175 |                             if 'kararId' in qs_main and qs_main['kararId']:
176 |                                 karar_id_from_main_link = qs_main['kararId'][0]
177 |                         else:
178 |                             logger.warning(f"Table {idx+1} decision link has unexpected format: {href_val}")
179 |                     else:
180 |                         logger.warning(f"Table {idx+1} could not find title/decision link tag.")
181 |                     
182 |                     current_karar_id = karar_id_from_main_link or karar_id_from_related
183 | 
184 |                     if not current_karar_id: 
185 |                         logger.warning(f"Table {idx+1} Karar ID not found. Skipping. Title (if any): {title_text}")
186 |                         continue
187 |                     
188 |                     # Convert string URLs to HttpUrl for the model
189 |                     final_decision_url = HttpUrl(decision_landing_url_str) if decision_landing_url_str else None
190 |                     final_related_cases_url = HttpUrl(related_cases_url_str) if related_cases_url_str else None
191 | 
192 |                     processed_decisions.append(RekabetDecisionSummary(
193 |                         publication_date=pub_date, decision_number=dec_num, decision_date=dec_date,
194 |                         decision_type_text=dec_type_text, title=title_text, 
195 |                         decision_url=final_decision_url, 
196 |                         karar_id=current_karar_id, 
197 |                         related_cases_url=final_related_cases_url
198 |                     ))
199 |                     logger.debug(f"Table {idx+1} parsed successfully: Karar ID '{current_karar_id}', Title '{title_text[:50] if title_text else 'N/A'}...'")
200 | 
201 |                 except Exception as e:
202 |                     logger.warning(f"RekabetKurumuApiClient: Error parsing decision summary {idx+1}: {e}. Problematic Table HTML:\n{table.prettify()}", exc_info=True)
203 |                     continue
204 |         
205 |         return RekabetSearchResult(
206 |             decisions=processed_decisions, total_records_found=total_records, 
207 |             retrieved_page_number=params.page, total_pages=total_pages if total_pages is not None else 0
208 |         )
209 | 
210 |     async def _extract_pdf_url_and_landing_page_metadata(self, karar_id: str, landing_page_html: str, landing_page_url: str) -> Dict[str, Any]:
211 |         soup = BeautifulSoup(landing_page_html, 'html.parser')
212 |         data: Dict[str, Any] = {
213 |             "pdf_url": None,
214 |             "title_on_landing_page": soup.title.string.strip() if soup.title and soup.title.string else f"Rekabet Kurumu Kararı {karar_id}",
215 |         }
216 |         # This part needs to be robust and specific to Rekabet Kurumu's landing page structure.
217 |         # Look for common patterns: direct links, download buttons, embedded viewers.
218 |         pdf_anchor = soup.find("a", href=re.compile(r"\.pdf(\?|$)", re.IGNORECASE)) # Basic PDF link
219 |         if not pdf_anchor: # Try other common patterns if the basic one fails
220 |             # Example: Look for links with specific text or class
221 |             pdf_anchor = soup.find("a", string=re.compile(r"karar metni|pdf indir", re.IGNORECASE))
222 |         
223 |         if pdf_anchor and pdf_anchor.has_attr('href'):
224 |             pdf_path = pdf_anchor['href']
225 |             data["pdf_url"] = urljoin(landing_page_url, pdf_path)
226 |             logger.info(f"PDF link found on landing page (<a>): {data['pdf_url']}")
227 |         else:
228 |             iframe_pdf = soup.find("iframe", src=re.compile(r"\.pdf(\?|$)", re.IGNORECASE))
229 |             if iframe_pdf and iframe_pdf.has_attr('src'):
230 |                 pdf_path = iframe_pdf['src']
231 |                 data["pdf_url"] = urljoin(landing_page_url, pdf_path)
232 |                 logger.info(f"PDF link found on landing page (<iframe>): {data['pdf_url']}")
233 |             else:
234 |                 embed_pdf = soup.find("embed", src=re.compile(r"\.pdf(\?|$)", re.IGNORECASE), type="application/pdf")
235 |                 if embed_pdf and embed_pdf.has_attr('src'):
236 |                     pdf_path = embed_pdf['src']
237 |                     data["pdf_url"] = urljoin(landing_page_url, pdf_path)
238 |                     logger.info(f"PDF link found on landing page (<embed>): {data['pdf_url']}")
239 |                 else:
240 |                     logger.warning(f"No PDF link found on landing page {landing_page_url} for kararId {karar_id} using common selectors.")
241 |         return data
242 | 
243 |     async def _download_pdf_bytes(self, pdf_url: str) -> Optional[bytes]:
244 |         try:
245 |             url_to_fetch = pdf_url if pdf_url.startswith(('http://', 'https://')) else urljoin(self.BASE_URL, pdf_url)
246 |             logger.info(f"Downloading PDF from: {url_to_fetch}")
247 |             response = await self.http_client.get(url_to_fetch)
248 |             response.raise_for_status()
249 |             pdf_bytes = await response.aread()
250 |             logger.info(f"PDF content downloaded ({len(pdf_bytes)} bytes) from: {url_to_fetch}")
251 |             return pdf_bytes
252 |         except httpx.RequestError as e:
253 |             logger.error(f"HTTP error downloading PDF from {pdf_url}: {e}")
254 |         except Exception as e:
255 |             logger.error(f"General error downloading PDF from {pdf_url}: {e}")
256 |         return None
257 | 
258 |     def _extract_single_pdf_page_as_pdf_bytes(self, original_pdf_bytes: bytes, page_number_to_extract: int) -> Tuple[Optional[bytes], int]:
259 |         total_pages_in_original_pdf = 0
260 |         single_page_pdf_bytes: Optional[bytes] = None
261 |         
262 |         if not original_pdf_bytes:
263 |             logger.warning("No original PDF bytes provided for page extraction.")
264 |             return None, 0
265 | 
266 |         try:
267 |             pdf_stream = io.BytesIO(original_pdf_bytes)
268 |             reader = PdfReader(pdf_stream)
269 |             total_pages_in_original_pdf = len(reader.pages)
270 |             
271 |             if not (0 < page_number_to_extract <= total_pages_in_original_pdf):
272 |                 logger.warning(f"Requested page number ({page_number_to_extract}) is out of PDF page range (1-{total_pages_in_original_pdf}).")
273 |                 return None, total_pages_in_original_pdf
274 | 
275 |             writer = PdfWriter()
276 |             writer.add_page(reader.pages[page_number_to_extract - 1]) # pypdf is 0-indexed
277 |             
278 |             output_pdf_stream = io.BytesIO()
279 |             writer.write(output_pdf_stream)
280 |             single_page_pdf_bytes = output_pdf_stream.getvalue()
281 |             
282 |             logger.debug(f"Page {page_number_to_extract} of original PDF (total {total_pages_in_original_pdf} pages) extracted as new PDF using pypdf.")
283 |             
284 |         except Exception as e:
285 |             logger.error(f"Error extracting PDF page using pypdf: {e}", exc_info=True)
286 |             return None, total_pages_in_original_pdf 
287 |         return single_page_pdf_bytes, total_pages_in_original_pdf
288 | 
289 |     def _convert_pdf_bytes_to_markdown(self, pdf_bytes: bytes, source_url_for_logging: str) -> Optional[str]:
290 |         if not pdf_bytes:
291 |             logger.warning(f"No PDF bytes provided for Markdown conversion (source: {source_url_for_logging}).")
292 |             return None
293 |         
294 |         pdf_stream = io.BytesIO(pdf_bytes)
295 |         try:
296 |             md_converter = MarkItDown(enable_plugins=False) 
297 |             conversion_result = md_converter.convert(pdf_stream) 
298 |             markdown_text = conversion_result.text_content
299 |             
300 |             if not markdown_text:
301 |                  logger.warning(f"MarkItDown returned empty content from PDF byte stream (source: {source_url_for_logging}). PDF page might be image-based or MarkItDown could not process the PDF stream.")
302 |             return markdown_text
303 |         except Exception as e:
304 |             logger.error(f"MarkItDown conversion error for PDF byte stream (source: {source_url_for_logging}): {e}", exc_info=True)
305 |             return None
306 | 
307 |     async def get_decision_document(self, karar_id: str, page_number: int = 1) -> RekabetDocument:
308 |         if not karar_id:
309 |              return RekabetDocument(
310 |                 source_landing_page_url=HttpUrl(f"{self.BASE_URL}"), 
311 |                 karar_id=karar_id or "UNKNOWN_KARAR_ID",
312 |                 error_message="karar_id is required.",
313 |                 current_page=1, total_pages=0, is_paginated=False )
314 | 
315 |         decision_url_path = f"{self.DECISION_LANDING_PATH_TEMPLATE}?kararId={karar_id}"
316 |         full_landing_page_url = urljoin(self.BASE_URL, decision_url_path)
317 |         
318 |         logger.info(f"RekabetKurumuApiClient: Getting decision document: {full_landing_page_url}, Requested PDF Page: {page_number}")
319 | 
320 |         pdf_url_to_report: Optional[HttpUrl] = None
321 |         title_to_report: Optional[str] = f"Rekabet Kurumu Kararı {karar_id}" # Default
322 |         error_message: Optional[str] = None
323 |         markdown_for_requested_page: Optional[str] = None
324 |         total_pdf_pages: int = 0
325 |         
326 |         try:
327 |             async with self.http_client.stream("GET", full_landing_page_url) as response:
328 |                 response.raise_for_status()
329 |                 content_type = response.headers.get("content-type", "").lower()
330 |                 final_url_of_response = HttpUrl(str(response.url))
331 |                 original_pdf_bytes: Optional[bytes] = None
332 | 
333 |                 if "application/pdf" in content_type:
334 |                     logger.info(f"URL {final_url_of_response} is a direct PDF. Processing content.")
335 |                     pdf_url_to_report = final_url_of_response
336 |                     original_pdf_bytes = await response.aread()
337 |                 elif "text/html" in content_type:
338 |                     logger.info(f"URL {final_url_of_response} is an HTML landing page. Looking for PDF link.")
339 |                     landing_page_html_bytes = await response.aread()
340 |                     detected_charset = response.charset_encoding or 'utf-8'
341 |                     try: landing_page_html = landing_page_html_bytes.decode(detected_charset)
342 |                     except UnicodeDecodeError: landing_page_html = landing_page_html_bytes.decode('utf-8', errors='replace')
343 | 
344 |                     if landing_page_html.strip():
345 |                         landing_page_data = self._extract_pdf_url_and_landing_page_metadata(karar_id, landing_page_html, str(final_url_of_response))
346 |                         pdf_url_str_from_html = landing_page_data.get("pdf_url")
347 |                         if landing_page_data.get("title_on_landing_page"): title_to_report = landing_page_data.get("title_on_landing_page")
348 |                         if pdf_url_str_from_html:
349 |                             pdf_url_to_report = HttpUrl(pdf_url_str_from_html)
350 |                             original_pdf_bytes = await self._download_pdf_bytes(str(pdf_url_to_report))
351 |                         else: error_message = (error_message or "") + " PDF URL not found on HTML landing page."
352 |                     else: error_message = "Decision landing page content is empty."
353 |                 else: error_message = f"Unexpected content type ({content_type}) for URL: {final_url_of_response}"
354 | 
355 |                 if original_pdf_bytes:
356 |                     single_page_pdf_bytes, total_pdf_pages_from_extraction = self._extract_single_pdf_page_as_pdf_bytes(original_pdf_bytes, page_number)
357 |                     total_pdf_pages = total_pdf_pages_from_extraction 
358 | 
359 |                     if single_page_pdf_bytes:
360 |                         markdown_for_requested_page = self._convert_pdf_bytes_to_markdown(single_page_pdf_bytes, str(pdf_url_to_report or full_landing_page_url))
361 |                         if not markdown_for_requested_page:
362 |                             error_message = (error_message or "") + f"; Could not convert page {page_number} of PDF to Markdown."
363 |                     elif total_pdf_pages > 0 : 
364 |                         error_message = (error_message or "") + f"; Could not extract page {page_number} from PDF (page may be out of range or extraction failed)."
365 |                     else: 
366 |                          error_message = (error_message or "") + "; PDF could not be processed or page count was zero (original PDF might be invalid)."
367 |                 elif not error_message: 
368 |                     error_message = "PDF content could not be downloaded or identified."
369 |             
370 |             is_paginated = total_pdf_pages > 1
371 |             current_page_final = page_number
372 |             if total_pdf_pages > 0: 
373 |                 current_page_final = max(1, min(page_number, total_pdf_pages))
374 |             elif markdown_for_requested_page is None: 
375 |                 current_page_final = 1 
376 |             
377 |             # If markdown is None but there was no specific error for markdown conversion (e.g. PDF not found first)
378 |             # make sure error_message reflects that.
379 |             if markdown_for_requested_page is None and pdf_url_to_report and not error_message:
380 |                  error_message = (error_message or "") + "; Failed to produce Markdown from PDF page."
381 | 
382 | 
383 |             return RekabetDocument(
384 |                 source_landing_page_url=full_landing_page_url, karar_id=karar_id,
385 |                 title_on_landing_page=title_to_report, pdf_url=pdf_url_to_report,
386 |                 markdown_chunk=markdown_for_requested_page, current_page=current_page_final,
387 |                 total_pages=total_pdf_pages, is_paginated=is_paginated,
388 |                 error_message=error_message.strip("; ") if error_message else None )
389 | 
390 |         except httpx.HTTPStatusError as e: error_msg_detail = f"HTTP Status error {e.response.status_code} while processing decision page."
391 |         except httpx.RequestError as e: error_msg_detail = f"HTTP Request error while processing decision page: {str(e)}"
392 |         except Exception as e: error_msg_detail = f"General error while processing decision: {str(e)}"
393 |         
394 |         exc_info_flag = not isinstance(e, (httpx.HTTPStatusError, httpx.RequestError)) if 'e' in locals() else True
395 |         logger.error(f"RekabetKurumuApiClient: Error processing decision {karar_id} from {full_landing_page_url}: {error_msg_detail}", exc_info=exc_info_flag)
396 |         error_message = (error_message + "; " if error_message else "") + error_msg_detail
397 |         
398 |         return RekabetDocument(
399 |             source_landing_page_url=full_landing_page_url, karar_id=karar_id,
400 |             title_on_landing_page=title_to_report, pdf_url=pdf_url_to_report,
401 |             markdown_chunk=None, current_page=page_number, total_pages=0, is_paginated=False,
402 |             error_message=error_message.strip("; ") if error_message else "An unexpected error occurred." )
403 | 
404 |     async def close_client_session(self): # Pragma: no cover
405 |         if hasattr(self, 'http_client') and self.http_client and not self.http_client.is_closed:
406 |             await self.http_client.aclose()
407 |             logger.info("RekabetKurumuApiClient: HTTP client session closed.")
```

--------------------------------------------------------------------------------
/saidsurucu-yargi-mcp-f5fa007/sayistay_mcp_module/client.py:
--------------------------------------------------------------------------------

```python
  1 | # sayistay_mcp_module/client.py
  2 | 
  3 | import httpx
  4 | import re
  5 | from bs4 import BeautifulSoup
  6 | from typing import Dict, Any, List, Optional, Tuple
  7 | import logging
  8 | import html
  9 | import io
 10 | from urllib.parse import urlencode, urljoin
 11 | from markitdown import MarkItDown
 12 | 
 13 | from .models import (
 14 |     GenelKurulSearchRequest, GenelKurulSearchResponse, GenelKurulDecision,
 15 |     TemyizKuruluSearchRequest, TemyizKuruluSearchResponse, TemyizKuruluDecision,
 16 |     DaireSearchRequest, DaireSearchResponse, DaireDecision,
 17 |     SayistayDocumentMarkdown
 18 | )
 19 | from .enums import DaireEnum, KamuIdaresiTuruEnum, WebKararKonusuEnum, WEB_KARAR_KONUSU_MAPPING
 20 | 
 21 | logger = logging.getLogger(__name__)
 22 | if not logger.hasHandlers():
 23 |     logging.basicConfig(
 24 |         level=logging.INFO,
 25 |         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 26 |     )
 27 | 
 28 | class SayistayApiClient:
 29 |     """
 30 |     API Client for Sayıştay (Turkish Court of Accounts) decision search system.
 31 |     
 32 |     Handles three types of decisions:
 33 |     - Genel Kurul (General Assembly): Precedent-setting interpretive decisions
 34 |     - Temyiz Kurulu (Appeals Board): Appeals against chamber decisions  
 35 |     - Daire (Chamber): First-instance audit findings and sanctions
 36 |     
 37 |     Features:
 38 |     - ASP.NET WebForms session management with CSRF tokens
 39 |     - DataTables-based pagination and filtering
 40 |     - Automatic session refresh on expiration
 41 |     - Document retrieval with Markdown conversion
 42 |     """
 43 |     
 44 |     BASE_URL = "https://www.sayistay.gov.tr"
 45 |     
 46 |     # Search endpoints for each decision type
 47 |     GENEL_KURUL_ENDPOINT = "/KararlarGenelKurul/DataTablesList"
 48 |     TEMYIZ_KURULU_ENDPOINT = "/KararlarTemyiz/DataTablesList" 
 49 |     DAIRE_ENDPOINT = "/KararlarDaire/DataTablesList"
 50 |     
 51 |     # Page endpoints for session initialization and document access
 52 |     GENEL_KURUL_PAGE = "/KararlarGenelKurul"
 53 |     TEMYIZ_KURULU_PAGE = "/KararlarTemyiz"
 54 |     DAIRE_PAGE = "/KararlarDaire"
 55 |     
 56 |     def __init__(self, request_timeout: float = 60.0):
 57 |         self.request_timeout = request_timeout
 58 |         self.session_cookies: Dict[str, str] = {}
 59 |         self.csrf_tokens: Dict[str, str] = {}  # Store tokens for each endpoint
 60 |         
 61 |         self.http_client = httpx.AsyncClient(
 62 |             base_url=self.BASE_URL,
 63 |             headers={
 64 |                 "Accept": "application/json, text/javascript, */*; q=0.01",
 65 |                 "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7",
 66 |                 "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
 67 |                 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
 68 |                 "X-Requested-With": "XMLHttpRequest",
 69 |                 "Sec-Fetch-Dest": "empty",
 70 |                 "Sec-Fetch-Mode": "cors", 
 71 |                 "Sec-Fetch-Site": "same-origin"
 72 |             },
 73 |             timeout=request_timeout,
 74 |             follow_redirects=True
 75 |         )
 76 | 
 77 |     async def _initialize_session_for_endpoint(self, endpoint_type: str) -> bool:
 78 |         """
 79 |         Initialize session and obtain CSRF token for specific endpoint.
 80 |         
 81 |         Args:
 82 |             endpoint_type: One of 'genel_kurul', 'temyiz_kurulu', 'daire'
 83 |             
 84 |         Returns:
 85 |             True if session initialized successfully, False otherwise
 86 |         """
 87 |         page_mapping = {
 88 |             'genel_kurul': self.GENEL_KURUL_PAGE,
 89 |             'temyiz_kurulu': self.TEMYIZ_KURULU_PAGE,
 90 |             'daire': self.DAIRE_PAGE
 91 |         }
 92 |         
 93 |         if endpoint_type not in page_mapping:
 94 |             logger.error(f"Invalid endpoint type: {endpoint_type}")
 95 |             return False
 96 |             
 97 |         page_url = page_mapping[endpoint_type]
 98 |         logger.info(f"Initializing session for {endpoint_type} endpoint: {page_url}")
 99 |         
100 |         try:
101 |             response = await self.http_client.get(page_url)
102 |             response.raise_for_status()
103 |             
104 |             # Extract session cookies
105 |             for cookie_name, cookie_value in response.cookies.items():
106 |                 self.session_cookies[cookie_name] = cookie_value
107 |                 logger.debug(f"Stored session cookie: {cookie_name}")
108 |             
109 |             # Extract CSRF token from form
110 |             soup = BeautifulSoup(response.text, 'html.parser')
111 |             csrf_input = soup.find('input', {'name': '__RequestVerificationToken'})
112 |             
113 |             if csrf_input and csrf_input.get('value'):
114 |                 self.csrf_tokens[endpoint_type] = csrf_input['value']
115 |                 logger.info(f"Extracted CSRF token for {endpoint_type}")
116 |                 return True
117 |             else:
118 |                 logger.warning(f"CSRF token not found in {endpoint_type} page")
119 |                 return False
120 |                 
121 |         except httpx.RequestError as e:
122 |             logger.error(f"HTTP error during session initialization for {endpoint_type}: {e}")
123 |             return False
124 |         except Exception as e:
125 |             logger.error(f"Error initializing session for {endpoint_type}: {e}")
126 |             return False
127 | 
128 |     def _enum_to_form_value(self, enum_value: str, enum_type: str) -> str:
129 |         """Convert enum values to form values expected by the API."""
130 |         if enum_value == "ALL":
131 |             if enum_type == "daire":
132 |                 return "Tüm Daireler"
133 |             elif enum_type == "kamu_idaresi":
134 |                 return "Tüm Kurumlar" 
135 |             elif enum_type == "web_karar_konusu":
136 |                 return "Tüm Konular"
137 |         
138 |         # Apply web_karar_konusu mapping
139 |         if enum_type == "web_karar_konusu":
140 |             return WEB_KARAR_KONUSU_MAPPING.get(enum_value, enum_value)
141 |         
142 |         return enum_value
143 | 
144 |     def _build_datatables_params(self, start: int, length: int, draw: int = 1) -> List[Tuple[str, str]]:
145 |         """Build standard DataTables parameters for all endpoints."""
146 |         params = [
147 |             ("draw", str(draw)),
148 |             ("start", str(start)),
149 |             ("length", str(length)),
150 |             ("search[value]", ""),
151 |             ("search[regex]", "false")
152 |         ]
153 |         return params
154 | 
155 |     def _build_genel_kurul_form_data(self, params: GenelKurulSearchRequest, draw: int = 1) -> List[Tuple[str, str]]:
156 |         """Build form data for Genel Kurul search request."""
157 |         form_data = self._build_datatables_params(params.start, params.length, draw)
158 |         
159 |         # Add DataTables column definitions (from actual request)
160 |         column_defs = [
161 |             ("columns[0][data]", "KARARNO"),
162 |             ("columns[0][name]", ""),
163 |             ("columns[0][searchable]", "true"),
164 |             ("columns[0][orderable]", "false"),
165 |             ("columns[0][search][value]", ""),
166 |             ("columns[0][search][regex]", "false"),
167 |             
168 |             ("columns[1][data]", "KARARNO"),
169 |             ("columns[1][name]", ""),
170 |             ("columns[1][searchable]", "true"),
171 |             ("columns[1][orderable]", "true"),
172 |             ("columns[1][search][value]", ""),
173 |             ("columns[1][search][regex]", "false"),
174 |             
175 |             ("columns[2][data]", "KARARTARIH"),
176 |             ("columns[2][name]", ""),
177 |             ("columns[2][searchable]", "true"),
178 |             ("columns[2][orderable]", "true"),
179 |             ("columns[2][search][value]", ""),
180 |             ("columns[2][search][regex]", "false"),
181 |             
182 |             ("columns[3][data]", "KARAROZETI"),
183 |             ("columns[3][name]", ""),
184 |             ("columns[3][searchable]", "true"),
185 |             ("columns[3][orderable]", "false"),
186 |             ("columns[3][search][value]", ""),
187 |             ("columns[3][search][regex]", "false"),
188 |             
189 |             ("columns[4][data]", ""),
190 |             ("columns[4][name]", ""),
191 |             ("columns[4][searchable]", "true"),
192 |             ("columns[4][orderable]", "false"),
193 |             ("columns[4][search][value]", ""),
194 |             ("columns[4][search][regex]", "false"),
195 |             
196 |             ("order[0][column]", "2"),
197 |             ("order[0][dir]", "desc")
198 |         ]
199 |         form_data.extend(column_defs)
200 |         
201 |         # Add search parameters
202 |         form_data.extend([
203 |             ("KararlarGenelKurulAra.KARARNO", params.karar_no or ""),
204 |             ("__Invariant[]", "KararlarGenelKurulAra.KARARNO"),
205 |             ("__Invariant[]", "KararlarGenelKurulAra.KARAREK"),
206 |             ("KararlarGenelKurulAra.KARAREK", params.karar_ek or ""),
207 |             ("KararlarGenelKurulAra.KARARTARIHBaslangic", params.karar_tarih_baslangic or "Başlangıç Tarihi"),
208 |             ("KararlarGenelKurulAra.KARARTARIHBitis", params.karar_tarih_bitis or "Bitiş Tarihi"), 
209 |             ("KararlarGenelKurulAra.KARARTAMAMI", params.karar_tamami or ""),
210 |             ("__RequestVerificationToken", self.csrf_tokens.get('genel_kurul', ''))
211 |         ])
212 |         
213 |         return form_data
214 | 
215 |     def _build_temyiz_kurulu_form_data(self, params: TemyizKuruluSearchRequest, draw: int = 1) -> List[Tuple[str, str]]:
216 |         """Build form data for Temyiz Kurulu search request."""
217 |         form_data = self._build_datatables_params(params.start, params.length, draw)
218 |         
219 |         # Add DataTables column definitions (from actual request)
220 |         column_defs = [
221 |             ("columns[0][data]", "TEMYIZTUTANAKTARIHI"),
222 |             ("columns[0][name]", ""),
223 |             ("columns[0][searchable]", "true"),
224 |             ("columns[0][orderable]", "false"),
225 |             ("columns[0][search][value]", ""),
226 |             ("columns[0][search][regex]", "false"),
227 |             
228 |             ("columns[1][data]", "TEMYIZTUTANAKTARIHI"),
229 |             ("columns[1][name]", ""),
230 |             ("columns[1][searchable]", "true"),
231 |             ("columns[1][orderable]", "true"),
232 |             ("columns[1][search][value]", ""),
233 |             ("columns[1][search][regex]", "false"),
234 |             
235 |             ("columns[2][data]", "ILAMDAIRESI"),
236 |             ("columns[2][name]", ""),
237 |             ("columns[2][searchable]", "true"),
238 |             ("columns[2][orderable]", "true"),
239 |             ("columns[2][search][value]", ""),
240 |             ("columns[2][search][regex]", "false"),
241 |             
242 |             ("columns[3][data]", "TEMYIZKARAR"),
243 |             ("columns[3][name]", ""),
244 |             ("columns[3][searchable]", "true"),
245 |             ("columns[3][orderable]", "false"),
246 |             ("columns[3][search][value]", ""),
247 |             ("columns[3][search][regex]", "false"),
248 |             
249 |             ("columns[4][data]", ""),
250 |             ("columns[4][name]", ""),
251 |             ("columns[4][searchable]", "true"),
252 |             ("columns[4][orderable]", "false"),
253 |             ("columns[4][search][value]", ""),
254 |             ("columns[4][search][regex]", "false"),
255 |             
256 |             ("order[0][column]", "1"),
257 |             ("order[0][dir]", "desc")
258 |         ]
259 |         form_data.extend(column_defs)
260 |         
261 |         # Add search parameters
262 |         daire_value = self._enum_to_form_value(params.ilam_dairesi, "daire")
263 |         kamu_idaresi_value = self._enum_to_form_value(params.kamu_idaresi_turu, "kamu_idaresi")
264 |         web_karar_konusu_value = self._enum_to_form_value(params.web_karar_konusu, "web_karar_konusu")
265 |         
266 |         form_data.extend([
267 |             ("KararlarTemyizAra.ILAMDAIRESI", daire_value),
268 |             ("KararlarTemyizAra.YILI", params.yili or ""),
269 |             ("KararlarTemyizAra.KARARTRHBaslangic", params.karar_tarih_baslangic or ""),
270 |             ("KararlarTemyizAra.KARARTRHBitis", params.karar_tarih_bitis or ""),
271 |             ("KararlarTemyizAra.KAMUIDARESITURU", kamu_idaresi_value if kamu_idaresi_value != "Tüm Kurumlar" else ""),
272 |             ("KararlarTemyizAra.ILAMNO", params.ilam_no or ""),
273 |             ("KararlarTemyizAra.DOSYANO", params.dosya_no or ""),
274 |             ("KararlarTemyizAra.TEMYIZTUTANAKNO", params.temyiz_tutanak_no or ""),
275 |             ("__Invariant", "KararlarTemyizAra.TEMYIZTUTANAKNO"),
276 |             ("KararlarTemyizAra.TEMYIZKARAR", params.temyiz_karar or ""),
277 |             ("KararlarTemyizAra.WEBKARARKONUSU", web_karar_konusu_value if web_karar_konusu_value != "Tüm Konular" else ""),
278 |             ("__RequestVerificationToken", self.csrf_tokens.get('temyiz_kurulu', ''))
279 |         ])
280 |         
281 |         return form_data
282 | 
283 |     def _build_daire_form_data(self, params: DaireSearchRequest, draw: int = 1) -> List[Tuple[str, str]]:
284 |         """Build form data for Daire search request."""
285 |         form_data = self._build_datatables_params(params.start, params.length, draw)
286 |         
287 |         # Add DataTables column definitions (from actual request)
288 |         column_defs = [
289 |             ("columns[0][data]", "YARGILAMADAIRESI"),
290 |             ("columns[0][name]", ""),
291 |             ("columns[0][searchable]", "true"),
292 |             ("columns[0][orderable]", "false"),
293 |             ("columns[0][search][value]", ""),
294 |             ("columns[0][search][regex]", "false"),
295 |             
296 |             ("columns[1][data]", "KARARTRH"),
297 |             ("columns[1][name]", ""),
298 |             ("columns[1][searchable]", "true"),
299 |             ("columns[1][orderable]", "true"),
300 |             ("columns[1][search][value]", ""),
301 |             ("columns[1][search][regex]", "false"),
302 |             
303 |             ("columns[2][data]", "KARARNO"),
304 |             ("columns[2][name]", ""),
305 |             ("columns[2][searchable]", "true"),
306 |             ("columns[2][orderable]", "true"),
307 |             ("columns[2][search][value]", ""),
308 |             ("columns[2][search][regex]", "false"),
309 |             
310 |             ("columns[3][data]", "YARGILAMADAIRESI"),
311 |             ("columns[3][name]", ""),
312 |             ("columns[3][searchable]", "true"),
313 |             ("columns[3][orderable]", "true"),
314 |             ("columns[3][search][value]", ""),
315 |             ("columns[3][search][regex]", "false"),
316 |             
317 |             ("columns[4][data]", "WEBKARARMETNI"),
318 |             ("columns[4][name]", ""),
319 |             ("columns[4][searchable]", "true"),
320 |             ("columns[4][orderable]", "false"),
321 |             ("columns[4][search][value]", ""),
322 |             ("columns[4][search][regex]", "false"),
323 |             
324 |             ("columns[5][data]", ""),
325 |             ("columns[5][name]", ""),
326 |             ("columns[5][searchable]", "true"),
327 |             ("columns[5][orderable]", "false"),
328 |             ("columns[5][search][value]", ""),
329 |             ("columns[5][search][regex]", "false"),
330 |             
331 |             ("order[0][column]", "2"),
332 |             ("order[0][dir]", "desc")
333 |         ]
334 |         form_data.extend(column_defs)
335 |         
336 |         # Add search parameters
337 |         daire_value = self._enum_to_form_value(params.yargilama_dairesi, "daire")
338 |         kamu_idaresi_value = self._enum_to_form_value(params.kamu_idaresi_turu, "kamu_idaresi")
339 |         web_karar_konusu_value = self._enum_to_form_value(params.web_karar_konusu, "web_karar_konusu")
340 |         
341 |         form_data.extend([
342 |             ("KararlarDaireAra.YARGILAMADAIRESI", daire_value),
343 |             ("KararlarDaireAra.KARARTRHBaslangic", params.karar_tarih_baslangic or ""),
344 |             ("KararlarDaireAra.KARARTRHBitis", params.karar_tarih_bitis or ""),
345 |             ("KararlarDaireAra.ILAMNO", params.ilam_no or ""),
346 |             ("KararlarDaireAra.KAMUIDARESITURU", kamu_idaresi_value if kamu_idaresi_value != "Tüm Kurumlar" else ""),
347 |             ("KararlarDaireAra.HESAPYILI", params.hesap_yili or ""),
348 |             ("KararlarDaireAra.WEBKARARKONUSU", web_karar_konusu_value if web_karar_konusu_value != "Tüm Konular" else ""),
349 |             ("KararlarDaireAra.WEBKARARMETNI", params.web_karar_metni or ""),
350 |             ("__RequestVerificationToken", self.csrf_tokens.get('daire', ''))
351 |         ])
352 |         
353 |         return form_data
354 | 
355 |     async def search_genel_kurul_decisions(self, params: GenelKurulSearchRequest) -> GenelKurulSearchResponse:
356 |         """
357 |         Search Sayıştay Genel Kurul (General Assembly) decisions.
358 |         
359 |         Args:
360 |             params: Search parameters for Genel Kurul decisions
361 |             
362 |         Returns:
363 |             GenelKurulSearchResponse with matching decisions
364 |         """
365 |         # Initialize session if needed
366 |         if 'genel_kurul' not in self.csrf_tokens:
367 |             if not await self._initialize_session_for_endpoint('genel_kurul'):
368 |                 raise Exception("Failed to initialize session for Genel Kurul endpoint")
369 |         
370 |         form_data = self._build_genel_kurul_form_data(params)
371 |         encoded_data = urlencode(form_data, encoding='utf-8')
372 |         
373 |         logger.info(f"Searching Genel Kurul decisions with parameters: {params.model_dump(exclude_none=True)}")
374 |         
375 |         try:
376 |             # Update headers with cookies
377 |             headers = self.http_client.headers.copy()
378 |             if self.session_cookies:
379 |                 cookie_header = "; ".join([f"{k}={v}" for k, v in self.session_cookies.items()])
380 |                 headers["Cookie"] = cookie_header
381 |             
382 |             response = await self.http_client.post(
383 |                 self.GENEL_KURUL_ENDPOINT,
384 |                 data=encoded_data,
385 |                 headers=headers
386 |             )
387 |             response.raise_for_status()
388 |             response_json = response.json()
389 |             
390 |             # Parse response
391 |             decisions = []
392 |             for item in response_json.get('data', []):
393 |                 decisions.append(GenelKurulDecision(
394 |                     id=item['Id'],
395 |                     karar_no=item['KARARNO'],
396 |                     karar_tarih=item['KARARTARIH'],
397 |                     karar_ozeti=item['KARAROZETI']
398 |                 ))
399 |             
400 |             return GenelKurulSearchResponse(
401 |                 decisions=decisions,
402 |                 total_records=response_json.get('recordsTotal', 0),
403 |                 total_filtered=response_json.get('recordsFiltered', 0),
404 |                 draw=response_json.get('draw', 1)
405 |             )
406 |             
407 |         except httpx.RequestError as e:
408 |             logger.error(f"HTTP error during Genel Kurul search: {e}")
409 |             raise
410 |         except Exception as e:
411 |             logger.error(f"Error processing Genel Kurul search: {e}")
412 |             raise
413 | 
414 |     async def search_temyiz_kurulu_decisions(self, params: TemyizKuruluSearchRequest) -> TemyizKuruluSearchResponse:
415 |         """
416 |         Search Sayıştay Temyiz Kurulu (Appeals Board) decisions.
417 |         
418 |         Args:
419 |             params: Search parameters for Temyiz Kurulu decisions
420 |             
421 |         Returns:
422 |             TemyizKuruluSearchResponse with matching decisions
423 |         """
424 |         # Initialize session if needed
425 |         if 'temyiz_kurulu' not in self.csrf_tokens:
426 |             if not await self._initialize_session_for_endpoint('temyiz_kurulu'):
427 |                 raise Exception("Failed to initialize session for Temyiz Kurulu endpoint")
428 |         
429 |         form_data = self._build_temyiz_kurulu_form_data(params)
430 |         encoded_data = urlencode(form_data, encoding='utf-8')
431 |         
432 |         logger.info(f"Searching Temyiz Kurulu decisions with parameters: {params.model_dump(exclude_none=True)}")
433 |         
434 |         try:
435 |             # Update headers with cookies
436 |             headers = self.http_client.headers.copy()
437 |             if self.session_cookies:
438 |                 cookie_header = "; ".join([f"{k}={v}" for k, v in self.session_cookies.items()])
439 |                 headers["Cookie"] = cookie_header
440 |             
441 |             response = await self.http_client.post(
442 |                 self.TEMYIZ_KURULU_ENDPOINT,
443 |                 data=encoded_data,
444 |                 headers=headers
445 |             )
446 |             response.raise_for_status()
447 |             response_json = response.json()
448 |             
449 |             # Parse response
450 |             decisions = []
451 |             for item in response_json.get('data', []):
452 |                 decisions.append(TemyizKuruluDecision(
453 |                     id=item['Id'],
454 |                     temyiz_tutanak_tarihi=item['TEMYIZTUTANAKTARIHI'],
455 |                     ilam_dairesi=item['ILAMDAIRESI'],
456 |                     temyiz_karar=item['TEMYIZKARAR']
457 |                 ))
458 |             
459 |             return TemyizKuruluSearchResponse(
460 |                 decisions=decisions,
461 |                 total_records=response_json.get('recordsTotal', 0),
462 |                 total_filtered=response_json.get('recordsFiltered', 0),
463 |                 draw=response_json.get('draw', 1)
464 |             )
465 |             
466 |         except httpx.RequestError as e:
467 |             logger.error(f"HTTP error during Temyiz Kurulu search: {e}")
468 |             raise
469 |         except Exception as e:
470 |             logger.error(f"Error processing Temyiz Kurulu search: {e}")
471 |             raise
472 | 
473 |     async def search_daire_decisions(self, params: DaireSearchRequest) -> DaireSearchResponse:
474 |         """
475 |         Search Sayıştay Daire (Chamber) decisions.
476 |         
477 |         Args:
478 |             params: Search parameters for Daire decisions
479 |             
480 |         Returns:
481 |             DaireSearchResponse with matching decisions
482 |         """
483 |         # Initialize session if needed
484 |         if 'daire' not in self.csrf_tokens:
485 |             if not await self._initialize_session_for_endpoint('daire'):
486 |                 raise Exception("Failed to initialize session for Daire endpoint")
487 |         
488 |         form_data = self._build_daire_form_data(params)
489 |         encoded_data = urlencode(form_data, encoding='utf-8')
490 |         
491 |         logger.info(f"Searching Daire decisions with parameters: {params.model_dump(exclude_none=True)}")
492 |         
493 |         try:
494 |             # Update headers with cookies
495 |             headers = self.http_client.headers.copy()
496 |             if self.session_cookies:
497 |                 cookie_header = "; ".join([f"{k}={v}" for k, v in self.session_cookies.items()])
498 |                 headers["Cookie"] = cookie_header
499 |             
500 |             response = await self.http_client.post(
501 |                 self.DAIRE_ENDPOINT,
502 |                 data=encoded_data,
503 |                 headers=headers
504 |             )
505 |             response.raise_for_status()
506 |             response_json = response.json()
507 |             
508 |             # Parse response
509 |             decisions = []
510 |             for item in response_json.get('data', []):
511 |                 decisions.append(DaireDecision(
512 |                     id=item['Id'],
513 |                     yargilama_dairesi=item['YARGILAMADAIRESI'],
514 |                     karar_tarih=item['KARARTRH'],
515 |                     karar_no=item['KARARNO'],
516 |                     ilam_no=item.get('ILAMNO'),  # Use get() to handle None values
517 |                     madde_no=item['MADDENO'],
518 |                     kamu_idaresi_turu=item['KAMUIDARESITURU'],
519 |                     hesap_yili=item['HESAPYILI'],
520 |                     web_karar_konusu=item['WEBKARARKONUSU'],
521 |                     web_karar_metni=item['WEBKARARMETNI']
522 |                 ))
523 |             
524 |             return DaireSearchResponse(
525 |                 decisions=decisions,
526 |                 total_records=response_json.get('recordsTotal', 0),
527 |                 total_filtered=response_json.get('recordsFiltered', 0),
528 |                 draw=response_json.get('draw', 1)
529 |             )
530 |             
531 |         except httpx.RequestError as e:
532 |             logger.error(f"HTTP error during Daire search: {e}")
533 |             raise
534 |         except Exception as e:
535 |             logger.error(f"Error processing Daire search: {e}")
536 |             raise
537 | 
538 |     def _convert_html_to_markdown(self, html_content: str) -> Optional[str]:
539 |         """Convert HTML content to Markdown using MarkItDown with BytesIO to avoid filename length issues."""
540 |         if not html_content:
541 |             return None
542 |             
543 |         try:
544 |             # Convert HTML string to bytes and create BytesIO stream
545 |             html_bytes = html_content.encode('utf-8')
546 |             html_stream = io.BytesIO(html_bytes)
547 |             
548 |             # Pass BytesIO stream to MarkItDown to avoid temp file creation
549 |             md_converter = MarkItDown()
550 |             result = md_converter.convert(html_stream)
551 |             markdown_content = result.text_content
552 |             
553 |             logger.info("Successfully converted HTML to Markdown")
554 |             return markdown_content
555 |             
556 |         except Exception as e:
557 |             logger.error(f"Error converting HTML to Markdown: {e}")
558 |             return f"Error converting HTML content: {str(e)}"
559 | 
560 |     async def get_document_as_markdown(self, decision_id: str, decision_type: str) -> SayistayDocumentMarkdown:
561 |         """
562 |         Retrieve full text of a Sayıştay decision and convert to Markdown.
563 |         
564 |         Args:
565 |             decision_id: Unique decision identifier
566 |             decision_type: Type of decision ('genel_kurul', 'temyiz_kurulu', 'daire')
567 |             
568 |         Returns:
569 |             SayistayDocumentMarkdown with converted content
570 |         """
571 |         logger.info(f"Retrieving document for {decision_type} decision ID: {decision_id}")
572 |         
573 |         # Validate decision_id
574 |         if not decision_id or not decision_id.strip():
575 |             return SayistayDocumentMarkdown(
576 |                 decision_id=decision_id,
577 |                 decision_type=decision_type,
578 |                 source_url="",
579 |                 markdown_content=None,
580 |                 error_message="Decision ID cannot be empty"
581 |             )
582 |         
583 |         # Map decision type to URL path
584 |         url_path_mapping = {
585 |             'genel_kurul': 'KararlarGenelKurul',
586 |             'temyiz_kurulu': 'KararlarTemyiz',
587 |             'daire': 'KararlarDaire'
588 |         }
589 |         
590 |         if decision_type not in url_path_mapping:
591 |             return SayistayDocumentMarkdown(
592 |                 decision_id=decision_id,
593 |                 decision_type=decision_type,
594 |                 source_url="",
595 |                 markdown_content=None,
596 |                 error_message=f"Invalid decision type: {decision_type}. Must be one of: {list(url_path_mapping.keys())}"
597 |             )
598 |         
599 |         # Build document URL
600 |         url_path = url_path_mapping[decision_type]
601 |         document_url = f"{self.BASE_URL}/{url_path}/Detay/{decision_id}/"
602 |         
603 |         try:
604 |             # Make HTTP GET request to document URL
605 |             headers = {
606 |                 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
607 |                 "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7",
608 |                 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
609 |                 "Sec-Fetch-Dest": "document",
610 |                 "Sec-Fetch-Mode": "navigate",
611 |                 "Sec-Fetch-Site": "same-origin"
612 |             }
613 |             
614 |             # Include session cookies if available
615 |             if self.session_cookies:
616 |                 cookie_header = "; ".join([f"{k}={v}" for k, v in self.session_cookies.items()])
617 |                 headers["Cookie"] = cookie_header
618 |             
619 |             response = await self.http_client.get(document_url, headers=headers)
620 |             response.raise_for_status()
621 |             html_content = response.text
622 |             
623 |             if not html_content or not html_content.strip():
624 |                 logger.warning(f"Received empty HTML content from {document_url}")
625 |                 return SayistayDocumentMarkdown(
626 |                     decision_id=decision_id,
627 |                     decision_type=decision_type,
628 |                     source_url=document_url,
629 |                     markdown_content=None,
630 |                     error_message="Document content is empty"
631 |                 )
632 |             
633 |             # Convert HTML to Markdown using existing method
634 |             markdown_content = self._convert_html_to_markdown(html_content)
635 |             
636 |             if markdown_content and "Error converting HTML content" not in markdown_content:
637 |                 logger.info(f"Successfully retrieved and converted document {decision_id} to Markdown")
638 |                 return SayistayDocumentMarkdown(
639 |                     decision_id=decision_id,
640 |                     decision_type=decision_type,
641 |                     source_url=document_url,
642 |                     markdown_content=markdown_content,
643 |                     retrieval_date=None  # Could add datetime.now().isoformat() if needed
644 |                 )
645 |             else:
646 |                 return SayistayDocumentMarkdown(
647 |                     decision_id=decision_id,
648 |                     decision_type=decision_type,
649 |                     source_url=document_url,
650 |                     markdown_content=None,
651 |                     error_message=f"Failed to convert HTML to Markdown: {markdown_content}"
652 |                 )
653 |                 
654 |         except httpx.HTTPStatusError as e:
655 |             error_msg = f"HTTP error {e.response.status_code} when fetching document: {e}"
656 |             logger.error(f"HTTP error fetching document {decision_id}: {error_msg}")
657 |             return SayistayDocumentMarkdown(
658 |                 decision_id=decision_id,
659 |                 decision_type=decision_type,
660 |                 source_url=document_url,
661 |                 markdown_content=None,
662 |                 error_message=error_msg
663 |             )
664 |         except httpx.RequestError as e:
665 |             error_msg = f"Network error when fetching document: {e}"
666 |             logger.error(f"Network error fetching document {decision_id}: {error_msg}")
667 |             return SayistayDocumentMarkdown(
668 |                 decision_id=decision_id,
669 |                 decision_type=decision_type,
670 |                 source_url=document_url,
671 |                 markdown_content=None,
672 |                 error_message=error_msg
673 |             )
674 |         except Exception as e:
675 |             error_msg = f"Unexpected error when fetching document: {e}"
676 |             logger.error(f"Unexpected error fetching document {decision_id}: {error_msg}")
677 |             return SayistayDocumentMarkdown(
678 |                 decision_id=decision_id,
679 |                 decision_type=decision_type,
680 |                 source_url=document_url,
681 |                 markdown_content=None,
682 |                 error_message=error_msg
683 |             )
684 | 
685 |     async def close_client_session(self):
686 |         """Close HTTP client session."""
687 |         if hasattr(self, 'http_client') and self.http_client and not self.http_client.is_closed:
688 |             await self.http_client.aclose()
689 |             logger.info("SayistayApiClient: HTTP client session closed.")
```

--------------------------------------------------------------------------------
/sayistay_mcp_module/client.py:
--------------------------------------------------------------------------------

```python
  1 | # sayistay_mcp_module/client.py
  2 | 
  3 | import httpx
  4 | import re
  5 | from bs4 import BeautifulSoup
  6 | from typing import Dict, Any, List, Optional, Tuple
  7 | import logging
  8 | import html
  9 | import io
 10 | from urllib.parse import urlencode, urljoin
 11 | from markitdown import MarkItDown
 12 | 
 13 | from .models import (
 14 |     GenelKurulSearchRequest, GenelKurulSearchResponse, GenelKurulDecision,
 15 |     TemyizKuruluSearchRequest, TemyizKuruluSearchResponse, TemyizKuruluDecision,
 16 |     DaireSearchRequest, DaireSearchResponse, DaireDecision,
 17 |     SayistayDocumentMarkdown
 18 | )
 19 | from .enums import DaireEnum, KamuIdaresiTuruEnum, WebKararKonusuEnum, WEB_KARAR_KONUSU_MAPPING
 20 | 
 21 | logger = logging.getLogger(__name__)
 22 | if not logger.hasHandlers():
 23 |     logging.basicConfig(
 24 |         level=logging.INFO,
 25 |         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 26 |     )
 27 | 
 28 | class SayistayApiClient:
 29 |     """
 30 |     API Client for Sayıştay (Turkish Court of Accounts) decision search system.
 31 |     
 32 |     Handles three types of decisions:
 33 |     - Genel Kurul (General Assembly): Precedent-setting interpretive decisions
 34 |     - Temyiz Kurulu (Appeals Board): Appeals against chamber decisions  
 35 |     - Daire (Chamber): First-instance audit findings and sanctions
 36 |     
 37 |     Features:
 38 |     - ASP.NET WebForms session management with CSRF tokens
 39 |     - DataTables-based pagination and filtering
 40 |     - Automatic session refresh on expiration
 41 |     - Document retrieval with Markdown conversion
 42 |     """
 43 |     
 44 |     BASE_URL = "https://www.sayistay.gov.tr"
 45 |     
 46 |     # Search endpoints for each decision type
 47 |     GENEL_KURUL_ENDPOINT = "/KararlarGenelKurul/DataTablesList"
 48 |     TEMYIZ_KURULU_ENDPOINT = "/KararlarTemyiz/DataTablesList" 
 49 |     DAIRE_ENDPOINT = "/KararlarDaire/DataTablesList"
 50 |     
 51 |     # Page endpoints for session initialization and document access
 52 |     GENEL_KURUL_PAGE = "/KararlarGenelKurul"
 53 |     TEMYIZ_KURULU_PAGE = "/KararlarTemyiz"
 54 |     DAIRE_PAGE = "/KararlarDaire"
 55 |     
 56 |     def __init__(self, request_timeout: float = 60.0):
 57 |         self.request_timeout = request_timeout
 58 |         self.session_cookies: Dict[str, str] = {}
 59 |         self.csrf_tokens: Dict[str, str] = {}  # Store tokens for each endpoint
 60 |         
 61 |         self.http_client = httpx.AsyncClient(
 62 |             base_url=self.BASE_URL,
 63 |             headers={
 64 |                 "Accept": "application/json, text/javascript, */*; q=0.01",
 65 |                 "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7",
 66 |                 "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
 67 |                 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
 68 |                 "X-Requested-With": "XMLHttpRequest",
 69 |                 "Sec-Fetch-Dest": "empty",
 70 |                 "Sec-Fetch-Mode": "cors", 
 71 |                 "Sec-Fetch-Site": "same-origin"
 72 |             },
 73 |             timeout=request_timeout,
 74 |             follow_redirects=True
 75 |         )
 76 | 
 77 |     async def _initialize_session_for_endpoint(self, endpoint_type: str) -> bool:
 78 |         """
 79 |         Initialize session and obtain CSRF token for specific endpoint.
 80 |         
 81 |         Args:
 82 |             endpoint_type: One of 'genel_kurul', 'temyiz_kurulu', 'daire'
 83 |             
 84 |         Returns:
 85 |             True if session initialized successfully, False otherwise
 86 |         """
 87 |         page_mapping = {
 88 |             'genel_kurul': self.GENEL_KURUL_PAGE,
 89 |             'temyiz_kurulu': self.TEMYIZ_KURULU_PAGE,
 90 |             'daire': self.DAIRE_PAGE
 91 |         }
 92 |         
 93 |         if endpoint_type not in page_mapping:
 94 |             logger.error(f"Invalid endpoint type: {endpoint_type}")
 95 |             return False
 96 |             
 97 |         page_url = page_mapping[endpoint_type]
 98 |         logger.info(f"Initializing session for {endpoint_type} endpoint: {page_url}")
 99 |         
100 |         try:
101 |             response = await self.http_client.get(page_url)
102 |             response.raise_for_status()
103 |             
104 |             # Extract session cookies
105 |             for cookie_name, cookie_value in response.cookies.items():
106 |                 self.session_cookies[cookie_name] = cookie_value
107 |                 logger.debug(f"Stored session cookie: {cookie_name}")
108 |             
109 |             # Extract CSRF token from form
110 |             soup = BeautifulSoup(response.text, 'html.parser')
111 |             csrf_input = soup.find('input', {'name': '__RequestVerificationToken'})
112 |             
113 |             if csrf_input and csrf_input.get('value'):
114 |                 self.csrf_tokens[endpoint_type] = csrf_input['value']
115 |                 logger.info(f"Extracted CSRF token for {endpoint_type}")
116 |                 return True
117 |             else:
118 |                 logger.warning(f"CSRF token not found in {endpoint_type} page")
119 |                 return False
120 |                 
121 |         except httpx.RequestError as e:
122 |             logger.error(f"HTTP error during session initialization for {endpoint_type}: {e}")
123 |             return False
124 |         except Exception as e:
125 |             logger.error(f"Error initializing session for {endpoint_type}: {e}")
126 |             return False
127 | 
128 |     def _enum_to_form_value(self, enum_value: str, enum_type: str) -> str:
129 |         """Convert enum values to form values expected by the API."""
130 |         if enum_value == "ALL":
131 |             if enum_type == "daire":
132 |                 return "Tüm Daireler"
133 |             elif enum_type == "kamu_idaresi":
134 |                 return "Tüm Kurumlar" 
135 |             elif enum_type == "web_karar_konusu":
136 |                 return "Tüm Konular"
137 |         
138 |         # Apply web_karar_konusu mapping
139 |         if enum_type == "web_karar_konusu":
140 |             return WEB_KARAR_KONUSU_MAPPING.get(enum_value, enum_value)
141 |         
142 |         return enum_value
143 | 
144 |     def _build_datatables_params(self, start: int, length: int, draw: int = 1) -> List[Tuple[str, str]]:
145 |         """Build standard DataTables parameters for all endpoints."""
146 |         params = [
147 |             ("draw", str(draw)),
148 |             ("start", str(start)),
149 |             ("length", str(length)),
150 |             ("search[value]", ""),
151 |             ("search[regex]", "false")
152 |         ]
153 |         return params
154 | 
155 |     def _build_genel_kurul_form_data(self, params: GenelKurulSearchRequest, draw: int = 1) -> List[Tuple[str, str]]:
156 |         """Build form data for Genel Kurul search request."""
157 |         form_data = self._build_datatables_params(params.start, params.length, draw)
158 |         
159 |         # Add DataTables column definitions (from actual request)
160 |         column_defs = [
161 |             ("columns[0][data]", "KARARNO"),
162 |             ("columns[0][name]", ""),
163 |             ("columns[0][searchable]", "true"),
164 |             ("columns[0][orderable]", "false"),
165 |             ("columns[0][search][value]", ""),
166 |             ("columns[0][search][regex]", "false"),
167 |             
168 |             ("columns[1][data]", "KARARNO"),
169 |             ("columns[1][name]", ""),
170 |             ("columns[1][searchable]", "true"),
171 |             ("columns[1][orderable]", "true"),
172 |             ("columns[1][search][value]", ""),
173 |             ("columns[1][search][regex]", "false"),
174 |             
175 |             ("columns[2][data]", "KARARTARIH"),
176 |             ("columns[2][name]", ""),
177 |             ("columns[2][searchable]", "true"),
178 |             ("columns[2][orderable]", "true"),
179 |             ("columns[2][search][value]", ""),
180 |             ("columns[2][search][regex]", "false"),
181 |             
182 |             ("columns[3][data]", "KARAROZETI"),
183 |             ("columns[3][name]", ""),
184 |             ("columns[3][searchable]", "true"),
185 |             ("columns[3][orderable]", "false"),
186 |             ("columns[3][search][value]", ""),
187 |             ("columns[3][search][regex]", "false"),
188 |             
189 |             ("columns[4][data]", ""),
190 |             ("columns[4][name]", ""),
191 |             ("columns[4][searchable]", "true"),
192 |             ("columns[4][orderable]", "false"),
193 |             ("columns[4][search][value]", ""),
194 |             ("columns[4][search][regex]", "false"),
195 |             
196 |             ("order[0][column]", "2"),
197 |             ("order[0][dir]", "desc")
198 |         ]
199 |         form_data.extend(column_defs)
200 |         
201 |         # Add search parameters
202 |         form_data.extend([
203 |             ("KararlarGenelKurulAra.KARARNO", params.karar_no or ""),
204 |             ("__Invariant[]", "KararlarGenelKurulAra.KARARNO"),
205 |             ("__Invariant[]", "KararlarGenelKurulAra.KARAREK"),
206 |             ("KararlarGenelKurulAra.KARAREK", params.karar_ek or ""),
207 |             ("KararlarGenelKurulAra.KARARTARIHBaslangic", params.karar_tarih_baslangic or "Başlangıç Tarihi"),
208 |             ("KararlarGenelKurulAra.KARARTARIHBitis", params.karar_tarih_bitis or "Bitiş Tarihi"), 
209 |             ("KararlarGenelKurulAra.KARARTAMAMI", params.karar_tamami or ""),
210 |             ("__RequestVerificationToken", self.csrf_tokens.get('genel_kurul', ''))
211 |         ])
212 |         
213 |         return form_data
214 | 
215 |     def _build_temyiz_kurulu_form_data(self, params: TemyizKuruluSearchRequest, draw: int = 1) -> List[Tuple[str, str]]:
216 |         """Build form data for Temyiz Kurulu search request."""
217 |         form_data = self._build_datatables_params(params.start, params.length, draw)
218 |         
219 |         # Add DataTables column definitions (from actual request)
220 |         column_defs = [
221 |             ("columns[0][data]", "TEMYIZTUTANAKTARIHI"),
222 |             ("columns[0][name]", ""),
223 |             ("columns[0][searchable]", "true"),
224 |             ("columns[0][orderable]", "false"),
225 |             ("columns[0][search][value]", ""),
226 |             ("columns[0][search][regex]", "false"),
227 |             
228 |             ("columns[1][data]", "TEMYIZTUTANAKTARIHI"),
229 |             ("columns[1][name]", ""),
230 |             ("columns[1][searchable]", "true"),
231 |             ("columns[1][orderable]", "true"),
232 |             ("columns[1][search][value]", ""),
233 |             ("columns[1][search][regex]", "false"),
234 |             
235 |             ("columns[2][data]", "ILAMDAIRESI"),
236 |             ("columns[2][name]", ""),
237 |             ("columns[2][searchable]", "true"),
238 |             ("columns[2][orderable]", "true"),
239 |             ("columns[2][search][value]", ""),
240 |             ("columns[2][search][regex]", "false"),
241 |             
242 |             ("columns[3][data]", "TEMYIZKARAR"),
243 |             ("columns[3][name]", ""),
244 |             ("columns[3][searchable]", "true"),
245 |             ("columns[3][orderable]", "false"),
246 |             ("columns[3][search][value]", ""),
247 |             ("columns[3][search][regex]", "false"),
248 |             
249 |             ("columns[4][data]", ""),
250 |             ("columns[4][name]", ""),
251 |             ("columns[4][searchable]", "true"),
252 |             ("columns[4][orderable]", "false"),
253 |             ("columns[4][search][value]", ""),
254 |             ("columns[4][search][regex]", "false"),
255 |             
256 |             ("order[0][column]", "1"),
257 |             ("order[0][dir]", "desc")
258 |         ]
259 |         form_data.extend(column_defs)
260 |         
261 |         # Add search parameters
262 |         daire_value = self._enum_to_form_value(params.ilam_dairesi, "daire")
263 |         kamu_idaresi_value = self._enum_to_form_value(params.kamu_idaresi_turu, "kamu_idaresi")
264 |         web_karar_konusu_value = self._enum_to_form_value(params.web_karar_konusu, "web_karar_konusu")
265 |         
266 |         form_data.extend([
267 |             ("KararlarTemyizAra.ILAMDAIRESI", daire_value),
268 |             ("KararlarTemyizAra.YILI", params.yili or ""),
269 |             ("KararlarTemyizAra.KARARTRHBaslangic", params.karar_tarih_baslangic or ""),
270 |             ("KararlarTemyizAra.KARARTRHBitis", params.karar_tarih_bitis or ""),
271 |             ("KararlarTemyizAra.KAMUIDARESITURU", kamu_idaresi_value if kamu_idaresi_value != "Tüm Kurumlar" else ""),
272 |             ("KararlarTemyizAra.ILAMNO", params.ilam_no or ""),
273 |             ("KararlarTemyizAra.DOSYANO", params.dosya_no or ""),
274 |             ("KararlarTemyizAra.TEMYIZTUTANAKNO", params.temyiz_tutanak_no or ""),
275 |             ("__Invariant", "KararlarTemyizAra.TEMYIZTUTANAKNO"),
276 |             ("KararlarTemyizAra.TEMYIZKARAR", params.temyiz_karar or ""),
277 |             ("KararlarTemyizAra.WEBKARARKONUSU", web_karar_konusu_value if web_karar_konusu_value != "Tüm Konular" else ""),
278 |             ("__RequestVerificationToken", self.csrf_tokens.get('temyiz_kurulu', ''))
279 |         ])
280 |         
281 |         return form_data
282 | 
283 |     def _build_daire_form_data(self, params: DaireSearchRequest, draw: int = 1) -> List[Tuple[str, str]]:
284 |         """Build form data for Daire search request."""
285 |         form_data = self._build_datatables_params(params.start, params.length, draw)
286 |         
287 |         # Add DataTables column definitions (from actual request)
288 |         column_defs = [
289 |             ("columns[0][data]", "YARGILAMADAIRESI"),
290 |             ("columns[0][name]", ""),
291 |             ("columns[0][searchable]", "true"),
292 |             ("columns[0][orderable]", "false"),
293 |             ("columns[0][search][value]", ""),
294 |             ("columns[0][search][regex]", "false"),
295 |             
296 |             ("columns[1][data]", "KARARTRH"),
297 |             ("columns[1][name]", ""),
298 |             ("columns[1][searchable]", "true"),
299 |             ("columns[1][orderable]", "true"),
300 |             ("columns[1][search][value]", ""),
301 |             ("columns[1][search][regex]", "false"),
302 |             
303 |             ("columns[2][data]", "KARARNO"),
304 |             ("columns[2][name]", ""),
305 |             ("columns[2][searchable]", "true"),
306 |             ("columns[2][orderable]", "true"),
307 |             ("columns[2][search][value]", ""),
308 |             ("columns[2][search][regex]", "false"),
309 |             
310 |             ("columns[3][data]", "YARGILAMADAIRESI"),
311 |             ("columns[3][name]", ""),
312 |             ("columns[3][searchable]", "true"),
313 |             ("columns[3][orderable]", "true"),
314 |             ("columns[3][search][value]", ""),
315 |             ("columns[3][search][regex]", "false"),
316 |             
317 |             ("columns[4][data]", "WEBKARARMETNI"),
318 |             ("columns[4][name]", ""),
319 |             ("columns[4][searchable]", "true"),
320 |             ("columns[4][orderable]", "false"),
321 |             ("columns[4][search][value]", ""),
322 |             ("columns[4][search][regex]", "false"),
323 |             
324 |             ("columns[5][data]", ""),
325 |             ("columns[5][name]", ""),
326 |             ("columns[5][searchable]", "true"),
327 |             ("columns[5][orderable]", "false"),
328 |             ("columns[5][search][value]", ""),
329 |             ("columns[5][search][regex]", "false"),
330 |             
331 |             ("order[0][column]", "2"),
332 |             ("order[0][dir]", "desc")
333 |         ]
334 |         form_data.extend(column_defs)
335 |         
336 |         # Add search parameters
337 |         daire_value = self._enum_to_form_value(params.yargilama_dairesi, "daire")
338 |         kamu_idaresi_value = self._enum_to_form_value(params.kamu_idaresi_turu, "kamu_idaresi")
339 |         web_karar_konusu_value = self._enum_to_form_value(params.web_karar_konusu, "web_karar_konusu")
340 |         
341 |         form_data.extend([
342 |             ("KararlarDaireAra.YARGILAMADAIRESI", daire_value),
343 |             ("KararlarDaireAra.KARARTRHBaslangic", params.karar_tarih_baslangic or ""),
344 |             ("KararlarDaireAra.KARARTRHBitis", params.karar_tarih_bitis or ""),
345 |             ("KararlarDaireAra.ILAMNO", params.ilam_no or ""),
346 |             ("KararlarDaireAra.KAMUIDARESITURU", kamu_idaresi_value if kamu_idaresi_value != "Tüm Kurumlar" else ""),
347 |             ("KararlarDaireAra.HESAPYILI", params.hesap_yili or ""),
348 |             ("KararlarDaireAra.WEBKARARKONUSU", web_karar_konusu_value if web_karar_konusu_value != "Tüm Konular" else ""),
349 |             ("KararlarDaireAra.WEBKARARMETNI", params.web_karar_metni or ""),
350 |             ("__RequestVerificationToken", self.csrf_tokens.get('daire', ''))
351 |         ])
352 |         
353 |         return form_data
354 | 
355 |     async def search_genel_kurul_decisions(self, params: GenelKurulSearchRequest) -> GenelKurulSearchResponse:
356 |         """
357 |         Search Sayıştay Genel Kurul (General Assembly) decisions.
358 |         
359 |         Args:
360 |             params: Search parameters for Genel Kurul decisions
361 |             
362 |         Returns:
363 |             GenelKurulSearchResponse with matching decisions
364 |         """
365 |         # Initialize session if needed
366 |         if 'genel_kurul' not in self.csrf_tokens:
367 |             if not await self._initialize_session_for_endpoint('genel_kurul'):
368 |                 raise Exception("Failed to initialize session for Genel Kurul endpoint")
369 |         
370 |         form_data = self._build_genel_kurul_form_data(params)
371 |         encoded_data = urlencode(form_data, encoding='utf-8')
372 |         
373 |         logger.info(f"Searching Genel Kurul decisions with parameters: {params.model_dump(exclude_none=True)}")
374 |         
375 |         try:
376 |             # Update headers with cookies
377 |             headers = self.http_client.headers.copy()
378 |             if self.session_cookies:
379 |                 cookie_header = "; ".join([f"{k}={v}" for k, v in self.session_cookies.items()])
380 |                 headers["Cookie"] = cookie_header
381 |             
382 |             response = await self.http_client.post(
383 |                 self.GENEL_KURUL_ENDPOINT,
384 |                 data=encoded_data,
385 |                 headers=headers
386 |             )
387 |             response.raise_for_status()
388 |             response_json = response.json()
389 |             
390 |             # Parse response
391 |             decisions = []
392 |             for item in response_json.get('data', []):
393 |                 decisions.append(GenelKurulDecision(
394 |                     id=item['Id'],
395 |                     karar_no=item['KARARNO'],
396 |                     karar_tarih=item['KARARTARIH'],
397 |                     karar_ozeti=item['KARAROZETI']
398 |                 ))
399 |             
400 |             return GenelKurulSearchResponse(
401 |                 decisions=decisions,
402 |                 total_records=response_json.get('recordsTotal', 0),
403 |                 total_filtered=response_json.get('recordsFiltered', 0),
404 |                 draw=response_json.get('draw', 1)
405 |             )
406 |             
407 |         except httpx.RequestError as e:
408 |             logger.error(f"HTTP error during Genel Kurul search: {e}")
409 |             raise
410 |         except Exception as e:
411 |             logger.error(f"Error processing Genel Kurul search: {e}")
412 |             raise
413 | 
414 |     async def search_temyiz_kurulu_decisions(self, params: TemyizKuruluSearchRequest) -> TemyizKuruluSearchResponse:
415 |         """
416 |         Search Sayıştay Temyiz Kurulu (Appeals Board) decisions.
417 |         
418 |         Args:
419 |             params: Search parameters for Temyiz Kurulu decisions
420 |             
421 |         Returns:
422 |             TemyizKuruluSearchResponse with matching decisions
423 |         """
424 |         # Initialize session if needed
425 |         if 'temyiz_kurulu' not in self.csrf_tokens:
426 |             if not await self._initialize_session_for_endpoint('temyiz_kurulu'):
427 |                 raise Exception("Failed to initialize session for Temyiz Kurulu endpoint")
428 |         
429 |         form_data = self._build_temyiz_kurulu_form_data(params)
430 |         encoded_data = urlencode(form_data, encoding='utf-8')
431 |         
432 |         logger.info(f"Searching Temyiz Kurulu decisions with parameters: {params.model_dump(exclude_none=True)}")
433 |         
434 |         try:
435 |             # Update headers with cookies
436 |             headers = self.http_client.headers.copy()
437 |             if self.session_cookies:
438 |                 cookie_header = "; ".join([f"{k}={v}" for k, v in self.session_cookies.items()])
439 |                 headers["Cookie"] = cookie_header
440 |             
441 |             response = await self.http_client.post(
442 |                 self.TEMYIZ_KURULU_ENDPOINT,
443 |                 data=encoded_data,
444 |                 headers=headers
445 |             )
446 |             response.raise_for_status()
447 |             response_json = response.json()
448 |             
449 |             # Parse response
450 |             decisions = []
451 |             for item in response_json.get('data', []):
452 |                 decisions.append(TemyizKuruluDecision(
453 |                     id=item['Id'],
454 |                     temyiz_tutanak_tarihi=item['TEMYIZTUTANAKTARIHI'],
455 |                     ilam_dairesi=item['ILAMDAIRESI'],
456 |                     temyiz_karar=item['TEMYIZKARAR']
457 |                 ))
458 |             
459 |             return TemyizKuruluSearchResponse(
460 |                 decisions=decisions,
461 |                 total_records=response_json.get('recordsTotal', 0),
462 |                 total_filtered=response_json.get('recordsFiltered', 0),
463 |                 draw=response_json.get('draw', 1)
464 |             )
465 |             
466 |         except httpx.RequestError as e:
467 |             logger.error(f"HTTP error during Temyiz Kurulu search: {e}")
468 |             raise
469 |         except Exception as e:
470 |             logger.error(f"Error processing Temyiz Kurulu search: {e}")
471 |             raise
472 | 
473 |     async def search_daire_decisions(self, params: DaireSearchRequest) -> DaireSearchResponse:
474 |         """
475 |         Search Sayıştay Daire (Chamber) decisions.
476 |         
477 |         Args:
478 |             params: Search parameters for Daire decisions
479 |             
480 |         Returns:
481 |             DaireSearchResponse with matching decisions
482 |         """
483 |         # Initialize session if needed
484 |         if 'daire' not in self.csrf_tokens:
485 |             if not await self._initialize_session_for_endpoint('daire'):
486 |                 raise Exception("Failed to initialize session for Daire endpoint")
487 |         
488 |         form_data = self._build_daire_form_data(params)
489 |         encoded_data = urlencode(form_data, encoding='utf-8')
490 |         
491 |         logger.info(f"Searching Daire decisions with parameters: {params.model_dump(exclude_none=True)}")
492 |         
493 |         try:
494 |             # Update headers with cookies
495 |             headers = self.http_client.headers.copy()
496 |             if self.session_cookies:
497 |                 cookie_header = "; ".join([f"{k}={v}" for k, v in self.session_cookies.items()])
498 |                 headers["Cookie"] = cookie_header
499 |             
500 |             response = await self.http_client.post(
501 |                 self.DAIRE_ENDPOINT,
502 |                 data=encoded_data,
503 |                 headers=headers
504 |             )
505 |             response.raise_for_status()
506 |             response_json = response.json()
507 |             
508 |             # Parse response
509 |             decisions = []
510 |             for item in response_json.get('data', []):
511 |                 decisions.append(DaireDecision(
512 |                     id=item['Id'],
513 |                     yargilama_dairesi=item['YARGILAMADAIRESI'],
514 |                     karar_tarih=item['KARARTRH'],
515 |                     karar_no=item['KARARNO'],
516 |                     ilam_no=item.get('ILAMNO'),  # Use get() to handle None values
517 |                     madde_no=item['MADDENO'],
518 |                     kamu_idaresi_turu=item['KAMUIDARESITURU'],
519 |                     hesap_yili=item['HESAPYILI'],
520 |                     web_karar_konusu=item['WEBKARARKONUSU'],
521 |                     web_karar_metni=item['WEBKARARMETNI']
522 |                 ))
523 |             
524 |             return DaireSearchResponse(
525 |                 decisions=decisions,
526 |                 total_records=response_json.get('recordsTotal', 0),
527 |                 total_filtered=response_json.get('recordsFiltered', 0),
528 |                 draw=response_json.get('draw', 1)
529 |             )
530 |             
531 |         except httpx.RequestError as e:
532 |             logger.error(f"HTTP error during Daire search: {e}")
533 |             raise
534 |         except Exception as e:
535 |             logger.error(f"Error processing Daire search: {e}")
536 |             raise
537 | 
538 |     def _convert_html_to_markdown(self, html_content: str) -> Optional[str]:
539 |         """Convert HTML content to Markdown using MarkItDown with BytesIO to avoid filename length issues."""
540 |         if not html_content:
541 |             return None
542 |             
543 |         try:
544 |             # Convert HTML string to bytes and create BytesIO stream
545 |             html_bytes = html_content.encode('utf-8')
546 |             html_stream = io.BytesIO(html_bytes)
547 |             
548 |             # Pass BytesIO stream to MarkItDown to avoid temp file creation
549 |             md_converter = MarkItDown()
550 |             result = md_converter.convert(html_stream)
551 |             markdown_content = result.text_content
552 |             
553 |             logger.info("Successfully converted HTML to Markdown")
554 |             return markdown_content
555 |             
556 |         except Exception as e:
557 |             logger.error(f"Error converting HTML to Markdown: {e}")
558 |             return f"Error converting HTML content: {str(e)}"
559 | 
560 |     async def get_document_as_markdown(self, decision_id: str, decision_type: str) -> SayistayDocumentMarkdown:
561 |         """
562 |         Retrieve full text of a Sayıştay decision and convert to Markdown.
563 |         
564 |         Args:
565 |             decision_id: Unique decision identifier
566 |             decision_type: Type of decision ('genel_kurul', 'temyiz_kurulu', 'daire')
567 |             
568 |         Returns:
569 |             SayistayDocumentMarkdown with converted content
570 |         """
571 |         logger.info(f"Retrieving document for {decision_type} decision ID: {decision_id}")
572 |         
573 |         # Validate decision_id
574 |         if not decision_id or not decision_id.strip():
575 |             return SayistayDocumentMarkdown(
576 |                 decision_id=decision_id,
577 |                 decision_type=decision_type,
578 |                 source_url="",
579 |                 markdown_content=None,
580 |                 error_message="Decision ID cannot be empty"
581 |             )
582 |         
583 |         # Map decision type to URL path
584 |         url_path_mapping = {
585 |             'genel_kurul': 'KararlarGenelKurul',
586 |             'temyiz_kurulu': 'KararlarTemyiz',
587 |             'daire': 'KararlarDaire'
588 |         }
589 |         
590 |         if decision_type not in url_path_mapping:
591 |             return SayistayDocumentMarkdown(
592 |                 decision_id=decision_id,
593 |                 decision_type=decision_type,
594 |                 source_url="",
595 |                 markdown_content=None,
596 |                 error_message=f"Invalid decision type: {decision_type}. Must be one of: {list(url_path_mapping.keys())}"
597 |             )
598 |         
599 |         # Build document URL
600 |         url_path = url_path_mapping[decision_type]
601 |         document_url = f"{self.BASE_URL}/{url_path}/Detay/{decision_id}/"
602 |         
603 |         try:
604 |             # Make HTTP GET request to document URL
605 |             headers = {
606 |                 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
607 |                 "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7",
608 |                 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
609 |                 "Sec-Fetch-Dest": "document",
610 |                 "Sec-Fetch-Mode": "navigate",
611 |                 "Sec-Fetch-Site": "same-origin"
612 |             }
613 |             
614 |             # Include session cookies if available
615 |             if self.session_cookies:
616 |                 cookie_header = "; ".join([f"{k}={v}" for k, v in self.session_cookies.items()])
617 |                 headers["Cookie"] = cookie_header
618 |             
619 |             response = await self.http_client.get(document_url, headers=headers)
620 |             response.raise_for_status()
621 |             html_content = response.text
622 |             
623 |             if not html_content or not html_content.strip():
624 |                 logger.warning(f"Received empty HTML content from {document_url}")
625 |                 return SayistayDocumentMarkdown(
626 |                     decision_id=decision_id,
627 |                     decision_type=decision_type,
628 |                     source_url=document_url,
629 |                     markdown_content=None,
630 |                     error_message="Document content is empty"
631 |                 )
632 |             
633 |             # Convert HTML to Markdown using existing method
634 |             markdown_content = self._convert_html_to_markdown(html_content)
635 |             
636 |             if markdown_content and "Error converting HTML content" not in markdown_content:
637 |                 logger.info(f"Successfully retrieved and converted document {decision_id} to Markdown")
638 |                 return SayistayDocumentMarkdown(
639 |                     decision_id=decision_id,
640 |                     decision_type=decision_type,
641 |                     source_url=document_url,
642 |                     markdown_content=markdown_content,
643 |                     retrieval_date=None  # Could add datetime.now().isoformat() if needed
644 |                 )
645 |             else:
646 |                 return SayistayDocumentMarkdown(
647 |                     decision_id=decision_id,
648 |                     decision_type=decision_type,
649 |                     source_url=document_url,
650 |                     markdown_content=None,
651 |                     error_message=f"Failed to convert HTML to Markdown: {markdown_content}"
652 |                 )
653 |                 
654 |         except httpx.HTTPStatusError as e:
655 |             error_msg = f"HTTP error {e.response.status_code} when fetching document: {e}"
656 |             logger.error(f"HTTP error fetching document {decision_id}: {error_msg}")
657 |             return SayistayDocumentMarkdown(
658 |                 decision_id=decision_id,
659 |                 decision_type=decision_type,
660 |                 source_url=document_url,
661 |                 markdown_content=None,
662 |                 error_message=error_msg
663 |             )
664 |         except httpx.RequestError as e:
665 |             error_msg = f"Network error when fetching document: {e}"
666 |             logger.error(f"Network error fetching document {decision_id}: {error_msg}")
667 |             return SayistayDocumentMarkdown(
668 |                 decision_id=decision_id,
669 |                 decision_type=decision_type,
670 |                 source_url=document_url,
671 |                 markdown_content=None,
672 |                 error_message=error_msg
673 |             )
674 |         except Exception as e:
675 |             error_msg = f"Unexpected error when fetching document: {e}"
676 |             logger.error(f"Unexpected error fetching document {decision_id}: {error_msg}")
677 |             return SayistayDocumentMarkdown(
678 |                 decision_id=decision_id,
679 |                 decision_type=decision_type,
680 |                 source_url=document_url,
681 |                 markdown_content=None,
682 |                 error_message=error_msg
683 |             )
684 | 
685 |     async def close_client_session(self):
686 |         """Close HTTP client session."""
687 |         if hasattr(self, 'http_client') and self.http_client and not self.http_client.is_closed:
688 |             await self.http_client.aclose()
689 |             logger.info("SayistayApiClient: HTTP client session closed.")
```