This is page 7 of 11. Use http://codebase.md/saidsurucu/yargi-mcp?lines=true&page={x} to view the full context.
# Directory Structure
```
├── __main__.py
├── .dockerignore
├── .env.example
├── .gitattributes
├── .github
│ └── workflows
│ └── publish.yml
├── .gitignore
├── .serena
│ ├── .gitignore
│ └── project.yml
├── 5ire-settings.png
├── analyze_kik_hash_generation.py
├── anayasa_mcp_module
│ ├── __init__.py
│ ├── bireysel_client.py
│ ├── client.py
│ ├── models.py
│ └── unified_client.py
├── asgi_app.py
├── bddk_mcp_module
│ ├── __init__.py
│ ├── client.py
│ └── models.py
├── bedesten_mcp_module
│ ├── __init__.py
│ ├── client.py
│ ├── enums.py
│ └── models.py
├── check_response_format.py
├── CLAUDE.md
├── danistay_mcp_module
│ ├── __init__.py
│ ├── client.py
│ └── models.py
├── docker-compose.yml
├── Dockerfile
├── docs
│ └── DEPLOYMENT.md
├── emsal_mcp_module
│ ├── __init__.py
│ ├── client.py
│ └── models.py
├── example_fastapi_app.py
├── fly-no-auth.toml
├── fly.toml
├── kik_mcp_module
│ ├── __init__.py
│ ├── client_v2.py
│ ├── client.py
│ ├── models_v2.py
│ └── models.py
├── kvkk_mcp_module
│ ├── __init__.py
│ ├── client.py
│ └── models.py
├── LICENSE
├── mcp_auth
│ ├── __init__.py
│ ├── clerk_config.py
│ ├── middleware.py
│ ├── oauth.py
│ ├── policy.py
│ └── storage.py
├── mcp_auth_factory.py
├── mcp_auth_http_adapter.py
├── mcp_auth_http_simple.py
├── mcp_server_main.py
├── nginx.conf
├── ornek.png
├── Procfile
├── pyproject.toml
├── railway.json
├── README.md
├── redis_session_store.py
├── rekabet_mcp_module
│ ├── __init__.py
│ ├── client.py
│ └── models.py
├── requirements.txt
├── run_asgi.py
├── saidsurucu-yargi-mcp-f5fa007
│ ├── __main__.py
│ ├── .dockerignore
│ ├── .env.example
│ ├── .gitattributes
│ ├── .github
│ │ └── workflows
│ │ └── publish.yml
│ ├── .gitignore
│ ├── 5ire-settings.png
│ ├── anayasa_mcp_module
│ │ ├── __init__.py
│ │ ├── bireysel_client.py
│ │ ├── client.py
│ │ ├── models.py
│ │ └── unified_client.py
│ ├── asgi_app.py
│ ├── bddk_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── models.py
│ ├── bedesten_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ ├── enums.py
│ │ └── models.py
│ ├── check_response_format.py
│ ├── danistay_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── models.py
│ ├── docker-compose.yml
│ ├── Dockerfile
│ ├── docs
│ │ └── DEPLOYMENT.md
│ ├── emsal_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── models.py
│ ├── example_fastapi_app.py
│ ├── kik_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── models.py
│ ├── kvkk_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── models.py
│ ├── LICENSE
│ ├── mcp_auth
│ │ ├── __init__.py
│ │ ├── clerk_config.py
│ │ ├── middleware.py
│ │ ├── oauth.py
│ │ ├── policy.py
│ │ └── storage.py
│ ├── mcp_auth_factory.py
│ ├── mcp_auth_http_adapter.py
│ ├── mcp_auth_http_simple.py
│ ├── mcp_server_main.py
│ ├── nginx.conf
│ ├── ornek.png
│ ├── Procfile
│ ├── pyproject.toml
│ ├── railway.json
│ ├── README.md
│ ├── redis_session_store.py
│ ├── rekabet_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── models.py
│ ├── run_asgi.py
│ ├── sayistay_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ ├── enums.py
│ │ ├── models.py
│ │ └── unified_client.py
│ ├── starlette_app.py
│ ├── stripe_webhook.py
│ ├── uyusmazlik_mcp_module
│ │ ├── __init__.py
│ │ ├── client.py
│ │ └── models.py
│ └── yargitay_mcp_module
│ ├── __init__.py
│ ├── client.py
│ └── models.py
├── sayistay_mcp_module
│ ├── __init__.py
│ ├── client.py
│ ├── enums.py
│ ├── models.py
│ └── unified_client.py
├── starlette_app.py
├── stripe_webhook.py
├── uv.lock
├── uyusmazlik_mcp_module
│ ├── __init__.py
│ ├── client.py
│ └── models.py
└── yargitay_mcp_module
├── __init__.py
├── client.py
└── models.py
```
# Files
--------------------------------------------------------------------------------
/asgi_app.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | ASGI application for Yargı MCP Server
3 |
4 | This module provides ASGI/HTTP access to the Yargı MCP server,
5 | allowing it to be deployed as a web service with FastAPI wrapper
6 | for OAuth integration and proper middleware support.
7 |
8 | Usage:
9 | uvicorn asgi_app:app --host 0.0.0.0 --port 8000
10 | """
11 |
12 | import os
13 | import time
14 | import logging
15 | import json
16 | from datetime import datetime, timedelta
17 | from fastapi import FastAPI, Request, HTTPException, Query
18 | from fastapi.responses import JSONResponse, HTMLResponse, Response
19 | from fastapi.exception_handlers import http_exception_handler
20 | from starlette.middleware import Middleware
21 | from starlette.middleware.cors import CORSMiddleware
22 | from starlette.middleware.base import BaseHTTPMiddleware
23 |
24 | # Import the proper create_app function that includes all middleware
25 | from mcp_server_main import create_app
26 |
27 | # Conditional auth-related imports (only if auth enabled)
28 | _auth_check = os.getenv("ENABLE_AUTH", "false").lower() == "true"
29 |
30 | if _auth_check:
31 | # Import MCP Auth HTTP adapter (OAuth endpoints)
32 | try:
33 | from mcp_auth_http_simple import router as mcp_auth_router
34 | except ImportError:
35 | mcp_auth_router = None
36 |
37 | # Import Stripe webhook router
38 | try:
39 | from stripe_webhook import router as stripe_router
40 | except ImportError:
41 | stripe_router = None
42 | else:
43 | mcp_auth_router = None
44 | stripe_router = None
45 |
46 | # OAuth configuration from environment variables
47 | CLERK_ISSUER = os.getenv("CLERK_ISSUER", "https://clerk.yargimcp.com")
48 | BASE_URL = os.getenv("BASE_URL", "https://api.yargimcp.com")
49 | CLERK_SECRET_KEY = os.getenv("CLERK_SECRET_KEY")
50 | CLERK_PUBLISHABLE_KEY = os.getenv("CLERK_PUBLISHABLE_KEY")
51 |
52 | # Setup logging
53 | logger = logging.getLogger(__name__)
54 |
55 | # Configure CORS and Auth middleware
56 | cors_origins = os.getenv("ALLOWED_ORIGINS", "*").split(",")
57 |
58 | # Import FastMCP Bearer Auth Provider
59 | from fastmcp.server.auth import BearerAuthProvider
60 | from fastmcp.server.auth.providers.bearer import RSAKeyPair
61 |
62 | # Import Clerk SDK at module level for performance
63 | try:
64 | from clerk_backend_api import Clerk
65 | CLERK_SDK_AVAILABLE = True
66 | except ImportError:
67 | CLERK_SDK_AVAILABLE = False
68 | logger.warning("Clerk SDK not available - falling back to development mode")
69 |
70 | # Configure Bearer token authentication based on ENABLE_AUTH
71 | auth_enabled = os.getenv("ENABLE_AUTH", "false").lower() == "true"
72 | bearer_auth = None
73 |
74 | if CLERK_SECRET_KEY and CLERK_ISSUER:
75 | # Production: Use Clerk JWKS endpoint for token validation
76 | bearer_auth = BearerAuthProvider(
77 | jwks_uri=f"{CLERK_ISSUER}/.well-known/jwks.json",
78 | issuer=None,
79 | algorithm="RS256",
80 | audience=None,
81 | required_scopes=[]
82 | )
83 | else:
84 | # Development: Generate RSA key pair for testing
85 | dev_key_pair = RSAKeyPair.generate()
86 | bearer_auth = BearerAuthProvider(
87 | public_key=dev_key_pair.public_key,
88 | issuer="https://dev.yargimcp.com",
89 | audience="dev-mcp-server",
90 | required_scopes=["yargi.read"]
91 | )
92 |
93 | # Create MCP app with Bearer authentication
94 | mcp_server = create_app(auth=bearer_auth if auth_enabled else None)
95 |
96 | # Create MCP Starlette sub-application with root path - mount will add /mcp prefix
97 | mcp_app = mcp_server.http_app(path="/")
98 |
99 |
100 | # Configure JSON encoder for proper Turkish character support
101 | class UTF8JSONResponse(JSONResponse):
102 | def __init__(self, content=None, status_code=200, headers=None, **kwargs):
103 | if headers is None:
104 | headers = {}
105 | headers["Content-Type"] = "application/json; charset=utf-8"
106 | super().__init__(content, status_code, headers, **kwargs)
107 |
108 | def render(self, content) -> bytes:
109 | return json.dumps(
110 | content,
111 | ensure_ascii=False,
112 | allow_nan=False,
113 | indent=None,
114 | separators=(",", ":"),
115 | ).encode("utf-8")
116 |
117 | custom_middleware = [
118 | Middleware(
119 | CORSMiddleware,
120 | allow_origins=cors_origins,
121 | allow_credentials=True,
122 | allow_methods=["GET", "POST", "OPTIONS", "DELETE"],
123 | allow_headers=["Content-Type", "Authorization", "X-Request-ID", "X-Session-ID"],
124 | ),
125 | ]
126 |
127 | # Create FastAPI wrapper application
128 | app = FastAPI(
129 | title="Yargı MCP Server",
130 | description="MCP server for Turkish legal databases with OAuth authentication",
131 | version="0.1.0",
132 | middleware=custom_middleware,
133 | default_response_class=UTF8JSONResponse, # Use UTF-8 JSON encoder
134 | redirect_slashes=False # Disable to prevent 307 redirects on /mcp endpoint
135 | )
136 |
137 | # Add auth-related routers to FastAPI (only if available)
138 | if stripe_router:
139 | app.include_router(stripe_router, prefix="/api/stripe")
140 |
141 | if mcp_auth_router:
142 | app.include_router(mcp_auth_router)
143 |
144 | # Custom 401 exception handler for MCP spec compliance
145 | @app.exception_handler(401)
146 | async def custom_401_handler(request: Request, exc: HTTPException):
147 | """Custom 401 handler that adds WWW-Authenticate header as required by MCP spec"""
148 | response = await http_exception_handler(request, exc)
149 |
150 | # Add WWW-Authenticate header pointing to protected resource metadata
151 | # as required by RFC 9728 Section 5.1 and MCP Authorization spec
152 | response.headers["WWW-Authenticate"] = (
153 | 'Bearer '
154 | 'error="invalid_token", '
155 | 'error_description="The access token is missing or invalid", '
156 | f'resource="{BASE_URL}/.well-known/oauth-protected-resource"'
157 | )
158 |
159 | return response
160 |
161 | # FastAPI health check endpoint - BEFORE mounting MCP app
162 | @app.get("/health")
163 | async def health_check():
164 | """Health check endpoint for monitoring"""
165 | return {
166 | "status": "healthy",
167 | "service": "Yargı MCP Server",
168 | "version": "0.1.0",
169 | "tools_count": len(mcp_server._tool_manager._tools),
170 | "auth_enabled": os.getenv("ENABLE_AUTH", "false").lower() == "true"
171 | }
172 |
173 | # Add explicit redirect for /mcp to /mcp/ with method preservation
174 | @app.api_route("/mcp", methods=["GET", "POST", "HEAD", "OPTIONS"])
175 | async def redirect_to_slash(request: Request):
176 | """Redirect /mcp to /mcp/ preserving HTTP method with 308"""
177 | from fastapi.responses import RedirectResponse
178 | return RedirectResponse(url="/mcp/", status_code=308)
179 |
180 | # MCP mount at /mcp handles path routing correctly
181 |
182 | # IMPORTANT: Add FastAPI endpoints BEFORE mounting MCP app
183 | # Otherwise mount at root will catch all requests
184 |
185 | # Debug endpoint to test routing
186 | @app.get("/debug/test")
187 | async def debug_test():
188 | """Debug endpoint to test if FastAPI routes work"""
189 | return {"message": "FastAPI routes working", "debug": True}
190 |
191 | # Clerk CORS proxy endpoints
192 | @app.api_route("/clerk-proxy/{path:path}", methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"])
193 | async def clerk_cors_proxy(request: Request, path: str):
194 | """
195 | Proxy requests to Clerk to bypass CORS restrictions.
196 | Forwards requests from Claude AI to clerk.yargimcp.com with proper CORS headers.
197 | """
198 | import httpx
199 |
200 | # Build target URL
201 | clerk_url = f"https://clerk.yargimcp.com/{path}"
202 |
203 | # Forward query parameters
204 | if request.url.query:
205 | clerk_url += f"?{request.url.query}"
206 |
207 | # Copy headers (exclude host/origin)
208 | headers = dict(request.headers)
209 | headers.pop('host', None)
210 | headers.pop('origin', None)
211 | headers['origin'] = 'https://yargimcp.com' # Use our frontend domain
212 |
213 | try:
214 | async with httpx.AsyncClient() as client:
215 | # Forward the request to Clerk
216 | if request.method == "OPTIONS":
217 | # Handle preflight
218 | response = await client.request(
219 | method=request.method,
220 | url=clerk_url,
221 | headers=headers
222 | )
223 | else:
224 | # Forward body for POST/PUT requests
225 | body = None
226 | if request.method in ["POST", "PUT", "PATCH"]:
227 | body = await request.body()
228 |
229 | response = await client.request(
230 | method=request.method,
231 | url=clerk_url,
232 | headers=headers,
233 | content=body
234 | )
235 |
236 | # Create response with CORS headers
237 | response_headers = dict(response.headers)
238 | response_headers.update({
239 | "Access-Control-Allow-Origin": "*",
240 | "Access-Control-Allow-Methods": "GET, POST, PUT, DELETE, OPTIONS",
241 | "Access-Control-Allow-Headers": "Content-Type, Authorization, Accept, Origin, X-Requested-With",
242 | "Access-Control-Allow-Credentials": "true",
243 | "Access-Control-Max-Age": "86400"
244 | })
245 |
246 | return Response(
247 | content=response.content,
248 | status_code=response.status_code,
249 | headers=response_headers,
250 | media_type=response.headers.get("content-type")
251 | )
252 |
253 | except Exception as e:
254 | return JSONResponse(
255 | {"error": "proxy_error", "message": str(e)},
256 | status_code=500,
257 | headers={"Access-Control-Allow-Origin": "*"}
258 | )
259 |
260 | # FastAPI root endpoint
261 | @app.get("/")
262 | async def root():
263 | """Root endpoint with service information"""
264 | return {
265 | "service": "Yargı MCP Server",
266 | "description": "MCP server for Turkish legal databases with OAuth authentication",
267 | "endpoints": {
268 | "mcp": "/mcp",
269 | "health": "/health",
270 | "status": "/status",
271 | "stripe_webhook": "/api/stripe/webhook",
272 | "oauth_login": "/auth/login",
273 | "oauth_callback": "/auth/callback",
274 | "oauth_google": "/auth/google/login",
275 | "user_info": "/auth/user"
276 | },
277 | "transports": {
278 | "http": "/mcp"
279 | },
280 | "supported_databases": [
281 | "Yargıtay (Court of Cassation)",
282 | "Danıştay (Council of State)",
283 | "Emsal (Precedent)",
284 | "Uyuşmazlık Mahkemesi (Court of Jurisdictional Disputes)",
285 | "Anayasa Mahkemesi (Constitutional Court)",
286 | "Kamu İhale Kurulu (Public Procurement Authority)",
287 | "Rekabet Kurumu (Competition Authority)",
288 | "Sayıştay (Court of Accounts)",
289 | "KVKK (Personal Data Protection Authority)",
290 | "BDDK (Banking Regulation and Supervision Agency)",
291 | "Bedesten API (Multiple courts)"
292 | ],
293 | "authentication": {
294 | "enabled": os.getenv("ENABLE_AUTH", "false").lower() == "true",
295 | "type": "OAuth 2.0 via Clerk",
296 | "issuer": CLERK_ISSUER,
297 | "providers": ["google"],
298 | "flow": "authorization_code"
299 | }
300 | }
301 |
302 | # OAuth 2.0 Authorization Server Metadata - MCP standard location
303 | @app.get("/.well-known/oauth-authorization-server")
304 | async def oauth_authorization_server_root():
305 | """OAuth 2.0 Authorization Server Metadata - root level for compatibility"""
306 | return {
307 | "issuer": BASE_URL, # Use BASE_URL as issuer for MCP integration
308 | "authorization_endpoint": f"{BASE_URL}/auth/login",
309 | "token_endpoint": f"{BASE_URL}/token",
310 | "jwks_uri": f"{CLERK_ISSUER}/.well-known/jwks.json",
311 | "response_types_supported": ["code"],
312 | "grant_types_supported": ["authorization_code", "refresh_token"],
313 | "token_endpoint_auth_methods_supported": ["client_secret_basic", "none"],
314 | "scopes_supported": ["read", "search", "openid", "profile", "email"],
315 | "subject_types_supported": ["public"],
316 | "id_token_signing_alg_values_supported": ["RS256"],
317 | "claims_supported": ["sub", "iss", "aud", "exp", "iat", "email", "name"],
318 | "code_challenge_methods_supported": ["S256"],
319 | "service_documentation": f"{BASE_URL}/mcp",
320 | "registration_endpoint": f"{BASE_URL}/register",
321 | "resource_documentation": f"{BASE_URL}/mcp"
322 | }
323 |
324 | # Claude AI MCP specific endpoint format - suffix versions
325 | @app.get("/.well-known/oauth-authorization-server/mcp")
326 | async def oauth_authorization_server_mcp_suffix():
327 | """OAuth 2.0 Authorization Server Metadata - Claude AI MCP specific format"""
328 | return {
329 | "issuer": BASE_URL, # Use BASE_URL as issuer for MCP integration
330 | "authorization_endpoint": f"{BASE_URL}/auth/login",
331 | "token_endpoint": f"{BASE_URL}/token",
332 | "jwks_uri": f"{CLERK_ISSUER}/.well-known/jwks.json",
333 | "response_types_supported": ["code"],
334 | "grant_types_supported": ["authorization_code", "refresh_token"],
335 | "token_endpoint_auth_methods_supported": ["client_secret_basic", "none"],
336 | "scopes_supported": ["read", "search", "openid", "profile", "email"],
337 | "subject_types_supported": ["public"],
338 | "id_token_signing_alg_values_supported": ["RS256"],
339 | "claims_supported": ["sub", "iss", "aud", "exp", "iat", "email", "name"],
340 | "code_challenge_methods_supported": ["S256"],
341 | "service_documentation": f"{BASE_URL}/mcp",
342 | "registration_endpoint": f"{BASE_URL}/register",
343 | "resource_documentation": f"{BASE_URL}/mcp"
344 | }
345 |
346 | @app.get("/.well-known/oauth-protected-resource/mcp")
347 | async def oauth_protected_resource_mcp_suffix():
348 | """OAuth 2.0 Protected Resource Metadata - Claude AI MCP specific format"""
349 | return {
350 | "resource": BASE_URL,
351 | "authorization_servers": [
352 | BASE_URL
353 | ],
354 | "scopes_supported": ["read", "search"],
355 | "bearer_methods_supported": ["header"],
356 | "resource_documentation": f"{BASE_URL}/mcp",
357 | "resource_policy_uri": f"{BASE_URL}/privacy"
358 | }
359 |
360 | # OAuth 2.0 Protected Resource Metadata (RFC 9728) - MCP Spec Required
361 | @app.get("/.well-known/oauth-protected-resource")
362 | async def oauth_protected_resource():
363 | """OAuth 2.0 Protected Resource Metadata as required by MCP spec"""
364 | return {
365 | "resource": BASE_URL,
366 | "authorization_servers": [
367 | BASE_URL
368 | ],
369 | "scopes_supported": ["read", "search"],
370 | "bearer_methods_supported": ["header"],
371 | "resource_documentation": f"{BASE_URL}/mcp",
372 | "resource_policy_uri": f"{BASE_URL}/privacy"
373 | }
374 |
375 | # Standard well-known discovery endpoint
376 | @app.get("/.well-known/mcp")
377 | async def well_known_mcp():
378 | """Standard MCP discovery endpoint"""
379 | return {
380 | "mcp_server": {
381 | "name": "Yargı MCP Server",
382 | "version": "0.1.0",
383 | "endpoint": f"{BASE_URL}/mcp",
384 | "authentication": {
385 | "type": "oauth2",
386 | "authorization_url": f"{BASE_URL}/auth/login",
387 | "scopes": ["read", "search"]
388 | },
389 | "capabilities": ["tools", "resources"],
390 | "tools_count": len(mcp_server._tool_manager._tools)
391 | }
392 | }
393 |
394 | # MCP Discovery endpoint for ChatGPT integration
395 | @app.get("/mcp/discovery")
396 | async def mcp_discovery():
397 | """MCP Discovery endpoint for ChatGPT and other MCP clients"""
398 | return {
399 | "name": "Yargı MCP Server",
400 | "description": "MCP server for Turkish legal databases",
401 | "version": "0.1.0",
402 | "protocol": "mcp",
403 | "transport": "http",
404 | "endpoint": "/mcp",
405 | "authentication": {
406 | "type": "oauth2",
407 | "authorization_url": "/auth/login",
408 | "token_url": "/token",
409 | "scopes": ["read", "search"],
410 | "provider": "clerk"
411 | },
412 | "capabilities": {
413 | "tools": True,
414 | "resources": True,
415 | "prompts": False
416 | },
417 | "tools_count": len(mcp_server._tool_manager._tools),
418 | "contact": {
419 | "url": BASE_URL,
420 | "email": "[email protected]"
421 | }
422 | }
423 |
424 | # FastAPI status endpoint
425 | @app.get("/status")
426 | async def status():
427 | """Status endpoint with detailed information"""
428 | tools = []
429 | for tool in mcp_server._tool_manager._tools.values():
430 | tools.append({
431 | "name": tool.name,
432 | "description": tool.description[:100] + "..." if len(tool.description) > 100 else tool.description
433 | })
434 |
435 | return {
436 | "status": "operational",
437 | "tools": tools,
438 | "total_tools": len(tools),
439 | "transport": "streamable_http",
440 | "architecture": "FastAPI wrapper + MCP Starlette sub-app",
441 | "auth_status": "enabled" if os.getenv("ENABLE_AUTH", "false").lower() == "true" else "disabled"
442 | }
443 |
444 | # Simplified OAuth session validation for callback endpoints only
445 | async def validate_clerk_session_for_oauth(request: Request, clerk_token: str = None) -> str:
446 | """Validate Clerk session for OAuth callback endpoints only (not for MCP endpoints)"""
447 |
448 | try:
449 | # Use Clerk SDK if available
450 | if not CLERK_SDK_AVAILABLE:
451 | raise ImportError("Clerk SDK not available")
452 | clerk = Clerk(bearer_auth=CLERK_SECRET_KEY)
453 |
454 | # Try JWT token first (from URL parameter)
455 | if clerk_token:
456 | try:
457 | return "oauth_user_from_token"
458 | except Exception as e:
459 | pass
460 |
461 | # Fallback to cookie validation
462 | clerk_session = request.cookies.get("__session")
463 | if not clerk_session:
464 | raise HTTPException(status_code=401, detail="No Clerk session found")
465 |
466 | # Validate session with Clerk
467 | session = clerk.sessions.verify_session(clerk_session)
468 | return session.user_id
469 |
470 | except ImportError:
471 | return "dev_user_123"
472 | except Exception as e:
473 | raise HTTPException(status_code=401, detail=f"OAuth session validation failed: {str(e)}")
474 |
475 | # MCP OAuth Callback Endpoint
476 | @app.get("/auth/mcp-callback")
477 | async def mcp_oauth_callback(request: Request, clerk_token: str = Query(None)):
478 | """Handle OAuth callback for MCP token generation"""
479 |
480 | try:
481 | # Validate Clerk session with JWT token support
482 | user_id = await validate_clerk_session_for_oauth(request, clerk_token)
483 |
484 | # Return success response
485 | return HTMLResponse(f"""
486 | <html>
487 | <head>
488 | <title>MCP Connection Successful</title>
489 | <style>
490 | body {{ font-family: Arial, sans-serif; text-align: center; padding: 50px; }}
491 | .success {{ color: #28a745; }}
492 | .token {{ background: #f8f9fa; padding: 15px; border-radius: 5px; margin: 20px 0; word-break: break-all; }}
493 | </style>
494 | </head>
495 | <body>
496 | <h1 class="success">✅ MCP Connection Successful!</h1>
497 | <p>Your Yargı MCP integration is now active.</p>
498 | <div class="token">
499 | <strong>Authentication:</strong><br>
500 | <code>Use your Clerk JWT token directly with Bearer authentication</code>
501 | </div>
502 | <p>You can now close this window and return to your MCP client.</p>
503 | <script>
504 | // Try to close the popup if opened as such
505 | if (window.opener) {{
506 | window.opener.postMessage({{
507 | type: 'MCP_AUTH_SUCCESS',
508 | token: 'use_clerk_jwt_token'
509 | }}, '*');
510 | setTimeout(() => window.close(), 3000);
511 | }}
512 | </script>
513 | </body>
514 | </html>
515 | """)
516 |
517 | except HTTPException as e:
518 | return HTMLResponse(f"""
519 | <html>
520 | <head>
521 | <title>MCP Connection Failed</title>
522 | <style>
523 | body {{ font-family: Arial, sans-serif; text-align: center; padding: 50px; }}
524 | .error {{ color: #dc3545; }}
525 | .debug {{ background: #f8f9fa; padding: 10px; margin: 20px 0; border-radius: 5px; font-family: monospace; }}
526 | </style>
527 | </head>
528 | <body>
529 | <h1 class="error">❌ MCP Connection Failed</h1>
530 | <p>{e.detail}</p>
531 | <div class="debug">
532 | <strong>Debug Info:</strong><br>
533 | Clerk Token: {'✅ Provided' if clerk_token else '❌ Missing'}<br>
534 | Error: {e.detail}<br>
535 | Status: {e.status_code}
536 | </div>
537 | <p>Please try again or contact support.</p>
538 | <a href="https://yargimcp.com/sign-in">Return to Sign In</a>
539 | </body>
540 | </html>
541 | """, status_code=e.status_code)
542 | except Exception as e:
543 | return HTMLResponse(f"""
544 | <html>
545 | <head>
546 | <title>MCP Connection Error</title>
547 | <style>
548 | body {{ font-family: Arial, sans-serif; text-align: center; padding: 50px; }}
549 | .error {{ color: #dc3545; }}
550 | </style>
551 | </head>
552 | <body>
553 | <h1 class="error">❌ Unexpected Error</h1>
554 | <p>An unexpected error occurred during authentication.</p>
555 | <p>Error: {str(e)}</p>
556 | <a href="https://yargimcp.com/sign-in">Return to Sign In</a>
557 | </body>
558 | </html>
559 | """, status_code=500)
560 |
561 | # OAuth2 Token Endpoint - Now uses Clerk JWT tokens directly
562 | @app.post("/auth/mcp-token")
563 | async def mcp_token_endpoint(request: Request):
564 | """OAuth2 token endpoint for MCP clients - returns Clerk JWT token info"""
565 | try:
566 | # Validate Clerk session
567 | user_id = await validate_clerk_session_for_oauth(request)
568 |
569 | return {
570 | "message": "Use your Clerk JWT token directly with Bearer authentication",
571 | "token_type": "Bearer",
572 | "scope": "yargi.read",
573 | "user_id": user_id,
574 | "instructions": "Include 'Authorization: Bearer YOUR_CLERK_JWT_TOKEN' in your requests"
575 | }
576 | except HTTPException as e:
577 | return JSONResponse(
578 | status_code=e.status_code,
579 | content={"error": "invalid_request", "error_description": e.detail}
580 | )
581 |
582 | # Mount MCP app at /mcp/ with trailing slash
583 | app.mount("/mcp/", mcp_app)
584 |
585 | # Set the lifespan context after mounting
586 | app.router.lifespan_context = mcp_app.lifespan
587 |
588 | # Export for uvicorn
589 | __all__ = ["app"]
```
--------------------------------------------------------------------------------
/saidsurucu-yargi-mcp-f5fa007/asgi_app.py:
--------------------------------------------------------------------------------
```python
1 | """
2 | ASGI application for Yargı MCP Server
3 |
4 | This module provides ASGI/HTTP access to the Yargı MCP server,
5 | allowing it to be deployed as a web service with FastAPI wrapper
6 | for Stripe webhook integration.
7 |
8 | Usage:
9 | uvicorn asgi_app:app --host 0.0.0.0 --port 8000
10 | """
11 |
12 | import os
13 | import time
14 | import logging
15 | from datetime import datetime, timedelta
16 | from fastapi import FastAPI, Request, HTTPException, Query
17 | from fastapi.responses import JSONResponse, HTMLResponse
18 | from fastapi.exception_handlers import http_exception_handler
19 | from starlette.middleware import Middleware
20 | from starlette.middleware.cors import CORSMiddleware
21 | from starlette.responses import Response
22 | from starlette.requests import Request as StarletteRequest
23 |
24 | # Import the MCP app creator function
25 | from mcp_server_main import create_app
26 |
27 | # Import Stripe webhook router
28 | from stripe_webhook import router as stripe_router
29 |
30 | # Import simplified MCP Auth HTTP adapter
31 | from mcp_auth_http_simple import router as mcp_auth_router
32 |
33 | # OAuth configuration from environment variables
34 | CLERK_ISSUER = os.getenv("CLERK_ISSUER", "https://accounts.yargimcp.com")
35 | BASE_URL = os.getenv("BASE_URL", "https://yargimcp.com")
36 |
37 | # Setup logging
38 | logger = logging.getLogger(__name__)
39 |
40 | # Configure CORS and Auth middleware
41 | cors_origins = os.getenv("ALLOWED_ORIGINS", "*").split(",")
42 |
43 | # Import FastMCP Bearer Auth Provider
44 | from fastmcp.server.auth import BearerAuthProvider
45 | from fastmcp.server.auth.providers.bearer import RSAKeyPair
46 |
47 | # Clerk JWT configuration for Bearer token validation
48 | CLERK_SECRET_KEY = os.getenv("CLERK_SECRET_KEY")
49 | CLERK_ISSUER = os.getenv("CLERK_ISSUER", "https://accounts.yargimcp.com")
50 | CLERK_PUBLISHABLE_KEY = os.getenv("CLERK_PUBLISHABLE_KEY")
51 |
52 | # Configure Bearer token authentication
53 | bearer_auth = None
54 | if CLERK_SECRET_KEY and CLERK_ISSUER:
55 | # Production: Use Clerk JWKS endpoint for token validation
56 | bearer_auth = BearerAuthProvider(
57 | jwks_uri=f"{CLERK_ISSUER}/.well-known/jwks.json",
58 | issuer=CLERK_ISSUER,
59 | algorithm="RS256",
60 | audience=None, # Disable audience validation - Clerk uses different audience format
61 | required_scopes=[] # Disable scope validation - Clerk JWT has ['read', 'search']
62 | )
63 | logger.info(f"Bearer auth configured with Clerk JWKS: {CLERK_ISSUER}/.well-known/jwks.json")
64 | else:
65 | # Development: Generate RSA key pair for testing
66 | logger.warning("No Clerk credentials found - using development RSA key pair")
67 | dev_key_pair = RSAKeyPair.generate()
68 | bearer_auth = BearerAuthProvider(
69 | public_key=dev_key_pair.public_key,
70 | issuer="https://dev.yargimcp.com",
71 | audience="dev-mcp-server",
72 | required_scopes=["yargi.read"]
73 | )
74 |
75 | # Generate a test token for development
76 | dev_token = dev_key_pair.create_token(
77 | subject="dev-user",
78 | issuer="https://dev.yargimcp.com",
79 | audience="dev-mcp-server",
80 | scopes=["yargi.read", "yargi.search"],
81 | expires_in_seconds=3600 * 24 # 24 hours for development
82 | )
83 | logger.info(f"Development Bearer token: {dev_token}")
84 |
85 | custom_middleware = [
86 | Middleware(
87 | CORSMiddleware,
88 | allow_origins=cors_origins,
89 | allow_credentials=True,
90 | allow_methods=["GET", "POST", "OPTIONS", "DELETE"],
91 | allow_headers=["Content-Type", "Authorization", "X-Request-ID", "X-Session-ID"],
92 | ),
93 | ]
94 |
95 | # Create MCP app with Bearer authentication
96 | mcp_server = create_app(auth=bearer_auth)
97 |
98 | # Add Starlette middleware to FastAPI (not MCP)
99 | # MCP already has Bearer auth, no need for additional middleware on MCP level
100 |
101 | # Create MCP Starlette sub-application with root path - mount will add /mcp prefix
102 | mcp_app = mcp_server.http_app(path="/")
103 |
104 | # Configure JSON encoder for proper Turkish character support
105 | import json
106 | from fastapi.responses import JSONResponse
107 |
108 | class UTF8JSONResponse(JSONResponse):
109 | def __init__(self, content=None, status_code=200, headers=None, **kwargs):
110 | if headers is None:
111 | headers = {}
112 | headers["Content-Type"] = "application/json; charset=utf-8"
113 | super().__init__(content, status_code, headers, **kwargs)
114 |
115 | def render(self, content) -> bytes:
116 | return json.dumps(
117 | content,
118 | ensure_ascii=False,
119 | allow_nan=False,
120 | indent=None,
121 | separators=(",", ":"),
122 | ).encode("utf-8")
123 |
124 | # Create FastAPI wrapper application
125 | app = FastAPI(
126 | title="Yargı MCP Server",
127 | description="MCP server for Turkish legal databases with OAuth authentication",
128 | version="0.1.0",
129 | middleware=custom_middleware,
130 | default_response_class=UTF8JSONResponse # Use UTF-8 JSON encoder
131 | )
132 |
133 | # Add Stripe webhook router to FastAPI
134 | app.include_router(stripe_router, prefix="/api")
135 |
136 | # Add MCP Auth HTTP adapter to FastAPI (handles OAuth endpoints)
137 | app.include_router(mcp_auth_router)
138 |
139 | # Custom 401 exception handler for MCP spec compliance
140 | @app.exception_handler(401)
141 | async def custom_401_handler(request: Request, exc: HTTPException):
142 | """Custom 401 handler that adds WWW-Authenticate header as required by MCP spec"""
143 | response = await http_exception_handler(request, exc)
144 |
145 | # Add WWW-Authenticate header pointing to protected resource metadata
146 | # as required by RFC 9728 Section 5.1 and MCP Authorization spec
147 | response.headers["WWW-Authenticate"] = (
148 | 'Bearer '
149 | 'error="invalid_token", '
150 | 'error_description="The access token is missing or invalid", '
151 | f'resource="{BASE_URL}/.well-known/oauth-protected-resource"'
152 | )
153 |
154 | return response
155 |
156 | # FastAPI health check endpoint - BEFORE mounting MCP app
157 | @app.get("/health")
158 | async def health_check():
159 | """Health check endpoint for monitoring"""
160 | return JSONResponse({
161 | "status": "healthy",
162 | "service": "Yargı MCP Server",
163 | "version": "0.1.0",
164 | "tools_count": len(mcp_server._tool_manager._tools),
165 | "auth_enabled": os.getenv("ENABLE_AUTH", "false").lower() == "true"
166 | })
167 |
168 | # Add explicit redirect for /mcp to /mcp/ with method preservation
169 | @app.api_route("/mcp", methods=["GET", "POST", "HEAD", "OPTIONS"])
170 | async def redirect_to_slash(request: Request):
171 | """Redirect /mcp to /mcp/ preserving HTTP method with 308"""
172 | from fastapi.responses import RedirectResponse
173 | return RedirectResponse(url="/mcp/", status_code=308)
174 |
175 | # Mount MCP app at /mcp/ with trailing slash
176 | app.mount("/mcp/", mcp_app)
177 |
178 | # Set the lifespan context after mounting
179 | app.router.lifespan_context = mcp_app.lifespan
180 |
181 |
182 | # SSE transport deprecated - removed
183 |
184 | # FastAPI root endpoint
185 | @app.get("/")
186 | async def root():
187 | """Root endpoint with service information"""
188 | return JSONResponse({
189 | "service": "Yargı MCP Server",
190 | "description": "MCP server for Turkish legal databases with OAuth authentication",
191 | "endpoints": {
192 | "mcp": "/mcp",
193 | "health": "/health",
194 | "status": "/status",
195 | "stripe_webhook": "/api/stripe/webhook",
196 | "oauth_login": "/auth/login",
197 | "oauth_callback": "/auth/callback",
198 | "oauth_google": "/auth/google/login",
199 | "user_info": "/auth/user"
200 | },
201 | "transports": {
202 | "http": "/mcp"
203 | },
204 | "supported_databases": [
205 | "Yargıtay (Court of Cassation)",
206 | "Danıştay (Council of State)",
207 | "Emsal (Precedent)",
208 | "Uyuşmazlık Mahkemesi (Court of Jurisdictional Disputes)",
209 | "Anayasa Mahkemesi (Constitutional Court)",
210 | "Kamu İhale Kurulu (Public Procurement Authority)",
211 | "Rekabet Kurumu (Competition Authority)",
212 | "Sayıştay (Court of Accounts)",
213 | "Bedesten API (Multiple courts)"
214 | ],
215 | "authentication": {
216 | "enabled": os.getenv("ENABLE_AUTH", "false").lower() == "true",
217 | "type": "OAuth 2.0 via Clerk",
218 | "issuer": os.getenv("CLERK_ISSUER", "https://clerk.accounts.dev"),
219 | "providers": ["google"],
220 | "flow": "authorization_code"
221 | }
222 | })
223 |
224 | # OAuth 2.0 Authorization Server Metadata proxy (for MCP clients that can't reach Clerk directly)
225 | # MCP Auth Toolkit expects this to be under /mcp/.well-known/oauth-authorization-server
226 | @app.get("/mcp/.well-known/oauth-authorization-server")
227 | async def oauth_authorization_server():
228 | """OAuth 2.0 Authorization Server Metadata proxy to Clerk - MCP Auth Toolkit standard location"""
229 | return JSONResponse({
230 | "issuer": BASE_URL,
231 | "authorization_endpoint": "https://yargimcp.com/mcp-callback",
232 | "token_endpoint": f"{BASE_URL}/token",
233 | "jwks_uri": f"{CLERK_ISSUER}/.well-known/jwks.json",
234 | "response_types_supported": ["code"],
235 | "grant_types_supported": ["authorization_code", "refresh_token"],
236 | "token_endpoint_auth_methods_supported": ["client_secret_basic", "none"],
237 | "scopes_supported": ["read", "search", "openid", "profile", "email"],
238 | "subject_types_supported": ["public"],
239 | "id_token_signing_alg_values_supported": ["RS256"],
240 | "claims_supported": ["sub", "iss", "aud", "exp", "iat", "email", "name"],
241 | "code_challenge_methods_supported": ["S256"],
242 | "service_documentation": f"{BASE_URL}/mcp",
243 | "registration_endpoint": f"{BASE_URL}/register",
244 | "resource_documentation": f"{BASE_URL}/mcp"
245 | })
246 |
247 | # Claude AI MCP specific endpoint format
248 | @app.get("/.well-known/oauth-authorization-server/mcp")
249 | async def oauth_authorization_server_mcp_suffix():
250 | """OAuth 2.0 Authorization Server Metadata - Claude AI MCP specific format"""
251 | return JSONResponse({
252 | "issuer": BASE_URL,
253 | "authorization_endpoint": "https://yargimcp.com/mcp-callback",
254 | "token_endpoint": f"{BASE_URL}/token",
255 | "jwks_uri": f"{CLERK_ISSUER}/.well-known/jwks.json",
256 | "response_types_supported": ["code"],
257 | "grant_types_supported": ["authorization_code", "refresh_token"],
258 | "token_endpoint_auth_methods_supported": ["client_secret_basic", "none"],
259 | "scopes_supported": ["read", "search", "openid", "profile", "email"],
260 | "subject_types_supported": ["public"],
261 | "id_token_signing_alg_values_supported": ["RS256"],
262 | "claims_supported": ["sub", "iss", "aud", "exp", "iat", "email", "name"],
263 | "code_challenge_methods_supported": ["S256"],
264 | "service_documentation": f"{BASE_URL}/mcp",
265 | "registration_endpoint": f"{BASE_URL}/register",
266 | "resource_documentation": f"{BASE_URL}/mcp"
267 | })
268 |
269 | @app.get("/.well-known/oauth-protected-resource/mcp")
270 | async def oauth_protected_resource_mcp_suffix():
271 | """OAuth 2.0 Protected Resource Metadata - Claude AI MCP specific format"""
272 | return JSONResponse({
273 | "resource": BASE_URL,
274 | "authorization_servers": [
275 | BASE_URL
276 | ],
277 | "scopes_supported": ["read", "search"],
278 | "bearer_methods_supported": ["header"],
279 | "resource_documentation": f"{BASE_URL}/mcp",
280 | "resource_policy_uri": f"{BASE_URL}/privacy"
281 | })
282 |
283 | # Keep root level for compatibility with some MCP clients
284 | @app.get("/.well-known/oauth-authorization-server")
285 | async def oauth_authorization_server_root():
286 | """OAuth 2.0 Authorization Server Metadata proxy to Clerk - root level for compatibility"""
287 | return JSONResponse({
288 | "issuer": BASE_URL,
289 | "authorization_endpoint": "https://yargimcp.com/mcp-callback",
290 | "token_endpoint": f"{BASE_URL}/token",
291 | "jwks_uri": f"{CLERK_ISSUER}/.well-known/jwks.json",
292 | "response_types_supported": ["code"],
293 | "grant_types_supported": ["authorization_code", "refresh_token"],
294 | "token_endpoint_auth_methods_supported": ["client_secret_basic", "none"],
295 | "scopes_supported": ["read", "search", "openid", "profile", "email"],
296 | "subject_types_supported": ["public"],
297 | "id_token_signing_alg_values_supported": ["RS256"],
298 | "claims_supported": ["sub", "iss", "aud", "exp", "iat", "email", "name"],
299 | "code_challenge_methods_supported": ["S256"],
300 | "service_documentation": f"{BASE_URL}/mcp",
301 | "registration_endpoint": f"{BASE_URL}/register",
302 | "resource_documentation": f"{BASE_URL}/mcp"
303 | })
304 |
305 | # Note: GET /mcp is handled by the mounted MCP app itself
306 | # This prevents 405 Method Not Allowed errors on POST requests
307 |
308 | # OAuth 2.0 Protected Resource Metadata (RFC 9728) - MCP Spec Required
309 | @app.get("/.well-known/oauth-protected-resource")
310 | async def oauth_protected_resource():
311 | """OAuth 2.0 Protected Resource Metadata as required by MCP spec"""
312 | return JSONResponse({
313 | "resource": BASE_URL,
314 | "authorization_servers": [
315 | BASE_URL
316 | ],
317 | "scopes_supported": ["read", "search"],
318 | "bearer_methods_supported": ["header"],
319 | "resource_documentation": f"{BASE_URL}/mcp",
320 | "resource_policy_uri": f"{BASE_URL}/privacy"
321 | })
322 |
323 | # Standard well-known discovery endpoint
324 | @app.get("/.well-known/mcp")
325 | async def well_known_mcp():
326 | """Standard MCP discovery endpoint"""
327 | return JSONResponse({
328 | "mcp_server": {
329 | "name": "Yargı MCP Server",
330 | "version": "0.1.0",
331 | "endpoint": f"{BASE_URL}/mcp",
332 | "authentication": {
333 | "type": "oauth2",
334 | "authorization_url": f"{BASE_URL}/auth/login",
335 | "scopes": ["read", "search"]
336 | },
337 | "capabilities": ["tools", "resources"],
338 | "tools_count": len(mcp_server._tool_manager._tools)
339 | }
340 | })
341 |
342 | # MCP Discovery endpoint for ChatGPT integration
343 | @app.get("/mcp/discovery")
344 | async def mcp_discovery():
345 | """MCP Discovery endpoint for ChatGPT and other MCP clients"""
346 | return JSONResponse({
347 | "name": "Yargı MCP Server",
348 | "description": "MCP server for Turkish legal databases",
349 | "version": "0.1.0",
350 | "protocol": "mcp",
351 | "transport": "http",
352 | "endpoint": "/mcp",
353 | "authentication": {
354 | "type": "oauth2",
355 | "authorization_url": "/auth/login",
356 | "token_url": "/auth/callback",
357 | "scopes": ["read", "search"],
358 | "provider": "clerk"
359 | },
360 | "capabilities": {
361 | "tools": True,
362 | "resources": True,
363 | "prompts": False
364 | },
365 | "tools_count": len(mcp_server._tool_manager._tools),
366 | "contact": {
367 | "url": BASE_URL,
368 | "email": "[email protected]"
369 | }
370 | })
371 |
372 | # FastAPI status endpoint
373 | @app.get("/status")
374 | async def status():
375 | """Status endpoint with detailed information"""
376 | tools = []
377 | for tool in mcp_server._tool_manager._tools.values():
378 | tools.append({
379 | "name": tool.name,
380 | "description": tool.description[:100] + "..." if len(tool.description) > 100 else tool.description
381 | })
382 |
383 | return JSONResponse({
384 | "status": "operational",
385 | "tools": tools,
386 | "total_tools": len(tools),
387 | "transport": "streamable_http",
388 | "architecture": "FastAPI wrapper + MCP Starlette sub-app",
389 | "auth_status": "enabled" if os.getenv("ENABLE_AUTH", "false").lower() == "true" else "disabled"
390 | })
391 |
392 | # Note: JWT token validation is now handled entirely by Clerk
393 | # All authentication flows use Clerk JWT tokens directly
394 |
395 | async def validate_clerk_session(request: Request, clerk_token: str = None) -> str:
396 | """Validate Clerk session from cookies or JWT token and return user_id"""
397 | logger.info(f"Validating Clerk session - token provided: {bool(clerk_token)}")
398 |
399 | try:
400 | # Try to import Clerk SDK
401 | from clerk_backend_api import Clerk
402 | clerk = Clerk(bearer_auth=os.getenv("CLERK_SECRET_KEY"))
403 |
404 | # Try JWT token first (from URL parameter)
405 | if clerk_token:
406 | logger.info("Validating Clerk JWT token from URL parameter")
407 | try:
408 | # Extract session_id from JWT token and verify with Clerk
409 | import jwt
410 | decoded_token = jwt.decode(clerk_token, options={"verify_signature": False})
411 | session_id = decoded_token.get("sid") # Use standard JWT 'sid' claim
412 |
413 | if session_id:
414 | # Verify with Clerk using session_id
415 | session = clerk.sessions.verify(session_id=session_id, token=clerk_token)
416 | user_id = session.user_id if session else None
417 |
418 | if user_id:
419 | logger.info(f"JWT token validation successful - user_id: {user_id}")
420 | return user_id
421 | else:
422 | logger.error("JWT token validation failed - no user_id in session")
423 | else:
424 | logger.error("No session_id found in JWT token")
425 | except Exception as e:
426 | logger.error(f"JWT token validation failed: {str(e)}")
427 | # Fall through to cookie validation
428 |
429 | # Fallback to cookie validation
430 | logger.info("Attempting cookie-based session validation")
431 | clerk_session = request.cookies.get("__session")
432 | if not clerk_session:
433 | logger.error("No Clerk session cookie found")
434 | raise HTTPException(status_code=401, detail="No Clerk session found")
435 |
436 | # Validate session with Clerk
437 | session = clerk.sessions.verify_session(clerk_session)
438 | logger.info(f"Cookie session validation successful - user_id: {session.user_id}")
439 | return session.user_id
440 |
441 | except ImportError:
442 | # Fallback for development without Clerk SDK
443 | logger.warning("Clerk SDK not available - using development fallback")
444 | return "dev_user_123"
445 | except Exception as e:
446 | logger.error(f"Session validation failed: {str(e)}")
447 | raise HTTPException(status_code=401, detail=f"Session validation failed: {str(e)}")
448 |
449 | # MCP OAuth Callback Endpoint
450 | @app.get("/auth/mcp-callback")
451 | async def mcp_oauth_callback(request: Request, clerk_token: str = Query(None)):
452 | """Handle OAuth callback for MCP token generation"""
453 | logger.info(f"MCP OAuth callback - clerk_token provided: {bool(clerk_token)}")
454 |
455 | try:
456 | # Validate Clerk session with JWT token support
457 | user_id = await validate_clerk_session(request, clerk_token)
458 | logger.info(f"User authenticated successfully - user_id: {user_id}")
459 |
460 | # Use the Clerk JWT token directly (no need to generate custom token)
461 | logger.info("User authenticated successfully via Clerk")
462 |
463 | # Return success response
464 | return HTMLResponse(f"""
465 | <html>
466 | <head>
467 | <title>MCP Connection Successful</title>
468 | <style>
469 | body {{ font-family: Arial, sans-serif; text-align: center; padding: 50px; }}
470 | .success {{ color: #28a745; }}
471 | .token {{ background: #f8f9fa; padding: 15px; border-radius: 5px; margin: 20px 0; word-break: break-all; }}
472 | </style>
473 | </head>
474 | <body>
475 | <h1 class="success">✅ MCP Connection Successful!</h1>
476 | <p>Your Yargı MCP integration is now active.</p>
477 | <div class="token">
478 | <strong>Authentication:</strong><br>
479 | <code>Use your Clerk JWT token directly with Bearer authentication</code>
480 | </div>
481 | <p>You can now close this window and return to your MCP client.</p>
482 | <script>
483 | // Try to close the popup if opened as such
484 | if (window.opener) {{
485 | window.opener.postMessage({{
486 | type: 'MCP_AUTH_SUCCESS',
487 | token: 'use_clerk_jwt_token'
488 | }}, '*');
489 | setTimeout(() => window.close(), 3000);
490 | }}
491 | </script>
492 | </body>
493 | </html>
494 | """)
495 |
496 | except HTTPException as e:
497 | logger.error(f"MCP OAuth callback failed: {e.detail}")
498 | return HTMLResponse(f"""
499 | <html>
500 | <head>
501 | <title>MCP Connection Failed</title>
502 | <style>
503 | body {{ font-family: Arial, sans-serif; text-align: center; padding: 50px; }}
504 | .error {{ color: #dc3545; }}
505 | .debug {{ background: #f8f9fa; padding: 10px; margin: 20px 0; border-radius: 5px; font-family: monospace; }}
506 | </style>
507 | </head>
508 | <body>
509 | <h1 class="error">❌ MCP Connection Failed</h1>
510 | <p>{e.detail}</p>
511 | <div class="debug">
512 | <strong>Debug Info:</strong><br>
513 | Clerk Token: {'✅ Provided' if clerk_token else '❌ Missing'}<br>
514 | Error: {e.detail}<br>
515 | Status: {e.status_code}
516 | </div>
517 | <p>Please try again or contact support.</p>
518 | <a href="https://yargimcp.com/sign-in">Return to Sign In</a>
519 | </body>
520 | </html>
521 | """, status_code=e.status_code)
522 | except Exception as e:
523 | logger.error(f"Unexpected error in MCP OAuth callback: {str(e)}")
524 | return HTMLResponse(f"""
525 | <html>
526 | <head>
527 | <title>MCP Connection Error</title>
528 | <style>
529 | body {{ font-family: Arial, sans-serif; text-align: center; padding: 50px; }}
530 | .error {{ color: #dc3545; }}
531 | </style>
532 | </head>
533 | <body>
534 | <h1 class="error">❌ Unexpected Error</h1>
535 | <p>An unexpected error occurred during authentication.</p>
536 | <p>Error: {str(e)}</p>
537 | <a href="https://yargimcp.com/sign-in">Return to Sign In</a>
538 | </body>
539 | </html>
540 | """, status_code=500)
541 |
542 | # OAuth2 Token Endpoint - Now uses Clerk JWT tokens directly
543 | @app.post("/auth/mcp-token")
544 | async def mcp_token_endpoint(request: Request):
545 | """OAuth2 token endpoint for MCP clients - returns Clerk JWT token info"""
546 | try:
547 | # Validate Clerk session
548 | user_id = await validate_clerk_session(request)
549 |
550 | return JSONResponse({
551 | "message": "Use your Clerk JWT token directly with Bearer authentication",
552 | "token_type": "Bearer",
553 | "scope": "yargi.read",
554 | "user_id": user_id,
555 | "instructions": "Include 'Authorization: Bearer YOUR_CLERK_JWT_TOKEN' in your requests"
556 | })
557 | except HTTPException as e:
558 | return JSONResponse(
559 | status_code=e.status_code,
560 | content={"error": "invalid_request", "error_description": e.detail}
561 | )
562 |
563 | # Note: Only HTTP transport supported - SSE transport deprecated
564 |
565 | # Export for uvicorn
566 | __all__ = ["app"]
```
--------------------------------------------------------------------------------
/rekabet_mcp_module/client.py:
--------------------------------------------------------------------------------
```python
1 | # rekabet_mcp_module/client.py
2 |
3 | import httpx
4 | from bs4 import BeautifulSoup
5 | from typing import List, Optional, Tuple, Dict, Any
6 | import logging
7 | import html
8 | import re
9 | import io # For io.BytesIO
10 | from urllib.parse import urlencode, urljoin, quote, parse_qs, urlparse
11 | from markitdown import MarkItDown
12 | import math
13 |
14 | # pypdf for PDF processing (lighter alternative to PyMuPDF)
15 | from pypdf import PdfReader, PdfWriter # PyPDF2'nin devamı niteliğindeki pypdf
16 |
17 | from .models import (
18 | RekabetKurumuSearchRequest,
19 | RekabetDecisionSummary,
20 | RekabetSearchResult,
21 | RekabetDocument,
22 | RekabetKararTuruGuidEnum
23 | )
24 | from pydantic import HttpUrl # Ensure HttpUrl is imported from pydantic
25 |
26 | logger = logging.getLogger(__name__)
27 | if not logger.hasHandlers(): # Pragma: no cover
28 | logging.basicConfig(
29 | level=logging.INFO, # Varsayılan log seviyesi
30 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
31 | )
32 | # Debug betiğinde daha detaylı loglama için seviye ayrıca ayarlanabilir.
33 |
34 | class RekabetKurumuApiClient:
35 | BASE_URL = "https://www.rekabet.gov.tr"
36 | SEARCH_PATH = "/tr/Kararlar"
37 | DECISION_LANDING_PATH_TEMPLATE = "/Karar"
38 | # PDF sayfa bazlı Markdown döndürüldüğü için bu sabit artık doğrudan kullanılmıyor.
39 | # DOCUMENT_MARKDOWN_CHUNK_SIZE = 5000
40 |
41 | def __init__(self, request_timeout: float = 60.0):
42 | self.http_client = httpx.AsyncClient(
43 | base_url=self.BASE_URL,
44 | headers={
45 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
46 | "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7",
47 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
48 | },
49 | timeout=request_timeout,
50 | verify=True,
51 | follow_redirects=True
52 | )
53 |
54 | def _build_search_query_params(self, params: RekabetKurumuSearchRequest) -> List[Tuple[str, str]]:
55 | query_params: List[Tuple[str, str]] = []
56 | query_params.append(("sayfaAdi", params.sayfaAdi if params.sayfaAdi is not None else ""))
57 | query_params.append(("YayinlanmaTarihi", params.YayinlanmaTarihi if params.YayinlanmaTarihi is not None else ""))
58 | query_params.append(("PdfText", params.PdfText if params.PdfText is not None else ""))
59 |
60 | karar_turu_id_value = ""
61 | if params.KararTuruID is not None:
62 | karar_turu_id_value = params.KararTuruID.value if params.KararTuruID.value != "ALL" else ""
63 | query_params.append(("KararTuruID", karar_turu_id_value))
64 |
65 | query_params.append(("KararSayisi", params.KararSayisi if params.KararSayisi is not None else ""))
66 | query_params.append(("KararTarihi", params.KararTarihi if params.KararTarihi is not None else ""))
67 |
68 | if params.page and params.page > 1:
69 | query_params.append(("page", str(params.page)))
70 |
71 | return query_params
72 |
73 | async def search_decisions(self, params: RekabetKurumuSearchRequest) -> RekabetSearchResult:
74 | request_path = self.SEARCH_PATH
75 | final_query_params = self._build_search_query_params(params)
76 | logger.info(f"RekabetKurumuApiClient: Performing search. Path: {request_path}, Parameters: {final_query_params}")
77 |
78 | try:
79 | response = await self.http_client.get(request_path, params=final_query_params)
80 | response.raise_for_status()
81 | html_content = response.text
82 | except httpx.RequestError as e:
83 | logger.error(f"RekabetKurumuApiClient: HTTP request error during search: {e}")
84 | raise
85 |
86 | soup = BeautifulSoup(html_content, 'html.parser')
87 | processed_decisions: List[RekabetDecisionSummary] = []
88 | total_records: Optional[int] = None
89 | total_pages: Optional[int] = None
90 |
91 | pagination_div = soup.find("div", class_="yazi01")
92 | if pagination_div:
93 | text_content = pagination_div.get_text(separator=" ", strip=True)
94 | total_match = re.search(r"Toplam\s*:\s*(\d+)", text_content)
95 | if total_match:
96 | try:
97 | total_records = int(total_match.group(1))
98 | logger.debug(f"Total records found from pagination: {total_records}")
99 | except ValueError:
100 | logger.warning(f"Could not convert 'Toplam' value to int: {total_match.group(1)}")
101 | else:
102 | logger.warning("'Toplam :' string not found in pagination section.")
103 |
104 | results_per_page_assumed = 10
105 | if total_records is not None:
106 | calculated_total_pages = math.ceil(total_records / results_per_page_assumed)
107 | total_pages = calculated_total_pages if calculated_total_pages > 0 else (1 if total_records > 0 else 0)
108 | logger.debug(f"Calculated total pages: {total_pages}")
109 |
110 | if total_pages is None: # Fallback if total_records couldn't be parsed
111 | last_page_link = pagination_div.select_one("li.PagedList-skipToLast a")
112 | if last_page_link and last_page_link.has_attr('href'):
113 | qs = parse_qs(urlparse(last_page_link['href']).query)
114 | if 'page' in qs and qs['page']:
115 | try:
116 | total_pages = int(qs['page'][0])
117 | logger.debug(f"Total pages found from 'Last >>' link: {total_pages}")
118 | except ValueError:
119 | logger.warning(f"Could not convert page value from 'Last >>' link to int: {qs['page'][0]}")
120 | elif total_records == 0 : total_pages = 0 # If no records, 0 pages
121 | elif total_records is not None and total_records > 0 : total_pages = 1 # If records exist but no last page link (e.g. single page)
122 | else: logger.warning("'Last >>' link not found in pagination section.")
123 |
124 | decision_tables_container = soup.find("div", id="kararList")
125 | if not decision_tables_container:
126 | logger.warning("`div#kararList` (decision list container) not found. HTML structure might have changed or no decisions on this page.")
127 | else:
128 | decision_tables = decision_tables_container.find_all("table", class_="equalDivide")
129 | logger.info(f"Found {len(decision_tables)} 'table' elements with class='equalDivide' for parsing.")
130 |
131 | if not decision_tables and total_records is not None and total_records > 0 :
132 | logger.warning(f"Page indicates {total_records} records but no decision tables found with class='equalDivide'.")
133 |
134 | for idx, table in enumerate(decision_tables):
135 | logger.debug(f"Processing table {idx + 1}...")
136 | try:
137 | rows = table.find_all("tr")
138 | if len(rows) != 3:
139 | logger.warning(f"Table {idx + 1} has an unexpected number of rows ({len(rows)} instead of 3). Skipping. HTML snippet:\n{table.prettify()[:500]}")
140 | continue
141 |
142 | # Row 1: Publication Date, Decision Number, Related Cases Link
143 | td_elements_r1 = rows[0].find_all("td")
144 | pub_date = td_elements_r1[0].get_text(strip=True) if len(td_elements_r1) > 0 else None
145 | dec_num = td_elements_r1[1].get_text(strip=True) if len(td_elements_r1) > 1 else None
146 |
147 | related_cases_link_tag = td_elements_r1[2].find("a", href=True) if len(td_elements_r1) > 2 else None
148 | related_cases_url_str: Optional[str] = None
149 | karar_id_from_related: Optional[str] = None
150 | if related_cases_link_tag and related_cases_link_tag.has_attr('href'):
151 | related_cases_url_str = urljoin(self.BASE_URL, related_cases_link_tag['href'])
152 | qs_related = parse_qs(urlparse(related_cases_link_tag['href']).query)
153 | if 'kararId' in qs_related and qs_related['kararId']:
154 | karar_id_from_related = qs_related['kararId'][0]
155 |
156 | # Row 2: Decision Date, Decision Type
157 | td_elements_r2 = rows[1].find_all("td")
158 | dec_date = td_elements_r2[0].get_text(strip=True) if len(td_elements_r2) > 0 else None
159 | dec_type_text = td_elements_r2[1].get_text(strip=True) if len(td_elements_r2) > 1 else None
160 |
161 | # Row 3: Title and Main Decision Link
162 | title_cell = rows[2].find("td", colspan="5")
163 | decision_link_tag = title_cell.find("a", href=True) if title_cell else None
164 |
165 | title_text: Optional[str] = None
166 | decision_landing_url_str: Optional[str] = None
167 | karar_id_from_main_link: Optional[str] = None
168 |
169 | if decision_link_tag and decision_link_tag.has_attr('href'):
170 | title_text = decision_link_tag.get_text(strip=True)
171 | href_val = decision_link_tag['href']
172 | if href_val.startswith(self.DECISION_LANDING_PATH_TEMPLATE + "?kararId="): # Ensure it's a decision link
173 | decision_landing_url_str = urljoin(self.BASE_URL, href_val)
174 | qs_main = parse_qs(urlparse(href_val).query)
175 | if 'kararId' in qs_main and qs_main['kararId']:
176 | karar_id_from_main_link = qs_main['kararId'][0]
177 | else:
178 | logger.warning(f"Table {idx+1} decision link has unexpected format: {href_val}")
179 | else:
180 | logger.warning(f"Table {idx+1} could not find title/decision link tag.")
181 |
182 | current_karar_id = karar_id_from_main_link or karar_id_from_related
183 |
184 | if not current_karar_id:
185 | logger.warning(f"Table {idx+1} Karar ID not found. Skipping. Title (if any): {title_text}")
186 | continue
187 |
188 | # Convert string URLs to HttpUrl for the model
189 | final_decision_url = HttpUrl(decision_landing_url_str) if decision_landing_url_str else None
190 | final_related_cases_url = HttpUrl(related_cases_url_str) if related_cases_url_str else None
191 |
192 | processed_decisions.append(RekabetDecisionSummary(
193 | publication_date=pub_date, decision_number=dec_num, decision_date=dec_date,
194 | decision_type_text=dec_type_text, title=title_text,
195 | decision_url=final_decision_url,
196 | karar_id=current_karar_id,
197 | related_cases_url=final_related_cases_url
198 | ))
199 | logger.debug(f"Table {idx+1} parsed successfully: Karar ID '{current_karar_id}', Title '{title_text[:50] if title_text else 'N/A'}...'")
200 |
201 | except Exception as e:
202 | logger.warning(f"RekabetKurumuApiClient: Error parsing decision summary {idx+1}: {e}. Problematic Table HTML:\n{table.prettify()}", exc_info=True)
203 | continue
204 |
205 | return RekabetSearchResult(
206 | decisions=processed_decisions, total_records_found=total_records,
207 | retrieved_page_number=params.page, total_pages=total_pages if total_pages is not None else 0
208 | )
209 |
210 | async def _extract_pdf_url_and_landing_page_metadata(self, karar_id: str, landing_page_html: str, landing_page_url: str) -> Dict[str, Any]:
211 | soup = BeautifulSoup(landing_page_html, 'html.parser')
212 | data: Dict[str, Any] = {
213 | "pdf_url": None,
214 | "title_on_landing_page": soup.title.string.strip() if soup.title and soup.title.string else f"Rekabet Kurumu Kararı {karar_id}",
215 | }
216 | # This part needs to be robust and specific to Rekabet Kurumu's landing page structure.
217 | # Look for common patterns: direct links, download buttons, embedded viewers.
218 | pdf_anchor = soup.find("a", href=re.compile(r"\.pdf(\?|$)", re.IGNORECASE)) # Basic PDF link
219 | if not pdf_anchor: # Try other common patterns if the basic one fails
220 | # Example: Look for links with specific text or class
221 | pdf_anchor = soup.find("a", string=re.compile(r"karar metni|pdf indir", re.IGNORECASE))
222 |
223 | if pdf_anchor and pdf_anchor.has_attr('href'):
224 | pdf_path = pdf_anchor['href']
225 | data["pdf_url"] = urljoin(landing_page_url, pdf_path)
226 | logger.info(f"PDF link found on landing page (<a>): {data['pdf_url']}")
227 | else:
228 | iframe_pdf = soup.find("iframe", src=re.compile(r"\.pdf(\?|$)", re.IGNORECASE))
229 | if iframe_pdf and iframe_pdf.has_attr('src'):
230 | pdf_path = iframe_pdf['src']
231 | data["pdf_url"] = urljoin(landing_page_url, pdf_path)
232 | logger.info(f"PDF link found on landing page (<iframe>): {data['pdf_url']}")
233 | else:
234 | embed_pdf = soup.find("embed", src=re.compile(r"\.pdf(\?|$)", re.IGNORECASE), type="application/pdf")
235 | if embed_pdf and embed_pdf.has_attr('src'):
236 | pdf_path = embed_pdf['src']
237 | data["pdf_url"] = urljoin(landing_page_url, pdf_path)
238 | logger.info(f"PDF link found on landing page (<embed>): {data['pdf_url']}")
239 | else:
240 | logger.warning(f"No PDF link found on landing page {landing_page_url} for kararId {karar_id} using common selectors.")
241 | return data
242 |
243 | async def _download_pdf_bytes(self, pdf_url: str) -> Optional[bytes]:
244 | try:
245 | url_to_fetch = pdf_url if pdf_url.startswith(('http://', 'https://')) else urljoin(self.BASE_URL, pdf_url)
246 | logger.info(f"Downloading PDF from: {url_to_fetch}")
247 | response = await self.http_client.get(url_to_fetch)
248 | response.raise_for_status()
249 | pdf_bytes = await response.aread()
250 | logger.info(f"PDF content downloaded ({len(pdf_bytes)} bytes) from: {url_to_fetch}")
251 | return pdf_bytes
252 | except httpx.RequestError as e:
253 | logger.error(f"HTTP error downloading PDF from {pdf_url}: {e}")
254 | except Exception as e:
255 | logger.error(f"General error downloading PDF from {pdf_url}: {e}")
256 | return None
257 |
258 | def _extract_single_pdf_page_as_pdf_bytes(self, original_pdf_bytes: bytes, page_number_to_extract: int) -> Tuple[Optional[bytes], int]:
259 | total_pages_in_original_pdf = 0
260 | single_page_pdf_bytes: Optional[bytes] = None
261 |
262 | if not original_pdf_bytes:
263 | logger.warning("No original PDF bytes provided for page extraction.")
264 | return None, 0
265 |
266 | try:
267 | pdf_stream = io.BytesIO(original_pdf_bytes)
268 | reader = PdfReader(pdf_stream)
269 | total_pages_in_original_pdf = len(reader.pages)
270 |
271 | if not (0 < page_number_to_extract <= total_pages_in_original_pdf):
272 | logger.warning(f"Requested page number ({page_number_to_extract}) is out of PDF page range (1-{total_pages_in_original_pdf}).")
273 | return None, total_pages_in_original_pdf
274 |
275 | writer = PdfWriter()
276 | writer.add_page(reader.pages[page_number_to_extract - 1]) # pypdf is 0-indexed
277 |
278 | output_pdf_stream = io.BytesIO()
279 | writer.write(output_pdf_stream)
280 | single_page_pdf_bytes = output_pdf_stream.getvalue()
281 |
282 | logger.debug(f"Page {page_number_to_extract} of original PDF (total {total_pages_in_original_pdf} pages) extracted as new PDF using pypdf.")
283 |
284 | except Exception as e:
285 | logger.error(f"Error extracting PDF page using pypdf: {e}", exc_info=True)
286 | return None, total_pages_in_original_pdf
287 | return single_page_pdf_bytes, total_pages_in_original_pdf
288 |
289 | def _convert_pdf_bytes_to_markdown(self, pdf_bytes: bytes, source_url_for_logging: str) -> Optional[str]:
290 | if not pdf_bytes:
291 | logger.warning(f"No PDF bytes provided for Markdown conversion (source: {source_url_for_logging}).")
292 | return None
293 |
294 | pdf_stream = io.BytesIO(pdf_bytes)
295 | try:
296 | md_converter = MarkItDown(enable_plugins=False)
297 | conversion_result = md_converter.convert(pdf_stream)
298 | markdown_text = conversion_result.text_content
299 |
300 | if not markdown_text:
301 | logger.warning(f"MarkItDown returned empty content from PDF byte stream (source: {source_url_for_logging}). PDF page might be image-based or MarkItDown could not process the PDF stream.")
302 | return markdown_text
303 | except Exception as e:
304 | logger.error(f"MarkItDown conversion error for PDF byte stream (source: {source_url_for_logging}): {e}", exc_info=True)
305 | return None
306 |
307 | async def get_decision_document(self, karar_id: str, page_number: int = 1) -> RekabetDocument:
308 | if not karar_id:
309 | return RekabetDocument(
310 | source_landing_page_url=HttpUrl(f"{self.BASE_URL}"),
311 | karar_id=karar_id or "UNKNOWN_KARAR_ID",
312 | error_message="karar_id is required.",
313 | current_page=1, total_pages=0, is_paginated=False )
314 |
315 | decision_url_path = f"{self.DECISION_LANDING_PATH_TEMPLATE}?kararId={karar_id}"
316 | full_landing_page_url = urljoin(self.BASE_URL, decision_url_path)
317 |
318 | logger.info(f"RekabetKurumuApiClient: Getting decision document: {full_landing_page_url}, Requested PDF Page: {page_number}")
319 |
320 | pdf_url_to_report: Optional[HttpUrl] = None
321 | title_to_report: Optional[str] = f"Rekabet Kurumu Kararı {karar_id}" # Default
322 | error_message: Optional[str] = None
323 | markdown_for_requested_page: Optional[str] = None
324 | total_pdf_pages: int = 0
325 |
326 | try:
327 | async with self.http_client.stream("GET", full_landing_page_url) as response:
328 | response.raise_for_status()
329 | content_type = response.headers.get("content-type", "").lower()
330 | final_url_of_response = HttpUrl(str(response.url))
331 | original_pdf_bytes: Optional[bytes] = None
332 |
333 | if "application/pdf" in content_type:
334 | logger.info(f"URL {final_url_of_response} is a direct PDF. Processing content.")
335 | pdf_url_to_report = final_url_of_response
336 | original_pdf_bytes = await response.aread()
337 | elif "text/html" in content_type:
338 | logger.info(f"URL {final_url_of_response} is an HTML landing page. Looking for PDF link.")
339 | landing_page_html_bytes = await response.aread()
340 | detected_charset = response.charset_encoding or 'utf-8'
341 | try: landing_page_html = landing_page_html_bytes.decode(detected_charset)
342 | except UnicodeDecodeError: landing_page_html = landing_page_html_bytes.decode('utf-8', errors='replace')
343 |
344 | if landing_page_html.strip():
345 | landing_page_data = self._extract_pdf_url_and_landing_page_metadata(karar_id, landing_page_html, str(final_url_of_response))
346 | pdf_url_str_from_html = landing_page_data.get("pdf_url")
347 | if landing_page_data.get("title_on_landing_page"): title_to_report = landing_page_data.get("title_on_landing_page")
348 | if pdf_url_str_from_html:
349 | pdf_url_to_report = HttpUrl(pdf_url_str_from_html)
350 | original_pdf_bytes = await self._download_pdf_bytes(str(pdf_url_to_report))
351 | else: error_message = (error_message or "") + " PDF URL not found on HTML landing page."
352 | else: error_message = "Decision landing page content is empty."
353 | else: error_message = f"Unexpected content type ({content_type}) for URL: {final_url_of_response}"
354 |
355 | if original_pdf_bytes:
356 | single_page_pdf_bytes, total_pdf_pages_from_extraction = self._extract_single_pdf_page_as_pdf_bytes(original_pdf_bytes, page_number)
357 | total_pdf_pages = total_pdf_pages_from_extraction
358 |
359 | if single_page_pdf_bytes:
360 | markdown_for_requested_page = self._convert_pdf_bytes_to_markdown(single_page_pdf_bytes, str(pdf_url_to_report or full_landing_page_url))
361 | if not markdown_for_requested_page:
362 | error_message = (error_message or "") + f"; Could not convert page {page_number} of PDF to Markdown."
363 | elif total_pdf_pages > 0 :
364 | error_message = (error_message or "") + f"; Could not extract page {page_number} from PDF (page may be out of range or extraction failed)."
365 | else:
366 | error_message = (error_message or "") + "; PDF could not be processed or page count was zero (original PDF might be invalid)."
367 | elif not error_message:
368 | error_message = "PDF content could not be downloaded or identified."
369 |
370 | is_paginated = total_pdf_pages > 1
371 | current_page_final = page_number
372 | if total_pdf_pages > 0:
373 | current_page_final = max(1, min(page_number, total_pdf_pages))
374 | elif markdown_for_requested_page is None:
375 | current_page_final = 1
376 |
377 | # If markdown is None but there was no specific error for markdown conversion (e.g. PDF not found first)
378 | # make sure error_message reflects that.
379 | if markdown_for_requested_page is None and pdf_url_to_report and not error_message:
380 | error_message = (error_message or "") + "; Failed to produce Markdown from PDF page."
381 |
382 |
383 | return RekabetDocument(
384 | source_landing_page_url=full_landing_page_url, karar_id=karar_id,
385 | title_on_landing_page=title_to_report, pdf_url=pdf_url_to_report,
386 | markdown_chunk=markdown_for_requested_page, current_page=current_page_final,
387 | total_pages=total_pdf_pages, is_paginated=is_paginated,
388 | error_message=error_message.strip("; ") if error_message else None )
389 |
390 | except httpx.HTTPStatusError as e: error_msg_detail = f"HTTP Status error {e.response.status_code} while processing decision page."
391 | except httpx.RequestError as e: error_msg_detail = f"HTTP Request error while processing decision page: {str(e)}"
392 | except Exception as e: error_msg_detail = f"General error while processing decision: {str(e)}"
393 |
394 | exc_info_flag = not isinstance(e, (httpx.HTTPStatusError, httpx.RequestError)) if 'e' in locals() else True
395 | logger.error(f"RekabetKurumuApiClient: Error processing decision {karar_id} from {full_landing_page_url}: {error_msg_detail}", exc_info=exc_info_flag)
396 | error_message = (error_message + "; " if error_message else "") + error_msg_detail
397 |
398 | return RekabetDocument(
399 | source_landing_page_url=full_landing_page_url, karar_id=karar_id,
400 | title_on_landing_page=title_to_report, pdf_url=pdf_url_to_report,
401 | markdown_chunk=None, current_page=page_number, total_pages=0, is_paginated=False,
402 | error_message=error_message.strip("; ") if error_message else "An unexpected error occurred." )
403 |
404 | async def close_client_session(self): # Pragma: no cover
405 | if hasattr(self, 'http_client') and self.http_client and not self.http_client.is_closed:
406 | await self.http_client.aclose()
407 | logger.info("RekabetKurumuApiClient: HTTP client session closed.")
```
--------------------------------------------------------------------------------
/saidsurucu-yargi-mcp-f5fa007/rekabet_mcp_module/client.py:
--------------------------------------------------------------------------------
```python
1 | # rekabet_mcp_module/client.py
2 |
3 | import httpx
4 | from bs4 import BeautifulSoup
5 | from typing import List, Optional, Tuple, Dict, Any
6 | import logging
7 | import html
8 | import re
9 | import io # For io.BytesIO
10 | from urllib.parse import urlencode, urljoin, quote, parse_qs, urlparse
11 | from markitdown import MarkItDown
12 | import math
13 |
14 | # pypdf for PDF processing (lighter alternative to PyMuPDF)
15 | from pypdf import PdfReader, PdfWriter # PyPDF2'nin devamı niteliğindeki pypdf
16 |
17 | from .models import (
18 | RekabetKurumuSearchRequest,
19 | RekabetDecisionSummary,
20 | RekabetSearchResult,
21 | RekabetDocument,
22 | RekabetKararTuruGuidEnum
23 | )
24 | from pydantic import HttpUrl # Ensure HttpUrl is imported from pydantic
25 |
26 | logger = logging.getLogger(__name__)
27 | if not logger.hasHandlers(): # Pragma: no cover
28 | logging.basicConfig(
29 | level=logging.INFO, # Varsayılan log seviyesi
30 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
31 | )
32 | # Debug betiğinde daha detaylı loglama için seviye ayrıca ayarlanabilir.
33 |
34 | class RekabetKurumuApiClient:
35 | BASE_URL = "https://www.rekabet.gov.tr"
36 | SEARCH_PATH = "/tr/Kararlar"
37 | DECISION_LANDING_PATH_TEMPLATE = "/Karar"
38 | # PDF sayfa bazlı Markdown döndürüldüğü için bu sabit artık doğrudan kullanılmıyor.
39 | # DOCUMENT_MARKDOWN_CHUNK_SIZE = 5000
40 |
41 | def __init__(self, request_timeout: float = 60.0):
42 | self.http_client = httpx.AsyncClient(
43 | base_url=self.BASE_URL,
44 | headers={
45 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
46 | "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7",
47 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
48 | },
49 | timeout=request_timeout,
50 | verify=True,
51 | follow_redirects=True
52 | )
53 |
54 | def _build_search_query_params(self, params: RekabetKurumuSearchRequest) -> List[Tuple[str, str]]:
55 | query_params: List[Tuple[str, str]] = []
56 | query_params.append(("sayfaAdi", params.sayfaAdi if params.sayfaAdi is not None else ""))
57 | query_params.append(("YayinlanmaTarihi", params.YayinlanmaTarihi if params.YayinlanmaTarihi is not None else ""))
58 | query_params.append(("PdfText", params.PdfText if params.PdfText is not None else ""))
59 |
60 | karar_turu_id_value = ""
61 | if params.KararTuruID is not None:
62 | karar_turu_id_value = params.KararTuruID.value if params.KararTuruID.value != "ALL" else ""
63 | query_params.append(("KararTuruID", karar_turu_id_value))
64 |
65 | query_params.append(("KararSayisi", params.KararSayisi if params.KararSayisi is not None else ""))
66 | query_params.append(("KararTarihi", params.KararTarihi if params.KararTarihi is not None else ""))
67 |
68 | if params.page and params.page > 1:
69 | query_params.append(("page", str(params.page)))
70 |
71 | return query_params
72 |
73 | async def search_decisions(self, params: RekabetKurumuSearchRequest) -> RekabetSearchResult:
74 | request_path = self.SEARCH_PATH
75 | final_query_params = self._build_search_query_params(params)
76 | logger.info(f"RekabetKurumuApiClient: Performing search. Path: {request_path}, Parameters: {final_query_params}")
77 |
78 | try:
79 | response = await self.http_client.get(request_path, params=final_query_params)
80 | response.raise_for_status()
81 | html_content = response.text
82 | except httpx.RequestError as e:
83 | logger.error(f"RekabetKurumuApiClient: HTTP request error during search: {e}")
84 | raise
85 |
86 | soup = BeautifulSoup(html_content, 'html.parser')
87 | processed_decisions: List[RekabetDecisionSummary] = []
88 | total_records: Optional[int] = None
89 | total_pages: Optional[int] = None
90 |
91 | pagination_div = soup.find("div", class_="yazi01")
92 | if pagination_div:
93 | text_content = pagination_div.get_text(separator=" ", strip=True)
94 | total_match = re.search(r"Toplam\s*:\s*(\d+)", text_content)
95 | if total_match:
96 | try:
97 | total_records = int(total_match.group(1))
98 | logger.debug(f"Total records found from pagination: {total_records}")
99 | except ValueError:
100 | logger.warning(f"Could not convert 'Toplam' value to int: {total_match.group(1)}")
101 | else:
102 | logger.warning("'Toplam :' string not found in pagination section.")
103 |
104 | results_per_page_assumed = 10
105 | if total_records is not None:
106 | calculated_total_pages = math.ceil(total_records / results_per_page_assumed)
107 | total_pages = calculated_total_pages if calculated_total_pages > 0 else (1 if total_records > 0 else 0)
108 | logger.debug(f"Calculated total pages: {total_pages}")
109 |
110 | if total_pages is None: # Fallback if total_records couldn't be parsed
111 | last_page_link = pagination_div.select_one("li.PagedList-skipToLast a")
112 | if last_page_link and last_page_link.has_attr('href'):
113 | qs = parse_qs(urlparse(last_page_link['href']).query)
114 | if 'page' in qs and qs['page']:
115 | try:
116 | total_pages = int(qs['page'][0])
117 | logger.debug(f"Total pages found from 'Last >>' link: {total_pages}")
118 | except ValueError:
119 | logger.warning(f"Could not convert page value from 'Last >>' link to int: {qs['page'][0]}")
120 | elif total_records == 0 : total_pages = 0 # If no records, 0 pages
121 | elif total_records is not None and total_records > 0 : total_pages = 1 # If records exist but no last page link (e.g. single page)
122 | else: logger.warning("'Last >>' link not found in pagination section.")
123 |
124 | decision_tables_container = soup.find("div", id="kararList")
125 | if not decision_tables_container:
126 | logger.warning("`div#kararList` (decision list container) not found. HTML structure might have changed or no decisions on this page.")
127 | else:
128 | decision_tables = decision_tables_container.find_all("table", class_="equalDivide")
129 | logger.info(f"Found {len(decision_tables)} 'table' elements with class='equalDivide' for parsing.")
130 |
131 | if not decision_tables and total_records is not None and total_records > 0 :
132 | logger.warning(f"Page indicates {total_records} records but no decision tables found with class='equalDivide'.")
133 |
134 | for idx, table in enumerate(decision_tables):
135 | logger.debug(f"Processing table {idx + 1}...")
136 | try:
137 | rows = table.find_all("tr")
138 | if len(rows) != 3:
139 | logger.warning(f"Table {idx + 1} has an unexpected number of rows ({len(rows)} instead of 3). Skipping. HTML snippet:\n{table.prettify()[:500]}")
140 | continue
141 |
142 | # Row 1: Publication Date, Decision Number, Related Cases Link
143 | td_elements_r1 = rows[0].find_all("td")
144 | pub_date = td_elements_r1[0].get_text(strip=True) if len(td_elements_r1) > 0 else None
145 | dec_num = td_elements_r1[1].get_text(strip=True) if len(td_elements_r1) > 1 else None
146 |
147 | related_cases_link_tag = td_elements_r1[2].find("a", href=True) if len(td_elements_r1) > 2 else None
148 | related_cases_url_str: Optional[str] = None
149 | karar_id_from_related: Optional[str] = None
150 | if related_cases_link_tag and related_cases_link_tag.has_attr('href'):
151 | related_cases_url_str = urljoin(self.BASE_URL, related_cases_link_tag['href'])
152 | qs_related = parse_qs(urlparse(related_cases_link_tag['href']).query)
153 | if 'kararId' in qs_related and qs_related['kararId']:
154 | karar_id_from_related = qs_related['kararId'][0]
155 |
156 | # Row 2: Decision Date, Decision Type
157 | td_elements_r2 = rows[1].find_all("td")
158 | dec_date = td_elements_r2[0].get_text(strip=True) if len(td_elements_r2) > 0 else None
159 | dec_type_text = td_elements_r2[1].get_text(strip=True) if len(td_elements_r2) > 1 else None
160 |
161 | # Row 3: Title and Main Decision Link
162 | title_cell = rows[2].find("td", colspan="5")
163 | decision_link_tag = title_cell.find("a", href=True) if title_cell else None
164 |
165 | title_text: Optional[str] = None
166 | decision_landing_url_str: Optional[str] = None
167 | karar_id_from_main_link: Optional[str] = None
168 |
169 | if decision_link_tag and decision_link_tag.has_attr('href'):
170 | title_text = decision_link_tag.get_text(strip=True)
171 | href_val = decision_link_tag['href']
172 | if href_val.startswith(self.DECISION_LANDING_PATH_TEMPLATE + "?kararId="): # Ensure it's a decision link
173 | decision_landing_url_str = urljoin(self.BASE_URL, href_val)
174 | qs_main = parse_qs(urlparse(href_val).query)
175 | if 'kararId' in qs_main and qs_main['kararId']:
176 | karar_id_from_main_link = qs_main['kararId'][0]
177 | else:
178 | logger.warning(f"Table {idx+1} decision link has unexpected format: {href_val}")
179 | else:
180 | logger.warning(f"Table {idx+1} could not find title/decision link tag.")
181 |
182 | current_karar_id = karar_id_from_main_link or karar_id_from_related
183 |
184 | if not current_karar_id:
185 | logger.warning(f"Table {idx+1} Karar ID not found. Skipping. Title (if any): {title_text}")
186 | continue
187 |
188 | # Convert string URLs to HttpUrl for the model
189 | final_decision_url = HttpUrl(decision_landing_url_str) if decision_landing_url_str else None
190 | final_related_cases_url = HttpUrl(related_cases_url_str) if related_cases_url_str else None
191 |
192 | processed_decisions.append(RekabetDecisionSummary(
193 | publication_date=pub_date, decision_number=dec_num, decision_date=dec_date,
194 | decision_type_text=dec_type_text, title=title_text,
195 | decision_url=final_decision_url,
196 | karar_id=current_karar_id,
197 | related_cases_url=final_related_cases_url
198 | ))
199 | logger.debug(f"Table {idx+1} parsed successfully: Karar ID '{current_karar_id}', Title '{title_text[:50] if title_text else 'N/A'}...'")
200 |
201 | except Exception as e:
202 | logger.warning(f"RekabetKurumuApiClient: Error parsing decision summary {idx+1}: {e}. Problematic Table HTML:\n{table.prettify()}", exc_info=True)
203 | continue
204 |
205 | return RekabetSearchResult(
206 | decisions=processed_decisions, total_records_found=total_records,
207 | retrieved_page_number=params.page, total_pages=total_pages if total_pages is not None else 0
208 | )
209 |
210 | async def _extract_pdf_url_and_landing_page_metadata(self, karar_id: str, landing_page_html: str, landing_page_url: str) -> Dict[str, Any]:
211 | soup = BeautifulSoup(landing_page_html, 'html.parser')
212 | data: Dict[str, Any] = {
213 | "pdf_url": None,
214 | "title_on_landing_page": soup.title.string.strip() if soup.title and soup.title.string else f"Rekabet Kurumu Kararı {karar_id}",
215 | }
216 | # This part needs to be robust and specific to Rekabet Kurumu's landing page structure.
217 | # Look for common patterns: direct links, download buttons, embedded viewers.
218 | pdf_anchor = soup.find("a", href=re.compile(r"\.pdf(\?|$)", re.IGNORECASE)) # Basic PDF link
219 | if not pdf_anchor: # Try other common patterns if the basic one fails
220 | # Example: Look for links with specific text or class
221 | pdf_anchor = soup.find("a", string=re.compile(r"karar metni|pdf indir", re.IGNORECASE))
222 |
223 | if pdf_anchor and pdf_anchor.has_attr('href'):
224 | pdf_path = pdf_anchor['href']
225 | data["pdf_url"] = urljoin(landing_page_url, pdf_path)
226 | logger.info(f"PDF link found on landing page (<a>): {data['pdf_url']}")
227 | else:
228 | iframe_pdf = soup.find("iframe", src=re.compile(r"\.pdf(\?|$)", re.IGNORECASE))
229 | if iframe_pdf and iframe_pdf.has_attr('src'):
230 | pdf_path = iframe_pdf['src']
231 | data["pdf_url"] = urljoin(landing_page_url, pdf_path)
232 | logger.info(f"PDF link found on landing page (<iframe>): {data['pdf_url']}")
233 | else:
234 | embed_pdf = soup.find("embed", src=re.compile(r"\.pdf(\?|$)", re.IGNORECASE), type="application/pdf")
235 | if embed_pdf and embed_pdf.has_attr('src'):
236 | pdf_path = embed_pdf['src']
237 | data["pdf_url"] = urljoin(landing_page_url, pdf_path)
238 | logger.info(f"PDF link found on landing page (<embed>): {data['pdf_url']}")
239 | else:
240 | logger.warning(f"No PDF link found on landing page {landing_page_url} for kararId {karar_id} using common selectors.")
241 | return data
242 |
243 | async def _download_pdf_bytes(self, pdf_url: str) -> Optional[bytes]:
244 | try:
245 | url_to_fetch = pdf_url if pdf_url.startswith(('http://', 'https://')) else urljoin(self.BASE_URL, pdf_url)
246 | logger.info(f"Downloading PDF from: {url_to_fetch}")
247 | response = await self.http_client.get(url_to_fetch)
248 | response.raise_for_status()
249 | pdf_bytes = await response.aread()
250 | logger.info(f"PDF content downloaded ({len(pdf_bytes)} bytes) from: {url_to_fetch}")
251 | return pdf_bytes
252 | except httpx.RequestError as e:
253 | logger.error(f"HTTP error downloading PDF from {pdf_url}: {e}")
254 | except Exception as e:
255 | logger.error(f"General error downloading PDF from {pdf_url}: {e}")
256 | return None
257 |
258 | def _extract_single_pdf_page_as_pdf_bytes(self, original_pdf_bytes: bytes, page_number_to_extract: int) -> Tuple[Optional[bytes], int]:
259 | total_pages_in_original_pdf = 0
260 | single_page_pdf_bytes: Optional[bytes] = None
261 |
262 | if not original_pdf_bytes:
263 | logger.warning("No original PDF bytes provided for page extraction.")
264 | return None, 0
265 |
266 | try:
267 | pdf_stream = io.BytesIO(original_pdf_bytes)
268 | reader = PdfReader(pdf_stream)
269 | total_pages_in_original_pdf = len(reader.pages)
270 |
271 | if not (0 < page_number_to_extract <= total_pages_in_original_pdf):
272 | logger.warning(f"Requested page number ({page_number_to_extract}) is out of PDF page range (1-{total_pages_in_original_pdf}).")
273 | return None, total_pages_in_original_pdf
274 |
275 | writer = PdfWriter()
276 | writer.add_page(reader.pages[page_number_to_extract - 1]) # pypdf is 0-indexed
277 |
278 | output_pdf_stream = io.BytesIO()
279 | writer.write(output_pdf_stream)
280 | single_page_pdf_bytes = output_pdf_stream.getvalue()
281 |
282 | logger.debug(f"Page {page_number_to_extract} of original PDF (total {total_pages_in_original_pdf} pages) extracted as new PDF using pypdf.")
283 |
284 | except Exception as e:
285 | logger.error(f"Error extracting PDF page using pypdf: {e}", exc_info=True)
286 | return None, total_pages_in_original_pdf
287 | return single_page_pdf_bytes, total_pages_in_original_pdf
288 |
289 | def _convert_pdf_bytes_to_markdown(self, pdf_bytes: bytes, source_url_for_logging: str) -> Optional[str]:
290 | if not pdf_bytes:
291 | logger.warning(f"No PDF bytes provided for Markdown conversion (source: {source_url_for_logging}).")
292 | return None
293 |
294 | pdf_stream = io.BytesIO(pdf_bytes)
295 | try:
296 | md_converter = MarkItDown(enable_plugins=False)
297 | conversion_result = md_converter.convert(pdf_stream)
298 | markdown_text = conversion_result.text_content
299 |
300 | if not markdown_text:
301 | logger.warning(f"MarkItDown returned empty content from PDF byte stream (source: {source_url_for_logging}). PDF page might be image-based or MarkItDown could not process the PDF stream.")
302 | return markdown_text
303 | except Exception as e:
304 | logger.error(f"MarkItDown conversion error for PDF byte stream (source: {source_url_for_logging}): {e}", exc_info=True)
305 | return None
306 |
307 | async def get_decision_document(self, karar_id: str, page_number: int = 1) -> RekabetDocument:
308 | if not karar_id:
309 | return RekabetDocument(
310 | source_landing_page_url=HttpUrl(f"{self.BASE_URL}"),
311 | karar_id=karar_id or "UNKNOWN_KARAR_ID",
312 | error_message="karar_id is required.",
313 | current_page=1, total_pages=0, is_paginated=False )
314 |
315 | decision_url_path = f"{self.DECISION_LANDING_PATH_TEMPLATE}?kararId={karar_id}"
316 | full_landing_page_url = urljoin(self.BASE_URL, decision_url_path)
317 |
318 | logger.info(f"RekabetKurumuApiClient: Getting decision document: {full_landing_page_url}, Requested PDF Page: {page_number}")
319 |
320 | pdf_url_to_report: Optional[HttpUrl] = None
321 | title_to_report: Optional[str] = f"Rekabet Kurumu Kararı {karar_id}" # Default
322 | error_message: Optional[str] = None
323 | markdown_for_requested_page: Optional[str] = None
324 | total_pdf_pages: int = 0
325 |
326 | try:
327 | async with self.http_client.stream("GET", full_landing_page_url) as response:
328 | response.raise_for_status()
329 | content_type = response.headers.get("content-type", "").lower()
330 | final_url_of_response = HttpUrl(str(response.url))
331 | original_pdf_bytes: Optional[bytes] = None
332 |
333 | if "application/pdf" in content_type:
334 | logger.info(f"URL {final_url_of_response} is a direct PDF. Processing content.")
335 | pdf_url_to_report = final_url_of_response
336 | original_pdf_bytes = await response.aread()
337 | elif "text/html" in content_type:
338 | logger.info(f"URL {final_url_of_response} is an HTML landing page. Looking for PDF link.")
339 | landing_page_html_bytes = await response.aread()
340 | detected_charset = response.charset_encoding or 'utf-8'
341 | try: landing_page_html = landing_page_html_bytes.decode(detected_charset)
342 | except UnicodeDecodeError: landing_page_html = landing_page_html_bytes.decode('utf-8', errors='replace')
343 |
344 | if landing_page_html.strip():
345 | landing_page_data = self._extract_pdf_url_and_landing_page_metadata(karar_id, landing_page_html, str(final_url_of_response))
346 | pdf_url_str_from_html = landing_page_data.get("pdf_url")
347 | if landing_page_data.get("title_on_landing_page"): title_to_report = landing_page_data.get("title_on_landing_page")
348 | if pdf_url_str_from_html:
349 | pdf_url_to_report = HttpUrl(pdf_url_str_from_html)
350 | original_pdf_bytes = await self._download_pdf_bytes(str(pdf_url_to_report))
351 | else: error_message = (error_message or "") + " PDF URL not found on HTML landing page."
352 | else: error_message = "Decision landing page content is empty."
353 | else: error_message = f"Unexpected content type ({content_type}) for URL: {final_url_of_response}"
354 |
355 | if original_pdf_bytes:
356 | single_page_pdf_bytes, total_pdf_pages_from_extraction = self._extract_single_pdf_page_as_pdf_bytes(original_pdf_bytes, page_number)
357 | total_pdf_pages = total_pdf_pages_from_extraction
358 |
359 | if single_page_pdf_bytes:
360 | markdown_for_requested_page = self._convert_pdf_bytes_to_markdown(single_page_pdf_bytes, str(pdf_url_to_report or full_landing_page_url))
361 | if not markdown_for_requested_page:
362 | error_message = (error_message or "") + f"; Could not convert page {page_number} of PDF to Markdown."
363 | elif total_pdf_pages > 0 :
364 | error_message = (error_message or "") + f"; Could not extract page {page_number} from PDF (page may be out of range or extraction failed)."
365 | else:
366 | error_message = (error_message or "") + "; PDF could not be processed or page count was zero (original PDF might be invalid)."
367 | elif not error_message:
368 | error_message = "PDF content could not be downloaded or identified."
369 |
370 | is_paginated = total_pdf_pages > 1
371 | current_page_final = page_number
372 | if total_pdf_pages > 0:
373 | current_page_final = max(1, min(page_number, total_pdf_pages))
374 | elif markdown_for_requested_page is None:
375 | current_page_final = 1
376 |
377 | # If markdown is None but there was no specific error for markdown conversion (e.g. PDF not found first)
378 | # make sure error_message reflects that.
379 | if markdown_for_requested_page is None and pdf_url_to_report and not error_message:
380 | error_message = (error_message or "") + "; Failed to produce Markdown from PDF page."
381 |
382 |
383 | return RekabetDocument(
384 | source_landing_page_url=full_landing_page_url, karar_id=karar_id,
385 | title_on_landing_page=title_to_report, pdf_url=pdf_url_to_report,
386 | markdown_chunk=markdown_for_requested_page, current_page=current_page_final,
387 | total_pages=total_pdf_pages, is_paginated=is_paginated,
388 | error_message=error_message.strip("; ") if error_message else None )
389 |
390 | except httpx.HTTPStatusError as e: error_msg_detail = f"HTTP Status error {e.response.status_code} while processing decision page."
391 | except httpx.RequestError as e: error_msg_detail = f"HTTP Request error while processing decision page: {str(e)}"
392 | except Exception as e: error_msg_detail = f"General error while processing decision: {str(e)}"
393 |
394 | exc_info_flag = not isinstance(e, (httpx.HTTPStatusError, httpx.RequestError)) if 'e' in locals() else True
395 | logger.error(f"RekabetKurumuApiClient: Error processing decision {karar_id} from {full_landing_page_url}: {error_msg_detail}", exc_info=exc_info_flag)
396 | error_message = (error_message + "; " if error_message else "") + error_msg_detail
397 |
398 | return RekabetDocument(
399 | source_landing_page_url=full_landing_page_url, karar_id=karar_id,
400 | title_on_landing_page=title_to_report, pdf_url=pdf_url_to_report,
401 | markdown_chunk=None, current_page=page_number, total_pages=0, is_paginated=False,
402 | error_message=error_message.strip("; ") if error_message else "An unexpected error occurred." )
403 |
404 | async def close_client_session(self): # Pragma: no cover
405 | if hasattr(self, 'http_client') and self.http_client and not self.http_client.is_closed:
406 | await self.http_client.aclose()
407 | logger.info("RekabetKurumuApiClient: HTTP client session closed.")
```
--------------------------------------------------------------------------------
/saidsurucu-yargi-mcp-f5fa007/sayistay_mcp_module/client.py:
--------------------------------------------------------------------------------
```python
1 | # sayistay_mcp_module/client.py
2 |
3 | import httpx
4 | import re
5 | from bs4 import BeautifulSoup
6 | from typing import Dict, Any, List, Optional, Tuple
7 | import logging
8 | import html
9 | import io
10 | from urllib.parse import urlencode, urljoin
11 | from markitdown import MarkItDown
12 |
13 | from .models import (
14 | GenelKurulSearchRequest, GenelKurulSearchResponse, GenelKurulDecision,
15 | TemyizKuruluSearchRequest, TemyizKuruluSearchResponse, TemyizKuruluDecision,
16 | DaireSearchRequest, DaireSearchResponse, DaireDecision,
17 | SayistayDocumentMarkdown
18 | )
19 | from .enums import DaireEnum, KamuIdaresiTuruEnum, WebKararKonusuEnum, WEB_KARAR_KONUSU_MAPPING
20 |
21 | logger = logging.getLogger(__name__)
22 | if not logger.hasHandlers():
23 | logging.basicConfig(
24 | level=logging.INFO,
25 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
26 | )
27 |
28 | class SayistayApiClient:
29 | """
30 | API Client for Sayıştay (Turkish Court of Accounts) decision search system.
31 |
32 | Handles three types of decisions:
33 | - Genel Kurul (General Assembly): Precedent-setting interpretive decisions
34 | - Temyiz Kurulu (Appeals Board): Appeals against chamber decisions
35 | - Daire (Chamber): First-instance audit findings and sanctions
36 |
37 | Features:
38 | - ASP.NET WebForms session management with CSRF tokens
39 | - DataTables-based pagination and filtering
40 | - Automatic session refresh on expiration
41 | - Document retrieval with Markdown conversion
42 | """
43 |
44 | BASE_URL = "https://www.sayistay.gov.tr"
45 |
46 | # Search endpoints for each decision type
47 | GENEL_KURUL_ENDPOINT = "/KararlarGenelKurul/DataTablesList"
48 | TEMYIZ_KURULU_ENDPOINT = "/KararlarTemyiz/DataTablesList"
49 | DAIRE_ENDPOINT = "/KararlarDaire/DataTablesList"
50 |
51 | # Page endpoints for session initialization and document access
52 | GENEL_KURUL_PAGE = "/KararlarGenelKurul"
53 | TEMYIZ_KURULU_PAGE = "/KararlarTemyiz"
54 | DAIRE_PAGE = "/KararlarDaire"
55 |
56 | def __init__(self, request_timeout: float = 60.0):
57 | self.request_timeout = request_timeout
58 | self.session_cookies: Dict[str, str] = {}
59 | self.csrf_tokens: Dict[str, str] = {} # Store tokens for each endpoint
60 |
61 | self.http_client = httpx.AsyncClient(
62 | base_url=self.BASE_URL,
63 | headers={
64 | "Accept": "application/json, text/javascript, */*; q=0.01",
65 | "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7",
66 | "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
67 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
68 | "X-Requested-With": "XMLHttpRequest",
69 | "Sec-Fetch-Dest": "empty",
70 | "Sec-Fetch-Mode": "cors",
71 | "Sec-Fetch-Site": "same-origin"
72 | },
73 | timeout=request_timeout,
74 | follow_redirects=True
75 | )
76 |
77 | async def _initialize_session_for_endpoint(self, endpoint_type: str) -> bool:
78 | """
79 | Initialize session and obtain CSRF token for specific endpoint.
80 |
81 | Args:
82 | endpoint_type: One of 'genel_kurul', 'temyiz_kurulu', 'daire'
83 |
84 | Returns:
85 | True if session initialized successfully, False otherwise
86 | """
87 | page_mapping = {
88 | 'genel_kurul': self.GENEL_KURUL_PAGE,
89 | 'temyiz_kurulu': self.TEMYIZ_KURULU_PAGE,
90 | 'daire': self.DAIRE_PAGE
91 | }
92 |
93 | if endpoint_type not in page_mapping:
94 | logger.error(f"Invalid endpoint type: {endpoint_type}")
95 | return False
96 |
97 | page_url = page_mapping[endpoint_type]
98 | logger.info(f"Initializing session for {endpoint_type} endpoint: {page_url}")
99 |
100 | try:
101 | response = await self.http_client.get(page_url)
102 | response.raise_for_status()
103 |
104 | # Extract session cookies
105 | for cookie_name, cookie_value in response.cookies.items():
106 | self.session_cookies[cookie_name] = cookie_value
107 | logger.debug(f"Stored session cookie: {cookie_name}")
108 |
109 | # Extract CSRF token from form
110 | soup = BeautifulSoup(response.text, 'html.parser')
111 | csrf_input = soup.find('input', {'name': '__RequestVerificationToken'})
112 |
113 | if csrf_input and csrf_input.get('value'):
114 | self.csrf_tokens[endpoint_type] = csrf_input['value']
115 | logger.info(f"Extracted CSRF token for {endpoint_type}")
116 | return True
117 | else:
118 | logger.warning(f"CSRF token not found in {endpoint_type} page")
119 | return False
120 |
121 | except httpx.RequestError as e:
122 | logger.error(f"HTTP error during session initialization for {endpoint_type}: {e}")
123 | return False
124 | except Exception as e:
125 | logger.error(f"Error initializing session for {endpoint_type}: {e}")
126 | return False
127 |
128 | def _enum_to_form_value(self, enum_value: str, enum_type: str) -> str:
129 | """Convert enum values to form values expected by the API."""
130 | if enum_value == "ALL":
131 | if enum_type == "daire":
132 | return "Tüm Daireler"
133 | elif enum_type == "kamu_idaresi":
134 | return "Tüm Kurumlar"
135 | elif enum_type == "web_karar_konusu":
136 | return "Tüm Konular"
137 |
138 | # Apply web_karar_konusu mapping
139 | if enum_type == "web_karar_konusu":
140 | return WEB_KARAR_KONUSU_MAPPING.get(enum_value, enum_value)
141 |
142 | return enum_value
143 |
144 | def _build_datatables_params(self, start: int, length: int, draw: int = 1) -> List[Tuple[str, str]]:
145 | """Build standard DataTables parameters for all endpoints."""
146 | params = [
147 | ("draw", str(draw)),
148 | ("start", str(start)),
149 | ("length", str(length)),
150 | ("search[value]", ""),
151 | ("search[regex]", "false")
152 | ]
153 | return params
154 |
155 | def _build_genel_kurul_form_data(self, params: GenelKurulSearchRequest, draw: int = 1) -> List[Tuple[str, str]]:
156 | """Build form data for Genel Kurul search request."""
157 | form_data = self._build_datatables_params(params.start, params.length, draw)
158 |
159 | # Add DataTables column definitions (from actual request)
160 | column_defs = [
161 | ("columns[0][data]", "KARARNO"),
162 | ("columns[0][name]", ""),
163 | ("columns[0][searchable]", "true"),
164 | ("columns[0][orderable]", "false"),
165 | ("columns[0][search][value]", ""),
166 | ("columns[0][search][regex]", "false"),
167 |
168 | ("columns[1][data]", "KARARNO"),
169 | ("columns[1][name]", ""),
170 | ("columns[1][searchable]", "true"),
171 | ("columns[1][orderable]", "true"),
172 | ("columns[1][search][value]", ""),
173 | ("columns[1][search][regex]", "false"),
174 |
175 | ("columns[2][data]", "KARARTARIH"),
176 | ("columns[2][name]", ""),
177 | ("columns[2][searchable]", "true"),
178 | ("columns[2][orderable]", "true"),
179 | ("columns[2][search][value]", ""),
180 | ("columns[2][search][regex]", "false"),
181 |
182 | ("columns[3][data]", "KARAROZETI"),
183 | ("columns[3][name]", ""),
184 | ("columns[3][searchable]", "true"),
185 | ("columns[3][orderable]", "false"),
186 | ("columns[3][search][value]", ""),
187 | ("columns[3][search][regex]", "false"),
188 |
189 | ("columns[4][data]", ""),
190 | ("columns[4][name]", ""),
191 | ("columns[4][searchable]", "true"),
192 | ("columns[4][orderable]", "false"),
193 | ("columns[4][search][value]", ""),
194 | ("columns[4][search][regex]", "false"),
195 |
196 | ("order[0][column]", "2"),
197 | ("order[0][dir]", "desc")
198 | ]
199 | form_data.extend(column_defs)
200 |
201 | # Add search parameters
202 | form_data.extend([
203 | ("KararlarGenelKurulAra.KARARNO", params.karar_no or ""),
204 | ("__Invariant[]", "KararlarGenelKurulAra.KARARNO"),
205 | ("__Invariant[]", "KararlarGenelKurulAra.KARAREK"),
206 | ("KararlarGenelKurulAra.KARAREK", params.karar_ek or ""),
207 | ("KararlarGenelKurulAra.KARARTARIHBaslangic", params.karar_tarih_baslangic or "Başlangıç Tarihi"),
208 | ("KararlarGenelKurulAra.KARARTARIHBitis", params.karar_tarih_bitis or "Bitiş Tarihi"),
209 | ("KararlarGenelKurulAra.KARARTAMAMI", params.karar_tamami or ""),
210 | ("__RequestVerificationToken", self.csrf_tokens.get('genel_kurul', ''))
211 | ])
212 |
213 | return form_data
214 |
215 | def _build_temyiz_kurulu_form_data(self, params: TemyizKuruluSearchRequest, draw: int = 1) -> List[Tuple[str, str]]:
216 | """Build form data for Temyiz Kurulu search request."""
217 | form_data = self._build_datatables_params(params.start, params.length, draw)
218 |
219 | # Add DataTables column definitions (from actual request)
220 | column_defs = [
221 | ("columns[0][data]", "TEMYIZTUTANAKTARIHI"),
222 | ("columns[0][name]", ""),
223 | ("columns[0][searchable]", "true"),
224 | ("columns[0][orderable]", "false"),
225 | ("columns[0][search][value]", ""),
226 | ("columns[0][search][regex]", "false"),
227 |
228 | ("columns[1][data]", "TEMYIZTUTANAKTARIHI"),
229 | ("columns[1][name]", ""),
230 | ("columns[1][searchable]", "true"),
231 | ("columns[1][orderable]", "true"),
232 | ("columns[1][search][value]", ""),
233 | ("columns[1][search][regex]", "false"),
234 |
235 | ("columns[2][data]", "ILAMDAIRESI"),
236 | ("columns[2][name]", ""),
237 | ("columns[2][searchable]", "true"),
238 | ("columns[2][orderable]", "true"),
239 | ("columns[2][search][value]", ""),
240 | ("columns[2][search][regex]", "false"),
241 |
242 | ("columns[3][data]", "TEMYIZKARAR"),
243 | ("columns[3][name]", ""),
244 | ("columns[3][searchable]", "true"),
245 | ("columns[3][orderable]", "false"),
246 | ("columns[3][search][value]", ""),
247 | ("columns[3][search][regex]", "false"),
248 |
249 | ("columns[4][data]", ""),
250 | ("columns[4][name]", ""),
251 | ("columns[4][searchable]", "true"),
252 | ("columns[4][orderable]", "false"),
253 | ("columns[4][search][value]", ""),
254 | ("columns[4][search][regex]", "false"),
255 |
256 | ("order[0][column]", "1"),
257 | ("order[0][dir]", "desc")
258 | ]
259 | form_data.extend(column_defs)
260 |
261 | # Add search parameters
262 | daire_value = self._enum_to_form_value(params.ilam_dairesi, "daire")
263 | kamu_idaresi_value = self._enum_to_form_value(params.kamu_idaresi_turu, "kamu_idaresi")
264 | web_karar_konusu_value = self._enum_to_form_value(params.web_karar_konusu, "web_karar_konusu")
265 |
266 | form_data.extend([
267 | ("KararlarTemyizAra.ILAMDAIRESI", daire_value),
268 | ("KararlarTemyizAra.YILI", params.yili or ""),
269 | ("KararlarTemyizAra.KARARTRHBaslangic", params.karar_tarih_baslangic or ""),
270 | ("KararlarTemyizAra.KARARTRHBitis", params.karar_tarih_bitis or ""),
271 | ("KararlarTemyizAra.KAMUIDARESITURU", kamu_idaresi_value if kamu_idaresi_value != "Tüm Kurumlar" else ""),
272 | ("KararlarTemyizAra.ILAMNO", params.ilam_no or ""),
273 | ("KararlarTemyizAra.DOSYANO", params.dosya_no or ""),
274 | ("KararlarTemyizAra.TEMYIZTUTANAKNO", params.temyiz_tutanak_no or ""),
275 | ("__Invariant", "KararlarTemyizAra.TEMYIZTUTANAKNO"),
276 | ("KararlarTemyizAra.TEMYIZKARAR", params.temyiz_karar or ""),
277 | ("KararlarTemyizAra.WEBKARARKONUSU", web_karar_konusu_value if web_karar_konusu_value != "Tüm Konular" else ""),
278 | ("__RequestVerificationToken", self.csrf_tokens.get('temyiz_kurulu', ''))
279 | ])
280 |
281 | return form_data
282 |
283 | def _build_daire_form_data(self, params: DaireSearchRequest, draw: int = 1) -> List[Tuple[str, str]]:
284 | """Build form data for Daire search request."""
285 | form_data = self._build_datatables_params(params.start, params.length, draw)
286 |
287 | # Add DataTables column definitions (from actual request)
288 | column_defs = [
289 | ("columns[0][data]", "YARGILAMADAIRESI"),
290 | ("columns[0][name]", ""),
291 | ("columns[0][searchable]", "true"),
292 | ("columns[0][orderable]", "false"),
293 | ("columns[0][search][value]", ""),
294 | ("columns[0][search][regex]", "false"),
295 |
296 | ("columns[1][data]", "KARARTRH"),
297 | ("columns[1][name]", ""),
298 | ("columns[1][searchable]", "true"),
299 | ("columns[1][orderable]", "true"),
300 | ("columns[1][search][value]", ""),
301 | ("columns[1][search][regex]", "false"),
302 |
303 | ("columns[2][data]", "KARARNO"),
304 | ("columns[2][name]", ""),
305 | ("columns[2][searchable]", "true"),
306 | ("columns[2][orderable]", "true"),
307 | ("columns[2][search][value]", ""),
308 | ("columns[2][search][regex]", "false"),
309 |
310 | ("columns[3][data]", "YARGILAMADAIRESI"),
311 | ("columns[3][name]", ""),
312 | ("columns[3][searchable]", "true"),
313 | ("columns[3][orderable]", "true"),
314 | ("columns[3][search][value]", ""),
315 | ("columns[3][search][regex]", "false"),
316 |
317 | ("columns[4][data]", "WEBKARARMETNI"),
318 | ("columns[4][name]", ""),
319 | ("columns[4][searchable]", "true"),
320 | ("columns[4][orderable]", "false"),
321 | ("columns[4][search][value]", ""),
322 | ("columns[4][search][regex]", "false"),
323 |
324 | ("columns[5][data]", ""),
325 | ("columns[5][name]", ""),
326 | ("columns[5][searchable]", "true"),
327 | ("columns[5][orderable]", "false"),
328 | ("columns[5][search][value]", ""),
329 | ("columns[5][search][regex]", "false"),
330 |
331 | ("order[0][column]", "2"),
332 | ("order[0][dir]", "desc")
333 | ]
334 | form_data.extend(column_defs)
335 |
336 | # Add search parameters
337 | daire_value = self._enum_to_form_value(params.yargilama_dairesi, "daire")
338 | kamu_idaresi_value = self._enum_to_form_value(params.kamu_idaresi_turu, "kamu_idaresi")
339 | web_karar_konusu_value = self._enum_to_form_value(params.web_karar_konusu, "web_karar_konusu")
340 |
341 | form_data.extend([
342 | ("KararlarDaireAra.YARGILAMADAIRESI", daire_value),
343 | ("KararlarDaireAra.KARARTRHBaslangic", params.karar_tarih_baslangic or ""),
344 | ("KararlarDaireAra.KARARTRHBitis", params.karar_tarih_bitis or ""),
345 | ("KararlarDaireAra.ILAMNO", params.ilam_no or ""),
346 | ("KararlarDaireAra.KAMUIDARESITURU", kamu_idaresi_value if kamu_idaresi_value != "Tüm Kurumlar" else ""),
347 | ("KararlarDaireAra.HESAPYILI", params.hesap_yili or ""),
348 | ("KararlarDaireAra.WEBKARARKONUSU", web_karar_konusu_value if web_karar_konusu_value != "Tüm Konular" else ""),
349 | ("KararlarDaireAra.WEBKARARMETNI", params.web_karar_metni or ""),
350 | ("__RequestVerificationToken", self.csrf_tokens.get('daire', ''))
351 | ])
352 |
353 | return form_data
354 |
355 | async def search_genel_kurul_decisions(self, params: GenelKurulSearchRequest) -> GenelKurulSearchResponse:
356 | """
357 | Search Sayıştay Genel Kurul (General Assembly) decisions.
358 |
359 | Args:
360 | params: Search parameters for Genel Kurul decisions
361 |
362 | Returns:
363 | GenelKurulSearchResponse with matching decisions
364 | """
365 | # Initialize session if needed
366 | if 'genel_kurul' not in self.csrf_tokens:
367 | if not await self._initialize_session_for_endpoint('genel_kurul'):
368 | raise Exception("Failed to initialize session for Genel Kurul endpoint")
369 |
370 | form_data = self._build_genel_kurul_form_data(params)
371 | encoded_data = urlencode(form_data, encoding='utf-8')
372 |
373 | logger.info(f"Searching Genel Kurul decisions with parameters: {params.model_dump(exclude_none=True)}")
374 |
375 | try:
376 | # Update headers with cookies
377 | headers = self.http_client.headers.copy()
378 | if self.session_cookies:
379 | cookie_header = "; ".join([f"{k}={v}" for k, v in self.session_cookies.items()])
380 | headers["Cookie"] = cookie_header
381 |
382 | response = await self.http_client.post(
383 | self.GENEL_KURUL_ENDPOINT,
384 | data=encoded_data,
385 | headers=headers
386 | )
387 | response.raise_for_status()
388 | response_json = response.json()
389 |
390 | # Parse response
391 | decisions = []
392 | for item in response_json.get('data', []):
393 | decisions.append(GenelKurulDecision(
394 | id=item['Id'],
395 | karar_no=item['KARARNO'],
396 | karar_tarih=item['KARARTARIH'],
397 | karar_ozeti=item['KARAROZETI']
398 | ))
399 |
400 | return GenelKurulSearchResponse(
401 | decisions=decisions,
402 | total_records=response_json.get('recordsTotal', 0),
403 | total_filtered=response_json.get('recordsFiltered', 0),
404 | draw=response_json.get('draw', 1)
405 | )
406 |
407 | except httpx.RequestError as e:
408 | logger.error(f"HTTP error during Genel Kurul search: {e}")
409 | raise
410 | except Exception as e:
411 | logger.error(f"Error processing Genel Kurul search: {e}")
412 | raise
413 |
414 | async def search_temyiz_kurulu_decisions(self, params: TemyizKuruluSearchRequest) -> TemyizKuruluSearchResponse:
415 | """
416 | Search Sayıştay Temyiz Kurulu (Appeals Board) decisions.
417 |
418 | Args:
419 | params: Search parameters for Temyiz Kurulu decisions
420 |
421 | Returns:
422 | TemyizKuruluSearchResponse with matching decisions
423 | """
424 | # Initialize session if needed
425 | if 'temyiz_kurulu' not in self.csrf_tokens:
426 | if not await self._initialize_session_for_endpoint('temyiz_kurulu'):
427 | raise Exception("Failed to initialize session for Temyiz Kurulu endpoint")
428 |
429 | form_data = self._build_temyiz_kurulu_form_data(params)
430 | encoded_data = urlencode(form_data, encoding='utf-8')
431 |
432 | logger.info(f"Searching Temyiz Kurulu decisions with parameters: {params.model_dump(exclude_none=True)}")
433 |
434 | try:
435 | # Update headers with cookies
436 | headers = self.http_client.headers.copy()
437 | if self.session_cookies:
438 | cookie_header = "; ".join([f"{k}={v}" for k, v in self.session_cookies.items()])
439 | headers["Cookie"] = cookie_header
440 |
441 | response = await self.http_client.post(
442 | self.TEMYIZ_KURULU_ENDPOINT,
443 | data=encoded_data,
444 | headers=headers
445 | )
446 | response.raise_for_status()
447 | response_json = response.json()
448 |
449 | # Parse response
450 | decisions = []
451 | for item in response_json.get('data', []):
452 | decisions.append(TemyizKuruluDecision(
453 | id=item['Id'],
454 | temyiz_tutanak_tarihi=item['TEMYIZTUTANAKTARIHI'],
455 | ilam_dairesi=item['ILAMDAIRESI'],
456 | temyiz_karar=item['TEMYIZKARAR']
457 | ))
458 |
459 | return TemyizKuruluSearchResponse(
460 | decisions=decisions,
461 | total_records=response_json.get('recordsTotal', 0),
462 | total_filtered=response_json.get('recordsFiltered', 0),
463 | draw=response_json.get('draw', 1)
464 | )
465 |
466 | except httpx.RequestError as e:
467 | logger.error(f"HTTP error during Temyiz Kurulu search: {e}")
468 | raise
469 | except Exception as e:
470 | logger.error(f"Error processing Temyiz Kurulu search: {e}")
471 | raise
472 |
473 | async def search_daire_decisions(self, params: DaireSearchRequest) -> DaireSearchResponse:
474 | """
475 | Search Sayıştay Daire (Chamber) decisions.
476 |
477 | Args:
478 | params: Search parameters for Daire decisions
479 |
480 | Returns:
481 | DaireSearchResponse with matching decisions
482 | """
483 | # Initialize session if needed
484 | if 'daire' not in self.csrf_tokens:
485 | if not await self._initialize_session_for_endpoint('daire'):
486 | raise Exception("Failed to initialize session for Daire endpoint")
487 |
488 | form_data = self._build_daire_form_data(params)
489 | encoded_data = urlencode(form_data, encoding='utf-8')
490 |
491 | logger.info(f"Searching Daire decisions with parameters: {params.model_dump(exclude_none=True)}")
492 |
493 | try:
494 | # Update headers with cookies
495 | headers = self.http_client.headers.copy()
496 | if self.session_cookies:
497 | cookie_header = "; ".join([f"{k}={v}" for k, v in self.session_cookies.items()])
498 | headers["Cookie"] = cookie_header
499 |
500 | response = await self.http_client.post(
501 | self.DAIRE_ENDPOINT,
502 | data=encoded_data,
503 | headers=headers
504 | )
505 | response.raise_for_status()
506 | response_json = response.json()
507 |
508 | # Parse response
509 | decisions = []
510 | for item in response_json.get('data', []):
511 | decisions.append(DaireDecision(
512 | id=item['Id'],
513 | yargilama_dairesi=item['YARGILAMADAIRESI'],
514 | karar_tarih=item['KARARTRH'],
515 | karar_no=item['KARARNO'],
516 | ilam_no=item.get('ILAMNO'), # Use get() to handle None values
517 | madde_no=item['MADDENO'],
518 | kamu_idaresi_turu=item['KAMUIDARESITURU'],
519 | hesap_yili=item['HESAPYILI'],
520 | web_karar_konusu=item['WEBKARARKONUSU'],
521 | web_karar_metni=item['WEBKARARMETNI']
522 | ))
523 |
524 | return DaireSearchResponse(
525 | decisions=decisions,
526 | total_records=response_json.get('recordsTotal', 0),
527 | total_filtered=response_json.get('recordsFiltered', 0),
528 | draw=response_json.get('draw', 1)
529 | )
530 |
531 | except httpx.RequestError as e:
532 | logger.error(f"HTTP error during Daire search: {e}")
533 | raise
534 | except Exception as e:
535 | logger.error(f"Error processing Daire search: {e}")
536 | raise
537 |
538 | def _convert_html_to_markdown(self, html_content: str) -> Optional[str]:
539 | """Convert HTML content to Markdown using MarkItDown with BytesIO to avoid filename length issues."""
540 | if not html_content:
541 | return None
542 |
543 | try:
544 | # Convert HTML string to bytes and create BytesIO stream
545 | html_bytes = html_content.encode('utf-8')
546 | html_stream = io.BytesIO(html_bytes)
547 |
548 | # Pass BytesIO stream to MarkItDown to avoid temp file creation
549 | md_converter = MarkItDown()
550 | result = md_converter.convert(html_stream)
551 | markdown_content = result.text_content
552 |
553 | logger.info("Successfully converted HTML to Markdown")
554 | return markdown_content
555 |
556 | except Exception as e:
557 | logger.error(f"Error converting HTML to Markdown: {e}")
558 | return f"Error converting HTML content: {str(e)}"
559 |
560 | async def get_document_as_markdown(self, decision_id: str, decision_type: str) -> SayistayDocumentMarkdown:
561 | """
562 | Retrieve full text of a Sayıştay decision and convert to Markdown.
563 |
564 | Args:
565 | decision_id: Unique decision identifier
566 | decision_type: Type of decision ('genel_kurul', 'temyiz_kurulu', 'daire')
567 |
568 | Returns:
569 | SayistayDocumentMarkdown with converted content
570 | """
571 | logger.info(f"Retrieving document for {decision_type} decision ID: {decision_id}")
572 |
573 | # Validate decision_id
574 | if not decision_id or not decision_id.strip():
575 | return SayistayDocumentMarkdown(
576 | decision_id=decision_id,
577 | decision_type=decision_type,
578 | source_url="",
579 | markdown_content=None,
580 | error_message="Decision ID cannot be empty"
581 | )
582 |
583 | # Map decision type to URL path
584 | url_path_mapping = {
585 | 'genel_kurul': 'KararlarGenelKurul',
586 | 'temyiz_kurulu': 'KararlarTemyiz',
587 | 'daire': 'KararlarDaire'
588 | }
589 |
590 | if decision_type not in url_path_mapping:
591 | return SayistayDocumentMarkdown(
592 | decision_id=decision_id,
593 | decision_type=decision_type,
594 | source_url="",
595 | markdown_content=None,
596 | error_message=f"Invalid decision type: {decision_type}. Must be one of: {list(url_path_mapping.keys())}"
597 | )
598 |
599 | # Build document URL
600 | url_path = url_path_mapping[decision_type]
601 | document_url = f"{self.BASE_URL}/{url_path}/Detay/{decision_id}/"
602 |
603 | try:
604 | # Make HTTP GET request to document URL
605 | headers = {
606 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
607 | "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7",
608 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
609 | "Sec-Fetch-Dest": "document",
610 | "Sec-Fetch-Mode": "navigate",
611 | "Sec-Fetch-Site": "same-origin"
612 | }
613 |
614 | # Include session cookies if available
615 | if self.session_cookies:
616 | cookie_header = "; ".join([f"{k}={v}" for k, v in self.session_cookies.items()])
617 | headers["Cookie"] = cookie_header
618 |
619 | response = await self.http_client.get(document_url, headers=headers)
620 | response.raise_for_status()
621 | html_content = response.text
622 |
623 | if not html_content or not html_content.strip():
624 | logger.warning(f"Received empty HTML content from {document_url}")
625 | return SayistayDocumentMarkdown(
626 | decision_id=decision_id,
627 | decision_type=decision_type,
628 | source_url=document_url,
629 | markdown_content=None,
630 | error_message="Document content is empty"
631 | )
632 |
633 | # Convert HTML to Markdown using existing method
634 | markdown_content = self._convert_html_to_markdown(html_content)
635 |
636 | if markdown_content and "Error converting HTML content" not in markdown_content:
637 | logger.info(f"Successfully retrieved and converted document {decision_id} to Markdown")
638 | return SayistayDocumentMarkdown(
639 | decision_id=decision_id,
640 | decision_type=decision_type,
641 | source_url=document_url,
642 | markdown_content=markdown_content,
643 | retrieval_date=None # Could add datetime.now().isoformat() if needed
644 | )
645 | else:
646 | return SayistayDocumentMarkdown(
647 | decision_id=decision_id,
648 | decision_type=decision_type,
649 | source_url=document_url,
650 | markdown_content=None,
651 | error_message=f"Failed to convert HTML to Markdown: {markdown_content}"
652 | )
653 |
654 | except httpx.HTTPStatusError as e:
655 | error_msg = f"HTTP error {e.response.status_code} when fetching document: {e}"
656 | logger.error(f"HTTP error fetching document {decision_id}: {error_msg}")
657 | return SayistayDocumentMarkdown(
658 | decision_id=decision_id,
659 | decision_type=decision_type,
660 | source_url=document_url,
661 | markdown_content=None,
662 | error_message=error_msg
663 | )
664 | except httpx.RequestError as e:
665 | error_msg = f"Network error when fetching document: {e}"
666 | logger.error(f"Network error fetching document {decision_id}: {error_msg}")
667 | return SayistayDocumentMarkdown(
668 | decision_id=decision_id,
669 | decision_type=decision_type,
670 | source_url=document_url,
671 | markdown_content=None,
672 | error_message=error_msg
673 | )
674 | except Exception as e:
675 | error_msg = f"Unexpected error when fetching document: {e}"
676 | logger.error(f"Unexpected error fetching document {decision_id}: {error_msg}")
677 | return SayistayDocumentMarkdown(
678 | decision_id=decision_id,
679 | decision_type=decision_type,
680 | source_url=document_url,
681 | markdown_content=None,
682 | error_message=error_msg
683 | )
684 |
685 | async def close_client_session(self):
686 | """Close HTTP client session."""
687 | if hasattr(self, 'http_client') and self.http_client and not self.http_client.is_closed:
688 | await self.http_client.aclose()
689 | logger.info("SayistayApiClient: HTTP client session closed.")
```
--------------------------------------------------------------------------------
/sayistay_mcp_module/client.py:
--------------------------------------------------------------------------------
```python
1 | # sayistay_mcp_module/client.py
2 |
3 | import httpx
4 | import re
5 | from bs4 import BeautifulSoup
6 | from typing import Dict, Any, List, Optional, Tuple
7 | import logging
8 | import html
9 | import io
10 | from urllib.parse import urlencode, urljoin
11 | from markitdown import MarkItDown
12 |
13 | from .models import (
14 | GenelKurulSearchRequest, GenelKurulSearchResponse, GenelKurulDecision,
15 | TemyizKuruluSearchRequest, TemyizKuruluSearchResponse, TemyizKuruluDecision,
16 | DaireSearchRequest, DaireSearchResponse, DaireDecision,
17 | SayistayDocumentMarkdown
18 | )
19 | from .enums import DaireEnum, KamuIdaresiTuruEnum, WebKararKonusuEnum, WEB_KARAR_KONUSU_MAPPING
20 |
21 | logger = logging.getLogger(__name__)
22 | if not logger.hasHandlers():
23 | logging.basicConfig(
24 | level=logging.INFO,
25 | format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
26 | )
27 |
28 | class SayistayApiClient:
29 | """
30 | API Client for Sayıştay (Turkish Court of Accounts) decision search system.
31 |
32 | Handles three types of decisions:
33 | - Genel Kurul (General Assembly): Precedent-setting interpretive decisions
34 | - Temyiz Kurulu (Appeals Board): Appeals against chamber decisions
35 | - Daire (Chamber): First-instance audit findings and sanctions
36 |
37 | Features:
38 | - ASP.NET WebForms session management with CSRF tokens
39 | - DataTables-based pagination and filtering
40 | - Automatic session refresh on expiration
41 | - Document retrieval with Markdown conversion
42 | """
43 |
44 | BASE_URL = "https://www.sayistay.gov.tr"
45 |
46 | # Search endpoints for each decision type
47 | GENEL_KURUL_ENDPOINT = "/KararlarGenelKurul/DataTablesList"
48 | TEMYIZ_KURULU_ENDPOINT = "/KararlarTemyiz/DataTablesList"
49 | DAIRE_ENDPOINT = "/KararlarDaire/DataTablesList"
50 |
51 | # Page endpoints for session initialization and document access
52 | GENEL_KURUL_PAGE = "/KararlarGenelKurul"
53 | TEMYIZ_KURULU_PAGE = "/KararlarTemyiz"
54 | DAIRE_PAGE = "/KararlarDaire"
55 |
56 | def __init__(self, request_timeout: float = 60.0):
57 | self.request_timeout = request_timeout
58 | self.session_cookies: Dict[str, str] = {}
59 | self.csrf_tokens: Dict[str, str] = {} # Store tokens for each endpoint
60 |
61 | self.http_client = httpx.AsyncClient(
62 | base_url=self.BASE_URL,
63 | headers={
64 | "Accept": "application/json, text/javascript, */*; q=0.01",
65 | "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7",
66 | "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
67 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
68 | "X-Requested-With": "XMLHttpRequest",
69 | "Sec-Fetch-Dest": "empty",
70 | "Sec-Fetch-Mode": "cors",
71 | "Sec-Fetch-Site": "same-origin"
72 | },
73 | timeout=request_timeout,
74 | follow_redirects=True
75 | )
76 |
77 | async def _initialize_session_for_endpoint(self, endpoint_type: str) -> bool:
78 | """
79 | Initialize session and obtain CSRF token for specific endpoint.
80 |
81 | Args:
82 | endpoint_type: One of 'genel_kurul', 'temyiz_kurulu', 'daire'
83 |
84 | Returns:
85 | True if session initialized successfully, False otherwise
86 | """
87 | page_mapping = {
88 | 'genel_kurul': self.GENEL_KURUL_PAGE,
89 | 'temyiz_kurulu': self.TEMYIZ_KURULU_PAGE,
90 | 'daire': self.DAIRE_PAGE
91 | }
92 |
93 | if endpoint_type not in page_mapping:
94 | logger.error(f"Invalid endpoint type: {endpoint_type}")
95 | return False
96 |
97 | page_url = page_mapping[endpoint_type]
98 | logger.info(f"Initializing session for {endpoint_type} endpoint: {page_url}")
99 |
100 | try:
101 | response = await self.http_client.get(page_url)
102 | response.raise_for_status()
103 |
104 | # Extract session cookies
105 | for cookie_name, cookie_value in response.cookies.items():
106 | self.session_cookies[cookie_name] = cookie_value
107 | logger.debug(f"Stored session cookie: {cookie_name}")
108 |
109 | # Extract CSRF token from form
110 | soup = BeautifulSoup(response.text, 'html.parser')
111 | csrf_input = soup.find('input', {'name': '__RequestVerificationToken'})
112 |
113 | if csrf_input and csrf_input.get('value'):
114 | self.csrf_tokens[endpoint_type] = csrf_input['value']
115 | logger.info(f"Extracted CSRF token for {endpoint_type}")
116 | return True
117 | else:
118 | logger.warning(f"CSRF token not found in {endpoint_type} page")
119 | return False
120 |
121 | except httpx.RequestError as e:
122 | logger.error(f"HTTP error during session initialization for {endpoint_type}: {e}")
123 | return False
124 | except Exception as e:
125 | logger.error(f"Error initializing session for {endpoint_type}: {e}")
126 | return False
127 |
128 | def _enum_to_form_value(self, enum_value: str, enum_type: str) -> str:
129 | """Convert enum values to form values expected by the API."""
130 | if enum_value == "ALL":
131 | if enum_type == "daire":
132 | return "Tüm Daireler"
133 | elif enum_type == "kamu_idaresi":
134 | return "Tüm Kurumlar"
135 | elif enum_type == "web_karar_konusu":
136 | return "Tüm Konular"
137 |
138 | # Apply web_karar_konusu mapping
139 | if enum_type == "web_karar_konusu":
140 | return WEB_KARAR_KONUSU_MAPPING.get(enum_value, enum_value)
141 |
142 | return enum_value
143 |
144 | def _build_datatables_params(self, start: int, length: int, draw: int = 1) -> List[Tuple[str, str]]:
145 | """Build standard DataTables parameters for all endpoints."""
146 | params = [
147 | ("draw", str(draw)),
148 | ("start", str(start)),
149 | ("length", str(length)),
150 | ("search[value]", ""),
151 | ("search[regex]", "false")
152 | ]
153 | return params
154 |
155 | def _build_genel_kurul_form_data(self, params: GenelKurulSearchRequest, draw: int = 1) -> List[Tuple[str, str]]:
156 | """Build form data for Genel Kurul search request."""
157 | form_data = self._build_datatables_params(params.start, params.length, draw)
158 |
159 | # Add DataTables column definitions (from actual request)
160 | column_defs = [
161 | ("columns[0][data]", "KARARNO"),
162 | ("columns[0][name]", ""),
163 | ("columns[0][searchable]", "true"),
164 | ("columns[0][orderable]", "false"),
165 | ("columns[0][search][value]", ""),
166 | ("columns[0][search][regex]", "false"),
167 |
168 | ("columns[1][data]", "KARARNO"),
169 | ("columns[1][name]", ""),
170 | ("columns[1][searchable]", "true"),
171 | ("columns[1][orderable]", "true"),
172 | ("columns[1][search][value]", ""),
173 | ("columns[1][search][regex]", "false"),
174 |
175 | ("columns[2][data]", "KARARTARIH"),
176 | ("columns[2][name]", ""),
177 | ("columns[2][searchable]", "true"),
178 | ("columns[2][orderable]", "true"),
179 | ("columns[2][search][value]", ""),
180 | ("columns[2][search][regex]", "false"),
181 |
182 | ("columns[3][data]", "KARAROZETI"),
183 | ("columns[3][name]", ""),
184 | ("columns[3][searchable]", "true"),
185 | ("columns[3][orderable]", "false"),
186 | ("columns[3][search][value]", ""),
187 | ("columns[3][search][regex]", "false"),
188 |
189 | ("columns[4][data]", ""),
190 | ("columns[4][name]", ""),
191 | ("columns[4][searchable]", "true"),
192 | ("columns[4][orderable]", "false"),
193 | ("columns[4][search][value]", ""),
194 | ("columns[4][search][regex]", "false"),
195 |
196 | ("order[0][column]", "2"),
197 | ("order[0][dir]", "desc")
198 | ]
199 | form_data.extend(column_defs)
200 |
201 | # Add search parameters
202 | form_data.extend([
203 | ("KararlarGenelKurulAra.KARARNO", params.karar_no or ""),
204 | ("__Invariant[]", "KararlarGenelKurulAra.KARARNO"),
205 | ("__Invariant[]", "KararlarGenelKurulAra.KARAREK"),
206 | ("KararlarGenelKurulAra.KARAREK", params.karar_ek or ""),
207 | ("KararlarGenelKurulAra.KARARTARIHBaslangic", params.karar_tarih_baslangic or "Başlangıç Tarihi"),
208 | ("KararlarGenelKurulAra.KARARTARIHBitis", params.karar_tarih_bitis or "Bitiş Tarihi"),
209 | ("KararlarGenelKurulAra.KARARTAMAMI", params.karar_tamami or ""),
210 | ("__RequestVerificationToken", self.csrf_tokens.get('genel_kurul', ''))
211 | ])
212 |
213 | return form_data
214 |
215 | def _build_temyiz_kurulu_form_data(self, params: TemyizKuruluSearchRequest, draw: int = 1) -> List[Tuple[str, str]]:
216 | """Build form data for Temyiz Kurulu search request."""
217 | form_data = self._build_datatables_params(params.start, params.length, draw)
218 |
219 | # Add DataTables column definitions (from actual request)
220 | column_defs = [
221 | ("columns[0][data]", "TEMYIZTUTANAKTARIHI"),
222 | ("columns[0][name]", ""),
223 | ("columns[0][searchable]", "true"),
224 | ("columns[0][orderable]", "false"),
225 | ("columns[0][search][value]", ""),
226 | ("columns[0][search][regex]", "false"),
227 |
228 | ("columns[1][data]", "TEMYIZTUTANAKTARIHI"),
229 | ("columns[1][name]", ""),
230 | ("columns[1][searchable]", "true"),
231 | ("columns[1][orderable]", "true"),
232 | ("columns[1][search][value]", ""),
233 | ("columns[1][search][regex]", "false"),
234 |
235 | ("columns[2][data]", "ILAMDAIRESI"),
236 | ("columns[2][name]", ""),
237 | ("columns[2][searchable]", "true"),
238 | ("columns[2][orderable]", "true"),
239 | ("columns[2][search][value]", ""),
240 | ("columns[2][search][regex]", "false"),
241 |
242 | ("columns[3][data]", "TEMYIZKARAR"),
243 | ("columns[3][name]", ""),
244 | ("columns[3][searchable]", "true"),
245 | ("columns[3][orderable]", "false"),
246 | ("columns[3][search][value]", ""),
247 | ("columns[3][search][regex]", "false"),
248 |
249 | ("columns[4][data]", ""),
250 | ("columns[4][name]", ""),
251 | ("columns[4][searchable]", "true"),
252 | ("columns[4][orderable]", "false"),
253 | ("columns[4][search][value]", ""),
254 | ("columns[4][search][regex]", "false"),
255 |
256 | ("order[0][column]", "1"),
257 | ("order[0][dir]", "desc")
258 | ]
259 | form_data.extend(column_defs)
260 |
261 | # Add search parameters
262 | daire_value = self._enum_to_form_value(params.ilam_dairesi, "daire")
263 | kamu_idaresi_value = self._enum_to_form_value(params.kamu_idaresi_turu, "kamu_idaresi")
264 | web_karar_konusu_value = self._enum_to_form_value(params.web_karar_konusu, "web_karar_konusu")
265 |
266 | form_data.extend([
267 | ("KararlarTemyizAra.ILAMDAIRESI", daire_value),
268 | ("KararlarTemyizAra.YILI", params.yili or ""),
269 | ("KararlarTemyizAra.KARARTRHBaslangic", params.karar_tarih_baslangic or ""),
270 | ("KararlarTemyizAra.KARARTRHBitis", params.karar_tarih_bitis or ""),
271 | ("KararlarTemyizAra.KAMUIDARESITURU", kamu_idaresi_value if kamu_idaresi_value != "Tüm Kurumlar" else ""),
272 | ("KararlarTemyizAra.ILAMNO", params.ilam_no or ""),
273 | ("KararlarTemyizAra.DOSYANO", params.dosya_no or ""),
274 | ("KararlarTemyizAra.TEMYIZTUTANAKNO", params.temyiz_tutanak_no or ""),
275 | ("__Invariant", "KararlarTemyizAra.TEMYIZTUTANAKNO"),
276 | ("KararlarTemyizAra.TEMYIZKARAR", params.temyiz_karar or ""),
277 | ("KararlarTemyizAra.WEBKARARKONUSU", web_karar_konusu_value if web_karar_konusu_value != "Tüm Konular" else ""),
278 | ("__RequestVerificationToken", self.csrf_tokens.get('temyiz_kurulu', ''))
279 | ])
280 |
281 | return form_data
282 |
283 | def _build_daire_form_data(self, params: DaireSearchRequest, draw: int = 1) -> List[Tuple[str, str]]:
284 | """Build form data for Daire search request."""
285 | form_data = self._build_datatables_params(params.start, params.length, draw)
286 |
287 | # Add DataTables column definitions (from actual request)
288 | column_defs = [
289 | ("columns[0][data]", "YARGILAMADAIRESI"),
290 | ("columns[0][name]", ""),
291 | ("columns[0][searchable]", "true"),
292 | ("columns[0][orderable]", "false"),
293 | ("columns[0][search][value]", ""),
294 | ("columns[0][search][regex]", "false"),
295 |
296 | ("columns[1][data]", "KARARTRH"),
297 | ("columns[1][name]", ""),
298 | ("columns[1][searchable]", "true"),
299 | ("columns[1][orderable]", "true"),
300 | ("columns[1][search][value]", ""),
301 | ("columns[1][search][regex]", "false"),
302 |
303 | ("columns[2][data]", "KARARNO"),
304 | ("columns[2][name]", ""),
305 | ("columns[2][searchable]", "true"),
306 | ("columns[2][orderable]", "true"),
307 | ("columns[2][search][value]", ""),
308 | ("columns[2][search][regex]", "false"),
309 |
310 | ("columns[3][data]", "YARGILAMADAIRESI"),
311 | ("columns[3][name]", ""),
312 | ("columns[3][searchable]", "true"),
313 | ("columns[3][orderable]", "true"),
314 | ("columns[3][search][value]", ""),
315 | ("columns[3][search][regex]", "false"),
316 |
317 | ("columns[4][data]", "WEBKARARMETNI"),
318 | ("columns[4][name]", ""),
319 | ("columns[4][searchable]", "true"),
320 | ("columns[4][orderable]", "false"),
321 | ("columns[4][search][value]", ""),
322 | ("columns[4][search][regex]", "false"),
323 |
324 | ("columns[5][data]", ""),
325 | ("columns[5][name]", ""),
326 | ("columns[5][searchable]", "true"),
327 | ("columns[5][orderable]", "false"),
328 | ("columns[5][search][value]", ""),
329 | ("columns[5][search][regex]", "false"),
330 |
331 | ("order[0][column]", "2"),
332 | ("order[0][dir]", "desc")
333 | ]
334 | form_data.extend(column_defs)
335 |
336 | # Add search parameters
337 | daire_value = self._enum_to_form_value(params.yargilama_dairesi, "daire")
338 | kamu_idaresi_value = self._enum_to_form_value(params.kamu_idaresi_turu, "kamu_idaresi")
339 | web_karar_konusu_value = self._enum_to_form_value(params.web_karar_konusu, "web_karar_konusu")
340 |
341 | form_data.extend([
342 | ("KararlarDaireAra.YARGILAMADAIRESI", daire_value),
343 | ("KararlarDaireAra.KARARTRHBaslangic", params.karar_tarih_baslangic or ""),
344 | ("KararlarDaireAra.KARARTRHBitis", params.karar_tarih_bitis or ""),
345 | ("KararlarDaireAra.ILAMNO", params.ilam_no or ""),
346 | ("KararlarDaireAra.KAMUIDARESITURU", kamu_idaresi_value if kamu_idaresi_value != "Tüm Kurumlar" else ""),
347 | ("KararlarDaireAra.HESAPYILI", params.hesap_yili or ""),
348 | ("KararlarDaireAra.WEBKARARKONUSU", web_karar_konusu_value if web_karar_konusu_value != "Tüm Konular" else ""),
349 | ("KararlarDaireAra.WEBKARARMETNI", params.web_karar_metni or ""),
350 | ("__RequestVerificationToken", self.csrf_tokens.get('daire', ''))
351 | ])
352 |
353 | return form_data
354 |
355 | async def search_genel_kurul_decisions(self, params: GenelKurulSearchRequest) -> GenelKurulSearchResponse:
356 | """
357 | Search Sayıştay Genel Kurul (General Assembly) decisions.
358 |
359 | Args:
360 | params: Search parameters for Genel Kurul decisions
361 |
362 | Returns:
363 | GenelKurulSearchResponse with matching decisions
364 | """
365 | # Initialize session if needed
366 | if 'genel_kurul' not in self.csrf_tokens:
367 | if not await self._initialize_session_for_endpoint('genel_kurul'):
368 | raise Exception("Failed to initialize session for Genel Kurul endpoint")
369 |
370 | form_data = self._build_genel_kurul_form_data(params)
371 | encoded_data = urlencode(form_data, encoding='utf-8')
372 |
373 | logger.info(f"Searching Genel Kurul decisions with parameters: {params.model_dump(exclude_none=True)}")
374 |
375 | try:
376 | # Update headers with cookies
377 | headers = self.http_client.headers.copy()
378 | if self.session_cookies:
379 | cookie_header = "; ".join([f"{k}={v}" for k, v in self.session_cookies.items()])
380 | headers["Cookie"] = cookie_header
381 |
382 | response = await self.http_client.post(
383 | self.GENEL_KURUL_ENDPOINT,
384 | data=encoded_data,
385 | headers=headers
386 | )
387 | response.raise_for_status()
388 | response_json = response.json()
389 |
390 | # Parse response
391 | decisions = []
392 | for item in response_json.get('data', []):
393 | decisions.append(GenelKurulDecision(
394 | id=item['Id'],
395 | karar_no=item['KARARNO'],
396 | karar_tarih=item['KARARTARIH'],
397 | karar_ozeti=item['KARAROZETI']
398 | ))
399 |
400 | return GenelKurulSearchResponse(
401 | decisions=decisions,
402 | total_records=response_json.get('recordsTotal', 0),
403 | total_filtered=response_json.get('recordsFiltered', 0),
404 | draw=response_json.get('draw', 1)
405 | )
406 |
407 | except httpx.RequestError as e:
408 | logger.error(f"HTTP error during Genel Kurul search: {e}")
409 | raise
410 | except Exception as e:
411 | logger.error(f"Error processing Genel Kurul search: {e}")
412 | raise
413 |
414 | async def search_temyiz_kurulu_decisions(self, params: TemyizKuruluSearchRequest) -> TemyizKuruluSearchResponse:
415 | """
416 | Search Sayıştay Temyiz Kurulu (Appeals Board) decisions.
417 |
418 | Args:
419 | params: Search parameters for Temyiz Kurulu decisions
420 |
421 | Returns:
422 | TemyizKuruluSearchResponse with matching decisions
423 | """
424 | # Initialize session if needed
425 | if 'temyiz_kurulu' not in self.csrf_tokens:
426 | if not await self._initialize_session_for_endpoint('temyiz_kurulu'):
427 | raise Exception("Failed to initialize session for Temyiz Kurulu endpoint")
428 |
429 | form_data = self._build_temyiz_kurulu_form_data(params)
430 | encoded_data = urlencode(form_data, encoding='utf-8')
431 |
432 | logger.info(f"Searching Temyiz Kurulu decisions with parameters: {params.model_dump(exclude_none=True)}")
433 |
434 | try:
435 | # Update headers with cookies
436 | headers = self.http_client.headers.copy()
437 | if self.session_cookies:
438 | cookie_header = "; ".join([f"{k}={v}" for k, v in self.session_cookies.items()])
439 | headers["Cookie"] = cookie_header
440 |
441 | response = await self.http_client.post(
442 | self.TEMYIZ_KURULU_ENDPOINT,
443 | data=encoded_data,
444 | headers=headers
445 | )
446 | response.raise_for_status()
447 | response_json = response.json()
448 |
449 | # Parse response
450 | decisions = []
451 | for item in response_json.get('data', []):
452 | decisions.append(TemyizKuruluDecision(
453 | id=item['Id'],
454 | temyiz_tutanak_tarihi=item['TEMYIZTUTANAKTARIHI'],
455 | ilam_dairesi=item['ILAMDAIRESI'],
456 | temyiz_karar=item['TEMYIZKARAR']
457 | ))
458 |
459 | return TemyizKuruluSearchResponse(
460 | decisions=decisions,
461 | total_records=response_json.get('recordsTotal', 0),
462 | total_filtered=response_json.get('recordsFiltered', 0),
463 | draw=response_json.get('draw', 1)
464 | )
465 |
466 | except httpx.RequestError as e:
467 | logger.error(f"HTTP error during Temyiz Kurulu search: {e}")
468 | raise
469 | except Exception as e:
470 | logger.error(f"Error processing Temyiz Kurulu search: {e}")
471 | raise
472 |
473 | async def search_daire_decisions(self, params: DaireSearchRequest) -> DaireSearchResponse:
474 | """
475 | Search Sayıştay Daire (Chamber) decisions.
476 |
477 | Args:
478 | params: Search parameters for Daire decisions
479 |
480 | Returns:
481 | DaireSearchResponse with matching decisions
482 | """
483 | # Initialize session if needed
484 | if 'daire' not in self.csrf_tokens:
485 | if not await self._initialize_session_for_endpoint('daire'):
486 | raise Exception("Failed to initialize session for Daire endpoint")
487 |
488 | form_data = self._build_daire_form_data(params)
489 | encoded_data = urlencode(form_data, encoding='utf-8')
490 |
491 | logger.info(f"Searching Daire decisions with parameters: {params.model_dump(exclude_none=True)}")
492 |
493 | try:
494 | # Update headers with cookies
495 | headers = self.http_client.headers.copy()
496 | if self.session_cookies:
497 | cookie_header = "; ".join([f"{k}={v}" for k, v in self.session_cookies.items()])
498 | headers["Cookie"] = cookie_header
499 |
500 | response = await self.http_client.post(
501 | self.DAIRE_ENDPOINT,
502 | data=encoded_data,
503 | headers=headers
504 | )
505 | response.raise_for_status()
506 | response_json = response.json()
507 |
508 | # Parse response
509 | decisions = []
510 | for item in response_json.get('data', []):
511 | decisions.append(DaireDecision(
512 | id=item['Id'],
513 | yargilama_dairesi=item['YARGILAMADAIRESI'],
514 | karar_tarih=item['KARARTRH'],
515 | karar_no=item['KARARNO'],
516 | ilam_no=item.get('ILAMNO'), # Use get() to handle None values
517 | madde_no=item['MADDENO'],
518 | kamu_idaresi_turu=item['KAMUIDARESITURU'],
519 | hesap_yili=item['HESAPYILI'],
520 | web_karar_konusu=item['WEBKARARKONUSU'],
521 | web_karar_metni=item['WEBKARARMETNI']
522 | ))
523 |
524 | return DaireSearchResponse(
525 | decisions=decisions,
526 | total_records=response_json.get('recordsTotal', 0),
527 | total_filtered=response_json.get('recordsFiltered', 0),
528 | draw=response_json.get('draw', 1)
529 | )
530 |
531 | except httpx.RequestError as e:
532 | logger.error(f"HTTP error during Daire search: {e}")
533 | raise
534 | except Exception as e:
535 | logger.error(f"Error processing Daire search: {e}")
536 | raise
537 |
538 | def _convert_html_to_markdown(self, html_content: str) -> Optional[str]:
539 | """Convert HTML content to Markdown using MarkItDown with BytesIO to avoid filename length issues."""
540 | if not html_content:
541 | return None
542 |
543 | try:
544 | # Convert HTML string to bytes and create BytesIO stream
545 | html_bytes = html_content.encode('utf-8')
546 | html_stream = io.BytesIO(html_bytes)
547 |
548 | # Pass BytesIO stream to MarkItDown to avoid temp file creation
549 | md_converter = MarkItDown()
550 | result = md_converter.convert(html_stream)
551 | markdown_content = result.text_content
552 |
553 | logger.info("Successfully converted HTML to Markdown")
554 | return markdown_content
555 |
556 | except Exception as e:
557 | logger.error(f"Error converting HTML to Markdown: {e}")
558 | return f"Error converting HTML content: {str(e)}"
559 |
560 | async def get_document_as_markdown(self, decision_id: str, decision_type: str) -> SayistayDocumentMarkdown:
561 | """
562 | Retrieve full text of a Sayıştay decision and convert to Markdown.
563 |
564 | Args:
565 | decision_id: Unique decision identifier
566 | decision_type: Type of decision ('genel_kurul', 'temyiz_kurulu', 'daire')
567 |
568 | Returns:
569 | SayistayDocumentMarkdown with converted content
570 | """
571 | logger.info(f"Retrieving document for {decision_type} decision ID: {decision_id}")
572 |
573 | # Validate decision_id
574 | if not decision_id or not decision_id.strip():
575 | return SayistayDocumentMarkdown(
576 | decision_id=decision_id,
577 | decision_type=decision_type,
578 | source_url="",
579 | markdown_content=None,
580 | error_message="Decision ID cannot be empty"
581 | )
582 |
583 | # Map decision type to URL path
584 | url_path_mapping = {
585 | 'genel_kurul': 'KararlarGenelKurul',
586 | 'temyiz_kurulu': 'KararlarTemyiz',
587 | 'daire': 'KararlarDaire'
588 | }
589 |
590 | if decision_type not in url_path_mapping:
591 | return SayistayDocumentMarkdown(
592 | decision_id=decision_id,
593 | decision_type=decision_type,
594 | source_url="",
595 | markdown_content=None,
596 | error_message=f"Invalid decision type: {decision_type}. Must be one of: {list(url_path_mapping.keys())}"
597 | )
598 |
599 | # Build document URL
600 | url_path = url_path_mapping[decision_type]
601 | document_url = f"{self.BASE_URL}/{url_path}/Detay/{decision_id}/"
602 |
603 | try:
604 | # Make HTTP GET request to document URL
605 | headers = {
606 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
607 | "Accept-Language": "tr-TR,tr;q=0.9,en-US;q=0.8,en;q=0.7",
608 | "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
609 | "Sec-Fetch-Dest": "document",
610 | "Sec-Fetch-Mode": "navigate",
611 | "Sec-Fetch-Site": "same-origin"
612 | }
613 |
614 | # Include session cookies if available
615 | if self.session_cookies:
616 | cookie_header = "; ".join([f"{k}={v}" for k, v in self.session_cookies.items()])
617 | headers["Cookie"] = cookie_header
618 |
619 | response = await self.http_client.get(document_url, headers=headers)
620 | response.raise_for_status()
621 | html_content = response.text
622 |
623 | if not html_content or not html_content.strip():
624 | logger.warning(f"Received empty HTML content from {document_url}")
625 | return SayistayDocumentMarkdown(
626 | decision_id=decision_id,
627 | decision_type=decision_type,
628 | source_url=document_url,
629 | markdown_content=None,
630 | error_message="Document content is empty"
631 | )
632 |
633 | # Convert HTML to Markdown using existing method
634 | markdown_content = self._convert_html_to_markdown(html_content)
635 |
636 | if markdown_content and "Error converting HTML content" not in markdown_content:
637 | logger.info(f"Successfully retrieved and converted document {decision_id} to Markdown")
638 | return SayistayDocumentMarkdown(
639 | decision_id=decision_id,
640 | decision_type=decision_type,
641 | source_url=document_url,
642 | markdown_content=markdown_content,
643 | retrieval_date=None # Could add datetime.now().isoformat() if needed
644 | )
645 | else:
646 | return SayistayDocumentMarkdown(
647 | decision_id=decision_id,
648 | decision_type=decision_type,
649 | source_url=document_url,
650 | markdown_content=None,
651 | error_message=f"Failed to convert HTML to Markdown: {markdown_content}"
652 | )
653 |
654 | except httpx.HTTPStatusError as e:
655 | error_msg = f"HTTP error {e.response.status_code} when fetching document: {e}"
656 | logger.error(f"HTTP error fetching document {decision_id}: {error_msg}")
657 | return SayistayDocumentMarkdown(
658 | decision_id=decision_id,
659 | decision_type=decision_type,
660 | source_url=document_url,
661 | markdown_content=None,
662 | error_message=error_msg
663 | )
664 | except httpx.RequestError as e:
665 | error_msg = f"Network error when fetching document: {e}"
666 | logger.error(f"Network error fetching document {decision_id}: {error_msg}")
667 | return SayistayDocumentMarkdown(
668 | decision_id=decision_id,
669 | decision_type=decision_type,
670 | source_url=document_url,
671 | markdown_content=None,
672 | error_message=error_msg
673 | )
674 | except Exception as e:
675 | error_msg = f"Unexpected error when fetching document: {e}"
676 | logger.error(f"Unexpected error fetching document {decision_id}: {error_msg}")
677 | return SayistayDocumentMarkdown(
678 | decision_id=decision_id,
679 | decision_type=decision_type,
680 | source_url=document_url,
681 | markdown_content=None,
682 | error_message=error_msg
683 | )
684 |
685 | async def close_client_session(self):
686 | """Close HTTP client session."""
687 | if hasattr(self, 'http_client') and self.http_client and not self.http_client.is_closed:
688 | await self.http_client.aclose()
689 | logger.info("SayistayApiClient: HTTP client session closed.")
```