#
tokens: 41622/50000 1/207 files (page 39/45)
lines: on (toggle) GitHub
raw markdown copy reset
This is page 39 of 45. Use http://codebase.md/dicklesworthstone/llm_gateway_mcp_server?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .cursorignore
├── .env.example
├── .envrc
├── .gitignore
├── additional_features.md
├── check_api_keys.py
├── completion_support.py
├── comprehensive_test.py
├── docker-compose.yml
├── Dockerfile
├── empirically_measured_model_speeds.json
├── error_handling.py
├── example_structured_tool.py
├── examples
│   ├── __init__.py
│   ├── advanced_agent_flows_using_unified_memory_system_demo.py
│   ├── advanced_extraction_demo.py
│   ├── advanced_unified_memory_system_demo.py
│   ├── advanced_vector_search_demo.py
│   ├── analytics_reporting_demo.py
│   ├── audio_transcription_demo.py
│   ├── basic_completion_demo.py
│   ├── cache_demo.py
│   ├── claude_integration_demo.py
│   ├── compare_synthesize_demo.py
│   ├── cost_optimization.py
│   ├── data
│   │   ├── sample_event.txt
│   │   ├── Steve_Jobs_Introducing_The_iPhone_compressed.md
│   │   └── Steve_Jobs_Introducing_The_iPhone_compressed.mp3
│   ├── docstring_refiner_demo.py
│   ├── document_conversion_and_processing_demo.py
│   ├── entity_relation_graph_demo.py
│   ├── filesystem_operations_demo.py
│   ├── grok_integration_demo.py
│   ├── local_text_tools_demo.py
│   ├── marqo_fused_search_demo.py
│   ├── measure_model_speeds.py
│   ├── meta_api_demo.py
│   ├── multi_provider_demo.py
│   ├── ollama_integration_demo.py
│   ├── prompt_templates_demo.py
│   ├── python_sandbox_demo.py
│   ├── rag_example.py
│   ├── research_workflow_demo.py
│   ├── sample
│   │   ├── article.txt
│   │   ├── backprop_paper.pdf
│   │   ├── buffett.pdf
│   │   ├── contract_link.txt
│   │   ├── legal_contract.txt
│   │   ├── medical_case.txt
│   │   ├── northwind.db
│   │   ├── research_paper.txt
│   │   ├── sample_data.json
│   │   └── text_classification_samples
│   │       ├── email_classification.txt
│   │       ├── news_samples.txt
│   │       ├── product_reviews.txt
│   │       └── support_tickets.txt
│   ├── sample_docs
│   │   └── downloaded
│   │       └── attention_is_all_you_need.pdf
│   ├── sentiment_analysis_demo.py
│   ├── simple_completion_demo.py
│   ├── single_shot_synthesis_demo.py
│   ├── smart_browser_demo.py
│   ├── sql_database_demo.py
│   ├── sse_client_demo.py
│   ├── test_code_extraction.py
│   ├── test_content_detection.py
│   ├── test_ollama.py
│   ├── text_classification_demo.py
│   ├── text_redline_demo.py
│   ├── tool_composition_examples.py
│   ├── tournament_code_demo.py
│   ├── tournament_text_demo.py
│   ├── unified_memory_system_demo.py
│   ├── vector_search_demo.py
│   ├── web_automation_instruction_packs.py
│   └── workflow_delegation_demo.py
├── LICENSE
├── list_models.py
├── marqo_index_config.json.example
├── mcp_protocol_schema_2025-03-25_version.json
├── mcp_python_lib_docs.md
├── mcp_tool_context_estimator.py
├── model_preferences.py
├── pyproject.toml
├── quick_test.py
├── README.md
├── resource_annotations.py
├── run_all_demo_scripts_and_check_for_errors.py
├── storage
│   └── smart_browser_internal
│       ├── locator_cache.db
│       ├── readability.js
│       └── storage_state.enc
├── test_client.py
├── test_connection.py
├── TEST_README.md
├── test_sse_client.py
├── test_stdio_client.py
├── tests
│   ├── __init__.py
│   ├── conftest.py
│   ├── integration
│   │   ├── __init__.py
│   │   └── test_server.py
│   ├── manual
│   │   ├── test_extraction_advanced.py
│   │   └── test_extraction.py
│   └── unit
│       ├── __init__.py
│       ├── test_cache.py
│       ├── test_providers.py
│       └── test_tools.py
├── TODO.md
├── tool_annotations.py
├── tools_list.json
├── ultimate_mcp_banner.webp
├── ultimate_mcp_logo.webp
├── ultimate_mcp_server
│   ├── __init__.py
│   ├── __main__.py
│   ├── cli
│   │   ├── __init__.py
│   │   ├── __main__.py
│   │   ├── commands.py
│   │   ├── helpers.py
│   │   └── typer_cli.py
│   ├── clients
│   │   ├── __init__.py
│   │   ├── completion_client.py
│   │   └── rag_client.py
│   ├── config
│   │   └── examples
│   │       └── filesystem_config.yaml
│   ├── config.py
│   ├── constants.py
│   ├── core
│   │   ├── __init__.py
│   │   ├── evaluation
│   │   │   ├── base.py
│   │   │   └── evaluators.py
│   │   ├── providers
│   │   │   ├── __init__.py
│   │   │   ├── anthropic.py
│   │   │   ├── base.py
│   │   │   ├── deepseek.py
│   │   │   ├── gemini.py
│   │   │   ├── grok.py
│   │   │   ├── ollama.py
│   │   │   ├── openai.py
│   │   │   └── openrouter.py
│   │   ├── server.py
│   │   ├── state_store.py
│   │   ├── tournaments
│   │   │   ├── manager.py
│   │   │   ├── tasks.py
│   │   │   └── utils.py
│   │   └── ums_api
│   │       ├── __init__.py
│   │       ├── ums_database.py
│   │       ├── ums_endpoints.py
│   │       ├── ums_models.py
│   │       └── ums_services.py
│   ├── exceptions.py
│   ├── graceful_shutdown.py
│   ├── services
│   │   ├── __init__.py
│   │   ├── analytics
│   │   │   ├── __init__.py
│   │   │   ├── metrics.py
│   │   │   └── reporting.py
│   │   ├── cache
│   │   │   ├── __init__.py
│   │   │   ├── cache_service.py
│   │   │   ├── persistence.py
│   │   │   ├── strategies.py
│   │   │   └── utils.py
│   │   ├── cache.py
│   │   ├── document.py
│   │   ├── knowledge_base
│   │   │   ├── __init__.py
│   │   │   ├── feedback.py
│   │   │   ├── manager.py
│   │   │   ├── rag_engine.py
│   │   │   ├── retriever.py
│   │   │   └── utils.py
│   │   ├── prompts
│   │   │   ├── __init__.py
│   │   │   ├── repository.py
│   │   │   └── templates.py
│   │   ├── prompts.py
│   │   └── vector
│   │       ├── __init__.py
│   │       ├── embeddings.py
│   │       └── vector_service.py
│   ├── tool_token_counter.py
│   ├── tools
│   │   ├── __init__.py
│   │   ├── audio_transcription.py
│   │   ├── base.py
│   │   ├── completion.py
│   │   ├── docstring_refiner.py
│   │   ├── document_conversion_and_processing.py
│   │   ├── enhanced-ums-lookbook.html
│   │   ├── entity_relation_graph.py
│   │   ├── excel_spreadsheet_automation.py
│   │   ├── extraction.py
│   │   ├── filesystem.py
│   │   ├── html_to_markdown.py
│   │   ├── local_text_tools.py
│   │   ├── marqo_fused_search.py
│   │   ├── meta_api_tool.py
│   │   ├── ocr_tools.py
│   │   ├── optimization.py
│   │   ├── provider.py
│   │   ├── pyodide_boot_template.html
│   │   ├── python_sandbox.py
│   │   ├── rag.py
│   │   ├── redline-compiled.css
│   │   ├── sentiment_analysis.py
│   │   ├── single_shot_synthesis.py
│   │   ├── smart_browser.py
│   │   ├── sql_databases.py
│   │   ├── text_classification.py
│   │   ├── text_redline_tools.py
│   │   ├── tournament.py
│   │   ├── ums_explorer.html
│   │   └── unified_memory_system.py
│   ├── utils
│   │   ├── __init__.py
│   │   ├── async_utils.py
│   │   ├── display.py
│   │   ├── logging
│   │   │   ├── __init__.py
│   │   │   ├── console.py
│   │   │   ├── emojis.py
│   │   │   ├── formatter.py
│   │   │   ├── logger.py
│   │   │   ├── panels.py
│   │   │   ├── progress.py
│   │   │   └── themes.py
│   │   ├── parse_yaml.py
│   │   ├── parsing.py
│   │   ├── security.py
│   │   └── text.py
│   └── working_memory_api.py
├── unified_memory_system_technical_analysis.md
└── uv.lock
```

# Files

--------------------------------------------------------------------------------
/ultimate_mcp_server/tools/text_redline_tools.py:
--------------------------------------------------------------------------------

```python
   1 | # -*- coding: utf-8 -*-
   2 | from __future__ import annotations
   3 | 
   4 | import base64
   5 | import datetime as _dt
   6 | import difflib
   7 | import hashlib
   8 | import html as html_stdlib
   9 | import itertools
  10 | import json
  11 | import re
  12 | import subprocess
  13 | import tempfile
  14 | import textwrap
  15 | import time
  16 | from dataclasses import dataclass
  17 | from copy import deepcopy
  18 | from datetime import timezone
  19 | from pathlib import Path
  20 | from typing import Any, Dict, List, Optional, Set, Tuple, Union
  21 | 
  22 | import markdown
  23 | from bs4 import BeautifulSoup
  24 | from bs4.element import NavigableString, Tag
  25 | from lxml import etree
  26 | from lxml import html as lxml_html
  27 | from lxml.etree import _Element, _ElementTree
  28 | from xmldiff import formatting, main
  29 | from xmldiff.actions import (
  30 |     DeleteAttrib,
  31 |     DeleteNode,
  32 |     InsertAttrib,
  33 |     InsertNode,
  34 |     MoveNode,
  35 |     RenameAttrib,
  36 |     UpdateAttrib,
  37 |     UpdateTextIn,
  38 | )
  39 | 
  40 | try:
  41 |     from xmldiff.actions import InsertComment
  42 | except ImportError:
  43 |     InsertComment = None
  44 | try:
  45 |     from xmldiff.actions import RenameNode
  46 | except ImportError:
  47 |     RenameNode = None
  48 | try:
  49 |     from xmldiff.actions import UpdateTextAfter
  50 | except ImportError:
  51 |     UpdateTextAfter = None
  52 | try:
  53 |     from xmldiff.actions import UpdateTextBefore
  54 | except ImportError:
  55 |     UpdateTextBefore = None
  56 | try:
  57 |     from xmldiff.actions import (
  58 |         DeleteTextAfter,
  59 |         DeleteTextBefore,
  60 |         DeleteTextIn,
  61 |         InsertTextAfter,
  62 |         InsertTextBefore,
  63 |         InsertTextIn,
  64 |     )
  65 | 
  66 |     _fine_grained_text_actions = True
  67 | except ImportError:
  68 |     InsertTextIn = InsertTextBefore = InsertTextAfter = None
  69 |     DeleteTextIn = DeleteTextBefore = DeleteTextAfter = None
  70 |     _fine_grained_text_actions = False
  71 |     _INSERT_TEXT_CLS = tuple()
  72 |     _DELETE_TEXT_CLS = tuple()
  73 |     _UPDATE_TEXT_CLS = tuple(c for c in (UpdateTextIn, UpdateTextBefore, UpdateTextAfter) if c)
  74 | 
  75 | if _fine_grained_text_actions:
  76 |     _INSERT_TEXT_CLS = tuple(c for c in (InsertTextIn, InsertTextBefore, InsertTextAfter) if c)
  77 |     _DELETE_TEXT_CLS = tuple(c for c in (DeleteTextIn, DeleteTextBefore, DeleteTextAfter) if c)
  78 |     _UPDATE_TEXT_CLS = tuple(c for c in (UpdateTextIn, UpdateTextBefore, UpdateTextAfter) if c)
  79 | else:
  80 |     _INSERT_TEXT_CLS = _INSERT_TEXT_CLS if "_INSERT_TEXT_CLS" in locals() else tuple()
  81 |     _DELETE_TEXT_CLS = _DELETE_TEXT_CLS if "_DELETE_TEXT_CLS" in locals() else tuple()
  82 |     _UPDATE_TEXT_CLS = (
  83 |         _UPDATE_TEXT_CLS
  84 |         if "_UPDATE_TEXT_CLS" in locals()
  85 |         else tuple(c for c in (UpdateTextIn, UpdateTextBefore, UpdateTextAfter) if c)
  86 |     )
  87 | 
  88 | _ATTR_UPDATE_CLS = (UpdateAttrib,)
  89 | _ATTR_INSERT_CLS = (InsertAttrib,)
  90 | _ATTR_DELETE_CLS = (DeleteAttrib,)
  91 | _ATTR_RENAME_CLS = (RenameAttrib,)
  92 | # ------------------------------------------------------------------
  93 | 
  94 | from ultimate_mcp_server.exceptions import ToolError, ToolInputError  # noqa: E402
  95 | from ultimate_mcp_server.tools.base import with_error_handling, with_tool_metrics  # noqa: E402
  96 | from ultimate_mcp_server.utils import get_logger  # noqa: E402
  97 | 
  98 | # --- Document Conversion Import ---
  99 | try:
 100 |     from ultimate_mcp_server.tools.document_conversion_and_processing import convert_document
 101 | 
 102 |     _DOC_CONVERSION_AVAILABLE = True
 103 | except ImportError:
 104 |     convert_document = None
 105 |     _DOC_CONVERSION_AVAILABLE = False
 106 |     print("Document conversion tool not available. Non-HTML input comparison will fail.")
 107 | # ---------------------------------
 108 | 
 109 | logger = get_logger("ultimate_mcp_server.tools.redline")
 110 | 
 111 | # --- Add logger.exception if missing ---
 112 | if not hasattr(logger, "exception"):
 113 | 
 114 |     def _logger_exception(msg, *args, **kw):
 115 |         logger.error(msg, *args, exc_info=True, **kw)
 116 | 
 117 |     logger.exception = _logger_exception  # type: ignore[attr-defined]
 118 | # -----------------------------------------
 119 | 
 120 | # Namespace and configuration constants
 121 | _DIFF_NS = "http://namespaces.shoobx.com/diff"
 122 | _DIFF_PREFIX = "diff"
 123 | 
 124 | # --- Synthetic ID Generation ---
 125 | _id_counter = itertools.count(1)
 126 | 
 127 | def _normalize_text(text: Optional[str]) -> str:
 128 |     """Collapses whitespace and strips leading/trailing space."""
 129 |     if text is None:
 130 |         return ""
 131 |     # Replace various whitespace chars with a single space, then strip
 132 |     normalized = re.sub(r"\s+", " ", text).strip()
 133 |     return normalized
 134 | 
 135 | def _get_normalized_full_text(el: _Element) -> str:
 136 |     """Gets normalized text content of an element and its descendants,
 137 |        excluding script and style tags."""
 138 |     if el is None:
 139 |         return ""
 140 |     # Get text from all descendant text nodes, excluding those within script/style
 141 |     # We join with space to handle cases like <p>Text<b>bold</b> more</p>
 142 |     try:
 143 |         texts = el.xpath(".//text()[not(ancestor::script) and not(ancestor::style)]")
 144 |         full_text = " ".join(t.strip() for t in texts if t.strip())
 145 |         # Normalize the combined text
 146 |         return _normalize_text(full_text)
 147 |     except Exception as e:
 148 |         # Fallback for safety, though xpath should be robust
 149 |         logger.warning(f"XPath text extraction failed for <{el.tag}>: {e}. Falling back.")
 150 |         texts = [t for t in el.itertext() if t.strip()] # Less precise about script/style
 151 |         full_text = " ".join(t.strip() for t in texts)
 152 |         return _normalize_text(full_text)
 153 |     
 154 | # Define significant attributes (adjust as needed)
 155 | # These are attributes likely to uniquely identify an element or its purpose
 156 | # Avoid volatile attributes like style, or overly common ones like class (unless very specific)
 157 | _SIGNIFICANT_ATTRIBUTES = {"id", "href", "src", "name", "value", "title", "alt", "rel", "type"}
 158 | # Consider adding data-* attributes if they are known to be stable identifiers in your source HTML
 159 | 
 160 | # --- Fuzzy move detection configuration ---
 161 | _MOVE_MIN_TOKENS: int = 5         # Ignore tiny fragments; raise to 8 for stricter pairing
 162 | _MOVE_MIN_CHARS: int = 24         # Shorter text tends to be noise
 163 | _MOVE_SIM_THRESHOLD: float = 0.82 # 0..1; lower to catch more, higher to be stricter
 164 | _MOVE_MAX_CANDIDATES: int = 2000  # Safety bound for O(N*M) matching
 165 | # Tags considered "blocky enough" to attempt fuzzy move pairing even if tags differ
 166 | _BLOCK_TAGS: Set[str] = {
 167 |     "p","li","ul","ol","h1","h2","h3","h4","h5","h6","blockquote","pre","code",
 168 |     "section","article","aside","figure","figcaption","table","thead","tbody","tr","td","th","dl","dt","dd"
 169 | }
 170 | # Attributes that help anchor identity/context
 171 | _ANCHOR_ATTRS: Set[str] = {"id","name","href","src","data-id","data-key","data-uid","aria-label","title"}
 172 | 
 173 | def _inject_synthetic_ids(root: _Element, *, attr: str = "data-diff-id") -> None:
 174 |     """Inject synthetic IDs into elements based on tag, normalized full text,
 175 |        and significant attributes."""
 176 |     global _id_counter
 177 |     if root is None:
 178 |         return
 179 | 
 180 |     processed_elements = 0
 181 |     elements_with_ids = 0
 182 | 
 183 |     # Iterate through all elements in the tree
 184 |     for el in root.iter():
 185 |         if not isinstance(el, _Element):
 186 |             continue
 187 | 
 188 |         processed_elements += 1
 189 | 
 190 |         # Skip if ID already exists (e.g., from previous run or source)
 191 |         if el.get(attr):
 192 |             elements_with_ids +=1
 193 |             continue
 194 | 
 195 |         # 1. Get Tag
 196 |         tag = el.tag
 197 | 
 198 |         # 2. Get Normalized Full Text Content
 199 |         norm_text = _get_normalized_full_text(el)
 200 | 
 201 |         # 3. Get Normalized Significant Attributes
 202 |         sig_attrs = {}
 203 |         for k, v in el.attrib.items():
 204 |             # Check if attribute is considered significant OR if it's a data-* attribute
 205 |             # (often used for stable identifiers)
 206 |             # Exclude the synthetic ID attribute itself if looping
 207 |             if (k in _SIGNIFICANT_ATTRIBUTES or k.startswith("data-")) and k != attr:
 208 |                  # Normalize attribute value's whitespace
 209 |                  sig_attrs[k] = _normalize_text(v)
 210 | 
 211 |         # Sort significant attributes by key for consistent signature
 212 |         sorted_sig_attrs = tuple(sorted(sig_attrs.items()))
 213 | 
 214 |         # 4. Create Signature Tuple
 215 |         # Using a hash of the potentially long text to keep the signature manageable
 216 |         text_hash = hashlib.blake2b(norm_text.encode('utf-8', 'replace'), digest_size=8).hexdigest()
 217 |         sig_tuple = (tag, text_hash, sorted_sig_attrs)
 218 | 
 219 |         # 5. Generate Hash and Synthetic ID
 220 |         try:
 221 |             # Hash the representation of the signature tuple
 222 |             sig_repr = repr(sig_tuple).encode("utf-8", "replace")
 223 |             h = hashlib.blake2b(sig_repr, digest_size=8).hexdigest()
 224 |             # Combine counter and hash for uniqueness
 225 |             synthetic_id = f"synid_{next(_id_counter):06d}_{h}"
 226 |             el.set(attr, synthetic_id)
 227 |             elements_with_ids += 1
 228 |         except Exception as e:
 229 |             logger.warning(
 230 |                 f"Failed to generate/set synthetic ID for element <{el.tag}> "
 231 |                 f"(Text hash: {text_hash}, Attrs: {sorted_sig_attrs}): {e}"
 232 |             )
 233 | 
 234 |     logger.debug(f"ID Injection: Processed {processed_elements} elements, {elements_with_ids} have IDs.")
 235 | 
 236 | # Helper to safely get attributes from actions
 237 | def _safe_get_attr(action: Any, *attr_names: str, default: Any = None) -> Any:
 238 |     if action is None:
 239 |         return default
 240 |     for name in attr_names:
 241 |         if "." in name:
 242 |             parts = name.split(".")
 243 |             obj = action
 244 |             try:
 245 |                 for part in parts:
 246 |                     if obj is None or not hasattr(obj, part):
 247 |                         obj = None
 248 |                         break
 249 |                     obj = getattr(obj, part)
 250 |                 if obj is not None:
 251 |                     return obj
 252 |             except (AttributeError, TypeError):
 253 |                 continue
 254 |         elif hasattr(action, name):
 255 |             val = getattr(action, name)
 256 |             if val is not None:
 257 |                 return val
 258 |     return default
 259 | 
 260 | 
 261 | # ‑‑‑ Redline XML Formatter ‑‑‑
 262 | class RedlineXMLFormatter:
 263 |     """Applies xmldiff actions using standardized diff:* attributes."""
 264 | 
 265 |     def __init__(self, **kwargs):
 266 |         self.detect_moves = kwargs.get("detect_moves", True)
 267 |         self.normalize = kwargs.get("normalize", formatting.WS_BOTH)
 268 |         self._orig_root: Optional[_Element] = None
 269 |         self._mod_root: Optional[_Element] = None
 270 |         self._annotated_copy_root: Optional[_Element] = None
 271 |         self._annotated_copy_tree: Optional[_ElementTree] = None
 272 |         self._actions: List[Any] = []
 273 |         self._node_map_orig_to_copy: Dict[_Element, _Element] = {}
 274 |         self._xpath_cache_orig: Dict[str, List[_Element]] = {}
 275 |         self._xpath_cache_mod: Dict[str, List[_Element]] = {}
 276 |         self.processed_actions: Dict[str, int] = {
 277 |             "insertions": 0,
 278 |             "deletions": 0,
 279 |             "moves": 0,
 280 |             "text_updates": 0,
 281 |             "attr_updates": 0,
 282 |             "renames": 0,
 283 |             "other_changes": 0,
 284 |             "errors": 0,
 285 |             "inline_insertions": 0,
 286 |             "inline_deletions": 0,
 287 |         }
 288 |         self._attr_changes: Dict[_Element, List[Dict[str, str]]] = {}
 289 | 
 290 |     def _reset_state(self):
 291 |         self._orig_root = None
 292 |         self._mod_root = None
 293 |         self._annotated_copy_root = None
 294 |         self._annotated_copy_tree = None
 295 |         self._actions = []
 296 |         self._node_map_orig_to_copy = {}
 297 |         self._xpath_cache_orig.clear()
 298 |         self._xpath_cache_mod.clear()
 299 |         self.processed_actions = {k: 0 for k in self.processed_actions}
 300 |         self._attr_changes.clear()
 301 | 
 302 |     @staticmethod
 303 |     def _add_diff_attribute(elem: _Element, name: str, value: Optional[str] = "true"):
 304 |         """Adds a diff:* attribute."""
 305 |         if elem is None:
 306 |             return
 307 |         if not isinstance(elem, _Element):
 308 |             return
 309 |         qname = f"{{{_DIFF_NS}}}{name}"
 310 |         val_str = str(value) if value is not None else ""
 311 |         try:
 312 |             elem.set(qname, val_str)
 313 |         except ValueError as e:
 314 |             logger.error(f"Failed to set attr '{qname}'='{val_str}' on <{elem.tag}>: {e}")
 315 | 
 316 |     def _add_attribute_change_detail(self, node: _Element, change_info: Dict[str, str]):
 317 |         if node not in self._attr_changes:
 318 |             self._attr_changes[node] = []
 319 |         self._attr_changes[node].append(change_info)
 320 | 
 321 |     def _aggregate_attribute_changes(self):
 322 |         for node, changes in self._attr_changes.items():
 323 |             if node is None or not changes:
 324 |                 continue
 325 |             try:
 326 |                 change_summary = json.dumps(changes)
 327 |                 self._add_diff_attribute(node, "attributes", change_summary)
 328 |             except (TypeError, ValueError) as e:
 329 |                 logger.error(f"Could not serialize attr changes for {node.tag}: {e}")
 330 |                 self._add_diff_attribute(node, "attributes", "[Serialization Error]")
 331 | 
 332 |     def _get_node_from_xpath(self, xpath: str, tree_type: str) -> Optional[_Element]:
 333 |         if not xpath:
 334 |             return None
 335 |         root = (
 336 |             self._orig_root
 337 |             if tree_type == "original"
 338 |             else self._mod_root
 339 |             if tree_type == "modified"
 340 |             else None
 341 |         )
 342 |         cache = (
 343 |             self._xpath_cache_orig
 344 |             if tree_type == "original"
 345 |             else self._xpath_cache_mod
 346 |             if tree_type == "modified"
 347 |             else None
 348 |         )
 349 |         if root is None or cache is None:
 350 |             return None
 351 |         if xpath in cache:
 352 |             nodes = cache[xpath]
 353 |             return nodes[0] if nodes else None
 354 |         try:
 355 |             adjusted_xpath = xpath[2:] if xpath.startswith("/0/") else xpath
 356 |             nodes = root.xpath(adjusted_xpath)
 357 |             element_nodes = [n for n in nodes if isinstance(n, _Element)]
 358 |             cache[xpath] = element_nodes
 359 |             return element_nodes[0] if element_nodes else None
 360 |         except Exception:
 361 |             cache[xpath] = []
 362 |             return None
 363 | 
 364 |     def _get_corresponding_node_in_copy(self, orig_node: _Element) -> Optional[_Element]:
 365 |         if orig_node is None:
 366 |             return None
 367 |         if orig_node in self._node_map_orig_to_copy:
 368 |             return self._node_map_orig_to_copy[orig_node]
 369 |         if self._orig_root is not None and self._annotated_copy_root is not None:
 370 |             try:
 371 |                 orig_xpath = self._orig_root.getroottree().getpath(orig_node)
 372 |                 if orig_xpath:
 373 |                     copy_nodes = self._annotated_copy_root.xpath(orig_xpath)
 374 |                     if copy_nodes and isinstance(copy_nodes[0], _Element):
 375 |                         self._node_map_orig_to_copy[orig_node] = copy_nodes[0]
 376 |                         return copy_nodes[0]
 377 |             except Exception:
 378 |                 pass
 379 |         return None
 380 | 
 381 |     def _build_initial_node_map(self):
 382 |         if self._orig_root is None or self._annotated_copy_root is None:
 383 |             return
 384 |         self._node_map_orig_to_copy.clear()
 385 |         orig_iter = self._orig_root.iter()
 386 |         copy_iter = self._annotated_copy_root.iter()
 387 |         try:
 388 |             while True:
 389 |                 orig_node = next(orig_iter)
 390 |                 copy_node = next(copy_iter)
 391 |                 if isinstance(orig_node, _Element) and isinstance(copy_node, _Element):
 392 |                     if (
 393 |                         hasattr(orig_node, "tag")
 394 |                         and hasattr(copy_node, "tag")
 395 |                         and orig_node.tag == copy_node.tag
 396 |                     ):
 397 |                         self._node_map_orig_to_copy[orig_node] = copy_node
 398 |         except StopIteration:
 399 |             pass
 400 |         except Exception as e:
 401 |             logger.error(f"Error during initial node mapping: {e}")
 402 |         logger.debug(f"Built initial node map with {len(self._node_map_orig_to_copy)} entries.")
 403 | 
 404 |     def _find_node_in_copy_by_xpath(self, xpath: str) -> Optional[_Element]:
 405 |         if not xpath or self._annotated_copy_root is None:
 406 |             return None
 407 |         try:
 408 |             adjusted_xpath = xpath[2:] if xpath.startswith("/0/") else xpath
 409 |             nodes = self._annotated_copy_root.xpath(adjusted_xpath)
 410 |             if nodes and isinstance(nodes[0], _Element):
 411 |                 return nodes[0]
 412 |             elif nodes:
 413 |                 try:
 414 |                     parent = nodes[0].getparent()
 415 |                     if isinstance(parent, _Element):
 416 |                         return parent
 417 |                 except AttributeError:
 418 |                     pass
 419 |                 return None
 420 |             else:
 421 |                 return None
 422 |         except Exception:
 423 |             return None
 424 | 
 425 |     # --- Action Handlers ---
 426 | 
 427 |     def _handle_delete_node(self, action: DeleteNode):
 428 |         node_xpath = _safe_get_attr(action, "node", "node_xpath", "target")
 429 |         if not node_xpath:
 430 |             logger.error(f"DeleteNode missing XPath: {action}")
 431 |             self.processed_actions["errors"] += 1
 432 |             return
 433 |         orig_node = self._get_node_from_xpath(node_xpath, "original")
 434 |         if orig_node is None:
 435 |             logger.warning(f"DeleteNode: Original node {node_xpath} not found.")
 436 |             return
 437 |         copy_node = self._get_corresponding_node_in_copy(orig_node)
 438 |         if copy_node is None:
 439 |             logger.warning(f"DeleteNode: Copy node for {node_xpath} not found.")
 440 |             return
 441 | 
 442 |         move_id = _safe_get_attr(action, "move_id")
 443 |         if not move_id:
 444 |             move_node = next(
 445 |                 (
 446 |                     a
 447 |                     for a in self._actions
 448 |                     if isinstance(a, MoveNode) and _safe_get_attr(a, "node", "source") == node_xpath
 449 |                 ),
 450 |                 None,
 451 |             )
 452 |             move_id = _safe_get_attr(move_node, "move_id")
 453 | 
 454 |         if move_id:
 455 |             self._add_diff_attribute(copy_node, "op", "move-source")
 456 |             self._add_diff_attribute(copy_node, "move-id", move_id)
 457 |         else:
 458 |             self._add_diff_attribute(copy_node, "op", "delete")
 459 |             self.processed_actions["deletions"] += 1
 460 | 
 461 |     def _handle_insert_node(self, action: InsertNode):
 462 |         parent_xpath = _safe_get_attr(action, "parent_xpath", "target")
 463 |         node_structure = _safe_get_attr(action, "node")
 464 |         tag = _safe_get_attr(action, "tag")
 465 |         pos = _safe_get_attr(action, "pos", "position")
 466 |         sibling_xpath = _safe_get_attr(action, "sibling_xpath")
 467 |         if not parent_xpath:
 468 |             logger.error(f"InsertNode missing parent: {action}")
 469 |             self.processed_actions["errors"] += 1
 470 |             return
 471 | 
 472 |         node_to_insert = None
 473 |         if node_structure is not None and isinstance(node_structure, _Element):
 474 |             try:
 475 |                 # Attempt to clone from the action object first
 476 |                 node_to_insert = deepcopy(node_structure) # Use deepcopy
 477 |                 # node_to_insert = etree.fromstring(etree.tostring(node_structure))
 478 |             except Exception as e:
 479 |                 logger.error(f"InsertNode clone failed: {e}")
 480 | 
 481 |         # # ➜ REMOVE THIS BLOCK ------------------------------------------
 482 |         # if node_to_insert is None:
 483 |         #     # ➜ NEW: find the inserted node in the MODIFIED tree ----------------
 484 |         #     if parent_xpath:
 485 |         #         mod_parent = self._get_node_from_xpath(parent_xpath, "modified")
 486 |         #         if mod_parent is not None:
 487 |         #             try:
 488 |         #                 idx = int(pos) if str(pos).isdigit() else len(mod_parent) - 1
 489 |         #                 # Make sure to clone the node from the modified tree
 490 |         #                 potential_node = mod_parent[idx]
 491 |         #                 if potential_node is not None:
 492 |         #                      node_to_insert = deepcopy(potential_node)
 493 |         #                      logger.debug(f"InsertNode: Fetched node <{node_to_insert.tag}> from modified tree.")
 494 |         #                 else:
 495 |         #                      node_to_insert = None
 496 |         #             except Exception as e:
 497 |         #                 logger.warning(f"InsertNode: Failed to fetch node from modified tree at {parent_xpath}[{pos}]: {e}")
 498 |         #                 node_to_insert = None
 499 |         # # ---------------------------------------------------------------
 500 | 
 501 |         # If cloning/fetching failed, create a placeholder
 502 |         if node_to_insert is None:
 503 |             if tag:
 504 |                 attrs = _safe_get_attr(action, "attrib", "attributes", default={}) or {}
 505 |                 node_to_insert = etree.Element(tag, attrs)
 506 |                 # Make placeholder text more distinct
 507 |                 node_to_insert.text = f"[Placeholder: Inserted <{tag}> content missing]"
 508 |                 logger.warning(f"InsertNode created placeholder <{tag}> because node structure was missing in action and couldn't be fetched.")
 509 |             else:
 510 |                 logger.error("InsertNode failed: No structure/tag provided in action.")
 511 |                 self.processed_actions["errors"] += 1
 512 |                 return
 513 | 
 514 |         move_id = _safe_get_attr(action, "move_id")
 515 |         is_move_target = bool(move_id)
 516 |         if not move_id:
 517 |             move_node = next(
 518 |                 (
 519 |                     a
 520 |                     for a in self._actions
 521 |                     if isinstance(a, MoveNode)
 522 |                     and _safe_get_attr(a, "target") == parent_xpath
 523 |                     and str(_safe_get_attr(a, "pos", "position")) == str(pos)
 524 |                 ),
 525 |                 None,
 526 |             )
 527 |             if move_node:
 528 |                 move_id = _safe_get_attr(move_node, "move_id")
 529 |                 is_move_target = bool(move_id)
 530 |                 if not move_id:
 531 |                     logger.warning(f"Insert seems move target but MoveNode lacks ID: {action}")
 532 | 
 533 |         if is_move_target and move_id:
 534 |             if not any(isinstance(a, MoveNode) for a in self._actions):
 535 |                 self.processed_actions["moves"] += 1
 536 |             pass
 537 |         else:
 538 |             self._add_diff_attribute(node_to_insert, "op", "insert")
 539 |             if is_move_target:
 540 |                 self.processed_actions["errors"] += 1
 541 |                 logger.warning(
 542 |                     f"Marking node <{node_to_insert.tag}> as insert (was move target w/o ID)."
 543 |                 )
 544 |             self.processed_actions["insertions"] += 1
 545 | 
 546 |         if not (is_move_target and any(isinstance(a, MoveNode) for a in self._actions)):
 547 |             target_node_in_copy = self._find_node_in_copy_by_xpath(
 548 |                 parent_xpath if pos == "into" or isinstance(pos, int) else sibling_xpath
 549 |             )
 550 |             if target_node_in_copy is None:
 551 |                 logger.error(
 552 |                     f"InsertNode: Target node not found in COPY. XPath: '{parent_xpath if pos == 'into' or isinstance(pos, int) else sibling_xpath}'."
 553 |                 )
 554 |                 self.processed_actions["errors"] += 1
 555 |                 return
 556 |             try:
 557 |                 if pos == "into" or isinstance(pos, int):
 558 |                     parent = target_node_in_copy
 559 |                     idx = (
 560 |                         int(pos)
 561 |                         if isinstance(pos, int) or (isinstance(pos, str) and pos.isdigit())
 562 |                         else len(parent)
 563 |                     )
 564 |                     idx = max(0, min(idx, len(parent)))
 565 |                     parent.insert(idx, node_to_insert)
 566 |                 elif pos == "before":
 567 |                     sibling = target_node_in_copy
 568 |                     parent = sibling.getparent()
 569 |                     parent.insert(parent.index(sibling), node_to_insert)
 570 |                 elif pos == "after":
 571 |                     sibling = target_node_in_copy
 572 |                     parent = sibling.getparent()
 573 |                     parent.insert(parent.index(sibling) + 1, node_to_insert)
 574 |                 else:
 575 |                     raise ValueError(f"Unknown pos '{pos}'")
 576 |             except Exception as e:
 577 |                 logger.exception(f"InsertNode insert error: {e}")
 578 |                 self.processed_actions["errors"] += 1
 579 | 
 580 |     def _handle_move_node(self, action: MoveNode):
 581 |         """Handle move: Ensure source marked, insert clone at target."""
 582 |         src_xpath = _safe_get_attr(action, "node", "source")
 583 |         tgt_xpath = _safe_get_attr(action, "target")
 584 |         pos = _safe_get_attr(action, "pos", "position", default="into") # Keep default 'into' if missing
 585 |         move_id = _safe_get_attr(action, "move_id")
 586 | 
 587 |         if not src_xpath or not tgt_xpath or not move_id:
 588 |             # Use ToolError for critical diff engine issues
 589 |             raise ToolError(
 590 |                 f"xmldiff produced a MoveNode without complete data "
 591 |                 f"(src={src_xpath!r}, tgt={tgt_xpath!r}, id={move_id!r}). "
 592 |                 "This indicates malfunction in the diff stage.",
 593 |                 code="DIFF_ENGINE_ERROR",
 594 |             )
 595 | 
 596 |         orig_src_node = self._get_node_from_xpath(src_xpath, "original")
 597 |         if orig_src_node is None:
 598 |             logger.error(f"MoveNode {move_id}: Original source node {src_xpath} not found.")
 599 |             self.processed_actions["errors"] += 1
 600 |             return
 601 |         copy_src_node = self._get_corresponding_node_in_copy(orig_src_node)
 602 |         if copy_src_node is None:
 603 |             # Log error but attempt to continue if possible - maybe source was deleted then moved? Unlikely but cover edge case.
 604 |             logger.error(f"MoveNode {move_id}: Corresponding copy source node for {src_xpath} not found.")
 605 |             # If the source isn't in the copy, we can't mark it, but we still need to insert the target.
 606 |             # No need to return here, proceed to insert the target.
 607 |             # self.processed_actions["errors"] += 1 # Maybe not an error if source was already removed by another action?
 608 |         else:
 609 |             # Ensure the source node in the copy is marked correctly
 610 |             # It might have been marked by _handle_delete_node already if xmldiff emits Delete then Move
 611 |             # Check if marking is already correct to avoid redundant logging/work
 612 |             if (
 613 |                 copy_src_node.get(f"{{{_DIFF_NS}}}op") != "move-source"
 614 |                 or copy_src_node.get(f"{{{_DIFF_NS}}}move-id") != move_id
 615 |             ):
 616 |                 logger.debug(f"MoveNode {move_id}: Marking source node {src_xpath} in copy.")
 617 |                 self._add_diff_attribute(copy_src_node, "op", "move-source")
 618 |                 self._add_diff_attribute(copy_src_node, "move-id", move_id)
 619 | 
 620 |         # --- Determine the node to clone ---
 621 |         # The goal is to clone the node *as it exists in the modified document*
 622 |         # The `action` tells us where it ended up (tgt_xpath, pos).
 623 |         node_to_clone = None
 624 |         mod_target_parent = self._get_node_from_xpath(tgt_xpath, "modified")
 625 | 
 626 |         if mod_target_parent is not None and isinstance(pos, int) and pos >= 0:
 627 |             try:
 628 |                 # Get the actual node from the modified tree at the target position
 629 |                 node_to_clone = mod_target_parent[pos]
 630 |                 logger.debug(f"MoveNode {move_id}: Found node to clone in MODIFIED tree at {tgt_xpath}[{pos}].")
 631 |             except IndexError:
 632 |                 logger.warning(f"MoveNode {move_id}: Index {pos} out of bounds for target parent {tgt_xpath} in MODIFIED tree. Parent has {len(mod_target_parent)} children.")
 633 |             except Exception as e:
 634 |                 logger.warning(f"MoveNode {move_id}: Error accessing node at {tgt_xpath}[{pos}] in MODIFIED tree: {e}")
 635 |         elif mod_target_parent is not None and pos == "into": # Handle insertion 'into' as append
 636 |              try:
 637 |                 # If pos is 'into', it usually implies appending. The moved node would be the last child.
 638 |                 # However, xmldiff usually gives an integer position for moves.
 639 |                 # Let's try finding based on the source node's ID if possible, as a fallback.
 640 |                 mod_node_with_same_id = mod_target_parent.xpath(f".//*[@data-diff-id='{orig_src_node.get('data-diff-id')}']")
 641 |                 if mod_node_with_same_id:
 642 |                     node_to_clone = mod_node_with_same_id[0]
 643 |                     logger.debug(f"MoveNode {move_id}: Found node to clone in MODIFIED tree based on ID matching source ID within {tgt_xpath}.")
 644 |                 else:
 645 |                     logger.warning(f"MoveNode {move_id}: Position is '{pos}', couldn't find node to clone in MODIFIED target parent {tgt_xpath} by index or ID.")
 646 | 
 647 |              except Exception as e:
 648 |                  logger.warning(f"MoveNode {move_id}: Error finding node in MODIFIED target parent {tgt_xpath} for pos='{pos}': {e}")
 649 | 
 650 | 
 651 |         if node_to_clone is None:
 652 |             # Fallback: Clone the original source node. This might lose internal changes.
 653 |             node_to_clone = orig_src_node
 654 |             logger.warning(
 655 |                 f"MoveNode {move_id}: Could not find moved node in MODIFIED tree at {tgt_xpath}:{pos}. "
 656 |                 f"Falling back to cloning ORIGINAL source node {src_xpath}. Internal changes might be lost."
 657 |             )
 658 | 
 659 |         # --- Clone and prepare the node for insertion ---
 660 |         try:
 661 |             # Use deepcopy which might be more robust for lxml elements than fromstring(tostring)
 662 |             cloned_node_for_insert = deepcopy(node_to_clone)
 663 |             if cloned_node_for_insert is None: raise ValueError("Deepcopy resulted in None") # noqa: E701
 664 |         except Exception as e:
 665 |             logger.error(f"MoveNode {move_id}: Cloning node failed: {e}")
 666 |             self.processed_actions["errors"] += 1
 667 |             return
 668 | 
 669 |         # --- Clean and mark the cloned node ---
 670 |         # Remove any pre-existing diff attributes from the clone and its descendants
 671 |         for el in cloned_node_for_insert.xpath(".//* | ."): # Iterate over self and descendants
 672 |              if isinstance(el, _Element):
 673 |                  for name in list(el.attrib):
 674 |                      if name.startswith(f"{{{_DIFF_NS}}}"):
 675 |                          del el.attrib[name]
 676 |                      # Also remove the synthetic ID from the clone to avoid collisions if diff runs again
 677 |                      if name == "data-diff-id":
 678 |                          del el.attrib[name]
 679 | 
 680 |         # Mark the root of the clone as the move target
 681 |         self._add_diff_attribute(cloned_node_for_insert, "op", "move-target")
 682 |         self._add_diff_attribute(cloned_node_for_insert, "move-id", move_id)
 683 | 
 684 |         # --- Insert the cloned node into the copy tree ---
 685 |         target_node_in_copy = self._find_node_in_copy_by_xpath(tgt_xpath)
 686 |         if target_node_in_copy is None:
 687 |             logger.error(f"MoveNode {move_id}: Target parent node {tgt_xpath} not found in COPY tree for insertion.")
 688 |             self.processed_actions["errors"] += 1
 689 |             # Attempt to insert into the root as a last resort? Or just fail? Let's fail.
 690 |             return
 691 | 
 692 |         try:
 693 |             if isinstance(pos, int) and pos >= 0:
 694 |                 # Insert at the specific index within the target parent found in the copy tree
 695 |                 parent = target_node_in_copy
 696 |                 # Clamp index to valid range for insertion
 697 |                 idx = max(0, min(int(pos), len(parent)))
 698 |                 parent.insert(idx, cloned_node_for_insert)
 699 |                 logger.debug(f"MoveNode {move_id}: Inserted move-target clone into copy tree at {tgt_xpath}[{idx}].")
 700 |                 self.processed_actions["moves"] += 1
 701 |             elif pos == "into": # Handle 'into' - append to the target node
 702 |                 parent = target_node_in_copy
 703 |                 parent.append(cloned_node_for_insert)
 704 |                 logger.debug(f"MoveNode {move_id}: Appended move-target clone into copy tree node {tgt_xpath}.")
 705 |                 self.processed_actions["moves"] += 1
 706 |             else:
 707 |                 # This case (e.g., pos='before'/'after') shouldn't happen with MoveNode from xmldiff typically,
 708 |                 # as it uses parent path + index. Log an error if it does.
 709 |                 logger.error(f"MoveNode {move_id}: Unsupported position '{pos}' for insertion. Expected integer or 'into'.")
 710 |                 self.processed_actions["errors"] += 1
 711 |         except Exception as e:
 712 |             logger.exception(f"MoveNode {move_id}: Insertion of cloned node into copy tree failed: {e}")
 713 |             self.processed_actions["errors"] += 1
 714 | 
 715 |     def _handle_update_text(self, action: Union[UpdateTextIn, UpdateTextBefore, UpdateTextAfter]):
 716 |         xpath = _safe_get_attr(action, "node", "node_xpath")
 717 |         new_text = _safe_get_attr(action, "text", "new", "new_text", default="")
 718 |         if not xpath:
 719 |             logger.error(f"{type(action).__name__} missing XPath: {action}")
 720 |             self.processed_actions["errors"] += 1
 721 |             return
 722 |         if _fine_grained_text_actions and type(action) in (_INSERT_TEXT_CLS + _DELETE_TEXT_CLS):
 723 |             return
 724 | 
 725 |         copy_node = self._find_node_in_copy_by_xpath(xpath)
 726 |         if copy_node is None:
 727 |             logger.warning(f"{type(action).__name__}: Node {xpath} not found in COPY.")
 728 |             return
 729 | 
 730 |         orig_node = self._get_node_from_xpath(xpath, "original")
 731 |         actual_old_text = "[Unknown Old Text]"
 732 |         update_type = "text"
 733 |         if orig_node is not None:
 734 |             if isinstance(action, UpdateTextIn):
 735 |                 actual_old_text = orig_node.text or ""
 736 |                 update_type = "text"
 737 |             elif UpdateTextAfter is not None and isinstance(action, UpdateTextAfter):
 738 |                 actual_old_text = orig_node.tail or ""
 739 |                 update_type = "tail"
 740 |             elif UpdateTextBefore is not None and isinstance(action, UpdateTextBefore):
 741 |                 actual_old_text = _safe_get_attr(action, "old", "old_text", default="[?]")
 742 |                 update_type = "before"
 743 |             else:
 744 |                 actual_old_text = orig_node.text or ""
 745 |         else:
 746 |             actual_old_text = _safe_get_attr(
 747 |                 action, "old", "old_text", default="[Missing Orig Node]"
 748 |             )
 749 | 
 750 |         norm_old = " ".join(str(actual_old_text).split())
 751 |         norm_new = " ".join(str(new_text).split())
 752 |         if norm_old == norm_new:
 753 |             return
 754 | 
 755 |         if update_type == "text":
 756 |             copy_node.text = new_text
 757 |             if len(copy_node) > 0:
 758 |                 for child in list(copy_node):
 759 |                     copy_node.remove(child)
 760 |             self._add_diff_attribute(copy_node, "op", "update-text")
 761 |             self._add_diff_attribute(copy_node, "old-value", actual_old_text)
 762 |             self.processed_actions["text_updates"] += 1
 763 |         elif update_type == "tail":
 764 |             copy_node.tail = new_text
 765 |             self._add_diff_attribute(copy_node, "op", "update-tail")
 766 |             self._add_diff_attribute(copy_node, "old-value", actual_old_text)
 767 |             self.processed_actions["text_updates"] += 1
 768 |         elif update_type == "before":
 769 |             self._add_diff_attribute(copy_node, "op", "update-text-context")
 770 |             self._add_diff_attribute(copy_node, "detail", "before")
 771 |             self._add_diff_attribute(copy_node, "new-value", new_text)
 772 |             self._add_diff_attribute(copy_node, "old-value", actual_old_text)
 773 |             self.processed_actions["text_updates"] += 1
 774 |             logger.warning(f"UpdateTextBefore marked on node {xpath}.")
 775 | 
 776 |     def _handle_attr_change(
 777 |         self, action: Union[UpdateAttrib, InsertAttrib, DeleteAttrib, RenameAttrib]
 778 |     ):
 779 |         xpath = _safe_get_attr(action, "node", "node_xpath")
 780 |         if not xpath:
 781 |             logger.error(f"{type(action).__name__} missing XPath: {action}")
 782 |             self.processed_actions["errors"] += 1
 783 |             return
 784 |         copy_node = self._find_node_in_copy_by_xpath(xpath)
 785 |         if copy_node is None:
 786 |             logger.warning(f"{type(action).__name__}: Node {xpath} not found in COPY.")
 787 |             return
 788 |         orig_node = self._get_node_from_xpath(xpath, "original")
 789 | 
 790 |         change_info = {}
 791 |         processed = False
 792 |         try:
 793 |             if isinstance(action, UpdateAttrib):
 794 |                 name = _safe_get_attr(action, "name")
 795 |                 new_val = _safe_get_attr(action, "value", "new", default="")
 796 |                 if name is None:
 797 |                     logger.error(f"UpdateAttrib missing name: {action}")
 798 |                     return
 799 |                 old_val = orig_node.get(name) if orig_node is not None else "[?]"
 800 |                 if old_val != new_val:
 801 |                     copy_node.set(name, new_val)
 802 |                     change_info = {"op": "update", "name": name, "old": old_val, "new": new_val}
 803 |                     self._add_attribute_change_detail(copy_node, change_info)
 804 |                     self.processed_actions["attr_updates"] += 1
 805 |                     processed = True
 806 |             elif isinstance(action, InsertAttrib):
 807 |                 name = _safe_get_attr(action, "name")
 808 |                 value = _safe_get_attr(action, "value", default="")
 809 |                 if name is None:
 810 |                     logger.error(f"InsertAttrib missing name: {action}")
 811 |                     return
 812 |                 copy_node.set(name, value)
 813 |                 change_info = {"op": "insert", "name": name, "new": value}
 814 |                 self._add_attribute_change_detail(copy_node, change_info)
 815 |                 self.processed_actions["attr_updates"] += 1
 816 |                 processed = True
 817 |             elif isinstance(action, DeleteAttrib):
 818 |                 name = _safe_get_attr(action, "name")
 819 |                 if name is None:
 820 |                     logger.error(f"DeleteAttrib missing name: {action}")
 821 |                     return
 822 |                 old_val = orig_node.get(name) if orig_node is not None else "[?]"
 823 |                 if name in copy_node.attrib:
 824 |                     del copy_node.attrib[name]
 825 |                 change_info = {"op": "delete", "name": name, "old": old_val}
 826 |                 self._add_attribute_change_detail(copy_node, change_info)
 827 |                 self.processed_actions["attr_updates"] += 1
 828 |                 processed = True
 829 |             elif isinstance(action, RenameAttrib):
 830 |                 old_n = _safe_get_attr(action, "old_name")
 831 |                 new_n = _safe_get_attr(action, "new_name")
 832 |                 if not old_n or not new_n:
 833 |                     logger.error(f"RenameAttrib missing names: {action}")
 834 |                     return
 835 |                 value = orig_node.get(old_n) if orig_node is not None else "[?]"
 836 |                 if old_n in copy_node.attrib:
 837 |                     del copy_node.attrib[old_n]
 838 |                 copy_node.set(new_n, value)
 839 |                 change_info = {"op": "rename", "old_name": old_n, "new_name": new_n, "value": value}
 840 |                 self._add_attribute_change_detail(copy_node, change_info)
 841 |                 self.processed_actions["attr_updates"] += 1
 842 |                 self.processed_actions["renames"] += 1
 843 |                 processed = True
 844 |             if processed and copy_node.get(f"{{{_DIFF_NS}}}op") is None:
 845 |                 self._add_diff_attribute(copy_node, "op", "update-attrib")
 846 |         except Exception as e:
 847 |             logger.exception(f"Attr change error for {xpath}: {e}")
 848 |             self.processed_actions["errors"] += 1
 849 | 
 850 |     def _handle_rename_node(self, action: RenameNode):
 851 |         xpath = _safe_get_attr(action, "node", "node_xpath")
 852 |         new_tag = _safe_get_attr(action, "new_tag", "new_name")
 853 |         if not xpath or not new_tag:
 854 |             logger.error(f"RenameNode missing xpath/new_tag: {action}")
 855 |             self.processed_actions["errors"] += 1
 856 |             return
 857 |         copy_node = self._find_node_in_copy_by_xpath(xpath)
 858 |         if copy_node is None:
 859 |             logger.warning(f"RenameNode: Node {xpath} not found in COPY.")
 860 |             return
 861 |         orig_node = self._get_node_from_xpath(xpath, "original")
 862 |         old_tag = orig_node.tag if orig_node is not None else copy_node.tag
 863 |         if old_tag != new_tag:
 864 |             copy_node.tag = new_tag
 865 |             self._add_diff_attribute(copy_node, "op", "rename-node")
 866 |             self._add_diff_attribute(copy_node, "old-value", old_tag)
 867 |             self._add_diff_attribute(copy_node, "new-value", new_tag)
 868 |             self.processed_actions["renames"] += 1
 869 | 
 870 |     def _handle_insert_text_node(
 871 |         self, action: Union[InsertTextIn, InsertTextBefore, InsertTextAfter]
 872 |     ):
 873 |         xpath = _safe_get_attr(action, "node", "node_xpath")
 874 |         text = _safe_get_attr(action, "text", "value", default="")
 875 |         if not xpath:
 876 |             logger.error(f"{type(action).__name__} missing xpath: {action}")
 877 |             self.processed_actions["errors"] += 1
 878 |             return
 879 |         copy_node = self._find_node_in_copy_by_xpath(xpath)
 880 |         if copy_node is None:
 881 |             logger.warning(f"{type(action).__name__}: Ref node {xpath} missing in copy.")
 882 |             return
 883 |         ins_el = etree.Element("ins", attrib={"class": "diff-insert-text"})
 884 |         ins_el.text = text
 885 |         try:
 886 |             if InsertTextBefore is not None and isinstance(action, InsertTextBefore):
 887 |                 parent = copy_node.getparent()
 888 |                 parent.insert(parent.index(copy_node), ins_el)
 889 |                 self.processed_actions["inline_insertions"] += 1
 890 |             elif InsertTextAfter is not None and isinstance(action, InsertTextAfter):
 891 |                 ins_el.tail = copy_node.tail
 892 |                 copy_node.tail = None
 893 |                 copy_node.addnext(ins_el)
 894 |                 self.processed_actions["inline_insertions"] += 1
 895 |             elif InsertTextIn is not None and isinstance(action, InsertTextIn):
 896 |                 pos = _safe_get_attr(action, "pos", default=len(copy_node))
 897 |                 idx = int(pos) if isinstance(pos, int) or str(pos).isdigit() else len(copy_node)
 898 |                 idx = max(0, min(idx, len(copy_node)))
 899 |                 copy_node.insert(idx, ins_el)
 900 |                 self.processed_actions["inline_insertions"] += 1
 901 |             else:
 902 |                 logger.warning(f"Unhandled InsertText: {type(action).__name__}.")
 903 |                 copy_node.append(ins_el)
 904 |                 self.processed_actions["inline_insertions"] += 1
 905 |         except Exception as e:
 906 |             logger.exception(f"InsertText error: {e}")
 907 |             self.processed_actions["errors"] += 1
 908 | 
 909 |     def _handle_delete_text_node(
 910 |         self, action: Union[DeleteTextIn, DeleteTextBefore, DeleteTextAfter]
 911 |     ):
 912 |         xpath = _safe_get_attr(action, "node", "node_xpath")
 913 |         text = _safe_get_attr(action, "text", "value")
 914 |         if not xpath or text is None:
 915 |             logger.error(f"{type(action).__name__} missing xpath/text: {action}")
 916 |             self.processed_actions["errors"] += 1
 917 |             return
 918 |         copy_node = self._find_node_in_copy_by_xpath(xpath)
 919 |         if copy_node is None:
 920 |             logger.warning(f"{type(action).__name__}: Ref node {xpath} missing in copy.")
 921 |             return
 922 |         del_el = etree.Element("del", attrib={"class": "diff-delete-text"})
 923 |         del_el.text = text
 924 |         try:
 925 |             if DeleteTextBefore is not None and isinstance(action, DeleteTextBefore):
 926 |                 parent = copy_node.getparent()
 927 |                 parent.insert(parent.index(copy_node), del_el)
 928 |                 self.processed_actions["inline_deletions"] += 1
 929 |             elif DeleteTextAfter is not None and isinstance(action, DeleteTextAfter):
 930 |                 orig_tail = copy_node.tail
 931 |                 copy_node.tail = None
 932 |                 del_el.tail = orig_tail
 933 |                 copy_node.addnext(del_el)
 934 |                 self.processed_actions["inline_deletions"] += 1
 935 |             elif DeleteTextIn is not None and isinstance(action, DeleteTextIn):
 936 |                 pos = _safe_get_attr(action, "pos", default=0)
 937 |                 idx = int(pos) if isinstance(pos, int) or str(pos).isdigit() else 0
 938 |                 idx = max(0, min(idx, len(copy_node)))
 939 |                 if copy_node.text and text in copy_node.text:
 940 |                     copy_node.text = copy_node.text.replace(text, "", 1)
 941 |                 copy_node.insert(idx, del_el)
 942 |                 self.processed_actions["inline_deletions"] += 1
 943 |             else:
 944 |                 logger.warning(f"Unhandled DeleteText: {type(action).__name__}.")
 945 |                 copy_node.insert(0, del_el)
 946 |                 self.processed_actions["inline_deletions"] += 1
 947 |         except Exception as e:
 948 |             logger.exception(f"DeleteText error: {e}")
 949 |             self.processed_actions["errors"] += 1
 950 | 
 951 |     # --- Main Formatting Method ---
 952 | 
 953 |     def format(
 954 |         self, actions: List[Any], orig_doc: _ElementTree, mod_doc: _ElementTree
 955 |     ) -> _ElementTree:
 956 |         """Applies diff actions to a copy of orig_doc."""
 957 |         self._reset_state()
 958 |         self._actions = actions
 959 |         logger.debug(f"Formatter init with {len(actions)} actions.")
 960 |         self._orig_root = orig_doc.getroot()
 961 |         self._mod_root = mod_doc.getroot()
 962 |         if self._orig_root is None or self._mod_root is None:
 963 |             raise ValueError("Docs missing root.")
 964 |         try:
 965 |             self._annotated_copy_tree = deepcopy(orig_doc)
 966 |             self._annotated_copy_root = self._annotated_copy_tree.getroot()
 967 |             assert self._annotated_copy_root is not None
 968 |         except Exception as e:
 969 |             logger.exception("Deepcopy failed.")
 970 |             raise RuntimeError("Copy failed.") from e
 971 |         self._build_initial_node_map()
 972 | 
 973 |         etree.register_namespace(_DIFF_PREFIX, _DIFF_NS)
 974 |         if (
 975 |             self._annotated_copy_root is not None
 976 |             and _DIFF_PREFIX not in self._annotated_copy_root.nsmap
 977 |         ):
 978 |             new_nsmap = self._annotated_copy_root.nsmap.copy()
 979 |             new_nsmap[_DIFF_PREFIX] = _DIFF_NS
 980 |             new_root = etree.Element(
 981 |                 self._annotated_copy_root.tag,
 982 |                 nsmap=new_nsmap,
 983 |                 attrib=self._annotated_copy_root.attrib,
 984 |             )
 985 |             new_root.text = self._annotated_copy_root.text
 986 |             new_root.tail = self._annotated_copy_root.tail
 987 |             for child in self._annotated_copy_root:
 988 |                 new_root.append(child)
 989 |             self._annotated_copy_tree._setroot(new_root)
 990 |             self._annotated_copy_root = new_root
 991 |             logger.debug(f"Registered '{_DIFF_PREFIX}' ns.")
 992 | 
 993 |         action_handlers = {
 994 |             DeleteNode: self._handle_delete_node,
 995 |             InsertNode: self._handle_insert_node,
 996 |             MoveNode: self._handle_move_node,
 997 |             UpdateTextIn: self._handle_update_text,
 998 |             UpdateAttrib: self._handle_attr_change,
 999 |             InsertAttrib: self._handle_attr_change,
1000 |             DeleteAttrib: self._handle_attr_change,
1001 |             RenameAttrib: self._handle_attr_change,
1002 |         }
1003 |         if RenameNode:
1004 |             action_handlers[RenameNode] = self._handle_rename_node
1005 |         if UpdateTextBefore:
1006 |             action_handlers[UpdateTextBefore] = self._handle_update_text
1007 |         if UpdateTextAfter:
1008 |             action_handlers[UpdateTextAfter] = self._handle_update_text
1009 |         if _fine_grained_text_actions:
1010 |             for cls in _INSERT_TEXT_CLS:
1011 |                 action_handlers[cls] = self._handle_insert_text_node
1012 |             for cls in _DELETE_TEXT_CLS:
1013 |                 action_handlers[cls] = self._handle_delete_text_node
1014 | 
1015 |         logger.info(f"Applying {len(actions)} actions to the document copy...")
1016 |         for i, action in enumerate(actions):
1017 |             atype = type(action)
1018 |             handler = action_handlers.get(atype)
1019 |             if handler:
1020 |                 try:
1021 |                     handler(action)
1022 |                 except Exception:
1023 |                     logger.exception(f"Handler error #{i + 1} ({atype.__name__}): {action}")
1024 |                     self.processed_actions["errors"] += 1
1025 |             elif atype == InsertComment:
1026 |                 pass
1027 |             else:
1028 |                 logger.warning(f"Unhandled action: {atype.__name__}")
1029 |                 self.processed_actions["other_changes"] += 1
1030 | 
1031 |         self._aggregate_attribute_changes()
1032 | 
1033 |         total = sum(
1034 |             v for k, v in self.processed_actions.items() if k not in ["total_changes", "errors"]
1035 |         )
1036 |         self.processed_actions["total_changes"] = total
1037 |         logger.info(f"Action processing complete. Stats: {self.processed_actions}")
1038 |         if self._annotated_copy_tree is None:
1039 |             raise RuntimeError("Formatting failed, tree is None.")
1040 |         return self._annotated_copy_tree
1041 | 
1042 | 
1043 | # ─────────────────────────────────────────────────────────────────────────────
1044 | #                     Markdown summary-generation helpers
1045 | # ─────────────────────────────────────────────────────────────────────────────
1046 | def _node_plain_text(node: Optional[_Element], *, max_len: int = 120) -> str:
1047 |     if node is None:
1048 |         return "[Node is None]"
1049 |     try:
1050 |         texts = [t for t in node.xpath(".//text()[not(parent::script) and not(parent::style)]")]
1051 |         txt = " ".join(t.strip() for t in texts if t.strip())
1052 |         txt = re.sub(r"\s+", " ", txt).strip()
1053 |     except Exception as e:
1054 |         logger.warning(f"Text extract error: {e}")
1055 |         txt = " ".join(node.itertext()).strip()
1056 |         txt = re.sub(r"\s+", " ", txt).strip()
1057 |     return textwrap.shorten(txt, max_len, placeholder="…") if max_len else txt
1058 | 
1059 | 
1060 | def _get_element_by_xpath_from_tree(xpath: str, tree: _ElementTree) -> Optional[_Element]:
1061 |     if not xpath or tree is None:
1062 |         return None
1063 |     root = tree.getroot()
1064 |     if root is None:
1065 |         return None
1066 |     try:
1067 |         nodes = root.xpath(xpath[2:] if xpath.startswith("/0/") else xpath)
1068 |         return next((n for n in nodes if isinstance(n, _Element)), None)
1069 |     except Exception:
1070 |         return None
1071 | 
1072 | 
1073 | def _generate_markdown_summary(
1074 |     *,
1075 |     orig_doc: _ElementTree,
1076 |     mod_doc: _ElementTree,
1077 |     actions: List[Any],
1078 |     context_chars: int = 120,
1079 | ) -> str:
1080 |     ts = _dt.datetime.now(timezone.utc).isoformat(timespec="seconds") + "Z"
1081 |     lines: List[str] = [f"# Detected Redline Differences ({ts})\n"]
1082 | 
1083 |     if orig_doc is None or mod_doc is None:
1084 |         return "# Error: Missing docs."
1085 | 
1086 |     processed_move_ids: set[str] = set()
1087 |     move_actions: Dict[str, Dict[str, Any]] = {}
1088 | 
1089 |     # --------------------------------------------------------------------- #
1090 |     # Pass 1 – collect information about moves
1091 |     # --------------------------------------------------------------------- #
1092 |     for a in actions:
1093 |         if isinstance(a, MoveNode):
1094 |             mid = _safe_get_attr(a, "move_id")
1095 |             src = _safe_get_attr(a, "node", "source")
1096 |             tgt = _safe_get_attr(a, "target")
1097 |             pos = _safe_get_attr(a, "pos", "position")
1098 |             if mid and src and tgt:
1099 |                 move_actions.setdefault(mid, {})["src"] = src
1100 |                 move_actions[mid].update({"tgt": tgt, "pos": pos, "found": True})
1101 |                 processed_move_ids.add(mid)
1102 | 
1103 |         elif isinstance(a, DeleteNode):
1104 |             mid = _safe_get_attr(a, "move_id")
1105 |             src = _safe_get_attr(a, "node", "node_xpath", "target")
1106 |             if mid and src and mid not in processed_move_ids:
1107 |                 move_actions.setdefault(mid, {})["src"] = src
1108 |                 move_actions[mid].update({"tgt": "?", "pos": "?", "found": False})
1109 |                 processed_move_ids.add(mid)
1110 | 
1111 |         elif isinstance(a, InsertNode):
1112 |             mid = _safe_get_attr(a, "move_id")
1113 |             tgt = _safe_get_attr(a, "parent_xpath", "target")
1114 |             pos = _safe_get_attr(a, "pos", "position")
1115 |             if mid and tgt and mid in move_actions and not move_actions[mid]["found"]:
1116 |                 move_actions[mid].update({"tgt": tgt, "pos": pos})
1117 | 
1118 |     # --------------------------------------------------------------------- #
1119 |     # Moves section
1120 |     # --------------------------------------------------------------------- #
1121 |     if move_actions:
1122 |         lines.append("## Moves\n")
1123 |         for mid, info in move_actions.items():
1124 |             src_elem = _get_element_by_xpath_from_tree(info["src"], orig_doc)
1125 |             tgt_elem = _get_element_by_xpath_from_tree(info["tgt"], mod_doc)
1126 |             loc = f"into <{tgt_elem.tag}>" if tgt_elem is not None else f"near {info['tgt']}"
1127 |             content_txt = _node_plain_text(src_elem, max_len=context_chars) if src_elem else "[?]"
1128 | 
1129 |             lines.extend(
1130 |                 [
1131 |                     f"### Move ID: `{mid}`",
1132 |                     f"- **From:** `{info['src']}`",
1133 |                     f"- **To:** `{loc}` (Pos: {info['pos']})",
1134 |                     "- **Content:**",
1135 |                     "  ```text",
1136 |                     f"  {content_txt}",
1137 |                     "  ```\n",
1138 |                 ]
1139 |             )
1140 |         lines.append("---\n")
1141 | 
1142 |     # --------------------------------------------------------------------- #
1143 |     # Headings map
1144 |     # --------------------------------------------------------------------- #
1145 |     hdrs = {
1146 |         InsertNode: "## Insertions\n",
1147 |         DeleteNode: "## Deletions\n",
1148 |         UpdateTextIn: "## Text Updates\n",
1149 |         UpdateAttrib: "## Attr Updates\n",
1150 |         InsertAttrib: "## Attr Updates\n",
1151 |         DeleteAttrib: "## Attr Updates\n",
1152 |         RenameAttrib: "## Attr Updates\n",
1153 |         RenameNode: "## Node Renames\n",
1154 |     }
1155 | 
1156 |     cur_sec = None
1157 | 
1158 |     # --------------------------------------------------------------------- #
1159 |     # Main pass – every non-move action
1160 |     # --------------------------------------------------------------------- #
1161 |     for a in actions:
1162 |         atype = type(a)
1163 |         mid = _safe_get_attr(a, "move_id")
1164 |         if isinstance(a, MoveNode) or (mid and mid in processed_move_ids):
1165 |             continue
1166 | 
1167 |         # heading management
1168 |         if atype in hdrs:
1169 |             if hdrs[atype] != cur_sec:
1170 |                 if cur_sec:
1171 |                     lines.append("---\n")
1172 |                 lines.append(hdrs[atype])
1173 |                 cur_sec = hdrs[atype]
1174 |         else:
1175 |             if cur_sec != "## Other Changes\n":
1176 |                 if cur_sec:
1177 |                     lines.append("---\n")
1178 |                 lines.append("## Other Changes\n")
1179 |                 cur_sec = "## Other Changes\n"
1180 | 
1181 |         try:
1182 |             summary: List[str] = []
1183 | 
1184 |             # ------------------------------------------------------------- #
1185 |             # INSERT NODE
1186 |             # ------------------------------------------------------------- #
1187 |             if isinstance(a, InsertNode):
1188 |                 pxp = _safe_get_attr(a, "parent_xpath", "target")
1189 |                 pos = _safe_get_attr(a, "pos", "position", default="N/A")
1190 |                 node_s = _safe_get_attr(a, "node")
1191 |                 tag = node_s.tag if node_s is not None else _safe_get_attr(a, "tag") or "[?]"
1192 |                 if node_s is not None:
1193 |                     raw = etree.tostring(
1194 |                         node_s, pretty_print=False, encoding="unicode", method="html"
1195 |                     ).strip()
1196 |                     content = textwrap.shorten(raw, context_chars * 2, placeholder="…")
1197 |                 else:
1198 |                     content = "[No structure]"
1199 |                 summary = [
1200 |                     f"### Inserted `<{tag}>`",
1201 |                     f"- **Location:** Into `{pxp}` (Pos: {pos})",
1202 |                     "- **Content:**",
1203 |                     "  ```html",
1204 |                     f"  {content}",
1205 |                     "  ```\n",
1206 |                 ]
1207 | 
1208 |             # ------------------------------------------------------------- #
1209 |             # DELETE NODE
1210 |             # ------------------------------------------------------------- #
1211 |             elif isinstance(a, DeleteNode):
1212 |                 xp = _safe_get_attr(a, "node", "node_xpath", "target")
1213 |                 onode = _get_element_by_xpath_from_tree(xp, orig_doc)
1214 |                 tag = onode.tag if onode else "[?]"
1215 |                 content = _node_plain_text(onode, max_len=context_chars) if onode is not None else "[?]"
1216 |                 summary = [
1217 |                     f"### Deleted `<{tag}>`",
1218 |                     f"- **Location:** `{xp}`",
1219 |                     "- **Content:**",
1220 |                     "  ```text",
1221 |                     f"  {content}",
1222 |                     "  ```\n",
1223 |                 ]
1224 | 
1225 |             # ------------------------------------------------------------- #
1226 |             # TEXT UPDATE
1227 |             # ------------------------------------------------------------- #
1228 |             elif isinstance(a, UpdateTextIn):
1229 |                 xp = _safe_get_attr(a, "node", "node_xpath")
1230 |                 onode = _get_element_by_xpath_from_tree(xp, orig_doc)
1231 |                 mnode = _get_element_by_xpath_from_tree(xp, mod_doc)
1232 |                 old = _node_plain_text(onode, max_len=context_chars) if onode is not None else "[?]"
1233 |                 new = _node_plain_text(mnode, max_len=context_chars) if mnode is not None else "[?]"
1234 |                 tag = onode.tag if onode is not None else (mnode.tag if mnode is not None else "[?]")
1235 |                 if old != new:
1236 |                     summary = [
1237 |                         f"### Text Change in `<{tag}>`",
1238 |                         f"- **Location:** `{xp}`",
1239 |                         f"- **Old:** `{old}`",
1240 |                         f"- **New:** `{new}`\n",
1241 |                     ]
1242 | 
1243 |             # ------------------------------------------------------------- #
1244 |             # ATTRIBUTE-LEVEL CHANGES
1245 |             # ------------------------------------------------------------- #
1246 |             elif isinstance(a, (UpdateAttrib, InsertAttrib, DeleteAttrib, RenameAttrib)):
1247 |                 xp = _safe_get_attr(a, "node", "node_xpath")
1248 |                 onode = _get_element_by_xpath_from_tree(xp, orig_doc)
1249 |                 mnode = _get_element_by_xpath_from_tree(xp, mod_doc)
1250 |                 tag = onode.tag if onode is not None else (mnode.tag if mnode is not None else "[?]")
1251 | 
1252 |                 details = ""
1253 |                 if isinstance(a, UpdateAttrib):
1254 |                     name = _safe_get_attr(a, "name")
1255 |                     old_v = onode.get(name) if onode is not None and name else "[?]"
1256 |                     new_v = _safe_get_attr(a, "value", "new")
1257 |                     details = f"- **Update:** `{name}`\n- **Old:** `{old_v}`\n- **New:** `{new_v}`"
1258 |                 elif isinstance(a, InsertAttrib):
1259 |                     name = _safe_get_attr(a, "name")
1260 |                     val = _safe_get_attr(a, "value")
1261 |                     details = f"- **Insert:** `{name}` = `{val}`"
1262 |                 elif isinstance(a, DeleteAttrib):
1263 |                     name = _safe_get_attr(a, "name")
1264 |                     old_v = onode.get(name) if onode is not None and name else "[?]"
1265 |                     details = f"- **Delete:** `{name}` (was `{old_v}`)"
1266 |                 elif isinstance(a, RenameAttrib):
1267 |                     old_n = _safe_get_attr(a, "old_name")
1268 |                     new_n = _safe_get_attr(a, "new_name")
1269 |                     val = onode.get(old_n) if onode is not None and old_n else "[?]"
1270 |                     details = f"- **Rename:** `{old_n}` → `{new_n}` (value: `{val}`)"
1271 | 
1272 |                 if details:
1273 |                     summary = [f"### Attribute Change in `<{tag}>` (`{xp}`)", details + "\n"]
1274 | 
1275 |             # ------------------------------------------------------------- #
1276 |             # RENAME NODE
1277 |             # ------------------------------------------------------------- #
1278 |             elif "RenameNode" in globals() and isinstance(a, RenameNode):
1279 |                 xp = _safe_get_attr(a, "node", "node_xpath")
1280 |                 new_tag = _safe_get_attr(a, "new_tag", "new_name")
1281 |                 onode = _get_element_by_xpath_from_tree(xp, orig_doc)
1282 |                 old_tag = onode.tag if onode else "[?]"
1283 |                 summary = [
1284 |                     "### Node Rename",
1285 |                     f"- **Location:** `{xp}`",
1286 |                     f"- **Old Tag:** `{old_tag}`",
1287 |                     f"- **New Tag:** `{new_tag}`\n",
1288 |                 ]
1289 | 
1290 |             # append to global list
1291 |             lines.extend(summary)
1292 | 
1293 |         except Exception as exc:  # pragma: no-cover
1294 |             logger.error("Markdown summary error: %s | %s", a, exc, exc_info=True)
1295 |             lines.extend(["\n---\nError: " + type(a).__name__ + "\n---\n"])
1296 | 
1297 |     return "\n".join(lines)
1298 | 
1299 | 
1300 | # ─────────────────────────────────────────────────────────────────────────────
1301 | #                         Fuzzy move pairing helpers
1302 | # ─────────────────────────────────────────────────────────────────────────────
1303 | @dataclass
1304 | class _NodeFP:
1305 |     xpath: str
1306 |     tag: str
1307 |     text_norm: str
1308 |     tokens: List[str]
1309 |     shingles: Set[str]
1310 |     simhash: int
1311 |     anchor_sig: str
1312 |     attrs_sig: str
1313 |     length: int
1314 |     node: _Element
1315 |     parent_xpath: Optional[str] = None
1316 |     pos: Optional[int] = None
1317 |     action_index: int = -1  # index in actions list
1318 | 
1319 | 
1320 | def _tokenize_for_fp(s: str) -> List[str]:
1321 |     return re.findall(r"[A-Za-z0-9]+", s.lower())
1322 | 
1323 | 
1324 | def _make_shingles(tokens: List[str], k: int = 4) -> Set[str]:
1325 |     if len(tokens) < k:
1326 |         return set(tokens) if tokens else set()
1327 |     return {" ".join(tokens[i : i + k]) for i in range(0, len(tokens) - k + 1)}
1328 | 
1329 | 
1330 | def _simhash64(items: Set[str]) -> int:
1331 |     # Fast 64-bit simhash over shingles/tokens
1332 |     if not items:
1333 |         return 0
1334 |     acc = [0] * 64
1335 |     for it in items:
1336 |         h = int(hashlib.blake2b(it.encode("utf-8", "replace"), digest_size=8).hexdigest(), 16)
1337 |         for b in range(64):
1338 |             acc[b] += 1 if (h >> b) & 1 else -1
1339 |     out = 0
1340 |     for b, v in enumerate(acc):
1341 |         if v >= 0:
1342 |             out |= 1 << b
1343 |     return out
1344 | 
1345 | 
1346 | def _simhash_sim(a: int, b: int) -> float:
1347 |     if a == b:
1348 |         return 1.0
1349 |     x = a ^ b
1350 |     try:
1351 |         d = x.bit_count()
1352 |     except AttributeError:  # pragma: no cover - Python < 3.8 fallback
1353 |         d = bin(x).count("1")
1354 |     return 1.0 - (d / 64.0)
1355 | 
1356 | 
1357 | def _ancestor_anchor_signature(el: _Element, depth: int = 4) -> str:
1358 |     parts: List[str] = []
1359 |     p = el.getparent()
1360 |     steps = 0
1361 |     while p is not None and steps < depth:
1362 |         tid = p.get("id") or ""
1363 |         cls = p.get("class") or ""
1364 |         name = p.tag.lower() if hasattr(p, "tag") else ""
1365 |         parts.append(f"{name}#{tid}.{cls}".strip("."))
1366 |         p = p.getparent()
1367 |         steps += 1
1368 |     parts.reverse()
1369 |     return "/".join(parts)
1370 | 
1371 | 
1372 | def _attrs_signature(el: _Element) -> str:
1373 |     if el is None or not hasattr(el, "attrib"):
1374 |         return ""
1375 |     kv = []
1376 |     for k, v in el.attrib.items():
1377 |         k_l = k.lower()
1378 |         if k_l in _ANCHOR_ATTRS or k_l.startswith("data-"):
1379 |             kv.append(f"{k_l}={_normalize_text(v)}")
1380 |     kv.sort()
1381 |     return "|".join(kv)
1382 | 
1383 | 
1384 | def _build_fp_for_element(el: _Element, xpath: str) -> Optional[_NodeFP]:
1385 |     if el is None:
1386 |         return None
1387 |     txt = _get_normalized_full_text(el)
1388 |     if len(txt) < _MOVE_MIN_CHARS:
1389 |         return None
1390 |     toks = _tokenize_for_fp(txt)
1391 |     if len(toks) < _MOVE_MIN_TOKENS:
1392 |         return None
1393 |     sh = _make_shingles(toks, 4) or set(toks)
1394 |     tag = el.tag.lower() if hasattr(el, "tag") else ""
1395 |     return _NodeFP(
1396 |         xpath=xpath,
1397 |         tag=tag,
1398 |         text_norm=txt,
1399 |         tokens=toks,
1400 |         shingles=sh,
1401 |         simhash=_simhash64(sh),
1402 |         anchor_sig=_ancestor_anchor_signature(el),
1403 |         attrs_sig=_attrs_signature(el),
1404 |         length=len(toks),
1405 |         node=el,
1406 |     )
1407 | 
1408 | 
1409 | def _resolve_insert_element(
1410 |     ins_action: Any, mod_doc: _ElementTree
1411 | ) -> Tuple[Optional[_Element], Optional[str], Optional[int]]:
1412 |     parent_xpath = _safe_get_attr(ins_action, "parent_xpath", "target")
1413 |     pos = _safe_get_attr(ins_action, "pos", "position")
1414 |     sib_xpath = _safe_get_attr(ins_action, "sibling_xpath")
1415 |     el = _safe_get_attr(ins_action, "node")
1416 |     if isinstance(el, _Element):
1417 |         try:
1418 |             if parent_xpath:
1419 |                 parent = _get_element_by_xpath_from_tree(parent_xpath, mod_doc)
1420 |                 if parent is not None:
1421 |                     target_idx = None
1422 |                     target_text = _get_normalized_full_text(el)[:40]
1423 |                     for i, ch in enumerate(list(parent)):
1424 |                         if getattr(ch, "tag", None) == getattr(el, "tag", None):
1425 |                             if _get_normalized_full_text(ch)[:40] == target_text:
1426 |                                 target_idx = i
1427 |                                 break
1428 |                     if isinstance(pos, int) or (isinstance(pos, str) and pos.isdigit()):
1429 |                         idx = int(pos)
1430 |                     elif target_idx is not None:
1431 |                         idx = target_idx
1432 |                     else:
1433 |                         idx = len(parent)
1434 |                     return el, parent_xpath, idx
1435 |         except Exception:
1436 |             pass
1437 | 
1438 |     if parent_xpath:
1439 |         parent = _get_element_by_xpath_from_tree(parent_xpath, mod_doc)
1440 |         if parent is not None:
1441 |             try:
1442 |                 if isinstance(pos, int) or (isinstance(pos, str) and str(pos).isdigit()):
1443 |                     return parent[int(pos)], parent_xpath, int(pos)
1444 |                 if pos in ("before", "after") and sib_xpath:
1445 |                     sib = _get_element_by_xpath_from_tree(sib_xpath, mod_doc)
1446 |                     if sib is not None:
1447 |                         par = sib.getparent()
1448 |                         idx = par.index(sib) + (1 if pos == "after" else 0)
1449 |                         try:
1450 |                             return par[idx], par.getroottree().getpath(par), idx
1451 |                         except Exception:
1452 |                             return None, par.getroottree().getpath(par), idx
1453 |                 idx = len(parent)
1454 |                 try:
1455 |                     return parent[idx], parent_xpath, idx
1456 |                 except Exception:
1457 |                     return None, parent_xpath, idx
1458 |             except Exception:
1459 |                 return None, parent_xpath, None
1460 | 
1461 |     if sib_xpath:
1462 |         sib = _get_element_by_xpath_from_tree(sib_xpath, mod_doc)
1463 |         if sib is not None:
1464 |             par = sib.getparent()
1465 |             idx = par.index(sib) + (1 if pos == "after" else 0) if pos in ("before", "after") else len(par)
1466 |             try:
1467 |                 return par[idx], par.getroottree().getpath(par), idx
1468 |             except Exception:
1469 |                 return None, par.getroottree().getpath(par), idx
1470 | 
1471 |     return None, None, None
1472 | 
1473 | 
1474 | def _score_pair(a: _NodeFP, b: _NodeFP) -> float:
1475 |     seq = difflib.SequenceMatcher(None, a.text_norm, b.text_norm, autojunk=False).ratio()
1476 |     jac = len(a.shingles & b.shingles) / max(1, len(a.shingles | b.shingles))
1477 |     content = max(seq, jac)
1478 |     shs = _simhash_sim(a.simhash, b.simhash)
1479 |     anc = difflib.SequenceMatcher(None, a.anchor_sig, b.anchor_sig, autojunk=False).ratio()
1480 |     attr = 1.0 if (a.attrs_sig and a.attrs_sig == b.attrs_sig) else (0.5 if a.attrs_sig and b.attrs_sig else 0.0)
1481 |     tag_bonus = 0.05 if (a.tag == b.tag) else (0.03 if (a.tag in _BLOCK_TAGS and b.tag in _BLOCK_TAGS) else -0.10)
1482 |     score = 0.45 * content + 0.25 * shs + 0.20 * anc + 0.10 * attr + tag_bonus
1483 |     lr = min(a.length, b.length) / max(a.length, b.length)
1484 |     if lr < 0.5:
1485 |         score *= 0.85
1486 |     return max(0.0, min(1.0, score))
1487 | 
1488 | 
1489 | def _augment_actions_with_fuzzy_moves(
1490 |     actions: List[Any],
1491 |     orig_doc: _ElementTree,
1492 |     mod_doc: _ElementTree,
1493 |     *,
1494 |     threshold: float = _MOVE_SIM_THRESHOLD,
1495 |     min_tokens: int = _MOVE_MIN_TOKENS,
1496 |     min_chars: int = _MOVE_MIN_CHARS,
1497 |     max_pairs: int = _MOVE_MAX_CANDIDATES,
1498 | ) -> List[Any]:
1499 |     """Pair DeleteNode + InsertNode actions that represent moves and emit MoveNode."""
1500 |     if not actions:
1501 |         return actions
1502 | 
1503 |     del_fps: List[_NodeFP] = []
1504 |     ins_fps: List[_NodeFP] = []
1505 | 
1506 |     for idx, a in enumerate(actions):
1507 |         if isinstance(a, DeleteNode):
1508 |             xp = _safe_get_attr(a, "node", "node_xpath", "target")
1509 |             if not xp:
1510 |                 continue
1511 |             el = _get_element_by_xpath_from_tree(xp, orig_doc)
1512 |             fp = _build_fp_for_element(el, xp)
1513 |             if fp:
1514 |                 fp.action_index = idx
1515 |                 del_fps.append(fp)
1516 | 
1517 |         elif isinstance(a, InsertNode):
1518 |             el, parent_xpath, pos = _resolve_insert_element(a, mod_doc)
1519 |             target_el = el or _get_element_by_xpath_from_tree(parent_xpath or "", mod_doc)
1520 |             if target_el is None:
1521 |                 continue
1522 |             if el is None:
1523 |                 node_struct = _safe_get_attr(a, "node")
1524 |                 if isinstance(node_struct, _Element):
1525 |                     target_el = node_struct
1526 |             xp = None
1527 |             try:
1528 |                 if el is not None:
1529 |                     xp = el.getroottree().getpath(el)
1530 |                 elif parent_xpath is not None:
1531 |                     xp = parent_xpath
1532 |             except Exception:
1533 |                 xp = parent_xpath or ""
1534 |             fp = _build_fp_for_element(target_el, xp or "")
1535 |             if fp:
1536 |                 fp.parent_xpath = parent_xpath
1537 |                 fp.pos = pos if (isinstance(pos, int) or (isinstance(pos, str) and str(pos).isdigit())) else None
1538 |                 fp.action_index = idx
1539 |                 ins_fps.append(fp)
1540 | 
1541 |     if not del_fps or not ins_fps:
1542 |         return actions
1543 | 
1544 |     cand: List[Tuple[float, int, int]] = []
1545 |     for d in del_fps:
1546 |         for i in ins_fps:
1547 |             if len(cand) >= max_pairs:
1548 |                 break
1549 |             if (d.tag != i.tag) and not (d.tag in _BLOCK_TAGS and i.tag in _BLOCK_TAGS):
1550 |                 continue
1551 |             len_ratio = min(d.length, i.length) / max(d.length, i.length)
1552 |             if len_ratio < 0.40:
1553 |                 continue
1554 |             if _simhash_sim(d.simhash, i.simhash) < 0.60:
1555 |                 continue
1556 |             s = _score_pair(d, i)
1557 |             if s >= threshold:
1558 |                 cand.append((s, d.action_index, i.action_index))
1559 |         if len(cand) >= max_pairs:
1560 |             break
1561 | 
1562 |     if not cand:
1563 |         return actions
1564 | 
1565 |     cand.sort(reverse=True)
1566 |     matched_d: Set[int] = set()
1567 |     matched_i: Set[int] = set()
1568 |     inject_moves: Dict[int, List[Any]] = {}
1569 | 
1570 |     def _mk_move_id(src_xp: str, tgt_px: str, pos_val: Optional[int]) -> str:
1571 |         h = hashlib.blake2b(f"{src_xp}|{tgt_px}|{pos_val}".encode("utf-8", "replace"), digest_size=6).hexdigest()
1572 |         return f"mv_{h}"
1573 | 
1574 |     for score, d_idx, i_idx in cand:
1575 |         if d_idx in matched_d or i_idx in matched_i:
1576 |             continue
1577 |         d_act = actions[d_idx]
1578 |         i_act = actions[i_idx]
1579 |         src_xpath = _safe_get_attr(d_act, "node", "node_xpath", "target")
1580 |         tgt_parent = _safe_get_attr(i_act, "parent_xpath", "target")
1581 |         pos = _safe_get_attr(i_act, "pos", "position")
1582 |         sib_xpath = _safe_get_attr(i_act, "sibling_xpath")
1583 | 
1584 |         if isinstance(pos, str) and pos in ("before", "after") and sib_xpath:
1585 |             try:
1586 |                 sib = _get_element_by_xpath_from_tree(sib_xpath, mod_doc)
1587 |                 if sib is not None:
1588 |                     par = sib.getparent()
1589 |                     tgt_parent = par.getroottree().getpath(par)
1590 |                     pos = int(par.index(sib) + (1 if pos == "after" else 0))
1591 |             except Exception:
1592 |                 pass
1593 |         elif not (isinstance(pos, int) or (isinstance(pos, str) and str(pos).isdigit())):
1594 |             try:
1595 |                 parent_el = _get_element_by_xpath_from_tree(tgt_parent, mod_doc) if tgt_parent else None
1596 |                 if parent_el is not None:
1597 |                     pos = len(parent_el)
1598 |             except Exception:
1599 |                 pos = "into"
1600 | 
1601 |         move_id = _mk_move_id(str(src_xpath), str(tgt_parent), int(pos) if str(pos).isdigit() else -1)
1602 |         try:
1603 |             if isinstance(pos, int) or (isinstance(pos, str) and str(pos).isdigit()):
1604 |                 pos_int = int(pos)
1605 |                 mv = MoveNode(src_xpath, tgt_parent, pos_int, move_id)  # type: ignore[call-arg]
1606 |             else:
1607 |                 mv = MoveNode(src_xpath, tgt_parent, "into", move_id)  # type: ignore[call-arg]
1608 |         except TypeError:
1609 |             mv = MoveNode(node=src_xpath, target=tgt_parent, pos=pos, move_id=move_id)  # type: ignore[call-arg]
1610 | 
1611 |         inject_moves.setdefault(i_idx, []).append(mv)
1612 |         matched_d.add(d_idx)
1613 |         matched_i.add(i_idx)
1614 |         logger.debug(
1615 |             f"FuzzyMove paired del#{d_idx} -> ins#{i_idx} (score={score:.3f}, id={move_id})"
1616 |         )
1617 | 
1618 |     if not inject_moves:
1619 |         return actions
1620 | 
1621 |     new_actions: List[Any] = []
1622 |     for idx, act in enumerate(actions):
1623 |         if idx in matched_d:
1624 |             continue
1625 |         if idx in matched_i:
1626 |             for mv in inject_moves.get(idx, []):
1627 |                 new_actions.append(mv)
1628 |             continue
1629 |         new_actions.append(act)
1630 | 
1631 |     logger.info(
1632 |         f"FuzzyMove: converted {len(matched_i)} insert/delete pairs into {sum(len(v) for v in inject_moves.values())} moves."
1633 |     )
1634 |     return new_actions
1635 | 
1636 | 
1637 | # ─────────────────────────────────────────────────────────────────────────────
1638 | #                               XSLT template
1639 | # ─────────────────────────────────────────────────────────────────────────────
1640 | _XMLDIFF_XSLT_REVISED = """<?xml version="1.0" encoding="UTF-8"?>
1641 | <xsl:stylesheet version="1.0"
1642 |     xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
1643 |     xmlns:diff="http://namespaces.shoobx.com/diff"
1644 |     exclude-result-prefixes="diff">
1645 |   <!-- Removed xmlns:json and the comment -->
1646 |   <xsl:output method="html" omit-xml-declaration="yes" indent="no"/>
1647 |   <xsl:param name="diff-ns-uri" select="'http://namespaces.shoobx.com/diff'"/>
1648 | 
1649 |   <!-- Match all nodes and attributes, copy them -->
1650 |   <xsl:template match="@*|node()">
1651 |     <xsl:copy>
1652 |       <xsl:apply-templates select="@*|node()"/>
1653 |     </xsl:copy>
1654 |   </xsl:template>
1655 | 
1656 |   <!-- Template for inserted elements -->
1657 |   <xsl:template match="*[@diff:op='insert']">
1658 |     <ins class="diff-insert">
1659 |       <xsl:call-template name="copy-element-content"/>
1660 |     </ins>
1661 |   </xsl:template>
1662 | 
1663 |   <!-- Template for move target elements -->
1664 |   <xsl:template match="*[@diff:op='move-target']">
1665 |     <ins class="diff-move-target" data-move-id="{@diff:move-id}">
1666 |       <xsl:call-template name="copy-element-content"/>
1667 |     </ins>
1668 |   </xsl:template>
1669 | 
1670 |   <!-- Template for deleted elements -->
1671 |   <xsl:template match="*[@diff:op='delete']">
1672 |     <del class="diff-delete">
1673 |       <xsl:call-template name="copy-element-content"/>
1674 |     </del>
1675 |   </xsl:template>
1676 | 
1677 |   <!-- Template for move source elements -->
1678 |   <xsl:template match="*[@diff:op='move-source']">
1679 |     <del class="diff-move-source" data-move-id="{@diff:move-id}">
1680 |       <xsl:call-template name="copy-element-content"/>
1681 |     </del>
1682 |   </xsl:template>
1683 | 
1684 |     <!-- Template for elements with updated text/tail -->
1685 |     <xsl:template match="*[@diff:op='update-text' or
1686 |                         @diff:op='update-tail' or
1687 |                         @diff:op='update-text-context']">
1688 |     <span class="diff-update-container">
1689 |         <xsl:attribute name="title">
1690 |         <xsl:text>Original: </xsl:text>
1691 |         <xsl:value-of select="@diff:old-value"/>
1692 |         <xsl:if test="@diff:op='update-text-context'">
1693 |             <xsl:text> (</xsl:text>
1694 |             <xsl:value-of select="@diff:detail"/>
1695 |             <xsl:text>)</xsl:text>
1696 |         </xsl:if>
1697 |         </xsl:attribute>
1698 |         <xsl:call-template name="copy-element-content"/>
1699 |     </span>
1700 |     </xsl:template>
1701 | 
1702 |   <!-- Template for elements with attribute changes (if not already handled by insert/delete/move/text) -->
1703 |   <!-- Increased priority to override base copy if only attribs changed -->
1704 |   <xsl:template match="*[@diff:op='update-attrib']" priority="2">
1705 |      <!-- Check if the node ALSO has a major op; if so, let that template handle the wrapper -->
1706 |      <xsl:choose>
1707 |         <xsl:when test="@diff:op='insert' or @diff:op='delete' or @diff:op='move-target' or @diff:op='move-source' or starts-with(@diff:op, 'update-text') or @diff:op='rename-node'">
1708 |             <!-- Already handled by a more specific template, just copy content -->
1709 |              <xsl:call-template name="copy-element-content"/>
1710 |         </xsl:when>
1711 |         <xsl:otherwise>
1712 |             <!-- Only attribute changes, wrap in span -->
1713 |             <span class="diff-attrib-change">
1714 |               <xsl:attribute name="title">
1715 |                 <xsl:call-template name="format-attribute-changes">
1716 |                   <xsl:with-param name="changes" select="@diff:attributes"/>
1717 |                 </xsl:call-template>
1718 |               </xsl:attribute>
1719 |               <xsl:call-template name="copy-element-content"/>
1720 |             </span>
1721 |         </xsl:otherwise>
1722 |      </xsl:choose>
1723 |   </xsl:template>
1724 | 
1725 |  <xsl:template match="*[@diff:op='rename-node']">
1726 |     <span class="diff-rename-node">
1727 |       <xsl:attribute name="title">Renamed from <<xsl:value-of select="@diff:old-value"/>> to <<xsl:value-of select="@diff:new-value"/>></xsl:attribute>
1728 |       <xsl:call-template name="copy-element-content"/>
1729 |     </span>
1730 |   </xsl:template>
1731 | 
1732 |   <!-- Helper template to copy element content excluding diff attributes -->
1733 |   <xsl:template name="copy-element-content">
1734 |     <xsl:element name="{name()}" namespace="{namespace-uri()}">
1735 |       <!-- Copy non-diff attributes -->
1736 |       <xsl:apply-templates select="@*[not(namespace-uri()=$diff-ns-uri)]"/>
1737 |       <!-- Recursively apply templates to child nodes -->
1738 |       <xsl:apply-templates select="node()"/>
1739 |     </xsl:element>
1740 |   </xsl:template>
1741 | 
1742 |   <!-- Helper template for attribute changes (basic display for XSLT 1.0) -->
1743 |   <xsl:template name="format-attribute-changes">
1744 |     <xsl:param name="changes"/>
1745 |     <xsl:text>Attrs changed: </xsl:text>
1746 |     <!-- XSLT 1.0 cannot parse JSON. Display raw string. -->
1747 |     <xsl:value-of select="$changes"/>
1748 |   </xsl:template>
1749 | 
1750 |    <!-- Handle inline text changes explicitly -->
1751 |   <xsl:template match="ins[@class='diff-insert-text'] | del[@class='diff-delete-text']">
1752 |     <xsl:copy-of select="."/>
1753 |   </xsl:template>
1754 | 
1755 |   <!-- Prevent diff:* attributes from being copied to the output -->
1756 |   <xsl:template match="@diff:*" priority="10"/>
1757 | 
1758 | </xsl:stylesheet>"""
1759 | 
1760 | 
1761 | # ─────────────────────────────────────────────────────────────────────────────
1762 | #                               Public tool
1763 | # ─────────────────────────────────────────────────────────────────────────────
1764 | 
1765 | 
1766 | @with_tool_metrics
1767 | @with_error_handling
1768 | async def create_html_redline(
1769 |     original_html: str,
1770 |     modified_html: str,
1771 |     *,
1772 |     detect_moves: bool = True,
1773 |     formatting_tags: Optional[List[str]] = None,
1774 |     ignore_whitespace: bool = True,
1775 |     include_css: bool = True,
1776 |     add_navigation: bool = True,
1777 |     output_format: str = "html",
1778 |     use_tempfiles: bool = False,
1779 |     run_tidy: bool = False,
1780 |     generate_markdown: bool = False,
1781 |     markdown_path: str = "detected_redline_differences.md",
1782 | ) -> Dict[str, Any]:
1783 |     """Generate a redline HTML comparing two HTML documents."""
1784 |     global _id_counter  # <-- Declare global at the top of the function scope
1785 |     t0 = time.time()
1786 |     logger.info("Starting HTML redline generation...")
1787 | 
1788 |     # --- Input Validation ---
1789 |     if not original_html or not isinstance(original_html, str):
1790 |         raise ToolInputError("original_html required")
1791 |     if not modified_html or not isinstance(modified_html, str):
1792 |         raise ToolInputError("modified_html required")
1793 |     if output_format not in {"html", "fragment"}:
1794 |         raise ToolInputError("output_format must be 'html' | 'fragment'")
1795 | 
1796 |     # --- Initialization for variables used outside try ---
1797 |     orig_tree: Optional[_ElementTree] = None
1798 |     mod_tree: Optional[_ElementTree] = None
1799 |     original_tree_pristine: Optional[_ElementTree] = None
1800 |     modified_tree_pristine: Optional[_ElementTree] = None
1801 | 
1802 |     logger.debug("Preprocessing HTML documents...")
1803 |     try:
1804 |         # --- Preprocessing ---
1805 |         orig_root, mod_root = _preprocess_html_docs(
1806 |             original_html,
1807 |             modified_html,
1808 |             ignore_whitespace=ignore_whitespace,
1809 |             use_tempfiles=use_tempfiles,
1810 |             run_tidy=run_tidy,
1811 |         )
1812 |         if orig_root is None or mod_root is None:
1813 |             raise ToolInputError("Preprocessing failed to return root elements.")
1814 | 
1815 |         # --- Reset Counter & Inject IDs ---
1816 |         logger.debug("Resetting ID counter and injecting synthetic IDs...")
1817 |         _id_counter = itertools.count(1) # Reset counter here
1818 |         _inject_synthetic_ids(orig_root) # Inject into original root
1819 |         _inject_synthetic_ids(mod_root)  # Inject into modified root
1820 |         logger.debug("Synthetic ID injection complete.")
1821 | 
1822 |         # --- Create Trees & Pristine Copies ---
1823 |         orig_tree = etree.ElementTree(orig_root)
1824 |         mod_tree = etree.ElementTree(mod_root)
1825 | 
1826 |         logger.debug("Creating pristine copies with IDs for formatter...")
1827 |         original_tree_pristine = deepcopy(orig_tree)
1828 |         modified_tree_pristine = deepcopy(mod_tree)
1829 |         logger.debug("Pristine copies created.")
1830 | 
1831 |     except Exception as e:
1832 |         logger.exception("Preprocessing, ID injection, or copying failed.")
1833 |         # Ensure pristine trees are None if we failed before creating them
1834 |         original_tree_pristine = None
1835 |         modified_tree_pristine = None
1836 |         raise ToolInputError("Failed HTML preparation") from e
1837 | 
1838 |     # --- Check if pristine copies were successfully created before proceeding ---
1839 |     if original_tree_pristine is None or modified_tree_pristine is None:
1840 |          # This case should ideally be caught by the exception above,
1841 |          # but it's good practice to check.
1842 |          logger.error("Pristine trees for diffing are missing after preparation step.")
1843 |          return {
1844 |              "redline_html": "<!-- Error: Failed to prepare documents for diffing -->",
1845 |              "stats": {"error": "Document preparation failed"},
1846 |              "processing_time": time.time() - t0,
1847 |              "success": False,
1848 |          }
1849 | 
1850 | 
1851 |     # --- Diff Actions (Using Synthetic IDs) ---
1852 |     logger.debug("Calculating differences using xmldiff with synthetic IDs...")
1853 |     # Options for the xmldiff Differ class constructor
1854 |     differ_opts: Dict[str, Any] = {
1855 |         "ratio_mode": "accurate",
1856 |         "fast_match": False,
1857 |         "F": 0.6,
1858 |         "uniqueattrs": ["data-diff-id"],
1859 |     }
1860 | 
1861 |     actions: List[Any] = []
1862 |     stats: Dict[str, Any] = {}
1863 |     markdown_summary = ""
1864 |     annotated_tree: Optional[_ElementTree] = None
1865 | 
1866 |     # --- Add this debug block ---
1867 |     try:
1868 |         debug_orig_path = "debug_orig_tree_with_ids.xml"
1869 |         debug_mod_path = "debug_mod_tree_with_ids.xml"
1870 |         with open(debug_orig_path, "wb") as f:
1871 |             orig_tree.write(f, pretty_print=True, encoding='utf-8', xml_declaration=True)
1872 |         with open(debug_mod_path, "wb") as f:
1873 |             mod_tree.write(f, pretty_print=True, encoding='utf-8', xml_declaration=True)
1874 |         logger.info(f"Debug trees with IDs written to {debug_orig_path} and {debug_mod_path}")
1875 |     except Exception as dbg_e:
1876 |         logger.warning(f"Failed to write debug trees: {dbg_e}")
1877 |     # --- End of debug block ---
1878 | 
1879 |     try:
1880 |         # Pass the trees WITH IDs to the diff engine
1881 |         # Use the main trees (orig_tree, mod_tree) for diffing
1882 |         # as they have the structure and IDs needed for diff calculation.
1883 |         actions = main.diff_trees(
1884 |             orig_tree, # Use the tree derived directly from preprocessing + ID injection
1885 |             mod_tree,  # Use the tree derived directly from preprocessing + ID injection
1886 |             diff_options=differ_opts,
1887 |         )
1888 |         logger.info(f"xmldiff generated {len(actions)} actions using synthetic IDs.")
1889 | 
1890 |         # --- Fuzzy move augmentation (Delete+Insert -> MoveNode) ---
1891 |         try:
1892 |             actions = _augment_actions_with_fuzzy_moves(
1893 |                 actions,
1894 |                 original_tree_pristine,
1895 |                 modified_tree_pristine,
1896 |                 threshold=_MOVE_SIM_THRESHOLD,
1897 |                 min_tokens=_MOVE_MIN_TOKENS,
1898 |                 min_chars=_MOVE_MIN_CHARS,
1899 |                 max_pairs=_MOVE_MAX_CANDIDATES,
1900 |             )
1901 |             logger.info(
1902 |                 f"After FuzzyMove: {len(actions)} actions (moves inflated, deletes/inserts deflated)."
1903 |             )
1904 |         except Exception as e:
1905 |             logger.exception(f"Fuzzy move augmentation failed: {e}")
1906 | 
1907 |         # Debug: Log first few actions
1908 |         if actions:
1909 |             logger.debug(f"First 5 actions generated: {actions[:5]}")
1910 |         else:
1911 |             logger.warning("xmldiff generated NO actions.")
1912 | 
1913 |         # Check insert/delete ratio
1914 |         insert_delete_ratio = (sum(1 for a in actions if isinstance(a, (InsertNode, DeleteNode))) / len(actions)) if actions else 0
1915 |         if insert_delete_ratio > 0.9:
1916 |             logger.warning(f"High ratio ({insert_delete_ratio:.2f}) of Insert/Delete actions. Node matching via data-diff-id might have failed.")
1917 | 
1918 |         # --- Generate Markdown Summary (if requested) ---
1919 |         if generate_markdown and actions:
1920 |             logger.debug("Generating Markdown summary...")
1921 |             # Pass the pristine copies (which include IDs for XPath lookup within the summary generation)
1922 |             markdown_summary = _generate_markdown_summary(
1923 |                 orig_doc=original_tree_pristine, mod_doc=modified_tree_pristine, actions=actions
1924 |             )
1925 |             # (Consider where to save/return markdown_path content if needed)
1926 | 
1927 |         # --- Apply Actions using Formatter ---
1928 |         logger.debug("Applying actions using RedlineXMLFormatter...")
1929 |         formatter = RedlineXMLFormatter(
1930 |             detect_moves=detect_moves,
1931 |             normalize=formatting.WS_BOTH if ignore_whitespace else formatting.WS_NONE,
1932 |         )
1933 |         # Pass the pristine copies WITH IDs to the formatter.
1934 |         # The formatter needs the original pristine tree (with IDs) to find nodes
1935 |         # referenced by actions, and it works on a *copy* of this pristine tree.
1936 |         # It also needs the modified pristine tree for lookups (e.g., finding move targets).
1937 |         annotated_tree = formatter.format(actions, original_tree_pristine, modified_tree_pristine)
1938 |         stats = formatter.processed_actions
1939 |         logger.debug(f"Formatting complete. Stats: {stats}")
1940 | 
1941 |     except ToolError as te:
1942 |         logger.error(f"Diff engine error: {te}", exc_info=True)
1943 |         return {
1944 |             "redline_html": f"<!-- Diff Engine Error: {html_stdlib.escape(str(te))} -->",
1945 |             "stats": {"error": str(te)},
1946 |             "processing_time": time.time() - t0,
1947 |             "success": False,
1948 |         }
1949 |     except Exception as e:
1950 |         logger.exception("Error during diff/formatting.")
1951 |         return {
1952 |             "redline_html": "<!-- Error during diff/formatting -->",
1953 |             "stats": {"error": str(e)},
1954 |             "processing_time": time.time() - t0,
1955 |             "success": False,
1956 |         }
1957 |     finally:
1958 |         # Clear potentially large list to free memory
1959 |         actions.clear()
1960 |         # Explicitly None out large trees if possible (though garbage collection should handle this)
1961 |         orig_tree = mod_tree = original_tree_pristine = modified_tree_pristine = None
1962 | 
1963 | 
1964 |     # --- Remove Synthetic IDs from Final Output ---
1965 |     if annotated_tree is not None:
1966 |         logger.debug("Removing synthetic IDs from the final annotated tree...")
1967 |         count_removed = 0
1968 |         for el in annotated_tree.iter():
1969 |             if isinstance(el, _Element) and el.attrib.pop("data-diff-id", None):
1970 |                 count_removed += 1
1971 |         logger.debug(f"Removed {count_removed} synthetic IDs from final output.")
1972 |     else:
1973 |          logger.error("Annotated tree is None after formatting.")
1974 |          # Handle this case - perhaps return an error
1975 |          return {
1976 |              "redline_html": "<!-- Error: Formatting produced no result -->",
1977 |              "stats": stats if stats else {"error": "Formatting failed"},
1978 |              "processing_time": time.time() - t0,
1979 |              "success": False,
1980 |          }
1981 | 
1982 | 
1983 |     # --- Apply XSLT ---
1984 |     logger.debug("Applying revised XSLT transformation...")
1985 |     redline_html = "<!-- XSLT Transformation Failed -->"
1986 |     # (Keep existing XSLT logic, ensuring annotated_tree is checked)
1987 |     try:
1988 |         xslt_root = etree.fromstring(_XMLDIFF_XSLT_REVISED.encode())
1989 |         transform = etree.XSLT(xslt_root)
1990 |         redline_doc = transform(annotated_tree)
1991 |         if redline_doc.getroot() is not None:
1992 |             redline_html = etree.tostring(
1993 |                 redline_doc, encoding="unicode", method="html", pretty_print=False
1994 |             )
1995 |             logger.debug("XSLT transformation successful.")
1996 |         else:
1997 |             logger.error("XSLT transformation resulted in an empty document.")
1998 |             redline_html = "<!-- XSLT empty result -->"
1999 |     except Exception as e:
2000 |         logger.exception("XSLT transformation failed.")
2001 |         redline_html = f"<!-- XSLT Error: {html_stdlib.escape(str(e))} -->"
2002 | 
2003 |     # --- Post-processing ---
2004 |     logger.debug("Post-processing HTML output...")
2005 |     final_redline_html = await _postprocess_redline(
2006 |         redline_html,
2007 |         include_css=include_css,
2008 |         add_navigation=add_navigation,
2009 |         output_format=output_format,
2010 |     )
2011 |     logger.debug("Post-processing complete.")
2012 | 
2013 |     # --- Final Result ---
2014 |     dt = time.time() - t0
2015 |     success_flag = (
2016 |         stats.get("errors", 0) == 0
2017 |         and "<!-- XSLT" not in redline_html # Check for XSLT error comments
2018 |     )
2019 |     result: Dict[str, Any] = {"stats": stats, "processing_time": dt, "success": success_flag}
2020 | 
2021 |     # Handle large output
2022 |     size_bytes = len(final_redline_html.encode("utf-8", errors="ignore"))
2023 |     logger.info(f"Generated redline HTML size: {size_bytes / 1024:.2f} KB")
2024 |     if size_bytes > 10_000_000: # Example limit: 10MB
2025 |         logger.warning(f"Redline HTML size ({size_bytes} bytes) exceeds limit, encoding Base64.")
2026 |         try:
2027 |             result["redline_html_base64"] = base64.b64encode(
2028 |                 final_redline_html.encode("utf-8")
2029 |             ).decode("ascii")
2030 |             result["output_is_base64"] = True # Add flag
2031 |         except Exception as e:
2032 |             logger.error(f"Base64 encoding failed: {e}")
2033 |             result["redline_html"] = "<!-- Error: Output too large & Base64 failed -->"
2034 |             result["success"] = False
2035 |         # Avoid keeping large string in memory if encoded
2036 |         del final_redline_html
2037 |     else:
2038 |         result["redline_html"] = final_redline_html
2039 |         result["output_is_base64"] = False
2040 | 
2041 |     if generate_markdown:
2042 |         result["markdown_summary"] = markdown_summary
2043 |         if markdown_path:
2044 |             result["markdown_path"] = str(Path(markdown_path).resolve()) # Example of returning path
2045 | 
2046 |     logger.info(
2047 |         f"HTML redline generation finished in {dt:.3f} seconds. Success: {result['success']}"
2048 |     )
2049 |     return result
2050 | 
2051 | 
2052 | # ─────────────────────────────────────────────────────────────────────────────
2053 | #                           Pre‑processing helpers
2054 | # ─────────────────────────────────────────────────────────────────────────────
2055 | def _check_tidy_available():
2056 |     try:
2057 |         res = subprocess.run(
2058 |             ["tidy", "--version"], capture_output=True, timeout=1, check=False, text=True
2059 |         )
2060 |         return res.returncode == 0 and "HTML Tidy" in res.stdout
2061 |     except Exception:
2062 |         return False
2063 | 
2064 | 
2065 | def _run_html_tidy(html: str) -> str:
2066 |     tidied_html = html
2067 |     with tempfile.TemporaryDirectory() as td:
2068 |         infile = Path(td, "input.html")
2069 |         infile.write_text(html, encoding="utf-8")
2070 |         cmd = [
2071 |             "tidy",
2072 |             "-q",
2073 |             "-m",
2074 |             "--tidy-mark",
2075 |             "no",
2076 |             "--drop-empty-elements",
2077 |             "no",
2078 |             "--wrap",
2079 |             "0",
2080 |             "--show-warnings",
2081 |             "no",
2082 |             "--show-errors",
2083 |             "0",
2084 |             "--force-output",
2085 |             "yes",
2086 |             "-utf8",
2087 |             str(infile),
2088 |         ]
2089 |         try:
2090 |             res = subprocess.run(cmd, capture_output=True, text=True, timeout=15, check=False)  # noqa: F841
2091 |             tidied_html = infile.read_text(encoding="utf-8")
2092 |         except Exception as e:
2093 |             logger.error(f"Tidy failed: {e}")
2094 |     return tidied_html
2095 | 
2096 | 
2097 | def _normalize_tree_whitespace(root: _Element) -> None:
2098 |     """Normalizes whitespace in .text and .tail for all elements in the tree in-place."""
2099 |     if root is None:
2100 |         return
2101 |     # Iterate through all elements AND comments/PIs (which can have tails)
2102 |     for node in root.xpath('. | .//node()'):
2103 |         # Check if it's an element or something else that can have text/tail
2104 |         if hasattr(node, 'text'):
2105 |              node.text = _normalize_text(node.text) or None # Use None if empty after normalize
2106 |         if hasattr(node, 'tail'):
2107 |              node.tail = _normalize_text(node.tail) or None # Use None if empty after normalize
2108 | 
2109 | def _deduplicate_body(root: _Element) -> None:
2110 |     """If an <html> element has more than one <body>, merge children
2111 |        into the first and delete the rest. Modifies the tree in-place."""
2112 |     if root is None or root.tag.lower() != 'html':
2113 |         # Only operate on the root <html> element
2114 |         return
2115 | 
2116 |     bodies = root.xpath('./body | ./BODY') # Case-insensitive check
2117 |     if len(bodies) <= 1:
2118 |         return # Nothing to do
2119 | 
2120 |     logger.warning(f"Found {len(bodies)} <body> elements; merging into the first.")
2121 |     main_body = bodies[0]
2122 |     parent = main_body.getparent() # Should be the <html> tag
2123 |     if parent is None: 
2124 |         return # Should not happen
2125 | 
2126 |     for i, extra_body in enumerate(bodies[1:], start=1):
2127 |          # Move children
2128 |          for child in list(extra_body): # Iterate over a list copy
2129 |               main_body.append(child) # Append moves the child
2130 | 
2131 |          # Append tail text if any
2132 |          if extra_body.tail:
2133 |              # Find the last element in main_body to append the tail to,
2134 |              # or append to main_body's text if it's empty
2135 |              last_element = main_body[-1] if len(main_body) > 0 else None
2136 |              if last_element is not None:
2137 |                  if last_element.tail:
2138 |                      last_element.tail = (last_element.tail or "") + (extra_body.tail or "")
2139 |                  else:
2140 |                      last_element.tail = extra_body.tail
2141 |              else: # If main_body has no children, append to its text
2142 |                  main_body.text = (main_body.text or "") + (extra_body.tail or "")
2143 | 
2144 | 
2145 |          # Remove the now-empty extra body
2146 |          try:
2147 |             parent.remove(extra_body)
2148 |          except ValueError:
2149 |             logger.error(f"Could not remove extra body #{i+1}, already removed?")
2150 | 
2151 |     logger.debug("Finished merging duplicate <body> elements.")
2152 | 
2153 | def _preprocess_html_docs(
2154 |     original_html: str,
2155 |     modified_html: str,
2156 |     *,
2157 |     ignore_whitespace: bool = True, # Keep this param, but handle normalization separately now
2158 |     use_tempfiles: bool = False,
2159 |     run_tidy: bool = False,
2160 | ) -> Tuple[_Element, _Element]:
2161 |     """Preprocesses HTML, including optional Tidy and robust whitespace normalization."""
2162 | 
2163 |     if not original_html.strip():
2164 |         original_html = "<html><body><p>Empty Document</p></body></html>" # Provide some structure
2165 |     if not modified_html.strip():
2166 |         modified_html = "<html><body><p>Empty Document</p></body></html>" # Provide some structure
2167 | 
2168 |     tidied_orig, tidied_mod = original_html, modified_html
2169 | 
2170 |     # 1. Optional Tidy (Run *before* parsing)
2171 |     if run_tidy:
2172 |         logger.debug("Running HTML Tidy...")
2173 |         if _check_tidy_available():
2174 |             try:
2175 |                 tidied_orig = _run_html_tidy(original_html)
2176 |                 tidied_mod = _run_html_tidy(modified_html)
2177 |                 logger.debug("HTML Tidy completed.")
2178 |             except Exception as e:
2179 |                 logger.warning(f"HTML Tidy failed: {e}. Proceeding without Tidy.")
2180 |         else:
2181 |             logger.warning("HTML Tidy requested but not available. Skipping.")
2182 | 
2183 |     # 2. Parse HTML (Crucially, DO NOT remove blank text here initially)
2184 |     logger.debug("Parsing HTML documents with lxml...")
2185 |     parser = lxml_html.HTMLParser(
2186 |         recover=True,
2187 |         encoding="utf-8",
2188 |         remove_comments=False, # Keep comments, they can affect structure/diff
2189 |         remove_pis=False,      # Keep processing instructions
2190 |         remove_blank_text=False, # IMPORTANT: Keep blank text for now
2191 |     )
2192 |     o_root: Optional[_Element] = None
2193 |     m_root: Optional[_Element] = None
2194 |     try:
2195 |         # Use memory parsing unless very large docs require temp files
2196 |         if use_tempfiles and (len(tidied_orig) > 5e6 or len(tidied_mod) > 5e6): # 5MB limit example
2197 |             logger.debug("Using temporary files for parsing large documents.")
2198 |             with tempfile.TemporaryDirectory() as td:
2199 |                 orig_p = Path(td, "orig.html")
2200 |                 mod_p = Path(td, "mod.html")
2201 |                 orig_p.write_text(tidied_orig, encoding="utf-8")
2202 |                 mod_p.write_text(tidied_mod, encoding="utf-8")
2203 |                 o_root = lxml_html.parse(str(orig_p), parser=parser).getroot()
2204 |                 m_root = lxml_html.parse(str(mod_p), parser=parser).getroot()
2205 |         else:
2206 |             # Ensure bytes for fromstring
2207 |             o_root = lxml_html.fromstring(tidied_orig.encode("utf-8"), parser=parser)
2208 |             m_root = lxml_html.fromstring(tidied_mod.encode("utf-8"), parser=parser)
2209 | 
2210 |         if o_root is None or m_root is None:
2211 |             raise ToolInputError("HTML parsing yielded None root element(s).")
2212 |         logger.debug("HTML parsing successful.")
2213 | 
2214 |     except Exception as e:
2215 |         logger.exception(f"HTML parsing failed: {e}")
2216 |         raise ToolInputError("Failed HTML parsing.") from e
2217 | 
2218 |     # 3. Normalize Whitespace (Apply *after* parsing)
2219 |     if ignore_whitespace:
2220 |         logger.debug("Normalizing whitespace in parsed trees...")
2221 |         try:
2222 |             _normalize_tree_whitespace(o_root)
2223 |             _normalize_tree_whitespace(m_root)
2224 |             logger.debug("Whitespace normalization complete.")
2225 |         except Exception as e:
2226 |              logger.exception("Whitespace normalization failed.")
2227 |              raise ToolInputError("Failed whitespace normalization during preprocessing.") from e
2228 | 
2229 |     # 4. Deduplicate Body Tags (Apply *after* normalization) <-- NEW STEP
2230 |     logger.debug("Checking for and merging duplicate <body> tags...")
2231 |     try:
2232 |         _deduplicate_body(o_root)
2233 |         _deduplicate_body(m_root)
2234 |         logger.debug("Duplicate <body> tag check complete.")
2235 |     except Exception as e:
2236 |         logger.exception("Failed during <body> deduplication.")
2237 |         # Decide whether to raise or just warn
2238 |         raise ToolInputError("Failed <body> tag deduplication during preprocessing.") from e
2239 | 
2240 |     # The roots returned now have a consistent whitespace representation
2241 |     # AND guaranteed single <body> element (if originally within <html>)
2242 |     return o_root, m_root
2243 | 
2244 | 
2245 | # ─────────────────────────────────────────────────────────────────────────────
2246 | #                       Post‑processing (CSS / nav UI)
2247 | # ─────────────────────────────────────────────────────────────────────────────
2248 | async def _postprocess_redline(
2249 |     redline_html: str,
2250 |     *,
2251 |     include_css: bool = True,
2252 |     add_navigation: bool = True,
2253 |     output_format: str = "html",
2254 | ) -> str:
2255 |     if not redline_html or not redline_html.strip():
2256 |         return "<!-- Empty output -->"
2257 |     soup = BeautifulSoup(redline_html, "html.parser")
2258 |     if not soup.find("html", recursive=False):
2259 |         new_soup = BeautifulSoup(
2260 |             "<!DOCTYPE html><html><head><title>Comparison</title></head><body></body></html>",
2261 |             "html.parser",
2262 |         )
2263 |         if new_soup.body:
2264 |             [
2265 |                 new_soup.body.append(deepcopy(el))
2266 |                 for el in soup.contents
2267 |                 if isinstance(el, Tag) or (isinstance(el, NavigableString) and el.strip())
2268 |             ]
2269 |         soup = new_soup
2270 |     html_tag = soup.html
2271 |     head = soup.head
2272 |     body = soup.body
2273 |     if not head:
2274 |         head = soup.new_tag("head")
2275 |         head.append(soup.new_tag("title", string="Comparison"))
2276 |         html_tag.insert(0, head)
2277 |     if not body:
2278 |         body = soup.new_tag("body")
2279 |         target = head.find_next_sibling() if head else None
2280 |         head.insert_after(body) if target else html_tag.append(body)
2281 |     if not head.find("meta", attrs={"name": "viewport"}):
2282 |         head.insert(
2283 |             0,
2284 |             soup.new_tag(
2285 |                 "meta",
2286 |                 attrs={"name": "viewport", "content": "width=device-width, initial-scale=1.0"},
2287 |             ),
2288 |         )
2289 |     if not head.find("style", attrs={"data-base-diff": "1"}):
2290 |         head.append(BeautifulSoup(_get_base_diff_css(), "html.parser"))
2291 |     if include_css and not head.find("script", src=lambda s: s and "cdn.tailwindcss.com" in s):
2292 |         head.append(
2293 |             BeautifulSoup('<script src="https://cdn.tailwindcss.com"></script>', "html.parser")
2294 |         )
2295 |         if not head.find("link", href=lambda x: x and "fonts.googleapis.com" in x):
2296 |             [
2297 |                 head.append(
2298 |                     BeautifulSoup(f'<link rel="preconnect" href="https://{u}"{a}>', "html.parser")
2299 |                 )
2300 |                 for u, a in [("fonts.googleapis.com", ""), ("fonts.gstatic.com", " crossorigin")]
2301 |             ]
2302 |             head.append(
2303 |                 BeautifulSoup(
2304 |                     '<link href="https://fonts.googleapis.com/css2?family=Newsreader:ital,opsz,wght@0,6..72,200..800;1,6..72,200..800&display=swap" rel="stylesheet">',
2305 |                     "html.parser",
2306 |                 )
2307 |             )
2308 |         style_tag = soup.new_tag("style", type="text/tailwindcss")
2309 |         style_tag.string = _get_tailwind_css()
2310 |         head.append(style_tag)
2311 |     if add_navigation and output_format == "html":
2312 |         if not body.find("div", class_="redline-minimap"):
2313 |             body.append(
2314 |                 BeautifulSoup(
2315 |                     """<div class="redline-minimap fixed right-1 top-10 bottom-10 w-1 bg-gray-100 dark:bg-gray-800 rounded z-40 hidden md:flex flex-col"></div>""",
2316 |                     "html.parser",
2317 |                 )
2318 |             )
2319 |         if not body.find("div", class_="redline-navigation"):
2320 |             body.insert(
2321 |                 0,
2322 |                 BeautifulSoup(
2323 |                     """<div class="redline-navigation fixed top-2 right-2 bg-white/90 dark:bg-gray-800/90 p-2 rounded-lg shadow-lg z-50 text-xs backdrop-blur-sm"><div class="flex items-center"><button class="btn" onclick="goPrevChange()">Prev</button><button class="btn" onclick="goNextChange()">Next</button><span class="ml-2 px-2 py-1 bg-gray-100 dark:bg-gray-700 rounded" id="change-counter">-/-</span></div></div>""".replace(
2324 |                         'class="btn"',
2325 |                         'class="bg-gray-100 hover:bg-gray-200 dark:bg-gray-700 dark:hover:bg-gray-600 px-2 py-1 rounded mr-1 transition"',
2326 |                     )
2327 |                     .replace(
2328 |                         "Prev",
2329 |                         '<span class="hidden sm:inline">Previous</span><span class="sm:hidden">←</span>',
2330 |                     )
2331 |                     .replace(
2332 |                         "Next",
2333 |                         '<span class="hidden sm:inline">Next</span><span class="sm:hidden">→</span>',
2334 |                     ),
2335 |                     "html.parser",
2336 |                 ),
2337 |             )
2338 |         if not body.find("div", class_="redline-legend"):
2339 |             body.append(
2340 |                 BeautifulSoup(
2341 |                     """<div class="redline-legend fixed bottom-2 left-2 bg-white/90 dark:bg-gray-800/90 p-2 rounded-lg shadow-lg z-50 text-xs flex flex-wrap gap-2 backdrop-blur-sm"><span class="legend-item"><span class="legend-color bg-blue-100 ring-blue-300 dark:bg-blue-900/60 dark:ring-blue-700"></span>Insert</span><span class="legend-item"><span class="legend-color bg-rose-100 ring-rose-300 dark:bg-rose-900/60 dark:ring-rose-700"></span>Delete</span><span class="legend-item"><span class="legend-color bg-emerald-100 ring-emerald-300 dark:bg-emerald-900/60 dark:ring-emerald-700"></span>Move</span><span class="legend-item"><span class="legend-color bg-orange-100 ring-orange-300 dark:bg-orange-900/60 dark:ring-orange-700"></span>Attr</span></div>""".replace(
2342 |                         'class="legend-item"', 'class="flex items-center"'
2343 |                     ).replace(
2344 |                         'class="legend-color', 'class="inline-block w-3 h-3 rounded ring-1 mr-1'
2345 |                     ),
2346 |                     "html.parser",
2347 |                 )
2348 |             )
2349 |         if not body.find("button", id="theme-toggle"):
2350 |             body.insert(
2351 |                 1,
2352 |                 BeautifulSoup(
2353 |                     """<button id="theme-toggle" title="Toggle theme" class="fixed top-2 left-2 z-50 p-2 bg-white dark:bg-gray-800 rounded-lg shadow-lg text-xs"><svg class="h-4 w-4 hidden dark:inline" fill="none" viewBox="0 0 24 24" stroke="currentColor"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 3v1m0 16v1m9-9h-1M4 12H3m15.364 6.364l-.707-.707M6.343 6.343l-.707-.707m12.728 0l-.707.707M6.343 17.657l-.707.707M16 12a4 4 0 11-8 0 4 4 0 018 0z"/></svg><svg class="h-4 w-4 dark:hidden" fill="none" viewBox="0 0 24 24" stroke="currentColor"><path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M20.354 15.354A9 9 0 018.646 3.646 9.003 9.003 0 0012 21a9.003 9.003 0 008.354-5.646z"/></svg></button>""",
2354 |                     "html.parser",
2355 |                 ),
2356 |             )
2357 |         if not body.find("script", string=lambda s: s and "goNextChange" in s):
2358 |             script_tag = soup.new_tag("script")
2359 |             script_tag.string = _get_navigation_js()
2360 |             body.append(script_tag)
2361 |     body_classes = body.get("class", [])
2362 |     req_cls = [
2363 |         "font-['Newsreader']",
2364 |         "dark:text-gray-200",
2365 |         "dark:bg-gray-900",
2366 |         "transition-colors",
2367 |         "duration-200",
2368 |     ]
2369 |     [body_classes.append(c) for c in req_cls if c not in body_classes]
2370 |     body["class"] = body_classes
2371 |     if output_format == "html" and not any(
2372 |         isinstance(c, Tag) and "prose" in c.get("class", []) for c in body.contents
2373 |     ):
2374 |         wrapper = soup.new_tag(
2375 |             "div", **{"class": "prose lg:prose-xl dark:prose-invert mx-auto max-w-4xl px-4 py-8"}
2376 |         )
2377 |         ui_els = body.select(
2378 |             ".redline-navigation, .redline-legend, .redline-minimap, #theme-toggle, script",
2379 |             recursive=False,
2380 |         )
2381 |         content = [el for el in body.contents if el not in ui_els]
2382 |         [
2383 |             wrapper.append(el.extract())
2384 |             for el in content
2385 |             if isinstance(el, Tag) or (isinstance(el, NavigableString) and el.strip())
2386 |         ]
2387 |         body.append(wrapper)
2388 |     final_html = body.decode_contents() if output_format == "fragment" else str(soup)
2389 |     logger.debug("HTML postprocessing finished.")
2390 |     return final_html
2391 | 
2392 | 
2393 | def _get_base_diff_css() -> str:
2394 |     # (Copied from previous correct version)
2395 |     return """
2396 |         <style data-base-diff="1">
2397 |           ins.diff-insert, .diff-move-target {color:#1d4ed8; background-color:#eff6ff; border:1px solid #93c5fd; padding:0 1px; margin:0 1px; border-radius:2px; text-decoration:none;}
2398 |           del.diff-delete, .diff-move-source {color:#b91c1c; background-color:#fef2f2; border:1px solid #fca5a5; padding:0 1px; margin:0 1px; border-radius:2px; text-decoration:line-through;}
2399 |           ins.diff-move-target {color:#047857; background-color:#ecfdf5; border:1px solid #6ee7b7;}
2400 |           del.diff-move-source {color:#065f46; background-color:#ecfdf599; border:1px dashed #6ee7b7; }
2401 |           span.diff-update-container > * {border-bottom: 1px dotted #f97316;}
2402 |           span.diff-attrib-change > * {box-shadow: 0px 0px 0px 1px #fb923c inset; }
2403 |           span.diff-rename-node > * {box-shadow: 0px 0px 0px 1px #a855f7 inset; }
2404 |           ins.diff-insert-text {color:#1e40af; text-decoration:underline; background:transparent; border:none; padding:0; margin:0;}
2405 |           del.diff-delete-text {color:#b91c1c; text-decoration:line-through; background:transparent; border:none; padding:0; margin:0;}
2406 |           /* Basic dark mode */
2407 |           @media (prefers-color-scheme: dark) {
2408 |             body { background-color: #1f2937; color: #d1d5db; }
2409 |             ins.diff-insert, .diff-move-target { color: #93c5fd; background-color: #1e3a8a; border-color: #3b82f6; }
2410 |             del.diff-delete, .diff-move-source { color: #fca5a5; background-color: #7f1d1d; border-color: #ef4444; }
2411 |             ins.diff-move-target { color: #6ee7b7; background-color: #065f46; border-color: #10b981; }
2412 |             del.diff-move-source { color: #a7f3d0; background-color: #064e3b; border-color: #34d399; }
2413 |             span.diff-update-container > * { border-color: #fb923c; }
2414 |             span.diff-attrib-change > * { box-shadow: 0px 0px 0px 1px #f97316 inset; }
2415 |             span.diff-rename-node > * { box-shadow: 0px 0px 0px 1px #c084fc inset; }
2416 |             ins.diff-insert-text {color:#60a5fa;}
2417 |             del.diff-delete-text {color:#f87171;}
2418 |           }
2419 |         </style>
2420 |         """
2421 | 
2422 | def _get_tailwind_css() -> str:
2423 |     return """ @tailwind base;@tailwind components;@tailwind utilities; @layer components { .diff-insert, .diff-delete, .diff-move-target, .diff-move-source { @apply px-0.5 rounded-sm mx-[1px] transition duration-150; } ins.diff-insert, .diff-insert > ins { @apply text-blue-800 bg-blue-50 ring-1 ring-inset ring-blue-300/60 no-underline; } .dark ins.diff-insert, .dark .diff-insert > ins { @apply text-blue-200 bg-blue-900/40 ring-blue-500/30; } ins.diff-insert:hover, .diff-insert > ins:hover { @apply ring-2 ring-offset-1 ring-black/10 shadow-sm bg-blue-100 dark:bg-blue-800/60; } del.diff-delete, .diff-delete > del { @apply text-rose-800 bg-rose-50 ring-1 ring-inset ring-rose-300/60 line-through; } .dark del.diff-delete, .dark .diff-delete > del { @apply text-rose-200 bg-rose-900/40 ring-rose-500/30; } del.diff-delete:hover, .diff-delete > del:hover { @apply ring-2 ring-offset-1 ring-black/10 shadow-sm bg-rose-100 dark:bg-rose-800/60; } ins.diff-move-target, .diff-move-target > ins { @apply text-emerald-900 bg-emerald-50 ring-1 ring-emerald-400/60 no-underline border border-emerald-300; } .dark ins.diff-move-target, .dark .diff-move-target > ins { @apply text-emerald-200 bg-emerald-900/40 ring-emerald-500/30 border-emerald-700; } ins.diff-move-target:hover, .diff-move-target > ins:hover { @apply ring-2 ring-offset-1 ring-black/10 shadow-sm bg-emerald-100 dark:bg-emerald-800/60; } del.diff-move-source, .diff-move-source > del { @apply text-emerald-800/60 bg-emerald-50/50 line-through border border-dashed border-emerald-400/40; } .dark del.diff-move-source, .dark .diff-move-source > del { @apply text-emerald-300/60 bg-emerald-900/30 border-emerald-700/40; } del.diff-move-source:hover, .diff-move-source > del:hover { @apply bg-emerald-100/70 border-emerald-400 shadow-sm dark:bg-emerald-800/50; } span.diff-update-container { @apply border-b border-dotted border-orange-400 bg-orange-50/30; } .dark span.diff-update-container { @apply border-orange-500 bg-orange-900/30; } span.diff-update-container:hover { @apply bg-orange-100/50 dark:bg-orange-800/40; } span.diff-attrib-change { @apply ring-1 ring-orange-400/50 ring-inset bg-orange-50/30 backdrop-blur-sm rounded-sm; } .dark span.diff-attrib-change { @apply ring-orange-500/50 bg-orange-900/30; } span.diff-attrib-change:hover { @apply bg-orange-100/50 dark:bg-orange-800/40; } span.diff-rename-node { @apply ring-1 ring-purple-400/50 ring-inset bg-violet-50/30 backdrop-blur-sm rounded-sm; } .dark span.diff-rename-node { @apply ring-purple-500/50 bg-violet-900/30; } span.diff-rename-node:hover { @apply bg-violet-100/50 dark:bg-violet-800/40; } ins.diff-insert-text { @apply text-blue-700 dark:text-blue-300 underline decoration-dotted decoration-1 underline-offset-2 bg-transparent border-none ring-0 p-0 m-0; } del.diff-delete-text { @apply text-rose-700 dark:text-rose-300 line-through decoration-dotted decoration-1 bg-transparent border-none ring-0 p-0 m-0; } @media print { .redline-navigation, .redline-legend, .redline-minimap, #theme-toggle { @apply hidden; } ins, del, span[class*="diff-"] { @apply text-black !important; background-color: transparent !important; border: none !important; ring: none !important; box-shadow: none !important; } ins { @apply font-bold no-underline; } del { @apply italic line-through; } } } """
2424 | 
2425 | 
2426 | def _get_navigation_js() -> str:
2427 |     return """ /* Combined JS */ document.addEventListener('DOMContentLoaded', () => { let _redlineChanges = null; let _changeIdx = -1; let _currentHi = null; const changeCounter = document.getElementById('change-counter'); const minimap = document.querySelector('.redline-minimap'); const container = document.querySelector('.prose') || document.body; function findAllChanges() { if (!_redlineChanges) { _redlineChanges = Array.from(container.querySelectorAll( 'ins.diff-insert, ins.diff-move-target, del.diff-delete, del.diff-move-source, span.diff-update-container, span.diff-attrib-change, span.diff-rename-node, ins.diff-insert-text, del.diff-delete-text' )); _redlineChanges = _redlineChanges.filter(el => { let p = el.parentElement; while (p && p !== container && p !== document.body) { if (_redlineChanges.includes(p)) return false; p = p.parentElement; } return true; }); _redlineChanges.sort((a, b) => { const c = a.compareDocumentPosition(b); if (c & Node.DOCUMENT_POSITION_FOLLOWING) return 1; if (c & Node.DOCUMENT_POSITION_PRECEDING) return -1; return 0; }); updateCounter(); if (_redlineChanges && _redlineChanges.length > 0) { _changeIdx = -1; } else { console.log("Redline Nav: No changes found."); } } return _redlineChanges; } function highlightCurrentChange(scrollTo = true) { if (!_redlineChanges || _changeIdx < 0 || _changeIdx >= _redlineChanges.length) return; const el = _redlineChanges[_changeIdx]; if (!el) return; if (_currentHi && _currentHi !== el && _redlineChanges.includes(_currentHi)) { _currentHi.style.outline = ''; _currentHi.style.boxShadow = ''; _currentHi.style.outlineOffset = ''; _currentHi.classList.remove('current-redline-change'); } el.style.outline = '2px solid orange'; el.style.outlineOffset = '2px'; el.style.boxShadow = '0 0 8px 1px rgba(255, 165, 0, 0.6)'; el.classList.add('current-redline-change'); if (scrollTo) { const rect = el.getBoundingClientRect(); const isVisible = rect.top >= 0 && rect.left >= 0 && rect.bottom <= (window.innerHeight || document.documentElement.clientHeight) && rect.right <= (window.innerWidth || document.documentElement.clientWidth); if (!isVisible) { el.scrollIntoView({ behavior: 'smooth', block: 'center', inline: 'nearest' }); } } _currentHi = el; updateCounter(); updateMinimapHighlight(); } window.goPrevChange = () => { findAllChanges(); if (!_redlineChanges || _redlineChanges.length === 0) return; _changeIdx = (_changeIdx <= 0) ? _redlineChanges.length - 1 : _changeIdx - 1; highlightCurrentChange(); }; window.goNextChange = () => { findAllChanges(); if (!_redlineChanges || _redlineChanges.length === 0) return; _changeIdx = (_changeIdx >= _redlineChanges.length - 1) ? 0 : _changeIdx + 1; highlightCurrentChange(); }; function updateCounter() { if (changeCounter && _redlineChanges) { changeCounter.textContent = `${_redlineChanges.length > 0 ? _changeIdx + 1 : 0}/${_redlineChanges.length}`; } else if (changeCounter) { changeCounter.textContent = '0/0'; } } document.addEventListener("keydown", e => { if (e.target.tagName === 'INPUT' || e.target.tagName === 'TEXTAREA' || e.target.isContentEditable) return; if (e.key === "ArrowRight" && !e.altKey && !e.ctrlKey && !e.metaKey && !e.shiftKey) { goNextChange(); e.preventDefault(); } else if (e.key === "ArrowLeft" && !e.altKey && !e.ctrlKey && !e.metaKey && !e.shiftKey) { goPrevChange(); e.preventDefault(); } }); let minimapMarkers = []; function setupMinimap() { if (!minimap || !container) { return; } const changes = findAllChanges(); if (!changes || !changes.length) { minimap.style.display = 'none'; return; } minimap.innerHTML = ''; minimapMarkers = []; requestAnimationFrame(() => { const containerHeight = container.scrollHeight; if (containerHeight <= 0) { return; } changes.forEach((change, index) => { let type = ''; const cl = change.classList; const tagName = change.tagName.toUpperCase(); if (cl.contains('diff-insert') || (tagName === 'INS' && !cl.contains('diff-move-target'))) type = 'insert'; else if (cl.contains('diff-delete') || (tagName === 'DEL' && !cl.contains('diff-move-source'))) type = 'delete'; else if (cl.contains('diff-move-target') || cl.contains('diff-move-source')) type = 'move'; else if (cl.contains('diff-attrib-change')) type = 'attrib'; else if (cl.contains('diff-update-container')) type = 'text'; else if (cl.contains('diff-rename-node')) type = 'rename'; else if (cl.contains('diff-insert-text')) type = 'insert'; else if (cl.contains('diff-delete-text')) type = 'delete'; else return; const relativePos = change.offsetTop / containerHeight; const marker = document.createElement('div'); marker.className = 'minimap-marker absolute w-full h-[3px] cursor-pointer opacity-75 hover:opacity-100 transition-opacity duration-150'; marker.style.top = `${Math.max(0, Math.min(100, relativePos * 100))}%`; if (type === 'insert') marker.classList.add('bg-blue-500'); else if (type === 'delete') marker.classList.add('bg-rose-500'); else if (type === 'move') marker.classList.add('bg-emerald-500'); else marker.classList.add('bg-orange-500'); marker.title = `${type.charAt(0).toUpperCase() + type.slice(1)} change (${index + 1}/${changes.length})`; marker.dataset.changeIndex = index; marker.addEventListener('click', () => { _changeIdx = index; highlightCurrentChange(); }); minimap.appendChild(marker); minimapMarkers.push(marker); }); minimap.style.display = 'flex'; updateMinimapHighlight(); }); } function updateMinimapHighlight() { minimapMarkers.forEach((marker, index) => { if (index === _changeIdx) { marker.style.transform = 'scaleX(1.5)'; marker.style.opacity = '1'; marker.style.zIndex = '10'; marker.classList.add('bg-yellow-400'); marker.classList.remove('bg-blue-500', 'bg-rose-500', 'bg-emerald-500', 'bg-orange-500'); } else { marker.style.transform = ''; marker.style.opacity = '0.75'; marker.style.zIndex = '1'; marker.classList.remove('bg-yellow-400'); const oClass = marker.title.includes('Insert') ? 'bg-blue-500' : marker.title.includes('Delete') ? 'bg-rose-500' : marker.title.includes('Move') ? 'bg-emerald-500' : 'bg-orange-500'; if (!marker.classList.contains(oClass)) { marker.classList.remove('bg-blue-500', 'bg-rose-500', 'bg-emerald-500', 'bg-orange-500'); marker.classList.add(oClass); } } }); } function debounce(func, wait) { let t; return function(...a) { const l = () => { clearTimeout(t); func(...a); }; clearTimeout(t); t = setTimeout(l, wait); }; } const debouncedSetupMinimap = debounce(setupMinimap, 250); window.addEventListener('resize', debouncedSetupMinimap); const themeToggle = document.getElementById('theme-toggle'); function applyTheme(isDark) { document.documentElement.classList.toggle('dark', isDark); localStorage.theme = isDark ? 'dark' : 'light'; setupMinimap(); } if (themeToggle) { const pDark = window.matchMedia('(prefers-color-scheme: dark)').matches; const cTheme = localStorage.theme === 'dark' || (!('theme' in localStorage) && pDark) ? 'dark' : 'light'; applyTheme(cTheme === 'dark'); themeToggle.addEventListener('click', () => { applyTheme(!document.documentElement.classList.contains('dark')); }); } function handleMoveHighlight(event) { const moveEl = event.target.closest("ins[data-move-id], del[data-move-id]"); if (!moveEl) return; const moveId = moveEl.dataset.moveId; if (!moveId) return; const isEnter = event.type === "mouseover"; document.querySelectorAll(`[data-move-id='${moveId}']`).forEach(el => { el.style.outline = isEnter ? "3px dashed #059669" : ""; el.style.outlineOffset = isEnter ? "2px" : ""; el.style.transition = 'outline 0.15s ease-in-out, outline-offset 0.15s ease-in-out'; }); } container.addEventListener("mouseover", handleMoveHighlight); container.addEventListener("mouseout", handleMoveHighlight); findAllChanges(); setupMinimap(); }); """
2428 | 
2429 | 
2430 | # ─────────────────────────────────────────────────────────────────────────────
2431 | #                       Plain‑text comparison (escaped)
2432 | # ─────────────────────────────────────────────────────────────────────────────
2433 | def _generate_text_redline(
2434 |     original_text: str, modified_text: str, *, diff_level: str = "word",
2435 | ) -> Tuple[str, Dict[str, int]]:
2436 |     """Return plain‑text diff with {- +} markers and [~ ~] for moves."""
2437 |     if diff_level == "char":
2438 |         orig_units, mod_units, joiner = list(original_text), list(modified_text), ""
2439 |     elif diff_level == "word":
2440 |         rx = r"(\w+[\S\w]*|\s+|[^\w\s])" # Keep whitespace as separate unit
2441 |         orig_units, mod_units, joiner = re.findall(rx, original_text), re.findall(rx, modified_text), ""
2442 |     else: # line level
2443 |         orig_units, mod_units, joiner = original_text.splitlines(True), modified_text.splitlines(True), ""
2444 | 
2445 |     sm = difflib.SequenceMatcher(None, orig_units, mod_units, autojunk=False)
2446 |     ops: List[Tuple[str, str]] = [] # Store ('tag', 'text') pairs
2447 |     for tag, i1, i2, j1, j2 in sm.get_opcodes():
2448 |         if tag == "equal":
2449 |             ops.append(("eq", joiner.join(orig_units[i1:i2])))
2450 |         elif tag == "delete":
2451 |             ops.append(("del", joiner.join(orig_units[i1:i2])))
2452 |         elif tag == "insert":
2453 |             ops.append(("ins", joiner.join(mod_units[j1:j2])))
2454 |         else: # replace
2455 |             # Treat replace as delete followed by insert for move detection
2456 |             ops.append(("del", joiner.join(orig_units[i1:i2])))
2457 |             ops.append(("ins", joiner.join(mod_units[j1:j2])))
2458 | 
2459 |     # --- Attempt Move Detection based on Content ---
2460 |     # Normalize whitespace and ignore case for matching identical blocks
2461 |     def _normalize_for_match(s: str) -> str:
2462 |         return re.sub(r'\s+', ' ', s.strip()).lower()
2463 | 
2464 |     dels: Dict[str, List[int]] = {} # Map normalized text -> list of deletion indices
2465 |     ins: Dict[str, List[int]] = {} # Map normalized text -> list of insertion indices
2466 |     paired: Dict[int, int] = {} # Map deletion index -> insertion index for identified moves
2467 | 
2468 |     for idx, (tag, txt) in enumerate(ops):
2469 |         if tag == "del":
2470 |             key = _normalize_for_match(txt)
2471 |             if key: # Only track non-empty deletions
2472 |                 dels.setdefault(key, []).append(idx)
2473 |         elif tag == "ins":
2474 |             key = _normalize_for_match(txt)
2475 |             if key: # Only track non-empty insertions
2476 |                 ins.setdefault(key, []).append(idx)
2477 | 
2478 |     # Find potential moves: identical normalized content deleted once and inserted once
2479 |     for key in set(dels) & set(ins):
2480 |         if len(dels[key]) == 1 and len(ins[key]) == 1:
2481 |             deletion_idx = dels[key][0]
2482 |             insertion_idx = ins[key][0]
2483 |             # Ensure they are not adjacent (which would be a replace)
2484 |             # This simple check might be too strict, but helps avoid marking simple replacements as moves
2485 |             if abs(deletion_idx - insertion_idx) > 1:
2486 |                 paired[deletion_idx] = insertion_idx # Mark as a move pair
2487 | 
2488 |     # --- Build Output String ---
2489 |     buf: List[str] = []
2490 |     ic = dc = mc = 0 # Insert, Delete, Move counts
2491 |     for idx, (tag, txt) in enumerate(ops):
2492 |         if idx in paired: # This is the deletion part of a move, skip it
2493 |             continue
2494 |         if idx in paired.values(): # This is the insertion part of a move
2495 |              # Escape markers within the moved text
2496 |              escaped_move = txt.replace("[~", "[ ~").replace("~]", "~ ]")
2497 |              buf.append(f"[~{escaped_move}~]")
2498 |              mc += 1
2499 |              continue
2500 | 
2501 |         # Handle regular operations
2502 |         if tag == "eq":
2503 |             buf.append(txt)
2504 |         elif tag == "del":
2505 |              # Escape markers within the deleted text
2506 |              escaped_del = txt.replace("[-", "[ -").replace("-]", "- ]")
2507 |              buf.append(f"[-{escaped_del}-]")
2508 |              dc += 1
2509 |         elif tag == "ins":
2510 |              # Escape markers within the inserted text
2511 |              escaped_ins = txt.replace("{+", "{ +").replace("+}", "+ }")
2512 |              buf.append(f"{{+{escaped_ins}+}}")
2513 |              ic += 1
2514 | 
2515 |     # --- Calculate Stats ---
2516 |     stats = {
2517 |         "total_changes": ic + dc + mc, # Total distinct changes
2518 |         "insertions": ic,
2519 |         "deletions": dc,
2520 |         "moves": mc,
2521 |         "text_updates": 0, # Not explicitly tracked with this method
2522 |         "attr_updates": 0,
2523 |         "other_changes": 0,
2524 |         "inline_insertions": ic + mc, # Count move insertions here?
2525 |         "inline_deletions": dc + mc, # Count move deletions here?
2526 |     }
2527 |     return "".join(buf), stats
2528 | 
2529 | 
2530 | # ─────────────────────────────────────────────────────────────────────────────
2531 | #                       Public wrapper for text docs
2532 | # ─────────────────────────────────────────────────────────────────────────────
2533 | @with_tool_metrics
2534 | @with_error_handling
2535 | async def compare_documents_redline(
2536 |     original_text: str,
2537 |     modified_text: str,
2538 |     *,
2539 |     file_format: str = "auto",
2540 |     detect_moves: bool = True,
2541 |     ignore_whitespace: bool = True,
2542 |     output_format: str = "html",
2543 |     diff_level: str = "word",
2544 |     include_css: bool = True,
2545 |     generate_markdown: bool = False,
2546 |     markdown_path: str = "detected_redline_differences.md",
2547 |     run_tidy: bool = False,
2548 | ) -> Dict[str, Any]:
2549 |     t0 = time.time()
2550 |     logger.info(f"Starting doc comparison. Input: {file_format}, Output: {output_format}")
2551 |     if not isinstance(original_text, str):
2552 |         raise ToolInputError("original_text must be str")
2553 |     if not isinstance(modified_text, str):
2554 |         raise ToolInputError("modified_text must be str")
2555 |     valid_formats = {"auto", "html", "text", "markdown", "latex"}
2556 |     if file_format not in valid_formats:
2557 |         raise ToolInputError(f"Invalid file_format: {file_format}")
2558 |     if output_format not in {"html", "text"}:
2559 |         raise ToolInputError(f"Invalid output_format: {output_format}")
2560 |     if diff_level not in {"char", "word", "line"}:
2561 |         raise ToolInputError(f"Invalid diff_level: {diff_level}")
2562 | 
2563 |     if original_text == modified_text:
2564 |         logger.info("Documents are identical.")
2565 |         stats = {k: 0 for k in RedlineXMLFormatter().processed_actions}
2566 |         stats["total_changes"] = 0
2567 |         if output_format == "html":
2568 |             fmt = file_format if file_format != "auto" else _detect_file_format(original_text)
2569 |             html = ""
2570 |             try:
2571 |                 if fmt == "html":
2572 |                     html = original_text
2573 |                 elif fmt == "markdown":
2574 |                     md_ext = [
2575 |                         "fenced_code",
2576 |                         "tables",
2577 |                         "sane_lists",
2578 |                         "nl2br",
2579 |                         "footnotes",
2580 |                         "attr_list",
2581 |                     ]
2582 |                     html = markdown.markdown(original_text, extensions=md_ext)
2583 |                 elif _DOC_CONVERSION_AVAILABLE:
2584 |                     res = await convert_document(
2585 |                         document_data=original_text.encode("utf-8"),
2586 |                         input_format_hint=fmt,
2587 |                         output_format="markdown",
2588 |                     )
2589 |                     if res.get("success") and res.get("content"):
2590 |                         md_ext = [
2591 |                             "fenced_code",
2592 |                             "tables",
2593 |                             "sane_lists",
2594 |                             "nl2br",
2595 |                             "footnotes",
2596 |                             "attr_list",
2597 |                         ]
2598 |                         html = markdown.markdown(res["content"], extensions=md_ext)
2599 |                     else:
2600 |                         logger.warning(f"Conv failed: {res.get('error')}")
2601 |                         html = f"<pre>{html_stdlib.escape(original_text)}</pre>"
2602 |                 else:
2603 |                     html = f"<pre>{html_stdlib.escape(original_text)}</pre>"
2604 |                 final_html = await _postprocess_redline(
2605 |                     html, include_css=True, add_navigation=False, output_format="html"
2606 |                 )
2607 |             except Exception as e:
2608 |                 logger.error(f"Error prep identical: {e}")
2609 |                 final_html = f"<!DOCTYPE html><html><body><pre>{html_stdlib.escape(original_text)}</pre></body></html>"
2610 |             return {
2611 |                 "redline_html": final_html,
2612 |                 "stats": stats,
2613 |                 "processing_time": time.time() - t0,
2614 |                 "success": True,
2615 |             }
2616 |         else:
2617 |             return {
2618 |                 "redline": original_text,
2619 |                 "stats": stats,
2620 |                 "processing_time": time.time() - t0,
2621 |                 "success": True,
2622 |             }
2623 | 
2624 |     actual_format = file_format
2625 |     if actual_format == "auto":
2626 |         actual_format = _detect_file_format(original_text)
2627 |         logger.info(f"Auto-detected format: {actual_format}")
2628 | 
2629 |     if output_format == "html":
2630 |         logger.info(f"Generating HTML redline for '{actual_format}' input...")
2631 |         orig_html = original_text
2632 |         mod_html = modified_text
2633 |         if actual_format != "html":
2634 |             if not _DOC_CONVERSION_AVAILABLE:
2635 |                 raise ToolError(
2636 |                     f"Input '{actual_format}', but conversion tool unavailable.",
2637 |                     code="DEPENDENCY_MISSING",
2638 |                 )
2639 |             logger.info(f"Converting '{actual_format}' input to Markdown then HTML...")
2640 |             try:
2641 |                 params = {
2642 |                     "output_format": "markdown",
2643 |                     "extraction_strategy": "hybrid_direct_ocr",
2644 |                     "enhance_with_llm": False,
2645 |                 }
2646 |                 res_o = await convert_document(
2647 |                     document_data=original_text.encode("utf-8"),
2648 |                     input_format_hint=actual_format,
2649 |                     **params,
2650 |                 )
2651 |                 if not res_o.get("success"):
2652 |                     raise ToolError(
2653 |                         f"Orig conv failed: {res_o.get('error')}", code="CONVERSION_FAILED"
2654 |                     )
2655 |                 res_m = await convert_document(
2656 |                     document_data=modified_text.encode("utf-8"),
2657 |                     input_format_hint=actual_format,
2658 |                     **params,
2659 |                 )
2660 |                 if not res_m.get("success"):
2661 |                     raise ToolError(
2662 |                         f"Mod conv failed: {res_m.get('error')}", code="CONVERSION_FAILED"
2663 |                     )
2664 |                 md_ext = ["fenced_code", "tables", "sane_lists", "nl2br", "footnotes", "attr_list"]
2665 |                 orig_html = markdown.markdown(res_o["content"], extensions=md_ext)
2666 |                 mod_html = markdown.markdown(res_m["content"], extensions=md_ext)
2667 |             except Exception as e:
2668 |                 logger.error(f"Doc conversion failed: {e}", exc_info=True)
2669 |                 raise ToolInputError("Failed doc conversion.") from e
2670 |         html_result = await create_html_redline(
2671 |             original_html=orig_html,
2672 |             modified_html=mod_html,
2673 |             detect_moves=detect_moves,
2674 |             ignore_whitespace=ignore_whitespace,
2675 |             output_format="html",
2676 |             include_css=include_css,
2677 |             add_navigation=True,
2678 |             generate_markdown=generate_markdown,
2679 |             markdown_path=markdown_path,
2680 |             run_tidy=run_tidy,
2681 |         )
2682 |         html_result["processing_time"] = time.time() - t0
2683 |         return html_result
2684 | 
2685 |     elif output_format == "text":
2686 |         logger.info(f"Generating plain text redline (level: {diff_level})...")
2687 |         o_plain, m_plain = original_text, modified_text
2688 |         if actual_format == "html":
2689 |             logger.warning("Generating text diff from HTML; tags included.")
2690 |         elif actual_format == "markdown":
2691 |             logger.warning("Generating text diff from Markdown; syntax included.")
2692 |         txt, stats = _generate_text_redline(o_plain, m_plain, diff_level=diff_level)
2693 |         return {
2694 |             "redline": txt,
2695 |             "stats": stats,
2696 |             "processing_time": time.time() - t0,
2697 |             "success": True,
2698 |         }
2699 | 
2700 |     raise ToolInputError("Invalid output format.")
2701 | 
2702 | 
2703 | # ─────────────────────────────────────────────────────────────────────────────
2704 | #                               Aux helpers
2705 | # ─────────────────────────────────────────────────────────────────────────────
2706 | def _detect_file_format(text: str) -> str:
2707 |     if not text or not text.strip():
2708 |         return "text"
2709 |     t = text.lower().strip()
2710 |     if t.startswith("<!doctype html") or t.startswith("<html"):
2711 |         return "html"
2712 |     hs = sum(f"<{tag}" in t for tag in ("body", "div", "p", "table", "h1", "br")) + sum(
2713 |         f"</{tag}>" in t for tag in ("body", "div", "p", "table", "h1")
2714 |     )
2715 |     lrx = [
2716 |         r"\\documentclass",
2717 |         r"\\begin\{document\}",
2718 |         r"\\section\{",
2719 |         r"\\usepackage\{",
2720 |         r"\$.+\$",
2721 |         r"\\begin\{",
2722 |     ]
2723 |     ls = sum(bool(re.search(p, text, re.M | re.I)) for p in lrx)
2724 |     mrx = [
2725 |         r"^[#]+\s+",
2726 |         r"^>\s+",
2727 |         r"^\s*[-*+]\s+",
2728 |         r"^\s*[0-9]+\.\s+",
2729 |         r"```|~~~",
2730 |         r"\|.*\|.*\|",
2731 |         r"\*{1,2}[^*\s]",
2732 |         r"`[^`]+`",
2733 |         r"\[.*?\]\(.*?\)",
2734 |     ]
2735 |     lines = text.splitlines()
2736 |     ms = (
2737 |         sum(bool(re.search(p, l)) for p in mrx[:5] for l in lines[:30])  # noqa: E741
2738 |         + sum( 
2739 |             bool(re.search(p, text, re.M)) for p in mrx[5:]
2740 |         )
2741 |     )  
2742 |     if ls >= 2 and hs < 2:
2743 |         return "latex"
2744 |     if hs >= 4 or (hs >= 2 and "<body" in t):
2745 |         return "html"
2746 |     if ms >= 3 and hs <= 1 and ls <= 0:
2747 |         return "markdown"
2748 |     if hs >= 2 and ms >= 2:
2749 |         return "html"
2750 |     return "text"
2751 | 
2752 | 
2753 | # ─────────────────────────────────────────────────────────────────────────────
2754 | #                               Metadata
2755 | # ─────────────────────────────────────────────────────────────────────────────
2756 | __all__ = ["create_html_redline", "compare_documents_redline", "RedlineXMLFormatter"]
2757 | __version__ = "1.5.0"
2758 | __updated__ = _dt.datetime.now(timezone.utc).isoformat(timespec="seconds") + "Z"
2759 | 
```
Page 39/45FirstPrevNextLast