moisnx/arc # codebase.md

This is page 4 of 10. Use http://codebase.md/moisnx/arc?lines=true&page={x} to view the full context.

# Directory Structure

```
├── .clang-format
├── .config
│   └── arceditor
│       ├── config.yaml
│       ├── keybinds.conf
│       └── themes
│           ├── catppuccin-mocha.theme
│           ├── cyberpunk-neon.theme
│           ├── default.theme
│           ├── dracula.theme
│           ├── github_dark.theme
│           ├── gruvbox_dark.theme
│           ├── gruvbox_light.theme
│           ├── high_constrast_dark.theme
│           ├── monokai.theme
│           ├── onedark.theme
│           ├── solarized_dark.theme
│           ├── solarized_light.theme
│           ├── tokyo_night.theme
│           └── vscode_light.theme
├── .github
│   └── assets
│       └── screenshot.gif
├── .gitignore
├── .gitmessage
├── .gitmodules
├── build.md
├── CMakeLists.txt
├── deps
│   └── tree-sitter-markdown
│       ├── .editorconfig
│       ├── .gitattributes
│       ├── .github
│       │   ├── screenshot.png
│       │   └── workflows
│       │       ├── ci.yml
│       │       ├── publish.yml
│       │       └── release.yml
│       ├── .gitignore
│       ├── binding.gyp
│       ├── bindings
│       │   ├── go
│       │   │   ├── binding_test.go
│       │   │   ├── markdown_inline.go
│       │   │   └── markdown.go
│       │   ├── node
│       │   │   ├── binding_test.js
│       │   │   ├── binding.cc
│       │   │   ├── index.d.ts
│       │   │   ├── index.js
│       │   │   └── inline.js
│       │   ├── python
│       │   │   ├── tests
│       │   │   │   └── test_binding.py
│       │   │   └── tree_sitter_markdown
│       │   │       ├── __init__.py
│       │   │       ├── __init__.pyi
│       │   │       ├── binding.c
│       │   │       └── py.typed
│       │   ├── rust
│       │   │   ├── benchmark.rs
│       │   │   ├── build.rs
│       │   │   ├── lib.rs
│       │   │   └── parser.rs
│       │   └── swift
│       │       ├── .gitignore
│       │       └── TreeSitterMarkdownTests
│       │           └── TreeSitterMarkdownTests.swift
│       ├── Cargo.toml
│       ├── CMakeLists.txt
│       ├── common
│       │   ├── common.js
│       │   ├── common.mak
│       │   └── html_entities.json
│       ├── CONTRIBUTING.md
│       ├── go.mod
│       ├── LICENSE
│       ├── Makefile
│       ├── package-lock.json
│       ├── package.json
│       ├── Package.resolved
│       ├── Package.swift
│       ├── pyproject.toml
│       ├── README.md
│       ├── scripts
│       │   ├── build.js
│       │   └── test.js
│       ├── setup.py
│       ├── tree-sitter-markdown
│       │   ├── bindings
│       │   │   ├── c
│       │   │   │   ├── tree-sitter-markdown.h
│       │   │   │   └── tree-sitter-markdown.pc.in
│       │   │   └── swift
│       │   │       └── TreeSitterMarkdown
│       │   │           └── markdown.h
│       │   ├── CMakeLists.txt
│       │   ├── grammar.js
│       │   ├── Makefile
│       │   ├── package.json
│       │   ├── queries
│       │   │   ├── highlights.scm
│       │   │   └── injections.scm
│       │   ├── src
│       │   │   ├── grammar.json
│       │   │   ├── node-types.json
│       │   │   ├── parser.c
│       │   │   ├── scanner.c
│       │   │   └── tree_sitter
│       │   │       ├── alloc.h
│       │   │       ├── array.h
│       │   │       └── parser.h
│       │   └── test
│       │       └── corpus
│       │           ├── extension_minus_metadata.txt
│       │           ├── extension_pipe_table.txt
│       │           ├── extension_plus_metadata.txt
│       │           ├── extension_task_list.txt
│       │           ├── failing.txt
│       │           ├── issues.txt
│       │           └── spec.txt
│       ├── tree-sitter-markdown-inline
│       │   ├── bindings
│       │   │   ├── c
│       │   │   │   ├── tree-sitter-markdown-inline.h
│       │   │   │   └── tree-sitter-markdown-inline.pc.in
│       │   │   └── swift
│       │   │       └── TreeSitterMarkdownInline
│       │   │           └── markdown_inline.h
│       │   ├── CMakeLists.txt
│       │   ├── grammar.js
│       │   ├── Makefile
│       │   ├── package.json
│       │   ├── queries
│       │   │   ├── highlights.scm
│       │   │   └── injections.scm
│       │   ├── src
│       │   │   ├── grammar.json
│       │   │   ├── node-types.json
│       │   │   ├── parser.c
│       │   │   ├── scanner.c
│       │   │   └── tree_sitter
│       │   │       ├── alloc.h
│       │   │       ├── array.h
│       │   │       └── parser.h
│       │   └── test
│       │       └── corpus
│       │           ├── extension_latex.txt
│       │           ├── extension_strikethrough.txt
│       │           ├── extension_wikilink.txt
│       │           ├── failing.txt
│       │           ├── issues.txt
│       │           ├── spec.txt
│       │           └── tags.txt
│       └── tree-sitter.json
├── LICENSE
├── Makefile
├── quickstart.md
├── README.md
├── src
│   ├── core
│   │   ├── buffer.cpp
│   │   ├── buffer.h
│   │   ├── config_manager.cpp
│   │   ├── config_manager.h
│   │   ├── editor_delta.h
│   │   ├── editor_validation.h
│   │   ├── editor.cpp
│   │   └── editor.h
│   ├── features
│   │   ├── markdown_state.h
│   │   ├── syntax_config_loader.cpp
│   │   ├── syntax_config_loader.h
│   │   ├── syntax_highlighter.cpp
│   │   └── syntax_highlighter.h
│   ├── main.cpp
│   └── ui
│       ├── input_handler.cpp
│       ├── input_handler.h
│       ├── renderer.cpp
│       ├── renderer.h
│       ├── style_manager.cpp
│       └── style_manager.h
└── treesitter
    ├── languages.yaml
    └── queries
        ├── _javascript
        │   ├── highlights.scm
        │   ├── locals.scm
        │   └── tags.scm
        ├── _jsx
        │   ├── highlights.scm
        │   ├── indents.scm
        │   └── textobjects.scm
        ├── _typescript
        │   ├── highlights.scm
        │   ├── indents.scm
        │   ├── locals.scm
        │   ├── tags.scm
        │   └── textobjects.scm
        ├── bash
        │   ├── highlights.scm
        │   ├── indents.scm
        │   ├── injections.scm
        │   ├── rainbows.scm
        │   ├── tags.scm
        │   └── textobjects.scm
        ├── c
        │   ├── highlights.scm
        │   ├── indents.scm
        │   ├── injections.scm
        │   ├── locals.scm
        │   ├── rainbows.scm
        │   ├── tags.scm
        │   └── textobjects.scm
        ├── cpp
        │   ├── highlights.scm
        │   ├── indents.scm
        │   ├── injections.scm
        │   ├── rainbows.scm
        │   ├── tags.scm
        │   └── textobjects.scm
        ├── css
        │   ├── highlights.scm
        │   ├── indents.scm
        │   ├── injections.scm
        │   └── rainbows.scm
        ├── ecma
        │   ├── highlights.scm
        │   ├── indents.scm
        │   ├── injections.scm
        │   ├── locals.scm
        │   ├── rainbows.scm
        │   ├── README.md
        │   └── textobjects.scm
        ├── go
        │   ├── highlights.scm
        │   ├── indents.scm
        │   ├── injections.scm
        │   ├── locals.scm
        │   ├── rainbows.scm
        │   ├── tags.scm
        │   └── textobjects.scm
        ├── javascript
        │   ├── highlights.scm
        │   ├── indents.scm
        │   ├── injections.scm
        │   ├── locals.scm
        │   ├── rainbows.scm
        │   ├── tags.scm
        │   └── textobjects.scm
        ├── markdown
        │   ├── highlights.scm
        │   ├── injections.scm
        │   └── tags.scm
        ├── markdown.inline
        │   ├── highlights.scm
        │   └── injections.scm
        ├── python
        │   ├── highlights.scm
        │   ├── indents.scm
        │   ├── injections.scm
        │   ├── locals.scm
        │   ├── rainbows.scm
        │   ├── tags.scm
        │   └── textobjects.scm
        ├── rust
        │   ├── highlights.scm
        │   ├── indents.scm
        │   ├── injections.scm
        │   ├── locals.scm
        │   ├── rainbows.scm
        │   ├── tags.scm
        │   └── textobjects.scm
        ├── toml
        │   ├── highlights.scm
        │   ├── injections.scm
        │   ├── rainbows.scm
        │   └── textobjects.scm
        ├── tsx
        │   ├── highlights.scm
        │   ├── indents.scm
        │   ├── injections.scm
        │   ├── locals.scm
        │   ├── rainbows.scm
        │   ├── tags.scm
        │   └── textobjects.scm
        ├── typescript
        │   ├── highlights.scm
        │   ├── indents.scm
        │   ├── injections.scm
        │   ├── locals.scm
        │   ├── rainbows.scm
        │   ├── tags.scm
        │   └── textobjects.scm
        ├── yaml
        │   ├── highlights.scm
        │   ├── indents.scm
        │   ├── injections.scm
        │   ├── rainbows.scm
        │   └── textobjects.scm
        └── zig
            ├── highlights.scm
            ├── indents.scm
            ├── injections.scm
            └── textobjects.scm
```

# Files

--------------------------------------------------------------------------------
/src/features/syntax_highlighter.cpp:
--------------------------------------------------------------------------------

```cpp
   1 | #include "syntax_highlighter.h"
   2 | #include "src/core/config_manager.h"
   3 | #include <algorithm>
   4 | #include <cstring>
   5 | #include <fstream>
   6 | #include <sstream>
   7 | #ifdef _WIN32
   8 | #include <curses.h>
   9 | #else
  10 | #include <ncurses.h>
  11 | #endif
  12 | #include <iostream>
  13 | 
  14 | #ifdef TREE_SITTER_ENABLED
  15 | #include "language_registry.h" // Auto-generated by CMake
  16 | #include "tree_sitter/api.h"
  17 | #endif
  18 | 
  19 | SyntaxHighlighter::SyntaxHighlighter()
  20 |     : config_loader_(std::make_unique<SyntaxConfigLoader>()),
  21 |       current_language_config_(nullptr), currentLanguage("text")
  22 | #ifdef TREE_SITTER_ENABLED
  23 |       ,
  24 |       parser_(nullptr), tree_(nullptr), current_ts_language_(nullptr),
  25 |       current_ts_query_(nullptr)
  26 | #endif
  27 | {
  28 | #ifdef TREE_SITTER_ENABLED
  29 |   initializeTreeSitter();
  30 | #endif
  31 | }
  32 | 
  33 | SyntaxHighlighter::~SyntaxHighlighter()
  34 | {
  35 | #ifdef TREE_SITTER_ENABLED
  36 |   cleanupTreeSitter();
  37 | #endif
  38 | }
  39 | 
  40 | bool SyntaxHighlighter::initialize(const std::string &config_directory)
  41 | {
  42 |   // std::cerr << "=== SyntaxHighlighter::initialize ===\n";
  43 |   // std::cerr << "Config directory: " << config_directory << std::endl;
  44 | 
  45 |   if (!config_loader_->loadAllLanguageConfigs(config_directory))
  46 |   {
  47 |     std::cerr << "Failed to load language configurations from: "
  48 |               << config_directory << std::endl;
  49 |     // Fall back to basic highlighting rules
  50 |     loadBasicRules();
  51 |     return false;
  52 |   }
  53 |   ConfigManager::registerReloadCallback(
  54 |       [this, config_directory]()
  55 |       {
  56 |         std::cerr << "Syntax config reload triggered." << std::endl;
  57 |         // Clear old configs and reload them
  58 |         config_loader_->language_configs_.clear();
  59 |         config_loader_->extension_to_language_.clear();
  60 | 
  61 |         // Reload all config files from the directory
  62 |         this->config_loader_->loadAllLanguageConfigs(
  63 |             ConfigManager::getSyntaxRulesDir());
  64 | 
  65 |         // Re-apply the parser for the current file
  66 |         setLanguage(this->currentLanguage); // Re-set language to pick up new
  67 |                                             // rules/queries
  68 |         // NOTE: Force a full buffer re-highlight/re-parse (e.g., set a flag)
  69 |       });
  70 | 
  71 |   // std::cout << "Successfully loaded language configurations" << std::endl;
  72 |   return true;
  73 | }
  74 | 
  75 | #ifdef TREE_SITTER_ENABLED
  76 | 
  77 | void SyntaxHighlighter::diagnoseGrammar() const
  78 | {
  79 |   if (!current_ts_language_)
  80 |   {
  81 |     std::cerr << "ERROR: No language loaded" << std::endl;
  82 |     return;
  83 |   }
  84 | 
  85 |   std::cerr << "=== Grammar Diagnostic ===" << std::endl;
  86 |   std::cerr << "ABI Version: " << ts_language_abi_version(current_ts_language_)
  87 |             << std::endl;
  88 |   std::cerr << "Symbol count: "
  89 |             << ts_language_symbol_count(current_ts_language_) << std::endl;
  90 | 
  91 |   // Test a simple parse
  92 |   const char *test_code = "int x;";
  93 |   TSTree *test_tree = ts_parser_parse_string(parser_, nullptr, test_code,
  94 |                                              std::strlen(test_code));
  95 | 
  96 |   if (test_tree)
  97 |   {
  98 |     TSNode root = ts_tree_root_node(test_tree);
  99 |     char *tree_string = ts_node_string(root);
 100 |     std::cerr << "Parse test result: " << tree_string << std::endl;
 101 |     free(tree_string);
 102 |     ts_tree_delete(test_tree);
 103 |   }
 104 |   else
 105 |   {
 106 |     std::cerr << "ERROR: Failed to parse simple test code" << std::endl;
 107 |   }
 108 | 
 109 |   std::cerr << "=== End Diagnostic ===" << std::endl;
 110 | }
 111 | 
 112 | #endif
 113 | 
 114 | void SyntaxHighlighter::setLanguage(const std::string &extension)
 115 | {
 116 |   std::string language_name =
 117 |       config_loader_->getLanguageFromExtension(extension);
 118 | 
 119 |   const LanguageConfig *config =
 120 |       config_loader_->getLanguageConfig(language_name);
 121 | 
 122 |   if (config)
 123 |   {
 124 |     current_language_config_ = config;
 125 |     currentLanguage = language_name;
 126 | 
 127 | #ifdef TREE_SITTER_ENABLED
 128 |     if (!config->parser_name.empty() && parser_)
 129 |     {
 130 |       const TSLanguage *ts_language = getLanguageFunction(config->parser_name);
 131 |       if (ts_language)
 132 |       {
 133 |         if (!ts_parser_set_language(parser_, ts_language))
 134 |         {
 135 |           std::cerr << "ERROR: Failed to set language for parser" << std::endl;
 136 |           loadBasicRules();
 137 |           return;
 138 |         }
 139 |         current_ts_language_ = ts_language;
 140 | 
 141 |         // Clean up old query
 142 |         if (current_ts_query_)
 143 |         {
 144 |           ts_query_delete(current_ts_query_);
 145 |           current_ts_query_ = nullptr;
 146 |         }
 147 | 
 148 |         // Load and merge all queries
 149 |         if (!config->queries.empty())
 150 |         {
 151 |           std::string merged_query_source;
 152 | 
 153 |           for (const auto &query_path : config->queries)
 154 |           {
 155 |             std::ifstream file(query_path);
 156 |             if (!file.is_open())
 157 |             {
 158 |               std::cerr << "ERROR: Cannot open query file: " << query_path
 159 |                         << std::endl;
 160 |               continue;
 161 |             }
 162 | 
 163 |             std::stringstream buffer;
 164 |             buffer << file.rdbuf();
 165 |             std::string query_content = buffer.str();
 166 | 
 167 |             if (!query_content.empty())
 168 |             {
 169 |               // Add newline between queries for safety
 170 |               if (!merged_query_source.empty())
 171 |               {
 172 |                 merged_query_source += "\n";
 173 |               }
 174 |               merged_query_source += query_content;
 175 |             }
 176 |           }
 177 | 
 178 |           // Parse the merged query once
 179 |           if (!merged_query_source.empty())
 180 |           {
 181 |             uint32_t error_offset;
 182 |             TSQueryError error_type;
 183 |             current_ts_query_ = ts_query_new(
 184 |                 current_ts_language_, merged_query_source.c_str(),
 185 |                 merged_query_source.length(), &error_offset, &error_type);
 186 | 
 187 |             if (!current_ts_query_)
 188 |             {
 189 |               std::cerr << "ERROR: Failed to parse merged query" << std::endl;
 190 |               std::cerr << "  Error offset: " << error_offset << std::endl;
 191 |               std::cerr << "  Error type: " << error_type << std::endl;
 192 | 
 193 |               // Show context around error
 194 |               if (error_offset < merged_query_source.length())
 195 |               {
 196 |                 int context_start = std::max(0, (int)error_offset - 50);
 197 |                 int context_end = std::min((int)merged_query_source.length(),
 198 |                                            (int)error_offset + 50);
 199 | 
 200 |                 std::cerr << "Context around error:" << std::endl;
 201 |                 std::cerr << "..."
 202 |                           << merged_query_source.substr(
 203 |                                  context_start, context_end - context_start)
 204 |                           << "..." << std::endl;
 205 |                 std::cerr << std::string(error_offset - context_start + 3, ' ')
 206 |                           << "^" << std::endl;
 207 |               }
 208 |             }
 209 |           }
 210 |         }
 211 |       }
 212 |       else
 213 |       {
 214 |         std::cerr << "ERROR: No Tree-sitter language function found for: "
 215 |                   << config->parser_name << std::endl;
 216 |         loadBasicRules();
 217 |       }
 218 |     }
 219 |     else
 220 |     {
 221 |       std::cerr << "Tree-sitter not available or no parser specified, using "
 222 |                    "basic highlighting"
 223 |                 << std::endl;
 224 |       loadBasicRules();
 225 |     }
 226 | #else
 227 |     std::cerr << "Tree-sitter disabled, using basic highlighting" << std::endl;
 228 |     loadBasicRules();
 229 | #endif
 230 |   }
 231 |   else
 232 |   {
 233 |     std::cerr << "ERROR: No config found for language: " << language_name
 234 |               << std::endl;
 235 |     loadBasicRules();
 236 |     currentLanguage = "text";
 237 |     current_language_config_ = nullptr;
 238 |   }
 239 | }
 240 | 
 241 | std::vector<ColorSpan>
 242 | SyntaxHighlighter::getHighlightSpans(const std::string &line, int lineIndex,
 243 |                                      const GapBuffer &buffer) const
 244 | {
 245 |   // Check cache first
 246 |   auto cache_it = line_cache_.find(lineIndex);
 247 |   if (cache_it != line_cache_.end())
 248 |   {
 249 |     return cache_it->second;
 250 |   }
 251 | 
 252 |   // Handle Markdown special states
 253 |   if (currentLanguage == "Markdown" && line_states_.count(lineIndex))
 254 |   {
 255 |     MarkdownState state = line_states_.at(lineIndex);
 256 |     if (state == MarkdownState::IN_FENCED_CODE_BLOCK)
 257 |     {
 258 |       std::vector<ColorSpan> result = {
 259 |           {0, (int)line.length(), getColorPairValue("MARKDOWN_CODE_BLOCK"),
 260 |            A_NORMAL, 100}};
 261 |       line_cache_[lineIndex] = result;
 262 |       return result;
 263 |     }
 264 |     else if (state == MarkdownState::IN_BLOCKQUOTE)
 265 |     {
 266 |       std::vector<ColorSpan> result = {
 267 |           {0, (int)line.length(), getColorPairValue("MARKDOWN_BLOCKQUOTE"),
 268 |            A_NORMAL, 90}};
 269 |       line_cache_[lineIndex] = result;
 270 |       return result;
 271 |     }
 272 |   }
 273 | 
 274 |   std::vector<ColorSpan> result;
 275 | 
 276 | #ifdef TREE_SITTER_ENABLED
 277 |   // CRITICAL: Do lazy reparse if needed
 278 |   if (tree_needs_reparse_)
 279 |   {
 280 |     const_cast<SyntaxHighlighter *>(this)->updateTree(buffer);
 281 |     const_cast<SyntaxHighlighter *>(this)->tree_needs_reparse_ = false;
 282 |   }
 283 | 
 284 |   if (current_ts_query_ && tree_)
 285 |   {
 286 |     try
 287 |     {
 288 |       result = executeTreeSitterQuery(line, lineIndex);
 289 |     }
 290 |     catch (const std::exception &e)
 291 |     {
 292 |       std::cerr << "Tree-sitter query error on line " << lineIndex << ": "
 293 |                 << e.what() << std::endl;
 294 |       result = getBasicHighlightSpans(line);
 295 |     }
 296 |   }
 297 | #endif
 298 | 
 299 |   // Fall back to basic highlighting if no Tree-sitter result
 300 |   if (result.empty())
 301 |   {
 302 |     result = getBasicHighlightSpans(line);
 303 |   }
 304 | 
 305 |   // Cache the result
 306 |   line_cache_[lineIndex] = result;
 307 |   return result;
 308 | }
 309 | void SyntaxHighlighter::updateTreeAfterEdit(
 310 |     const GapBuffer &buffer, size_t byte_pos, size_t old_byte_len,
 311 |     size_t new_byte_len, uint32_t start_row, uint32_t start_col,
 312 |     uint32_t old_end_row, uint32_t old_end_col, uint32_t new_end_row,
 313 |     uint32_t new_end_col)
 314 | {
 315 | #ifdef TREE_SITTER_ENABLED
 316 |   if (!tree_ || !parser_)
 317 |     return;
 318 | 
 319 |   // Apply incremental edit to tree structure
 320 |   TSInputEdit edit = {.start_byte = (uint32_t)byte_pos,
 321 |                       .old_end_byte = (uint32_t)(byte_pos + old_byte_len),
 322 |                       .new_end_byte = (uint32_t)(byte_pos + new_byte_len),
 323 |                       .start_point = {start_row, start_col},
 324 |                       .old_end_point = {old_end_row, old_end_col},
 325 |                       .new_end_point = {new_end_row, new_end_col}};
 326 | 
 327 |   ts_tree_edit(tree_, &edit);
 328 |   tree_version_++;
 329 | 
 330 |   // Mark that tree needs reparsing (will happen on next query)
 331 |   tree_needs_reparse_ = true;
 332 | 
 333 |   // For very large changes, schedule background reparse
 334 |   if (old_end_row != new_end_row && (new_end_row - old_end_row) > 10)
 335 |   {
 336 |     scheduleBackgroundParse(buffer);
 337 |   }
 338 | #endif
 339 | }
 340 | 
 341 | void SyntaxHighlighter::invalidateLineCache(int lineNum)
 342 | {
 343 |   line_cache_.erase(lineNum);
 344 | }
 345 | 
 346 | void SyntaxHighlighter::bufferChanged(const GapBuffer &buffer)
 347 | {
 348 | #ifdef TREE_SITTER_ENABLED
 349 |   if (!parser_ || !current_ts_language_)
 350 |     return;
 351 | 
 352 |   // REMOVED the "optimization" that was skipping reparsing
 353 |   // If current_buffer_content_ is empty, we MUST reparse
 354 | 
 355 |   if (current_buffer_content_.empty())
 356 |   {
 357 |     // Content was cleared - this signals we need full reparse
 358 |     updateTree(buffer);
 359 |   }
 360 |   else if (!tree_)
 361 |   {
 362 |     // No tree exists - need initial parse
 363 |     updateTree(buffer);
 364 |   }
 365 |   // If tree exists AND content is valid, incremental edits should have
 366 |   // already updated it via notifyEdit()
 367 | #endif
 368 | 
 369 |   if (currentLanguage == "Markdown")
 370 |   {
 371 |     updateMarkdownState(buffer);
 372 |   }
 373 | }
 374 | 
 375 | void SyntaxHighlighter::invalidateFromLine(int startLine)
 376 | {
 377 |   // This is for structural changes (insert/delete lines)
 378 |   // Clear only lines >= startLine, but do it efficiently
 379 | 
 380 |   auto it = line_cache_.lower_bound(startLine);
 381 |   if (it != line_cache_.end())
 382 |   {
 383 |     line_cache_.erase(it, line_cache_.end());
 384 |   }
 385 | 
 386 |   // Don't clear content cache unless change is massive
 387 |   // Let incremental edits handle the tree updates
 388 | }
 389 | 
 390 | #ifdef TREE_SITTER_ENABLED
 391 | bool SyntaxHighlighter::initializeTreeSitter()
 392 | {
 393 |   parser_ = ts_parser_new();
 394 |   if (!parser_)
 395 |   {
 396 |     std::cerr << "ERROR: Failed to create Tree-sitter parser" << std::endl;
 397 |     return false;
 398 |   }
 399 | 
 400 |   // Auto-register all languages from generated header
 401 |   registerAllLanguages(language_registry_);
 402 | 
 403 |   // std::cerr << "Tree-sitter initialized with " << language_registry_.size()
 404 |   //           << " language parser(s)" << std::endl;
 405 | 
 406 |   return true;
 407 | }
 408 | 
 409 | void SyntaxHighlighter::cleanupTreeSitter()
 410 | {
 411 |   // Wait for background thread
 412 |   while (is_parsing_)
 413 |   {
 414 |     std::this_thread::sleep_for(std::chrono::milliseconds(10));
 415 |   }
 416 | 
 417 |   std::lock_guard<std::mutex> lock(tree_mutex_); // ADD LOCK
 418 | 
 419 |   if (current_ts_query_)
 420 |   {
 421 |     ts_query_delete(current_ts_query_);
 422 |     current_ts_query_ = nullptr;
 423 |   }
 424 | 
 425 |   if (tree_)
 426 |   {
 427 |     ts_tree_delete(tree_);
 428 |     tree_ = nullptr;
 429 |   }
 430 | 
 431 |   if (parser_)
 432 |   {
 433 |     ts_parser_delete(parser_);
 434 |     parser_ = nullptr;
 435 |   }
 436 | }
 437 | 
 438 | const TSLanguage *
 439 | SyntaxHighlighter::getLanguageFunction(const std::string &parser_name)
 440 | {
 441 |   auto it = language_registry_.find(parser_name);
 442 |   if (it != language_registry_.end())
 443 |   {
 444 |     return it->second(); // Call the function pointer
 445 |   }
 446 | 
 447 |   // Enhanced error message showing available languages
 448 |   std::cerr << "WARNING: No Tree-sitter language found for: '" << parser_name
 449 |             << "'" << std::endl;
 450 |   std::cerr << "  Available languages: ";
 451 |   bool first = true;
 452 |   for (const auto &pair : language_registry_)
 453 |   {
 454 |     if (!first)
 455 |       std::cerr << ", ";
 456 |     std::cerr << pair.first;
 457 |     first = false;
 458 |   }
 459 |   std::cerr << std::endl;
 460 | 
 461 |   return nullptr;
 462 | }
 463 | 
 464 | TSQuery *
 465 | SyntaxHighlighter::loadQueryFromFile(const std::string &query_file_path)
 466 | {
 467 |   std::ifstream file(query_file_path);
 468 |   if (!file.is_open())
 469 |   {
 470 |     std::cerr << "ERROR: Cannot open query file: " << query_file_path
 471 |               << std::endl;
 472 |     return nullptr;
 473 |   }
 474 | 
 475 |   std::stringstream buffer;
 476 |   buffer << file.rdbuf();
 477 |   std::string query_source = buffer.str();
 478 | 
 479 |   if (query_source.empty())
 480 |   {
 481 |     std::cerr << "ERROR: Query file is empty: " << query_file_path << std::endl;
 482 |     return nullptr;
 483 |   }
 484 | 
 485 |   // Debug: Print the query source around the error offset
 486 |   // std::cerr << "Query source length: " << query_source.length() << "
 487 |   // characters"
 488 |   //           << std::endl;
 489 | 
 490 |   uint32_t error_offset;
 491 |   TSQueryError error_type;
 492 |   TSQuery *query =
 493 |       ts_query_new(current_ts_language_, query_source.c_str(),
 494 |                    query_source.length(), &error_offset, &error_type);
 495 | 
 496 |   if (!query)
 497 |   {
 498 |     std::cerr << "ERROR: Failed to parse query file " << query_file_path
 499 |               << std::endl;
 500 |     std::cerr << "  Error offset: " << error_offset << std::endl;
 501 |     std::cerr << "  Error type: " << error_type;
 502 | 
 503 |     // Provide more detailed error information
 504 |     switch (error_type)
 505 |     {
 506 |     case TSQueryErrorNone:
 507 |       std::cerr << " (None)";
 508 |       break;
 509 |     case TSQueryErrorSyntax:
 510 |       std::cerr << " (Syntax Error)";
 511 |       break;
 512 |     case TSQueryErrorNodeType:
 513 |       std::cerr << " (Unknown Node Type)";
 514 |       break;
 515 |     case TSQueryErrorField:
 516 |       std::cerr << " (Unknown Field)";
 517 |       break;
 518 |     case TSQueryErrorCapture:
 519 |       std::cerr << " (Unknown Capture)";
 520 |       break;
 521 |     case TSQueryErrorStructure:
 522 |       std::cerr << " (Invalid Structure)";
 523 |       break;
 524 |     default:
 525 |       std::cerr << " (Unknown Error)";
 526 |       break;
 527 |     }
 528 |     std::cerr << std::endl;
 529 | 
 530 |     // Show context around error
 531 |     if (error_offset < query_source.length())
 532 |     {
 533 |       int context_start = std::max(0, (int)error_offset - 50);
 534 |       int context_end =
 535 |           std::min((int)query_source.length(), (int)error_offset + 50);
 536 | 
 537 |       std::cerr << "Context around error:" << std::endl;
 538 |       std::cerr << "..."
 539 |                 << query_source.substr(context_start,
 540 |                                        context_end - context_start)
 541 |                 << "..." << std::endl;
 542 | 
 543 |       // Point to error location
 544 |       std::cerr << std::string(error_offset - context_start + 3, ' ') << "^"
 545 |                 << std::endl;
 546 |     }
 547 | 
 548 |     return nullptr;
 549 |   }
 550 | 
 551 |   // std::cerr << "Successfully loaded query from: " << query_file_path
 552 |   //           << std::endl;
 553 |   return query;
 554 | }
 555 | 
 556 | void SyntaxHighlighter::notifyEdit(size_t byte_pos, size_t old_byte_len,
 557 |                                    size_t new_byte_len, uint32_t start_row,
 558 |                                    uint32_t start_col, uint32_t old_end_row,
 559 |                                    uint32_t old_end_col, uint32_t new_end_row,
 560 |                                    uint32_t new_end_col)
 561 | {
 562 | #ifdef TREE_SITTER_ENABLED
 563 |   if (!tree_)
 564 |   {
 565 |     return;
 566 |   }
 567 | 
 568 |   TSInputEdit edit = {.start_byte = (uint32_t)byte_pos,
 569 |                       .old_end_byte = (uint32_t)(byte_pos + old_byte_len),
 570 |                       .new_end_byte = (uint32_t)(byte_pos + new_byte_len),
 571 |                       .start_point = {start_row, start_col},
 572 |                       .old_end_point = {old_end_row, old_end_col},
 573 |                       .new_end_point = {new_end_row, new_end_col}};
 574 | 
 575 |   ts_tree_edit(tree_, &edit);
 576 | 
 577 |   // CRITICAL FIX: Mark that we need to reparse on next access
 578 |   // This forces updateTree() to be called on next getHighlightSpans()
 579 |   // current_buffer_content_.clear();
 580 | #endif
 581 | }
 582 | 
 583 | void SyntaxHighlighter::invalidateLineRange(int startLine, int endLine)
 584 | {
 585 |   // OPTIMIZATION: Only invalidate affected lines, not entire cache
 586 | 
 587 |   // For single-line changes, only clear that line
 588 |   if (endLine - startLine <= 3)
 589 |   {
 590 |     for (int i = startLine; i <= endLine; ++i)
 591 |     {
 592 |       line_cache_.erase(i);
 593 |       line_states_.erase(i);
 594 |     }
 595 |     return;
 596 |   }
 597 | 
 598 |   // For multi-line changes, clear from startLine onwards
 599 |   auto cache_it = line_cache_.lower_bound(startLine);
 600 |   if (cache_it != line_cache_.end())
 601 |   {
 602 |     line_cache_.erase(cache_it, line_cache_.end());
 603 |   }
 604 | 
 605 |   auto state_it = line_states_.lower_bound(startLine);
 606 |   if (state_it != line_states_.end())
 607 |   {
 608 |     line_states_.erase(state_it, line_states_.end());
 609 |   }
 610 | 
 611 |   // DON'T clear buffer content unless structural change
 612 |   if (endLine - startLine > 10)
 613 |   {
 614 |     current_buffer_content_.clear(); // Force reparse on next access
 615 |   }
 616 | }
 617 | 
 618 | void SyntaxHighlighter::updateTree(const GapBuffer &buffer)
 619 | {
 620 | #ifdef TREE_SITTER_ENABLED
 621 |   std::string content;
 622 |   int lineCount = buffer.getLineCount();
 623 | 
 624 |   // Build line offset cache while building content
 625 |   line_byte_offsets_.clear();
 626 |   line_byte_offsets_.reserve(lineCount + 1);
 627 |   line_byte_offsets_.push_back(0); // First line starts at byte 0
 628 | 
 629 |   for (int i = 0; i < lineCount; i++)
 630 |   {
 631 |     if (i > 0)
 632 |       content += "\n";
 633 |     content += buffer.getLine(i);
 634 | 
 635 |     // Store the byte offset for the next line
 636 |     line_byte_offsets_.push_back(content.length());
 637 |   }
 638 | 
 639 |   if (content.empty())
 640 |   {
 641 |     std::cerr << "WARNING: Attempting to parse empty buffer\n";
 642 |     return;
 643 |   }
 644 | 
 645 |   std::lock_guard<std::mutex> lock(tree_mutex_);
 646 |   current_buffer_content_ = content;
 647 | 
 648 |   if (!tree_)
 649 |   {
 650 |     tree_ = ts_parser_parse_string(parser_, nullptr, content.c_str(),
 651 |                                    content.length());
 652 |   }
 653 |   else
 654 |   {
 655 |     TSTree *old_tree = tree_;
 656 |     tree_ = ts_parser_parse_string(parser_, old_tree, content.c_str(),
 657 |                                    content.length());
 658 |     if (old_tree && tree_)
 659 |     {
 660 |       ts_tree_delete(old_tree);
 661 |     }
 662 |   }
 663 | 
 664 |   if (!tree_)
 665 |   {
 666 |     std::cerr << "ERROR: Failed to parse tree\n";
 667 |   }
 668 | #endif
 669 | }
 670 | 
 671 | void SyntaxHighlighter::markViewportLines(int startLine, int endLine) const
 672 | {
 673 |   priority_lines_.clear();
 674 |   for (int i = startLine; i <= endLine; ++i)
 675 |   {
 676 |     priority_lines_.insert(i);
 677 |   }
 678 | }
 679 | 
 680 | bool SyntaxHighlighter::isLineHighlighted(int lineIndex) const
 681 | {
 682 |   return line_cache_.find(lineIndex) != line_cache_.end();
 683 | }
 684 | 
 685 | std::vector<ColorSpan>
 686 | SyntaxHighlighter::executeTreeSitterQuery(const std::string &line,
 687 |                                           int lineNum) const
 688 | {
 689 |   if (!current_ts_query_ || !tree_)
 690 |     return {};
 691 | 
 692 |   std::lock_guard<std::mutex> lock(tree_mutex_);
 693 |   std::vector<ColorSpan> spans;
 694 |   TSQueryCursor *cursor = ts_query_cursor_new();
 695 |   TSNode root_node = ts_tree_root_node(tree_);
 696 | 
 697 |   int adjusted_line =
 698 |       is_full_parse_ ? lineNum : (lineNum - viewport_start_line_);
 699 |   if (adjusted_line < 0 ||
 700 |       adjusted_line >= ts_node_end_point(root_node).row + 1)
 701 |   {
 702 |     ts_query_cursor_delete(cursor);
 703 |     return {};
 704 |   }
 705 | 
 706 |   // Calculate byte range for current line
 707 |   uint32_t line_start_byte = 0;
 708 |   uint32_t line_end_byte = 0;
 709 | 
 710 |   std::istringstream content_stream(current_buffer_content_);
 711 |   std::string content_line;
 712 |   int current_line = 0;
 713 | 
 714 |   while (std::getline(content_stream, content_line) && current_line <= lineNum)
 715 |   {
 716 |     if (current_line == lineNum)
 717 |     {
 718 |       line_end_byte = line_start_byte + content_line.length();
 719 |       break;
 720 |     }
 721 |     line_start_byte += content_line.length() + 1;
 722 |     current_line++;
 723 |   }
 724 | 
 725 |   ts_query_cursor_set_byte_range(cursor, line_start_byte, line_end_byte);
 726 |   ts_query_cursor_exec(cursor, current_ts_query_, root_node);
 727 | 
 728 |   TSQueryMatch match;
 729 |   while (ts_query_cursor_next_match(cursor, &match))
 730 |   {
 731 |     for (uint32_t i = 0; i < match.capture_count; i++)
 732 |     {
 733 |       TSQueryCapture capture = match.captures[i];
 734 |       TSNode node = capture.node;
 735 | 
 736 |       TSPoint start_point = ts_node_start_point(node);
 737 |       TSPoint end_point = ts_node_end_point(node);
 738 | 
 739 |       // ORIGINAL: Only process captures starting on current line
 740 |       // Check if this capture affects the current line
 741 |       if (start_point.row <= (uint32_t)lineNum &&
 742 |           end_point.row >= (uint32_t)lineNum)
 743 |       {
 744 |         uint32_t name_length;
 745 |         const char *capture_name_ptr = ts_query_capture_name_for_id(
 746 |             current_ts_query_, capture.index, &name_length);
 747 |         std::string capture_name(capture_name_ptr, name_length);
 748 | 
 749 |         int start_col =
 750 |             (start_point.row == (uint32_t)lineNum) ? start_point.column : 0;
 751 |         int end_col = (end_point.row == (uint32_t)lineNum) ? end_point.column
 752 |                                                            : (int)line.length();
 753 | 
 754 |         start_col = std::max(0, std::min(start_col, (int)line.length()));
 755 |         end_col = std::max(start_col, std::min(end_col, (int)line.length()));
 756 | 
 757 |         if (start_col < end_col)
 758 |         {
 759 |           int color_pair = getColorPairForCapture(capture_name);
 760 |           spans.push_back({start_col, end_col, color_pair, 0, 100});
 761 |         }
 762 |       }
 763 |     }
 764 |   }
 765 | 
 766 |   ts_query_cursor_delete(cursor);
 767 |   return spans;
 768 | }
 769 | 
 770 | int SyntaxHighlighter::getColorPairForCapture(
 771 |     const std::string &capture_name) const
 772 | {
 773 |   static const std::unordered_map<std::string, std::string> capture_to_color = {
 774 |       // Keywords
 775 |       {"keyword", "KEYWORD"},
 776 |       {"keyword.control", "KEYWORD"},
 777 |       {"keyword.function", "KEYWORD"},
 778 |       {"keyword.operator", "KEYWORD"},
 779 |       {"keyword.return", "KEYWORD"},
 780 |       {"keyword.conditional", "KEYWORD"},
 781 |       {"keyword.repeat", "KEYWORD"},
 782 |       {"keyword.import", "KEYWORD"},
 783 |       {"keyword.exception", "KEYWORD"},
 784 | 
 785 |       // Types
 786 |       {"type", "TYPE"},
 787 |       {"type.builtin", "TYPE"},
 788 |       {"type.definition", "TYPE"},
 789 |       {"class", "TYPE"},
 790 |       {"interface", "TYPE"},
 791 | 
 792 |       // Functions
 793 |       {"function", "FUNCTION"},
 794 |       {"function.call", "FUNCTION"},
 795 |       {"function.builtin", "FUNCTION"},
 796 |       {"function.method", "FUNCTION"},
 797 |       {"method", "FUNCTION"},
 798 | 
 799 |       // Variables & constants
 800 | 
 801 |       {"variable", "VARIABLE"},
 802 |       {"variable.parameter", "VARIABLE"},
 803 |       {"variable.builtin", "CONSTANT"},
 804 |       {"variable.member", "VARIABLE"},
 805 |       {"constant", "CONSTANT"},
 806 |       {"constant.builtin", "CONSTANT"},
 807 |       {"parameter", "VARIABLE"},
 808 | 
 809 |       // Literals
 810 |       {"string", "STRING_LITERAL"},
 811 |       {"string_literal", "STRING_LITERAL"},
 812 |       {"number", "NUMBER"},
 813 |       {"integer", "NUMBER"},
 814 |       {"float", "NUMBER"},
 815 |       {"boolean", "CONSTANT"},
 816 | 
 817 |       // Comments
 818 |       {"comment", "COMMENT"},
 819 | 
 820 |       // Operators & punctuation
 821 |       {"operator", "OPERATOR"},
 822 |       {"punctuation", "PUNCTUATION"},
 823 |       {"punctuation.bracket", "PUNCTUATION"},
 824 |       {"punctuation.delimiter", "PUNCTUATION"},
 825 | 
 826 |       // Specialized
 827 |       {"namespace", "NAMESPACE"},
 828 |       {"property", "PROPERTY"},
 829 |       {"field", "PROPERTY"},
 830 |       {"attribute", "DECORATOR"},
 831 |       {"decorator", "DECORATOR"},
 832 |       {"label", "LABEL"},
 833 |       {"tag", "LABEL"},
 834 | 
 835 |       // Preprocessor/macro
 836 |       {"preproc", "MACRO"},
 837 |       {"preproc_include", "MACRO"},
 838 |       {"preproc_def", "MACRO"},
 839 |       {"preproc_call", "MACRO"},
 840 |       {"preproc_if", "MACRO"},
 841 |       {"preproc_ifdef", "MACRO"},
 842 |       {"preproc_ifndef", "MACRO"},
 843 |       {"preproc_else", "MACRO"},
 844 |       {"preproc_elif", "MACRO"},
 845 |       {"preproc_endif", "MACRO"},
 846 |       {"macro", "MACRO"},
 847 | 
 848 |       // Markup (Markdown, etc.)
 849 |       {"markup.heading", "MARKUP_HEADING"},
 850 |       {"heading", "MARKUP_HEADING"},
 851 |       {"markup.bold", "MARKUP_BOLD"},
 852 |       {"markup.italic", "MARKUP_ITALIC"},
 853 |       {"emphasis", "MARKUP_ITALIC"},
 854 |       {"markup.code", "MARKUP_CODE"},
 855 |       {"code", "MARKUP_CODE"},
 856 |       {"markup.link", "MARKUP_LINK"},
 857 |       {"link_text", "MARKUP_LINK"},
 858 |       {"markup.url", "MARKUP_URL"},
 859 |       {"link_uri", "MARKUP_URL"},
 860 |       {"markup.quote", "MARKUP_BLOCKQUOTE"},
 861 |       {"markup.list", "MARKUP_LIST"},
 862 |       {"markup.code", "MARKUP_CODE"},
 863 |       {"code_fence_content", "MARKUP_CODE_BLOCK"},
 864 |       {"code_span", "MARKUP_CODE"},
 865 | 
 866 |       // Markdown structure
 867 |       {"markup.list", "MARKUP_LIST"},
 868 |       {"markup.quote", "MARKUP_BLOCKQUOTE"},
 869 |   };
 870 | 
 871 |   auto it = capture_to_color.find(capture_name);
 872 |   if (it != capture_to_color.end())
 873 |   {
 874 |     return getColorPairValue(it->second);
 875 |   }
 876 | 
 877 |   // Fallback: hierarchical matching
 878 |   if (capture_name.find("keyword") != std::string::npos)
 879 |     return getColorPairValue("KEYWORD");
 880 |   if (capture_name.find("type") != std::string::npos)
 881 |     return getColorPairValue("TYPE");
 882 |   if (capture_name.find("function") != std::string::npos)
 883 |     return getColorPairValue("FUNCTION");
 884 |   if (capture_name.find("string") != std::string::npos)
 885 |     return getColorPairValue("STRING_LITERAL");
 886 |   if (capture_name.find("comment") != std::string::npos)
 887 |     return getColorPairValue("COMMENT");
 888 |   if (capture_name.find("number") != std::string::npos)
 889 |     return getColorPairValue("NUMBER");
 890 |   if (capture_name.find("constant") != std::string::npos)
 891 |     return getColorPairValue("CONSTANT");
 892 | 
 893 |   return 0; // Default
 894 | }
 895 | #endif
 896 | 
 897 | int SyntaxHighlighter::getColorPairValue(const std::string &color_name) const
 898 | {
 899 |   static const std::unordered_map<std::string, int> color_map = {
 900 |       {"COMMENT", COMMENT},
 901 |       {"KEYWORD", KEYWORD},
 902 |       {"STRING_LITERAL", STRING_LITERAL},
 903 |       {"NUMBER", NUMBER},
 904 |       {"FUNCTION", FUNCTION},
 905 |       {"VARIABLE", VARIABLE},
 906 |       {"TYPE", TYPE},
 907 |       {"OPERATOR", OPERATOR},
 908 |       {"PUNCTUATION", PUNCTUATION},
 909 |       {"CONSTANT", CONSTANT},
 910 |       {"NAMESPACE", NAMESPACE},
 911 |       {"PROPERTY", PROPERTY},
 912 |       {"DECORATOR", DECORATOR},
 913 |       {"MACRO", MACRO},
 914 |       {"LABEL", LABEL},
 915 |       {"MARKUP_HEADING", MARKUP_HEADING},
 916 |       {"MARKUP_BOLD", MARKUP_BOLD},
 917 |       {"MARKUP_ITALIC", MARKUP_ITALIC},
 918 |       {"MARKUP_CODE", MARKUP_CODE},
 919 |       {"MARKUP_CODE_BLOCK", MARKUP_CODE_BLOCK},
 920 |       {"MARKUP_LINK", MARKUP_LINK},
 921 |       {"MARKUP_URL", MARKUP_URL},
 922 |       {"MARKUP_LIST", MARKUP_LIST},
 923 |       {"MARKUP_BLOCKQUOTE", MARKUP_BLOCKQUOTE},
 924 |       {"MARKUP_STRIKETHROUGH", MARKUP_STRIKETHROUGH},
 925 |       {"MARKUP_QUOTE", MARKUP_QUOTE}};
 926 | 
 927 |   auto it = color_map.find(color_name);
 928 |   return (it != color_map.end()) ? it->second : 0;
 929 | }
 930 | 
 931 | int SyntaxHighlighter::getAttributeValue(
 932 |     const std::string &attribute_name) const
 933 | {
 934 |   static const std::unordered_map<std::string, int> attribute_map = {
 935 |       {"0", 0},
 936 |       {"A_BOLD", A_BOLD},
 937 |       {"A_DIM", A_DIM},
 938 |       {"A_UNDERLINE", A_UNDERLINE},
 939 |       {"A_REVERSE", A_REVERSE}};
 940 | 
 941 |   auto it = attribute_map.find(attribute_name);
 942 |   return (it != attribute_map.end()) ? it->second : 0;
 943 | }
 944 | 
 945 | std::vector<ColorSpan>
 946 | SyntaxHighlighter::getBasicHighlightSpans(const std::string &line) const
 947 | {
 948 |   std::vector<ColorSpan> spans;
 949 | 
 950 |   // Very basic regex-based highlighting as fallback
 951 |   // Comments (# and //)
 952 |   size_t comment_pos = line.find('#');
 953 |   if (comment_pos == std::string::npos)
 954 |   {
 955 |     comment_pos = line.find("//");
 956 |   }
 957 |   if (comment_pos != std::string::npos)
 958 |   {
 959 |     spans.push_back({static_cast<int>(comment_pos),
 960 |                      static_cast<int>(line.length()),
 961 |                      getColorPairValue("COMMENT"), 0, 100});
 962 |   }
 963 | 
 964 |   // Simple string detection (basic)
 965 |   bool in_string = false;
 966 |   char string_char = 0;
 967 |   size_t string_start = 0;
 968 | 
 969 |   for (size_t i = 0; i < line.length(); i++)
 970 |   {
 971 |     char c = line[i];
 972 |     if (!in_string && (c == '"' || c == '\''))
 973 |     {
 974 |       in_string = true;
 975 |       string_char = c;
 976 |       string_start = i;
 977 |     }
 978 |     else if (in_string && c == string_char && (i == 0 || line[i - 1] != '\\'))
 979 |     {
 980 |       spans.push_back({static_cast<int>(string_start), static_cast<int>(i + 1),
 981 |                        getColorPairValue("STRING_LITERAL"), 0, 90});
 982 |       in_string = false;
 983 |     }
 984 |   }
 985 | 
 986 |   return spans;
 987 | }
 988 | 
 989 | void SyntaxHighlighter::loadBasicRules()
 990 | {
 991 |   // This is called as a fallback when Tree-sitter is not available
 992 |   std::cerr << "Loading basic highlighting rules (fallback mode)" << std::endl;
 993 | }
 994 | 
 995 | // Markdown state management (unchanged from original)
 996 | void SyntaxHighlighter::updateMarkdownState(const GapBuffer &buffer)
 997 | {
 998 |   if (currentLanguage != "Markdown")
 999 |   {
1000 |     line_states_.clear();
1001 |     return;
1002 |   }
1003 | 
1004 |   line_states_.clear();
1005 |   MarkdownState currentState = MarkdownState::DEFAULT;
1006 | 
1007 |   int lineCount = buffer.getLineCount();
1008 |   for (int i = 0; i < lineCount; ++i)
1009 |   {
1010 |     std::string line = buffer.getLine(i);
1011 |     line_states_[i] = currentState;
1012 | 
1013 |     if (currentState == MarkdownState::DEFAULT)
1014 |     {
1015 |       if (line.rfind("```", 0) == 0)
1016 |       {
1017 |         currentState = MarkdownState::IN_FENCED_CODE_BLOCK;
1018 |       }
1019 |       else if (line.rfind(">", 0) == 0)
1020 |       {
1021 |         line_states_[i] = MarkdownState::IN_BLOCKQUOTE;
1022 |       }
1023 |     }
1024 |     else if (currentState == MarkdownState::IN_FENCED_CODE_BLOCK)
1025 |     {
1026 |       if (line.rfind("```", 0) == 0)
1027 |       {
1028 |         currentState = MarkdownState::DEFAULT;
1029 |       }
1030 |       line_states_[i] = MarkdownState::IN_FENCED_CODE_BLOCK;
1031 |     }
1032 |   }
1033 | }
1034 | 
1035 | std::vector<std::string> SyntaxHighlighter::getSupportedExtensions() const
1036 | {
1037 |   return {"cpp", "h", "hpp", "c", "py", "md", "txt"};
1038 | }
1039 | 
1040 | void SyntaxHighlighter::debugTreeSitterState() const
1041 | {
1042 | #ifdef TREE_SITTER_ENABLED
1043 |   std::cerr << "=== Tree-sitter State Debug ===\n";
1044 |   std::cerr << "Current language: " << currentLanguage << "\n";
1045 |   std::cerr << "Parser: " << (parser_ ? "EXISTS" : "NULL") << "\n";
1046 |   std::cerr << "Tree: " << (tree_ ? "EXISTS" : "NULL") << "\n";
1047 |   std::cerr << "TS Language: " << (current_ts_language_ ? "EXISTS" : "NULL")
1048 |             << "\n";
1049 |   std::cerr << "TS Query: " << (current_ts_query_ ? "EXISTS" : "NULL") << "\n";
1050 |   std::cerr << "Buffer content length: " << current_buffer_content_.length()
1051 |             << "\n";
1052 |   std::cerr << "Line cache size: " << line_cache_.size() << "\n";
1053 | 
1054 |   if (tree_)
1055 |   {
1056 |     TSNode root = ts_tree_root_node(tree_);
1057 |     char *tree_str = ts_node_string(root);
1058 |     std::cerr << "Parse tree (truncated): "
1059 |               << std::string(tree_str).substr(0, 200) << "...\n";
1060 |     free(tree_str);
1061 |   }
1062 |   std::cerr << "=== End Debug ===\n";
1063 | #else
1064 |   std::cerr << "Tree-sitter not enabled\n";
1065 | #endif
1066 | }
1067 | 
1068 | void SyntaxHighlighter::parseViewportOnly(const GapBuffer &buffer,
1069 |                                           int targetLine)
1070 | {
1071 | #ifdef TREE_SITTER_ENABLED
1072 |   if (!parser_ || !current_ts_language_)
1073 |     return;
1074 | 
1075 |   int startLine = std::max(0, targetLine - 50);
1076 |   int endLine = std::min(buffer.getLineCount() - 1, targetLine + 50);
1077 | 
1078 |   std::string content;
1079 |   for (int i = startLine; i <= endLine; i++)
1080 |   {
1081 |     if (i > startLine)
1082 |       content += "\n";
1083 |     content += buffer.getLine(i);
1084 |   }
1085 | 
1086 |   if (content.empty())
1087 |     return;
1088 | 
1089 |   TSTree *new_tree = ts_parser_parse_string(parser_, nullptr, content.c_str(),
1090 |                                             content.length());
1091 | 
1092 |   if (new_tree)
1093 |   {
1094 |     std::lock_guard<std::mutex> lock(tree_mutex_); // LOCK ADDED
1095 |     if (tree_)
1096 |       ts_tree_delete(tree_);
1097 |     tree_ = new_tree;
1098 |     current_buffer_content_ = content;
1099 |     viewport_start_line_ = startLine;
1100 |     is_full_parse_ = false;
1101 |   }
1102 | #endif
1103 | }
1104 | 
1105 | void SyntaxHighlighter::scheduleBackgroundParse(const GapBuffer &buffer)
1106 | {
1107 | #ifdef TREE_SITTER_ENABLED
1108 |   if (is_parsing_ || !parser_ || !current_ts_language_)
1109 |     return;
1110 | 
1111 |   auto now = std::chrono::steady_clock::now();
1112 |   auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(
1113 |                      now - last_parse_time_)
1114 |                      .count();
1115 | 
1116 |   if (elapsed < 500)
1117 |     return;
1118 | 
1119 |   // Copy content BEFORE starting thread
1120 |   std::string content;
1121 |   int lineCount = buffer.getLineCount();
1122 |   content.reserve(lineCount * 80);
1123 | 
1124 |   for (int i = 0; i < lineCount; i++)
1125 |   {
1126 |     if (i > 0)
1127 |       content += "\n";
1128 |     content += buffer.getLine(i);
1129 |   }
1130 | 
1131 |   if (content.empty())
1132 |     return;
1133 | 
1134 |   is_parsing_ = true;
1135 |   last_parse_time_ = now;
1136 | 
1137 |   // NEW: Capture current version
1138 |   uint64_t expected_version = tree_version_.load();
1139 | 
1140 |   // Create a COPY of parser state to avoid races
1141 |   TSParser *temp_parser = ts_parser_new();
1142 |   if (!ts_parser_set_language(temp_parser, current_ts_language_))
1143 |   {
1144 |     ts_parser_delete(temp_parser);
1145 |     is_parsing_ = false;
1146 |     return;
1147 |   }
1148 | 
1149 |   parse_thread_ = std::thread(
1150 |       [this, content, temp_parser, expected_version]() mutable
1151 |       {
1152 |         TSTree *new_tree = ts_parser_parse_string(
1153 |             temp_parser, nullptr, content.c_str(), content.length());
1154 | 
1155 |         if (new_tree)
1156 |         {
1157 |           std::lock_guard<std::mutex> lock(tree_mutex_);
1158 | 
1159 |           // NEW: Only update if no newer edits happened
1160 |           if (tree_version_.load() == expected_version)
1161 |           {
1162 |             TSTree *old_tree = tree_;
1163 |             tree_ = new_tree;
1164 |             current_buffer_content_ = std::move(content);
1165 |             is_full_parse_ = true;
1166 | 
1167 |             if (old_tree)
1168 |               ts_tree_delete(old_tree);
1169 |           }
1170 |           else
1171 |           {
1172 |             // Discard stale parse - user has made newer edits
1173 |             ts_tree_delete(new_tree);
1174 |           }
1175 |         }
1176 | 
1177 |         ts_parser_delete(temp_parser);
1178 |         is_parsing_ = false;
1179 |         parse_complete_ = true;
1180 |       });
1181 | 
1182 |   parse_thread_.detach();
1183 | #endif
1184 | }
1185 | 
1186 | void SyntaxHighlighter::forceFullReparse(const GapBuffer &buffer)
1187 | {
1188 | #ifdef TREE_SITTER_ENABLED
1189 |   if (!parser_ || !current_ts_language_)
1190 |     return;
1191 | 
1192 |   std::lock_guard<std::mutex> lock(tree_mutex_);
1193 | 
1194 |   // Build fresh content
1195 |   std::string content;
1196 |   int lineCount = buffer.getLineCount();
1197 | 
1198 |   // Pre-allocate to avoid reallocations
1199 |   size_t estimated_size = lineCount * 50; // Rough estimate
1200 |   content.reserve(estimated_size);
1201 | 
1202 |   for (int i = 0; i < lineCount; i++)
1203 |   {
1204 |     if (i > 0)
1205 |       content += "\n";
1206 |     content += buffer.getLine(i);
1207 |   }
1208 | 
1209 |   if (content.empty())
1210 |   {
1211 |     std::cerr << "WARNING: Empty buffer in forceFullReparse\n";
1212 |     return;
1213 |   }
1214 | 
1215 |   // OPTIMIZATION: Use the old tree as a reference for faster re-parsing
1216 |   TSTree *old_tree = tree_;
1217 |   tree_ = ts_parser_parse_string(parser_, old_tree, content.c_str(),
1218 |                                  content.length());
1219 | 
1220 |   if (tree_)
1221 |   {
1222 |     current_buffer_content_ = std::move(content); // Move instead of copy
1223 |     is_full_parse_ = true;
1224 | 
1225 |     // Delete old tree AFTER successful parse
1226 |     if (old_tree)
1227 |       ts_tree_delete(old_tree);
1228 |   }
1229 |   else
1230 |   {
1231 |     std::cerr << "ERROR: Reparse failed, keeping old tree\n";
1232 |     tree_ = old_tree; // Restore old tree
1233 |     return;
1234 |   }
1235 | #endif
1236 | 
1237 |   // Clear cache ONLY, don't rebuild markdown state unless necessary
1238 |   line_cache_.clear();
1239 | 
1240 |   if (currentLanguage == "Markdown")
1241 |   {
1242 |     updateMarkdownState(buffer);
1243 |   }
1244 | }
1245 | 
1246 | void SyntaxHighlighter::clearAllCache()
1247 | {
1248 |   // Clear ALL cached line highlighting
1249 |   line_cache_.clear();
1250 | 
1251 |   // Clear line states (for Markdown)
1252 |   line_states_.clear();
1253 | 
1254 |   // Clear priority lines
1255 |   priority_lines_.clear();
1256 | 
1257 |   // CRITICAL: Force tree-sitter content to be marked as stale
1258 |   current_buffer_content_.clear();
1259 | 
1260 |   // Mark that we need a full reparse
1261 |   is_full_parse_ = false;
1262 | }
```

--------------------------------------------------------------------------------
/deps/tree-sitter-markdown/tree-sitter-markdown/src/scanner.c:
--------------------------------------------------------------------------------

```cpp
   1 | #include "tree_sitter/parser.h"
   2 | #include <assert.h>
   3 | #include <ctype.h>
   4 | #include <string.h>
   5 | #include <wchar.h>
   6 | #include <wctype.h>
   7 | 
   8 | // For explanation of the tokens see grammar.js
   9 | typedef enum {
  10 |     LINE_ENDING,
  11 |     SOFT_LINE_ENDING,
  12 |     BLOCK_CLOSE,
  13 |     BLOCK_CONTINUATION,
  14 |     BLOCK_QUOTE_START,
  15 |     INDENTED_CHUNK_START,
  16 |     ATX_H1_MARKER,
  17 |     ATX_H2_MARKER,
  18 |     ATX_H3_MARKER,
  19 |     ATX_H4_MARKER,
  20 |     ATX_H5_MARKER,
  21 |     ATX_H6_MARKER,
  22 |     SETEXT_H1_UNDERLINE,
  23 |     SETEXT_H2_UNDERLINE,
  24 |     THEMATIC_BREAK,
  25 |     LIST_MARKER_MINUS,
  26 |     LIST_MARKER_PLUS,
  27 |     LIST_MARKER_STAR,
  28 |     LIST_MARKER_PARENTHESIS,
  29 |     LIST_MARKER_DOT,
  30 |     LIST_MARKER_MINUS_DONT_INTERRUPT,
  31 |     LIST_MARKER_PLUS_DONT_INTERRUPT,
  32 |     LIST_MARKER_STAR_DONT_INTERRUPT,
  33 |     LIST_MARKER_PARENTHESIS_DONT_INTERRUPT,
  34 |     LIST_MARKER_DOT_DONT_INTERRUPT,
  35 |     FENCED_CODE_BLOCK_START_BACKTICK,
  36 |     FENCED_CODE_BLOCK_START_TILDE,
  37 |     BLANK_LINE_START,
  38 |     FENCED_CODE_BLOCK_END_BACKTICK,
  39 |     FENCED_CODE_BLOCK_END_TILDE,
  40 |     HTML_BLOCK_1_START,
  41 |     HTML_BLOCK_1_END,
  42 |     HTML_BLOCK_2_START,
  43 |     HTML_BLOCK_3_START,
  44 |     HTML_BLOCK_4_START,
  45 |     HTML_BLOCK_5_START,
  46 |     HTML_BLOCK_6_START,
  47 |     HTML_BLOCK_7_START,
  48 |     CLOSE_BLOCK,
  49 |     NO_INDENTED_CHUNK,
  50 |     ERROR,
  51 |     TRIGGER_ERROR,
  52 |     TOKEN_EOF,
  53 |     MINUS_METADATA,
  54 |     PLUS_METADATA,
  55 |     PIPE_TABLE_START,
  56 |     PIPE_TABLE_LINE_ENDING,
  57 | } TokenType;
  58 | 
  59 | // Description of a block on the block stack.
  60 | //
  61 | // LIST_ITEM is a list item with minimal indentation (content begins at indent
  62 | // level 2) while LIST_ITEM_MAX_INDENTATION represents a list item with maximal
  63 | // indentation without being considered a indented code block.
  64 | //
  65 | // ANONYMOUS represents any block that whose close is not handled by the
  66 | // external s.
  67 | typedef enum {
  68 |     BLOCK_QUOTE,
  69 |     INDENTED_CODE_BLOCK,
  70 |     LIST_ITEM,
  71 |     LIST_ITEM_1_INDENTATION,
  72 |     LIST_ITEM_2_INDENTATION,
  73 |     LIST_ITEM_3_INDENTATION,
  74 |     LIST_ITEM_4_INDENTATION,
  75 |     LIST_ITEM_5_INDENTATION,
  76 |     LIST_ITEM_6_INDENTATION,
  77 |     LIST_ITEM_7_INDENTATION,
  78 |     LIST_ITEM_8_INDENTATION,
  79 |     LIST_ITEM_9_INDENTATION,
  80 |     LIST_ITEM_10_INDENTATION,
  81 |     LIST_ITEM_11_INDENTATION,
  82 |     LIST_ITEM_12_INDENTATION,
  83 |     LIST_ITEM_13_INDENTATION,
  84 |     LIST_ITEM_14_INDENTATION,
  85 |     LIST_ITEM_MAX_INDENTATION,
  86 |     FENCED_CODE_BLOCK,
  87 |     ANONYMOUS,
  88 | } Block;
  89 | 
  90 | // Determines if a character is punctuation as defined by the markdown spec.
  91 | static bool is_punctuation(char chr) {
  92 |     return (chr >= '!' && chr <= '/') || (chr >= ':' && chr <= '@') ||
  93 |            (chr >= '[' && chr <= '`') || (chr >= '{' && chr <= '~');
  94 | }
  95 | 
  96 | // Returns the indentation level which lines of a list item should have at
  97 | // minimum. Should only be called with blocks for which `is_list_item` returns
  98 | // true.
  99 | static uint8_t list_item_indentation(Block block) {
 100 |     return (uint8_t)(block - LIST_ITEM + 2);
 101 | }
 102 | 
 103 | #define NUM_HTML_TAG_NAMES_RULE_1 3
 104 | 
 105 | static const char *const HTML_TAG_NAMES_RULE_1[NUM_HTML_TAG_NAMES_RULE_1] = {
 106 |     "pre", "script", "style"};
 107 | 
 108 | #define NUM_HTML_TAG_NAMES_RULE_7 62
 109 | 
 110 | static const char *const HTML_TAG_NAMES_RULE_7[NUM_HTML_TAG_NAMES_RULE_7] = {
 111 |     "address",  "article",    "aside",  "base",     "basefont", "blockquote",
 112 |     "body",     "caption",    "center", "col",      "colgroup", "dd",
 113 |     "details",  "dialog",     "dir",    "div",      "dl",       "dt",
 114 |     "fieldset", "figcaption", "figure", "footer",   "form",     "frame",
 115 |     "frameset", "h1",         "h2",     "h3",       "h4",       "h5",
 116 |     "h6",       "head",       "header", "hr",       "html",     "iframe",
 117 |     "legend",   "li",         "link",   "main",     "menu",     "menuitem",
 118 |     "nav",      "noframes",   "ol",     "optgroup", "option",   "p",
 119 |     "param",    "section",    "source", "summary",  "table",    "tbody",
 120 |     "td",       "tfoot",      "th",     "thead",    "title",    "tr",
 121 |     "track",    "ul"};
 122 | 
 123 | // For explanation of the tokens see grammar.js
 124 | static const bool paragraph_interrupt_symbols[] = {
 125 |     false, // LINE_ENDING,
 126 |     false, // SOFT_LINE_ENDING,
 127 |     false, // BLOCK_CLOSE,
 128 |     false, // BLOCK_CONTINUATION,
 129 |     true,  // BLOCK_QUOTE_START,
 130 |     false, // INDENTED_CHUNK_START,
 131 |     true,  // ATX_H1_MARKER,
 132 |     true,  // ATX_H2_MARKER,
 133 |     true,  // ATX_H3_MARKER,
 134 |     true,  // ATX_H4_MARKER,
 135 |     true,  // ATX_H5_MARKER,
 136 |     true,  // ATX_H6_MARKER,
 137 |     true,  // SETEXT_H1_UNDERLINE,
 138 |     true,  // SETEXT_H2_UNDERLINE,
 139 |     true,  // THEMATIC_BREAK,
 140 |     true,  // LIST_MARKER_MINUS,
 141 |     true,  // LIST_MARKER_PLUS,
 142 |     true,  // LIST_MARKER_STAR,
 143 |     true,  // LIST_MARKER_PARENTHESIS,
 144 |     true,  // LIST_MARKER_DOT,
 145 |     false, // LIST_MARKER_MINUS_DONT_INTERRUPT,
 146 |     false, // LIST_MARKER_PLUS_DONT_INTERRUPT,
 147 |     false, // LIST_MARKER_STAR_DONT_INTERRUPT,
 148 |     false, // LIST_MARKER_PARENTHESIS_DONT_INTERRUPT,
 149 |     false, // LIST_MARKER_DOT_DONT_INTERRUPT,
 150 |     true,  // FENCED_CODE_BLOCK_START_BACKTICK,
 151 |     true,  // FENCED_CODE_BLOCK_START_TILDE,
 152 |     true,  // BLANK_LINE_START,
 153 |     false, // FENCED_CODE_BLOCK_END_BACKTICK,
 154 |     false, // FENCED_CODE_BLOCK_END_TILDE,
 155 |     true,  // HTML_BLOCK_1_START,
 156 |     false, // HTML_BLOCK_1_END,
 157 |     true,  // HTML_BLOCK_2_START,
 158 |     true,  // HTML_BLOCK_3_START,
 159 |     true,  // HTML_BLOCK_4_START,
 160 |     true,  // HTML_BLOCK_5_START,
 161 |     true,  // HTML_BLOCK_6_START,
 162 |     false, // HTML_BLOCK_7_START,
 163 |     false, // CLOSE_BLOCK,
 164 |     false, // NO_INDENTED_CHUNK,
 165 |     false, // ERROR,
 166 |     false, // TRIGGER_ERROR,
 167 |     false, // EOF,
 168 |     false, // MINUS_METADATA,
 169 |     false, // PLUS_METADATA,
 170 |     true,  // PIPE_TABLE_START,
 171 |     false, // PIPE_TABLE_LINE_ENDING,
 172 | };
 173 | 
 174 | // State bitflags used with `Scanner.state`
 175 | 
 176 | // Currently matching (at the beginning of a line)
 177 | static const uint8_t STATE_MATCHING = 0x1 << 0;
 178 | // Last line break was inside a paragraph
 179 | static const uint8_t STATE_WAS_SOFT_LINE_BREAK = 0x1 << 1;
 180 | // Block should be closed after next line break
 181 | static const uint8_t STATE_CLOSE_BLOCK = 0x1 << 4;
 182 | 
 183 | static size_t roundup_32(size_t x) {
 184 |     x--;
 185 | 
 186 |     x |= x >> 1;
 187 |     x |= x >> 2;
 188 |     x |= x >> 4;
 189 |     x |= x >> 8;
 190 |     x |= x >> 16;
 191 | 
 192 |     x++;
 193 | 
 194 |     return x;
 195 | }
 196 | 
 197 | typedef struct {
 198 |     // A stack of open blocks in the current parse state
 199 |     struct {
 200 |         size_t size;
 201 |         size_t capacity;
 202 |         Block *items;
 203 |     } open_blocks;
 204 | 
 205 |     // Parser state flags
 206 |     uint8_t state;
 207 |     // Number of blocks that have been matched so far. Only changes during
 208 |     // matching and is reset after every line ending
 209 |     uint8_t matched;
 210 |     // Consumed but "unused" indentation. Sometimes a tab needs to be "split" to
 211 |     // be used in multiple tokens.
 212 |     uint8_t indentation;
 213 |     // The current column. Used to decide how many spaces a tab should equal
 214 |     uint8_t column;
 215 |     // The delimiter length of the currently open fenced code block
 216 |     uint8_t fenced_code_block_delimiter_length;
 217 | 
 218 |     bool simulate;
 219 | } Scanner;
 220 | 
 221 | static void push_block(Scanner *s, Block b) {
 222 |     if (s->open_blocks.size == s->open_blocks.capacity) {
 223 |         s->open_blocks.capacity =
 224 |             s->open_blocks.capacity ? s->open_blocks.capacity << 1 : 8;
 225 |         void *tmp = realloc(s->open_blocks.items,
 226 |                             sizeof(Block) * s->open_blocks.capacity);
 227 |         assert(tmp != NULL);
 228 |         s->open_blocks.items = tmp;
 229 |     }
 230 | 
 231 |     s->open_blocks.items[s->open_blocks.size++] = b;
 232 | }
 233 | 
 234 | static inline Block pop_block(Scanner *s) {
 235 |     return s->open_blocks.items[--s->open_blocks.size];
 236 | }
 237 | 
 238 | // Write the whole state of a Scanner to a byte buffer
 239 | static unsigned serialize(Scanner *s, char *buffer) {
 240 |     unsigned size = 0;
 241 |     buffer[size++] = (char)s->state;
 242 |     buffer[size++] = (char)s->matched;
 243 |     buffer[size++] = (char)s->indentation;
 244 |     buffer[size++] = (char)s->column;
 245 |     buffer[size++] = (char)s->fenced_code_block_delimiter_length;
 246 |     size_t blocks_count = s->open_blocks.size;
 247 |     if (blocks_count > 0) {
 248 |         memcpy(&buffer[size], s->open_blocks.items,
 249 |                blocks_count * sizeof(Block));
 250 |         size += blocks_count * sizeof(Block);
 251 |     }
 252 |     return size;
 253 | }
 254 | 
 255 | // Read the whole state of a Scanner from a byte buffer
 256 | // `serizalize` and `deserialize` should be fully symmetric.
 257 | static void deserialize(Scanner *s, const char *buffer, unsigned length) {
 258 |     s->open_blocks.size = 0;
 259 |     s->open_blocks.capacity = 0;
 260 |     s->state = 0;
 261 |     s->matched = 0;
 262 |     s->indentation = 0;
 263 |     s->column = 0;
 264 |     s->fenced_code_block_delimiter_length = 0;
 265 |     if (length > 0) {
 266 |         size_t size = 0;
 267 |         s->state = (uint8_t)buffer[size++];
 268 |         s->matched = (uint8_t)buffer[size++];
 269 |         s->indentation = (uint8_t)buffer[size++];
 270 |         s->column = (uint8_t)buffer[size++];
 271 |         s->fenced_code_block_delimiter_length = (uint8_t)buffer[size++];
 272 |         size_t blocks_size = length - size;
 273 |         if (blocks_size > 0) {
 274 |             size_t blocks_count = blocks_size / sizeof(Block);
 275 | 
 276 |             // ensure open blocks has enough room
 277 |             if (s->open_blocks.capacity < blocks_count) {
 278 |               size_t capacity = roundup_32(blocks_count);
 279 |               void *tmp = realloc(s->open_blocks.items,
 280 |                             sizeof(Block) * capacity);
 281 |               assert(tmp != NULL);
 282 |               s->open_blocks.items = tmp;
 283 |               s->open_blocks.capacity = capacity;
 284 |             }
 285 |             memcpy(s->open_blocks.items, &buffer[size], blocks_size);
 286 |             s->open_blocks.size = blocks_count;
 287 |         }
 288 |     }
 289 | }
 290 | 
 291 | static void mark_end(Scanner *s, TSLexer *lexer) {
 292 |     if (!s->simulate) {
 293 |         lexer->mark_end(lexer);
 294 |     }
 295 | }
 296 | 
 297 | // Convenience function to emit the error token. This is done to stop invalid
 298 | // parse branches. Specifically:
 299 | // 1. When encountering a newline after a line break that ended a paragraph, and
 300 | // no new block
 301 | //    has been opened.
 302 | // 2. When encountering a new block after a soft line break.
 303 | // 3. When a `$._trigger_error` token is valid, which is used to stop parse
 304 | // branches through
 305 | //    normal tree-sitter grammar rules.
 306 | //
 307 | // See also the `$._soft_line_break` and `$._paragraph_end_newline` tokens in
 308 | // grammar.js
 309 | static bool error(TSLexer *lexer) {
 310 |     lexer->result_symbol = ERROR;
 311 |     return true;
 312 | }
 313 | 
 314 | // Advance the lexer one character
 315 | // Also keeps track of the current column, counting tabs as spaces with tab stop
 316 | // 4 See https://github.github.com/gfm/#tabs
 317 | static size_t advance(Scanner *s, TSLexer *lexer) {
 318 |     size_t size = 1;
 319 |     if (lexer->lookahead == '\t') {
 320 |         size = 4 - s->column;
 321 |         s->column = 0;
 322 |     } else {
 323 |         s->column = (s->column + 1) % 4;
 324 |     }
 325 |     lexer->advance(lexer, false);
 326 |     return size;
 327 | }
 328 | 
 329 | // Try to match the given block, i.e. consume all tokens that belong to the
 330 | // block. These are
 331 | // 1. indentation for list items and indented code blocks
 332 | // 2. '>' for block quotes
 333 | // Returns true if the block is matched and false otherwise
 334 | static bool match(Scanner *s, TSLexer *lexer, Block block) {
 335 |     switch (block) {
 336 |         case INDENTED_CODE_BLOCK:
 337 |             while (s->indentation < 4) {
 338 |                 if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
 339 |                     s->indentation += advance(s, lexer);
 340 |                 } else {
 341 |                     break;
 342 |                 }
 343 |             }
 344 |             if (s->indentation >= 4 && lexer->lookahead != '\n' &&
 345 |                 lexer->lookahead != '\r') {
 346 |                 s->indentation -= 4;
 347 |                 return true;
 348 |             }
 349 |             break;
 350 |         case LIST_ITEM:
 351 |         case LIST_ITEM_1_INDENTATION:
 352 |         case LIST_ITEM_2_INDENTATION:
 353 |         case LIST_ITEM_3_INDENTATION:
 354 |         case LIST_ITEM_4_INDENTATION:
 355 |         case LIST_ITEM_5_INDENTATION:
 356 |         case LIST_ITEM_6_INDENTATION:
 357 |         case LIST_ITEM_7_INDENTATION:
 358 |         case LIST_ITEM_8_INDENTATION:
 359 |         case LIST_ITEM_9_INDENTATION:
 360 |         case LIST_ITEM_10_INDENTATION:
 361 |         case LIST_ITEM_11_INDENTATION:
 362 |         case LIST_ITEM_12_INDENTATION:
 363 |         case LIST_ITEM_13_INDENTATION:
 364 |         case LIST_ITEM_14_INDENTATION:
 365 |         case LIST_ITEM_MAX_INDENTATION:
 366 |             while (s->indentation < list_item_indentation(block)) {
 367 |                 if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
 368 |                     s->indentation += advance(s, lexer);
 369 |                 } else {
 370 |                     break;
 371 |                 }
 372 |             }
 373 |             if (s->indentation >= list_item_indentation(block)) {
 374 |                 s->indentation -= list_item_indentation(block);
 375 |                 return true;
 376 |             }
 377 |             if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
 378 |                 s->indentation = 0;
 379 |                 return true;
 380 |             }
 381 |             break;
 382 |         case BLOCK_QUOTE:
 383 |             while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
 384 |                 s->indentation += advance(s, lexer);
 385 |             }
 386 |             if (lexer->lookahead == '>') {
 387 |                 advance(s, lexer);
 388 |                 s->indentation = 0;
 389 |                 if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
 390 |                     s->indentation += advance(s, lexer) - 1;
 391 |                 }
 392 |                 return true;
 393 |             }
 394 |             break;
 395 |         case FENCED_CODE_BLOCK:
 396 |         case ANONYMOUS:
 397 |             return true;
 398 |     }
 399 |     return false;
 400 | }
 401 | 
 402 | static bool parse_fenced_code_block(Scanner *s, const char delimiter,
 403 |                                     TSLexer *lexer, const bool *valid_symbols) {
 404 |     // count the number of backticks
 405 |     uint8_t level = 0;
 406 |     while (lexer->lookahead == delimiter) {
 407 |         advance(s, lexer);
 408 |         level++;
 409 |     }
 410 |     mark_end(s, lexer);
 411 |     // If this is able to close a fenced code block then that is the only valid
 412 |     // interpretation. It can only close a fenced code block if the number of
 413 |     // backticks is at least the number of backticks of the opening delimiter.
 414 |     // Also it cannot be indented more than 3 spaces.
 415 |     if ((delimiter == '`' ? valid_symbols[FENCED_CODE_BLOCK_END_BACKTICK]
 416 |                           : valid_symbols[FENCED_CODE_BLOCK_END_TILDE]) &&
 417 |         s->indentation < 4 && level >= s->fenced_code_block_delimiter_length) {
 418 |         while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
 419 |             advance(s, lexer);
 420 |         }
 421 |         if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
 422 |             s->fenced_code_block_delimiter_length = 0;
 423 |             lexer->result_symbol = delimiter == '`'
 424 |                                        ? FENCED_CODE_BLOCK_END_BACKTICK
 425 |                                        : FENCED_CODE_BLOCK_END_TILDE;
 426 |             return true;
 427 |         }
 428 |     }
 429 |     // If this could be the start of a fenced code block, check if the info
 430 |     // string contains any backticks.
 431 |     if ((delimiter == '`' ? valid_symbols[FENCED_CODE_BLOCK_START_BACKTICK]
 432 |                           : valid_symbols[FENCED_CODE_BLOCK_START_TILDE]) &&
 433 |         level >= 3) {
 434 |         bool info_string_has_backtick = false;
 435 |         if (delimiter == '`') {
 436 |             while (lexer->lookahead != '\n' && lexer->lookahead != '\r' &&
 437 |                    !lexer->eof(lexer)) {
 438 |                 if (lexer->lookahead == '`') {
 439 |                     info_string_has_backtick = true;
 440 |                     break;
 441 |                 }
 442 |                 advance(s, lexer);
 443 |             }
 444 |         }
 445 |         // If it does not then choose to interpret this as the start of a fenced
 446 |         // code block.
 447 |         if (!info_string_has_backtick) {
 448 |             lexer->result_symbol = delimiter == '`'
 449 |                                        ? FENCED_CODE_BLOCK_START_BACKTICK
 450 |                                        : FENCED_CODE_BLOCK_START_TILDE;
 451 |             if (!s->simulate)
 452 |                 push_block(s, FENCED_CODE_BLOCK);
 453 |             // Remember the length of the delimiter for later, since we need it
 454 |             // to decide whether a sequence of backticks can close the block.
 455 |             s->fenced_code_block_delimiter_length = level;
 456 |             s->indentation = 0;
 457 |             return true;
 458 |         }
 459 |     }
 460 |     return false;
 461 | }
 462 | 
 463 | static bool parse_star(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
 464 |     advance(s, lexer);
 465 |     mark_end(s, lexer);
 466 |     // Otherwise count the number of stars permitting whitespaces between them.
 467 |     size_t star_count = 1;
 468 |     // Also remember how many stars there are before the first whitespace...
 469 |     // ...and how many spaces follow the first star.
 470 |     uint8_t extra_indentation = 0;
 471 |     for (;;) {
 472 |         if (lexer->lookahead == '*') {
 473 |             if (star_count == 1 && extra_indentation >= 1 &&
 474 |                 valid_symbols[LIST_MARKER_STAR]) {
 475 |                 // If we get to this point then the token has to be at least
 476 |                 // this long. We need to call `mark_end` here in case we decide
 477 |                 // later that this is a list item.
 478 |                 mark_end(s, lexer);
 479 |             }
 480 |             star_count++;
 481 |             advance(s, lexer);
 482 |         } else if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
 483 |             if (star_count == 1) {
 484 |                 extra_indentation += advance(s, lexer);
 485 |             } else {
 486 |                 advance(s, lexer);
 487 |             }
 488 |         } else {
 489 |             break;
 490 |         }
 491 |     }
 492 |     bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r';
 493 |     bool dont_interrupt = false;
 494 |     if (star_count == 1 && line_end) {
 495 |         extra_indentation = 1;
 496 |         // line is empty so don't interrupt paragraphs if this is a list marker
 497 |         dont_interrupt = s->matched == s->open_blocks.size;
 498 |     }
 499 |     // If there were at least 3 stars then this could be a thematic break
 500 |     bool thematic_break = star_count >= 3 && line_end;
 501 |     // If there was a star and at least one space after that star then this
 502 |     // could be a list marker.
 503 |     bool list_marker_star = star_count >= 1 && extra_indentation >= 1;
 504 |     if (valid_symbols[THEMATIC_BREAK] && thematic_break && s->indentation < 4) {
 505 |         // If a thematic break is valid then it takes precedence
 506 |         lexer->result_symbol = THEMATIC_BREAK;
 507 |         mark_end(s, lexer);
 508 |         s->indentation = 0;
 509 |         return true;
 510 |     }
 511 |     if ((dont_interrupt ? valid_symbols[LIST_MARKER_STAR_DONT_INTERRUPT]
 512 |                         : valid_symbols[LIST_MARKER_STAR]) &&
 513 |         list_marker_star) {
 514 |         // List markers take precedence over emphasis markers
 515 |         // If star_count > 1 then we already called mark_end at the right point.
 516 |         // Otherwise the token should go until this point.
 517 |         if (star_count == 1) {
 518 |             mark_end(s, lexer);
 519 |         }
 520 |         // Not counting one space...
 521 |         extra_indentation--;
 522 |         // ... check if the list item begins with an indented code block
 523 |         if (extra_indentation <= 3) {
 524 |             // If not then calculate the indentation level of the list item
 525 |             // content as indentation of list marker + indentation after list
 526 |             // marker - 1
 527 |             extra_indentation += s->indentation;
 528 |             s->indentation = 0;
 529 |         } else {
 530 |             // Otherwise the indentation level is just the indentation of the
 531 |             // list marker. We keep the indentation after the list marker for
 532 |             // later blocks.
 533 |             uint8_t temp = s->indentation;
 534 |             s->indentation = extra_indentation;
 535 |             extra_indentation = temp;
 536 |         }
 537 |         if (!s->simulate)
 538 |             push_block(s, (Block)(LIST_ITEM + extra_indentation));
 539 |         lexer->result_symbol =
 540 |             dont_interrupt ? LIST_MARKER_STAR_DONT_INTERRUPT : LIST_MARKER_STAR;
 541 |         return true;
 542 |     }
 543 |     return false;
 544 | }
 545 | 
 546 | static bool parse_thematic_break_underscore(Scanner *s, TSLexer *lexer,
 547 |                                             const bool *valid_symbols) {
 548 |     advance(s, lexer);
 549 |     mark_end(s, lexer);
 550 |     size_t underscore_count = 1;
 551 |     for (;;) {
 552 |         if (lexer->lookahead == '_') {
 553 |             underscore_count++;
 554 |             advance(s, lexer);
 555 |         } else if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
 556 |             advance(s, lexer);
 557 |         } else {
 558 |             break;
 559 |         }
 560 |     }
 561 |     bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r';
 562 |     if (underscore_count >= 3 && line_end && valid_symbols[THEMATIC_BREAK]) {
 563 |         lexer->result_symbol = THEMATIC_BREAK;
 564 |         mark_end(s, lexer);
 565 |         s->indentation = 0;
 566 |         return true;
 567 |     }
 568 |     return false;
 569 | }
 570 | 
 571 | static bool parse_block_quote(Scanner *s, TSLexer *lexer,
 572 |                               const bool *valid_symbols) {
 573 |     if (valid_symbols[BLOCK_QUOTE_START]) {
 574 |         advance(s, lexer);
 575 |         s->indentation = 0;
 576 |         if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
 577 |             s->indentation += advance(s, lexer) - 1;
 578 |         }
 579 |         lexer->result_symbol = BLOCK_QUOTE_START;
 580 |         if (!s->simulate)
 581 |             push_block(s, BLOCK_QUOTE);
 582 |         return true;
 583 |     }
 584 |     return false;
 585 | }
 586 | 
 587 | static bool parse_atx_heading(Scanner *s, TSLexer *lexer,
 588 |                               const bool *valid_symbols) {
 589 |     if (valid_symbols[ATX_H1_MARKER] && s->indentation <= 3) {
 590 |         mark_end(s, lexer);
 591 |         uint16_t level = 0;
 592 |         while (lexer->lookahead == '#' && level <= 6) {
 593 |             advance(s, lexer);
 594 |             level++;
 595 |         }
 596 |         if (level <= 6 &&
 597 |             (lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
 598 |              lexer->lookahead == '\n' || lexer->lookahead == '\r')) {
 599 |             lexer->result_symbol = ATX_H1_MARKER + (level - 1);
 600 |             s->indentation = 0;
 601 |             mark_end(s, lexer);
 602 |             return true;
 603 |         }
 604 |     }
 605 |     return false;
 606 | }
 607 | 
 608 | static bool parse_setext_underline(Scanner *s, TSLexer *lexer,
 609 |                                    const bool *valid_symbols) {
 610 |     if (valid_symbols[SETEXT_H1_UNDERLINE] &&
 611 |         s->matched == s->open_blocks.size) {
 612 |         mark_end(s, lexer);
 613 |         while (lexer->lookahead == '=') {
 614 |             advance(s, lexer);
 615 |         }
 616 |         while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
 617 |             advance(s, lexer);
 618 |         }
 619 |         if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
 620 |             lexer->result_symbol = SETEXT_H1_UNDERLINE;
 621 |             mark_end(s, lexer);
 622 |             return true;
 623 |         }
 624 |     }
 625 |     return false;
 626 | }
 627 | 
 628 | static bool parse_plus(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
 629 |     if (s->indentation <= 3 &&
 630 |         (valid_symbols[LIST_MARKER_PLUS] ||
 631 |          valid_symbols[LIST_MARKER_PLUS_DONT_INTERRUPT] ||
 632 |          valid_symbols[PLUS_METADATA])) {
 633 |         advance(s, lexer);
 634 |         if (valid_symbols[PLUS_METADATA] && lexer->lookahead == '+') {
 635 |             advance(s, lexer);
 636 |             if (lexer->lookahead != '+') {
 637 |                 return false;
 638 |             }
 639 |             advance(s, lexer);
 640 |             while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
 641 |                 advance(s, lexer);
 642 |             }
 643 |             if (lexer->lookahead != '\n' && lexer->lookahead != '\r') {
 644 |                 return false;
 645 |             }
 646 |             for (;;) {
 647 |                 // advance over newline
 648 |                 if (lexer->lookahead == '\r') {
 649 |                     advance(s, lexer);
 650 |                     if (lexer->lookahead == '\n') {
 651 |                         advance(s, lexer);
 652 |                     }
 653 |                 } else {
 654 |                     advance(s, lexer);
 655 |                 }
 656 |                 // check for pluses
 657 |                 size_t plus_count = 0;
 658 |                 while (lexer->lookahead == '+') {
 659 |                     plus_count++;
 660 |                     advance(s, lexer);
 661 |                 }
 662 |                 if (plus_count == 3) {
 663 |                     // if exactly 3 check if next symbol (after eventual
 664 |                     // whitespace) is newline
 665 |                     while (lexer->lookahead == ' ' ||
 666 |                            lexer->lookahead == '\t') {
 667 |                         advance(s, lexer);
 668 |                     }
 669 |                     if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
 670 |                         // if so also consume newline
 671 |                         if (lexer->lookahead == '\r') {
 672 |                             advance(s, lexer);
 673 |                             if (lexer->lookahead == '\n') {
 674 |                                 advance(s, lexer);
 675 |                             }
 676 |                         } else {
 677 |                             advance(s, lexer);
 678 |                         }
 679 |                         mark_end(s, lexer);
 680 |                         lexer->result_symbol = PLUS_METADATA;
 681 |                         return true;
 682 |                     }
 683 |                 }
 684 |                 // otherwise consume rest of line
 685 |                 while (lexer->lookahead != '\n' && lexer->lookahead != '\r' &&
 686 |                        !lexer->eof(lexer)) {
 687 |                     advance(s, lexer);
 688 |                 }
 689 |                 // if end of file is reached, then this is not metadata
 690 |                 if (lexer->eof(lexer)) {
 691 |                     break;
 692 |                 }
 693 |             }
 694 |         } else {
 695 |             uint8_t extra_indentation = 0;
 696 |             while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
 697 |                 extra_indentation += advance(s, lexer);
 698 |             }
 699 |             bool dont_interrupt = false;
 700 |             if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
 701 |                 extra_indentation = 1;
 702 |                 dont_interrupt = true;
 703 |             }
 704 |             dont_interrupt =
 705 |                 dont_interrupt && s->matched == s->open_blocks.size;
 706 |             if (extra_indentation >= 1 &&
 707 |                 (dont_interrupt ? valid_symbols[LIST_MARKER_PLUS_DONT_INTERRUPT]
 708 |                                 : valid_symbols[LIST_MARKER_PLUS])) {
 709 |                 lexer->result_symbol = dont_interrupt
 710 |                                            ? LIST_MARKER_PLUS_DONT_INTERRUPT
 711 |                                            : LIST_MARKER_PLUS;
 712 |                 extra_indentation--;
 713 |                 if (extra_indentation <= 3) {
 714 |                     extra_indentation += s->indentation;
 715 |                     s->indentation = 0;
 716 |                 } else {
 717 |                     uint8_t temp = s->indentation;
 718 |                     s->indentation = extra_indentation;
 719 |                     extra_indentation = temp;
 720 |                 }
 721 |                 if (!s->simulate)
 722 |                     push_block(s, (Block)(LIST_ITEM + extra_indentation));
 723 |                 return true;
 724 |             }
 725 |         }
 726 |     }
 727 |     return false;
 728 | }
 729 | 
 730 | static bool parse_ordered_list_marker(Scanner *s, TSLexer *lexer,
 731 |                                       const bool *valid_symbols) {
 732 |     if (s->indentation <= 3 &&
 733 |         (valid_symbols[LIST_MARKER_PARENTHESIS] ||
 734 |          valid_symbols[LIST_MARKER_DOT] ||
 735 |          valid_symbols[LIST_MARKER_PARENTHESIS_DONT_INTERRUPT] ||
 736 |          valid_symbols[LIST_MARKER_DOT_DONT_INTERRUPT])) {
 737 |         size_t digits = 1;
 738 |         bool dont_interrupt = lexer->lookahead != '1';
 739 |         advance(s, lexer);
 740 |         while (isdigit(lexer->lookahead)) {
 741 |             dont_interrupt = true;
 742 |             digits++;
 743 |             advance(s, lexer);
 744 |         }
 745 |         if (digits >= 1 && digits <= 9) {
 746 |             bool dot = false;
 747 |             bool parenthesis = false;
 748 |             if (lexer->lookahead == '.') {
 749 |                 advance(s, lexer);
 750 |                 dot = true;
 751 |             } else if (lexer->lookahead == ')') {
 752 |                 advance(s, lexer);
 753 |                 parenthesis = true;
 754 |             }
 755 |             if (dot || parenthesis) {
 756 |                 uint8_t extra_indentation = 0;
 757 |                 while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
 758 |                     extra_indentation += advance(s, lexer);
 759 |                 }
 760 |                 bool line_end =
 761 |                     lexer->lookahead == '\n' || lexer->lookahead == '\r';
 762 |                 if (line_end) {
 763 |                     extra_indentation = 1;
 764 |                     dont_interrupt = true;
 765 |                 }
 766 |                 dont_interrupt =
 767 |                     dont_interrupt && s->matched == s->open_blocks.size;
 768 |                 if (extra_indentation >= 1 &&
 769 |                     (dot ? (dont_interrupt
 770 |                                 ? valid_symbols[LIST_MARKER_DOT_DONT_INTERRUPT]
 771 |                                 : valid_symbols[LIST_MARKER_DOT])
 772 |                          : (dont_interrupt
 773 |                                 ? valid_symbols
 774 |                                       [LIST_MARKER_PARENTHESIS_DONT_INTERRUPT]
 775 |                                 : valid_symbols[LIST_MARKER_PARENTHESIS]))) {
 776 |                     lexer->result_symbol =
 777 |                         dot ? LIST_MARKER_DOT : LIST_MARKER_PARENTHESIS;
 778 |                     extra_indentation--;
 779 |                     if (extra_indentation <= 3) {
 780 |                         extra_indentation += s->indentation;
 781 |                         s->indentation = 0;
 782 |                     } else {
 783 |                         uint8_t temp = s->indentation;
 784 |                         s->indentation = extra_indentation;
 785 |                         extra_indentation = temp;
 786 |                     }
 787 |                     if (!s->simulate)
 788 |                         push_block(
 789 |                             s, (Block)(LIST_ITEM + extra_indentation + digits));
 790 |                     return true;
 791 |                 }
 792 |             }
 793 |         }
 794 |     }
 795 |     return false;
 796 | }
 797 | 
 798 | static bool parse_minus(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
 799 |     if (s->indentation <= 3 &&
 800 |         (valid_symbols[LIST_MARKER_MINUS] ||
 801 |          valid_symbols[LIST_MARKER_MINUS_DONT_INTERRUPT] ||
 802 |          valid_symbols[SETEXT_H2_UNDERLINE] || valid_symbols[THEMATIC_BREAK] ||
 803 |          valid_symbols[MINUS_METADATA])) {
 804 |         mark_end(s, lexer);
 805 |         bool whitespace_after_minus = false;
 806 |         bool minus_after_whitespace = false;
 807 |         size_t minus_count = 0;
 808 |         uint8_t extra_indentation = 0;
 809 | 
 810 |         for (;;) {
 811 |             if (lexer->lookahead == '-') {
 812 |                 if (minus_count == 1 && extra_indentation >= 1) {
 813 |                     mark_end(s, lexer);
 814 |                 }
 815 |                 minus_count++;
 816 |                 advance(s, lexer);
 817 |                 minus_after_whitespace = whitespace_after_minus;
 818 |             } else if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
 819 |                 if (minus_count == 1) {
 820 |                     extra_indentation += advance(s, lexer);
 821 |                 } else {
 822 |                     advance(s, lexer);
 823 |                 }
 824 |                 whitespace_after_minus = true;
 825 |             } else {
 826 |                 break;
 827 |             }
 828 |         }
 829 |         bool line_end = lexer->lookahead == '\n' || lexer->lookahead == '\r';
 830 |         bool dont_interrupt = false;
 831 |         if (minus_count == 1 && line_end) {
 832 |             extra_indentation = 1;
 833 |             dont_interrupt = true;
 834 |         }
 835 |         dont_interrupt = dont_interrupt && s->matched == s->open_blocks.size;
 836 |         bool thematic_break = minus_count >= 3 && line_end;
 837 |         bool underline =
 838 |             minus_count >= 1 && !minus_after_whitespace && line_end &&
 839 |             s->matched ==
 840 |                 s->open_blocks
 841 |                     .size; // setext heading can not break lazy continuation
 842 |         bool list_marker_minus = minus_count >= 1 && extra_indentation >= 1;
 843 |         bool success = false;
 844 |         if (valid_symbols[SETEXT_H2_UNDERLINE] && underline) {
 845 |             lexer->result_symbol = SETEXT_H2_UNDERLINE;
 846 |             mark_end(s, lexer);
 847 |             s->indentation = 0;
 848 |             success = true;
 849 |         } else if (valid_symbols[THEMATIC_BREAK] &&
 850 |                    thematic_break) { // underline is false if list_marker_minus
 851 |                                      // is true
 852 |             lexer->result_symbol = THEMATIC_BREAK;
 853 |             mark_end(s, lexer);
 854 |             s->indentation = 0;
 855 |             success = true;
 856 |         } else if ((dont_interrupt
 857 |                         ? valid_symbols[LIST_MARKER_MINUS_DONT_INTERRUPT]
 858 |                         : valid_symbols[LIST_MARKER_MINUS]) &&
 859 |                    list_marker_minus) {
 860 |             if (minus_count == 1) {
 861 |                 mark_end(s, lexer);
 862 |             }
 863 |             extra_indentation--;
 864 |             if (extra_indentation <= 3) {
 865 |                 extra_indentation += s->indentation;
 866 |                 s->indentation = 0;
 867 |             } else {
 868 |                 uint8_t temp = s->indentation;
 869 |                 s->indentation = extra_indentation;
 870 |                 extra_indentation = temp;
 871 |             }
 872 |             if (!s->simulate)
 873 |                 push_block(s, (Block)(LIST_ITEM + extra_indentation));
 874 |             lexer->result_symbol = dont_interrupt
 875 |                                        ? LIST_MARKER_MINUS_DONT_INTERRUPT
 876 |                                        : LIST_MARKER_MINUS;
 877 |             return true;
 878 |         }
 879 |         if (minus_count == 3 && (!minus_after_whitespace) && line_end &&
 880 |             valid_symbols[MINUS_METADATA]) {
 881 |             for (;;) {
 882 |                 // advance over newline
 883 |                 if (lexer->lookahead == '\r') {
 884 |                     advance(s, lexer);
 885 |                     if (lexer->lookahead == '\n') {
 886 |                         advance(s, lexer);
 887 |                     }
 888 |                 } else {
 889 |                     advance(s, lexer);
 890 |                 }
 891 |                 // check for minuses
 892 |                 minus_count = 0;
 893 |                 while (lexer->lookahead == '-') {
 894 |                     minus_count++;
 895 |                     advance(s, lexer);
 896 |                 }
 897 |                 if (minus_count == 3) {
 898 |                     // if exactly 3 check if next symbol (after eventual
 899 |                     // whitespace) is newline
 900 |                     while (lexer->lookahead == ' ' ||
 901 |                            lexer->lookahead == '\t') {
 902 |                         advance(s, lexer);
 903 |                     }
 904 |                     if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
 905 |                         // if so also consume newline
 906 |                         if (lexer->lookahead == '\r') {
 907 |                             advance(s, lexer);
 908 |                             if (lexer->lookahead == '\n') {
 909 |                                 advance(s, lexer);
 910 |                             }
 911 |                         } else {
 912 |                             advance(s, lexer);
 913 |                         }
 914 |                         mark_end(s, lexer);
 915 |                         lexer->result_symbol = MINUS_METADATA;
 916 |                         return true;
 917 |                     }
 918 |                 }
 919 |                 // otherwise consume rest of line
 920 |                 while (lexer->lookahead != '\n' && lexer->lookahead != '\r' &&
 921 |                        !lexer->eof(lexer)) {
 922 |                     advance(s, lexer);
 923 |                 }
 924 |                 // if end of file is reached, then this is not metadata
 925 |                 if (lexer->eof(lexer)) {
 926 |                     break;
 927 |                 }
 928 |             }
 929 |         }
 930 |         if (success) {
 931 |             return true;
 932 |         }
 933 |     }
 934 |     return false;
 935 | }
 936 | 
 937 | static bool parse_html_block(Scanner *s, TSLexer *lexer,
 938 |                              const bool *valid_symbols) {
 939 |     if (!(valid_symbols[HTML_BLOCK_1_START] ||
 940 |           valid_symbols[HTML_BLOCK_1_END] ||
 941 |           valid_symbols[HTML_BLOCK_2_START] ||
 942 |           valid_symbols[HTML_BLOCK_3_START] ||
 943 |           valid_symbols[HTML_BLOCK_4_START] ||
 944 |           valid_symbols[HTML_BLOCK_5_START] ||
 945 |           valid_symbols[HTML_BLOCK_6_START] ||
 946 |           valid_symbols[HTML_BLOCK_7_START])) {
 947 |         return false;
 948 |     }
 949 |     advance(s, lexer);
 950 |     if (lexer->lookahead == '?' && valid_symbols[HTML_BLOCK_3_START]) {
 951 |         advance(s, lexer);
 952 |         lexer->result_symbol = HTML_BLOCK_3_START;
 953 |         if (!s->simulate)
 954 |             push_block(s, ANONYMOUS);
 955 |         return true;
 956 |     }
 957 |     if (lexer->lookahead == '!') {
 958 |         // could be block 2
 959 |         advance(s, lexer);
 960 |         if (lexer->lookahead == '-') {
 961 |             advance(s, lexer);
 962 |             if (lexer->lookahead == '-' && valid_symbols[HTML_BLOCK_2_START]) {
 963 |                 advance(s, lexer);
 964 |                 lexer->result_symbol = HTML_BLOCK_2_START;
 965 |                 if (!s->simulate)
 966 |                     push_block(s, ANONYMOUS);
 967 |                 return true;
 968 |             }
 969 |         } else if ('A' <= lexer->lookahead && lexer->lookahead <= 'Z' &&
 970 |                    valid_symbols[HTML_BLOCK_4_START]) {
 971 |             advance(s, lexer);
 972 |             lexer->result_symbol = HTML_BLOCK_4_START;
 973 |             if (!s->simulate)
 974 |                 push_block(s, ANONYMOUS);
 975 |             return true;
 976 |         } else if (lexer->lookahead == '[') {
 977 |             advance(s, lexer);
 978 |             if (lexer->lookahead == 'C') {
 979 |                 advance(s, lexer);
 980 |                 if (lexer->lookahead == 'D') {
 981 |                     advance(s, lexer);
 982 |                     if (lexer->lookahead == 'A') {
 983 |                         advance(s, lexer);
 984 |                         if (lexer->lookahead == 'T') {
 985 |                             advance(s, lexer);
 986 |                             if (lexer->lookahead == 'A') {
 987 |                                 advance(s, lexer);
 988 |                                 if (lexer->lookahead == '[' &&
 989 |                                     valid_symbols[HTML_BLOCK_5_START]) {
 990 |                                     advance(s, lexer);
 991 |                                     lexer->result_symbol = HTML_BLOCK_5_START;
 992 |                                     if (!s->simulate)
 993 |                                         push_block(s, ANONYMOUS);
 994 |                                     return true;
 995 |                                 }
 996 |                             }
 997 |                         }
 998 |                     }
 999 |                 }
1000 |             }
1001 |         }
1002 |     }
1003 |     bool starting_slash = lexer->lookahead == '/';
1004 |     if (starting_slash) {
1005 |         advance(s, lexer);
1006 |     }
1007 |     char name[11];
1008 |     size_t name_length = 0;
1009 |     while (iswalpha((wint_t)lexer->lookahead)) {
1010 |         if (name_length < 10) {
1011 |             name[name_length++] = (char)towlower((wint_t)lexer->lookahead);
1012 |         } else {
1013 |             name_length = 12;
1014 |         }
1015 |         advance(s, lexer);
1016 |     }
1017 |     if (name_length == 0) {
1018 |         return false;
1019 |     }
1020 |     bool tag_closed = false;
1021 |     if (name_length < 11) {
1022 |         name[name_length] = 0;
1023 |         bool next_symbol_valid =
1024 |             lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
1025 |             lexer->lookahead == '\n' || lexer->lookahead == '\r' ||
1026 |             lexer->lookahead == '>';
1027 |         if (next_symbol_valid) {
1028 |             // try block 1 names
1029 |             for (size_t i = 0; i < NUM_HTML_TAG_NAMES_RULE_1; i++) {
1030 |                 if (strcmp(name, HTML_TAG_NAMES_RULE_1[i]) == 0) {
1031 |                     if (starting_slash) {
1032 |                         if (valid_symbols[HTML_BLOCK_1_END]) {
1033 |                             lexer->result_symbol = HTML_BLOCK_1_END;
1034 |                             return true;
1035 |                         }
1036 |                     } else if (valid_symbols[HTML_BLOCK_1_START]) {
1037 |                         lexer->result_symbol = HTML_BLOCK_1_START;
1038 |                         if (!s->simulate)
1039 |                             push_block(s, ANONYMOUS);
1040 |                         return true;
1041 |                     }
1042 |                 }
1043 |             }
1044 |         }
1045 |         if (!next_symbol_valid && lexer->lookahead == '/') {
1046 |             advance(s, lexer);
1047 |             if (lexer->lookahead == '>') {
1048 |                 advance(s, lexer);
1049 |                 tag_closed = true;
1050 |             }
1051 |         }
1052 |         if (next_symbol_valid || tag_closed) {
1053 |             // try block 2 names
1054 |             for (size_t i = 0; i < NUM_HTML_TAG_NAMES_RULE_7; i++) {
1055 |                 if (strcmp(name, HTML_TAG_NAMES_RULE_7[i]) == 0 &&
1056 |                     valid_symbols[HTML_BLOCK_6_START]) {
1057 |                     lexer->result_symbol = HTML_BLOCK_6_START;
1058 |                     if (!s->simulate)
1059 |                         push_block(s, ANONYMOUS);
1060 |                     return true;
1061 |                 }
1062 |             }
1063 |         }
1064 |     }
1065 | 
1066 |     if (!valid_symbols[HTML_BLOCK_7_START]) {
1067 |         return false;
1068 |     }
1069 | 
1070 |     if (!tag_closed) {
1071 |         // tag name (continued)
1072 |         while (iswalnum((wint_t)lexer->lookahead) || lexer->lookahead == '-') {
1073 |             advance(s, lexer);
1074 |         }
1075 |         if (!starting_slash) {
1076 |             // attributes
1077 |             bool had_whitespace = false;
1078 |             for (;;) {
1079 |                 // whitespace
1080 |                 while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
1081 |                     had_whitespace = true;
1082 |                     advance(s, lexer);
1083 |                 }
1084 |                 if (lexer->lookahead == '/') {
1085 |                     advance(s, lexer);
1086 |                     break;
1087 |                 }
1088 |                 if (lexer->lookahead == '>') {
1089 |                     break;
1090 |                 }
1091 |                 // attribute name
1092 |                 if (!had_whitespace) {
1093 |                     return false;
1094 |                 }
1095 |                 if (!iswalpha((wint_t)lexer->lookahead) &&
1096 |                     lexer->lookahead != '_' && lexer->lookahead != ':') {
1097 |                     return false;
1098 |                 }
1099 |                 had_whitespace = false;
1100 |                 advance(s, lexer);
1101 |                 while (iswalnum((wint_t)lexer->lookahead) ||
1102 |                        lexer->lookahead == '_' || lexer->lookahead == '.' ||
1103 |                        lexer->lookahead == ':' || lexer->lookahead == '-') {
1104 |                     advance(s, lexer);
1105 |                 }
1106 |                 // attribute value specification
1107 |                 // optional whitespace
1108 |                 while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
1109 |                     had_whitespace = true;
1110 |                     advance(s, lexer);
1111 |                 }
1112 |                 // =
1113 |                 if (lexer->lookahead == '=') {
1114 |                     advance(s, lexer);
1115 |                     had_whitespace = false;
1116 |                     // optional whitespace
1117 |                     while (lexer->lookahead == ' ' ||
1118 |                            lexer->lookahead == '\t') {
1119 |                         advance(s, lexer);
1120 |                     }
1121 |                     // attribute value
1122 |                     if (lexer->lookahead == '\'' || lexer->lookahead == '"') {
1123 |                         char delimiter = (char)lexer->lookahead;
1124 |                         advance(s, lexer);
1125 |                         while (lexer->lookahead != delimiter &&
1126 |                                lexer->lookahead != '\n' &&
1127 |                                lexer->lookahead != '\r' && !lexer->eof(lexer)) {
1128 |                             advance(s, lexer);
1129 |                         }
1130 |                         if (lexer->lookahead != delimiter) {
1131 |                             return false;
1132 |                         }
1133 |                         advance(s, lexer);
1134 |                     } else {
1135 |                         // unquoted attribute value
1136 |                         bool had_one = false;
1137 |                         while (lexer->lookahead != ' ' &&
1138 |                                lexer->lookahead != '\t' &&
1139 |                                lexer->lookahead != '"' &&
1140 |                                lexer->lookahead != '\'' &&
1141 |                                lexer->lookahead != '=' &&
1142 |                                lexer->lookahead != '<' &&
1143 |                                lexer->lookahead != '>' &&
1144 |                                lexer->lookahead != '`' &&
1145 |                                lexer->lookahead != '\n' &&
1146 |                                lexer->lookahead != '\r' && !lexer->eof(lexer)) {
1147 |                             advance(s, lexer);
1148 |                             had_one = true;
1149 |                         }
1150 |                         if (!had_one) {
1151 |                             return false;
1152 |                         }
1153 |                     }
1154 |                 }
1155 |             }
1156 |         } else {
1157 |             while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
1158 |                 advance(s, lexer);
1159 |             }
1160 |         }
1161 |         if (lexer->lookahead != '>') {
1162 |             return false;
1163 |         }
1164 |         advance(s, lexer);
1165 |     }
1166 |     while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
1167 |         advance(s, lexer);
1168 |     }
1169 |     if (lexer->lookahead == '\r' || lexer->lookahead == '\n') {
1170 |         lexer->result_symbol = HTML_BLOCK_7_START;
1171 |         if (!s->simulate)
1172 |             push_block(s, ANONYMOUS);
1173 |         return true;
1174 |     }
1175 |     return false;
1176 | }
1177 | 
1178 | static bool parse_pipe_table(Scanner *s, TSLexer *lexer,
1179 |                              const bool *valid_symbols) {
1180 | 
1181 |     // unused
1182 |     (void)(valid_symbols);
1183 | 
1184 |     // PIPE_TABLE_START is zero width
1185 |     mark_end(s, lexer);
1186 |     // count number of cells
1187 |     size_t cell_count = 0;
1188 |     // also remember if we see starting and ending pipes, as empty headers have
1189 |     // to have both
1190 |     bool starting_pipe = false;
1191 |     bool ending_pipe = false;
1192 |     bool empty = true;
1193 |     if (lexer->lookahead == '|') {
1194 |         starting_pipe = true;
1195 |         advance(s, lexer);
1196 |     }
1197 |     while (lexer->lookahead != '\r' && lexer->lookahead != '\n' &&
1198 |            !lexer->eof(lexer)) {
1199 |         if (lexer->lookahead == '|') {
1200 |             cell_count++;
1201 |             ending_pipe = true;
1202 |             advance(s, lexer);
1203 |         } else {
1204 |             if (lexer->lookahead != ' ' && lexer->lookahead != '\t') {
1205 |                 ending_pipe = false;
1206 |             }
1207 |             if (lexer->lookahead == '\\') {
1208 |                 advance(s, lexer);
1209 |                 if (is_punctuation((char)lexer->lookahead)) {
1210 |                     advance(s, lexer);
1211 |                 }
1212 |             } else {
1213 |                 advance(s, lexer);
1214 |             }
1215 |         }
1216 |     }
1217 |     if (empty && cell_count == 0 && !(starting_pipe && ending_pipe)) {
1218 |         return false;
1219 |     }
1220 |     if (!ending_pipe) {
1221 |         cell_count++;
1222 |     }
1223 | 
1224 |     // check the following line for a delimiter row
1225 |     // parse a newline
1226 |     if (lexer->lookahead == '\n') {
1227 |         advance(s, lexer);
1228 |     } else if (lexer->lookahead == '\r') {
1229 |         advance(s, lexer);
1230 |         if (lexer->lookahead == '\n') {
1231 |             advance(s, lexer);
1232 |         }
1233 |     } else {
1234 |         return false;
1235 |     }
1236 |     s->indentation = 0;
1237 |     s->column = 0;
1238 |     for (;;) {
1239 |         if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
1240 |             s->indentation += advance(s, lexer);
1241 |         } else {
1242 |             break;
1243 |         }
1244 |     }
1245 |     s->simulate = true;
1246 |     uint8_t matched_temp = 0;
1247 |     while (matched_temp < (uint8_t)s->open_blocks.size) {
1248 |         if (match(s, lexer, s->open_blocks.items[matched_temp])) {
1249 |             matched_temp++;
1250 |         } else {
1251 |             return false;
1252 |         }
1253 |     }
1254 | 
1255 |     // check if delimiter row has the same number of cells and at least one pipe
1256 |     size_t delimiter_cell_count = 0;
1257 |     if (lexer->lookahead == '|') {
1258 |         advance(s, lexer);
1259 |     }
1260 |     for (;;) {
1261 |         while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
1262 |             advance(s, lexer);
1263 |         }
1264 |         if (lexer->lookahead == '|') {
1265 |             delimiter_cell_count++;
1266 |             advance(s, lexer);
1267 |             continue;
1268 |         }
1269 |         if (lexer->lookahead == ':') {
1270 |             advance(s, lexer);
1271 |             if (lexer->lookahead != '-') {
1272 |                 return false;
1273 |             }
1274 |         }
1275 |         bool had_one_minus = false;
1276 |         while (lexer->lookahead == '-') {
1277 |             had_one_minus = true;
1278 |             advance(s, lexer);
1279 |         }
1280 |         if (had_one_minus) {
1281 |             delimiter_cell_count++;
1282 |         }
1283 |         if (lexer->lookahead == ':') {
1284 |             if (!had_one_minus) {
1285 |                 return false;
1286 |             }
1287 |             advance(s, lexer);
1288 |         }
1289 |         while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
1290 |             advance(s, lexer);
1291 |         }
1292 |         if (lexer->lookahead == '|') {
1293 |             if (!had_one_minus) {
1294 |                 delimiter_cell_count++;
1295 |             }
1296 |             advance(s, lexer);
1297 |             continue;
1298 |         }
1299 |         if (lexer->lookahead != '\r' && lexer->lookahead != '\n') {
1300 |             return false;
1301 |         } else {
1302 |             break;
1303 |         }
1304 |     }
1305 |     // if the cell counts are not equal then this is not a table
1306 |     if (cell_count != delimiter_cell_count) {
1307 |         return false;
1308 |     }
1309 | 
1310 |     lexer->result_symbol = PIPE_TABLE_START;
1311 |     return true;
1312 | }
1313 | 
1314 | static bool scan(Scanner *s, TSLexer *lexer, const bool *valid_symbols) {
1315 |     // A normal tree-sitter rule decided that the current branch is invalid and
1316 |     // now "requests" an error to stop the branch
1317 |     if (valid_symbols[TRIGGER_ERROR]) {
1318 |         return error(lexer);
1319 |     }
1320 | 
1321 |     // Close the inner most block after the next line break as requested. See
1322 |     // `$._close_block` in grammar.js
1323 |     if (valid_symbols[CLOSE_BLOCK]) {
1324 |         s->state |= STATE_CLOSE_BLOCK;
1325 |         lexer->result_symbol = CLOSE_BLOCK;
1326 |         return true;
1327 |     }
1328 | 
1329 |     // if we are at the end of the file and there are still open blocks close
1330 |     // them all
1331 |     if (lexer->eof(lexer)) {
1332 |         if (valid_symbols[TOKEN_EOF]) {
1333 |             lexer->result_symbol = TOKEN_EOF;
1334 |             return true;
1335 |         }
1336 |         if (s->open_blocks.size > 0) {
1337 |             lexer->result_symbol = BLOCK_CLOSE;
1338 |             if (!s->simulate)
1339 |                 pop_block(s);
1340 |             return true;
1341 |         }
1342 |         return false;
1343 |     }
1344 | 
1345 |     if (!(s->state & STATE_MATCHING)) {
1346 |         // Parse any preceeding whitespace and remember its length. This makes a
1347 |         // lot of parsing quite a bit easier.
1348 |         for (;;) {
1349 |             if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
1350 |                 s->indentation += advance(s, lexer);
1351 |             } else {
1352 |                 break;
1353 |             }
1354 |         }
1355 |         // We are not matching. This is where the parsing logic for most
1356 |         // "normal" token is. Most importantly parsing logic for the start of
1357 |         // new blocks.
1358 |         if (valid_symbols[INDENTED_CHUNK_START] &&
1359 |             !valid_symbols[NO_INDENTED_CHUNK]) {
1360 |             if (s->indentation >= 4 && lexer->lookahead != '\n' &&
1361 |                 lexer->lookahead != '\r') {
1362 |                 lexer->result_symbol = INDENTED_CHUNK_START;
1363 |                 if (!s->simulate)
1364 |                     push_block(s, INDENTED_CODE_BLOCK);
1365 |                 s->indentation -= 4;
1366 |                 return true;
1367 |             }
1368 |         }
1369 |         // Decide which tokens to consider based on the first non-whitespace
1370 |         // character
1371 |         switch (lexer->lookahead) {
1372 |             case '\r':
1373 |             case '\n':
1374 |                 if (valid_symbols[BLANK_LINE_START]) {
1375 |                     // A blank line token is actually just 0 width, so do not
1376 |                     // consume the characters
1377 |                     lexer->result_symbol = BLANK_LINE_START;
1378 |                     return true;
1379 |                 }
1380 |                 break;
1381 |             case '`':
1382 |                 // A backtick could mark the beginning or ending of a fenced
1383 |                 // code block.
1384 |                 return parse_fenced_code_block(s, '`', lexer, valid_symbols);
1385 |             case '~':
1386 |                 // A tilde could mark the beginning or ending of a fenced code
1387 |                 // block.
1388 |                 return parse_fenced_code_block(s, '~', lexer, valid_symbols);
1389 |             case '*':
1390 |                 // A star could either mark  a list item or a thematic break.
1391 |                 // This code is similar to the code for '_' and '+'.
1392 |                 return parse_star(s, lexer, valid_symbols);
1393 |             case '_':
1394 |                 return parse_thematic_break_underscore(s, lexer, valid_symbols);
1395 |             case '>':
1396 |                 // A '>' could mark the beginning of a block quote
1397 |                 return parse_block_quote(s, lexer, valid_symbols);
1398 |             case '#':
1399 |                 // A '#' could mark a atx heading
1400 |                 return parse_atx_heading(s, lexer, valid_symbols);
1401 |             case '=':
1402 |                 // A '=' could mark a setext underline
1403 |                 return parse_setext_underline(s, lexer, valid_symbols);
1404 |             case '+':
1405 |                 // A '+' could be a list marker
1406 |                 return parse_plus(s, lexer, valid_symbols);
1407 |             case '0':
1408 |             case '1':
1409 |             case '2':
1410 |             case '3':
1411 |             case '4':
1412 |             case '5':
1413 |             case '6':
1414 |             case '7':
1415 |             case '8':
1416 |             case '9':
1417 |                 // A number could be a list marker (if followed by a dot or a
1418 |                 // parenthesis)
1419 |                 return parse_ordered_list_marker(s, lexer, valid_symbols);
1420 |             case '-':
1421 |                 // A minus could mark a list marker, a thematic break or a
1422 |                 // setext underline
1423 |                 return parse_minus(s, lexer, valid_symbols);
1424 |             case '<':
1425 |                 // A < could mark the beginning of a html block
1426 |                 return parse_html_block(s, lexer, valid_symbols);
1427 |         }
1428 |         if (lexer->lookahead != '\r' && lexer->lookahead != '\n' &&
1429 |             valid_symbols[PIPE_TABLE_START]) {
1430 |             return parse_pipe_table(s, lexer, valid_symbols);
1431 |         }
1432 |     } else { // we are in the state of trying to match all currently open blocks
1433 |         bool partial_success = false;
1434 |         while (s->matched < (uint8_t)s->open_blocks.size) {
1435 |             if (s->matched == (uint8_t)s->open_blocks.size - 1 &&
1436 |                 (s->state & STATE_CLOSE_BLOCK)) {
1437 |                 if (!partial_success)
1438 |                     s->state &= ~STATE_CLOSE_BLOCK;
1439 |                 break;
1440 |             }
1441 |             if (match(s, lexer, s->open_blocks.items[s->matched])) {
1442 |                 partial_success = true;
1443 |                 s->matched++;
1444 |             } else {
1445 |                 if (s->state & STATE_WAS_SOFT_LINE_BREAK) {
1446 |                     s->state &= (~STATE_MATCHING);
1447 |                 }
1448 |                 break;
1449 |             }
1450 |         }
1451 |         if (partial_success) {
1452 |             if (s->matched == s->open_blocks.size) {
1453 |                 s->state &= (~STATE_MATCHING);
1454 |             }
1455 |             lexer->result_symbol = BLOCK_CONTINUATION;
1456 |             return true;
1457 |         }
1458 | 
1459 |         if (!(s->state & STATE_WAS_SOFT_LINE_BREAK)) {
1460 |             lexer->result_symbol = BLOCK_CLOSE;
1461 |             pop_block(s);
1462 |             if (s->matched == s->open_blocks.size) {
1463 |                 s->state &= (~STATE_MATCHING);
1464 |             }
1465 |             return true;
1466 |         }
1467 |     }
1468 | 
1469 |     // The parser just encountered a line break. Setup the state correspondingly
1470 |     if ((valid_symbols[LINE_ENDING] || valid_symbols[SOFT_LINE_ENDING] ||
1471 |          valid_symbols[PIPE_TABLE_LINE_ENDING]) &&
1472 |         (lexer->lookahead == '\n' || lexer->lookahead == '\r')) {
1473 |         if (lexer->lookahead == '\r') {
1474 |             advance(s, lexer);
1475 |             if (lexer->lookahead == '\n') {
1476 |                 advance(s, lexer);
1477 |             }
1478 |         } else {
1479 |             advance(s, lexer);
1480 |         }
1481 |         s->indentation = 0;
1482 |         s->column = 0;
1483 |         if (!(s->state & STATE_CLOSE_BLOCK) &&
1484 |             (valid_symbols[SOFT_LINE_ENDING] ||
1485 |              valid_symbols[PIPE_TABLE_LINE_ENDING])) {
1486 |             lexer->mark_end(lexer);
1487 |             for (;;) {
1488 |                 if (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
1489 |                     s->indentation += advance(s, lexer);
1490 |                 } else {
1491 |                     break;
1492 |                 }
1493 |             }
1494 |             s->simulate = true;
1495 |             uint8_t matched_temp = s->matched;
1496 |             s->matched = 0;
1497 |             bool one_will_be_matched = false;
1498 |             while (s->matched < (uint8_t)s->open_blocks.size) {
1499 |                 if (match(s, lexer, s->open_blocks.items[s->matched])) {
1500 |                     s->matched++;
1501 |                     one_will_be_matched = true;
1502 |                 } else {
1503 |                     break;
1504 |                 }
1505 |             }
1506 |             bool all_will_be_matched = s->matched == s->open_blocks.size;
1507 |             if (!lexer->eof(lexer) &&
1508 |                 !scan(s, lexer, paragraph_interrupt_symbols)) {
1509 |                 s->matched = matched_temp;
1510 |                 // If the last line break ended a paragraph and no new block
1511 |                 // opened, the last line break should have been a soft line
1512 |                 // break Reset the counter for matched blocks
1513 |                 s->matched = 0;
1514 |                 s->indentation = 0;
1515 |                 s->column = 0;
1516 |                 // If there is at least one open block, we should be in the
1517 |                 // matching state. Also set the matching flag if a
1518 |                 // `$._soft_line_break_marker` can be emitted so it does get
1519 |                 // emitted.
1520 |                 if (one_will_be_matched) {
1521 |                     s->state |= STATE_MATCHING;
1522 |                 } else {
1523 |                     s->state &= (~STATE_MATCHING);
1524 |                 }
1525 |                 if (valid_symbols[PIPE_TABLE_LINE_ENDING]) {
1526 |                     if (all_will_be_matched) {
1527 |                         lexer->result_symbol = PIPE_TABLE_LINE_ENDING;
1528 |                         return true;
1529 |                     }
1530 |                 } else {
1531 |                     lexer->result_symbol = SOFT_LINE_ENDING;
1532 |                     // reset some state variables
1533 |                     s->state |= STATE_WAS_SOFT_LINE_BREAK;
1534 |                     return true;
1535 |                 }
1536 |             } else {
1537 |                 s->matched = matched_temp;
1538 |             }
1539 |             s->indentation = 0;
1540 |             s->column = 0;
1541 |         }
1542 |         if (valid_symbols[LINE_ENDING]) {
1543 |             // If the last line break ended a paragraph and no new block opened,
1544 |             // the last line break should have been a soft line break Reset the
1545 |             // counter for matched blocks
1546 |             s->matched = 0;
1547 |             // If there is at least one open block, we should be in the matching
1548 |             // state. Also set the matching flag if a
1549 |             // `$._soft_line_break_marker` can be emitted so it does get
1550 |             // emitted.
1551 |             if (s->open_blocks.size > 0) {
1552 |                 s->state |= STATE_MATCHING;
1553 |             } else {
1554 |                 s->state &= (~STATE_MATCHING);
1555 |             }
1556 |             // reset some state variables
1557 |             s->state &= (~STATE_WAS_SOFT_LINE_BREAK);
1558 |             lexer->result_symbol = LINE_ENDING;
1559 |             return true;
1560 |         }
1561 |     }
1562 |     return false;
1563 | }
1564 | 
1565 | void *tree_sitter_markdown_external_scanner_create(void) {
1566 |     Scanner *s = (Scanner *)malloc(sizeof(Scanner));
1567 |     s->open_blocks.items = (Block *)calloc(1, sizeof(Block));
1568 | #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)
1569 |     _Static_assert(ATX_H6_MARKER == ATX_H1_MARKER + 5, "");
1570 | #else
1571 |     assert(ATX_H6_MARKER == ATX_H1_MARKER + 5);
1572 | #endif
1573 |     deserialize(s, NULL, 0);
1574 | 
1575 |     return s;
1576 | }
1577 | 
1578 | bool tree_sitter_markdown_external_scanner_scan(void *payload, TSLexer *lexer,
1579 |                                                 const bool *valid_symbols) {
1580 |     Scanner *scanner = (Scanner *)payload;
1581 |     scanner->simulate = false;
1582 |     return scan(scanner, lexer, valid_symbols);
1583 | }
1584 | 
1585 | unsigned tree_sitter_markdown_external_scanner_serialize(void *payload,
1586 |                                                          char *buffer) {
1587 |     Scanner *scanner = (Scanner *)payload;
1588 |     return serialize(scanner, buffer);
1589 | }
1590 | 
1591 | void tree_sitter_markdown_external_scanner_deserialize(void *payload,
1592 |                                                        const char *buffer,
1593 |                                                        unsigned length) {
1594 |     Scanner *scanner = (Scanner *)payload;
1595 |     deserialize(scanner, buffer, length);
1596 | }
1597 | 
1598 | void tree_sitter_markdown_external_scanner_destroy(void *payload) {
1599 |     Scanner *scanner = (Scanner *)payload;
1600 |     free(scanner->open_blocks.items);
1601 |     free(scanner);
1602 | }
1603 | 
```