#
tokens: 56375/50000 1/216 files (page 32/33)
lines: off (toggle) GitHub
raw markdown copy
This is page 32 of 33. Use http://codebase.md/pragmar/mcp_server_webcrawl/crawlers/archivebox/tests.html?page={x} to view the full context.

# Directory Structure

```
├── .gitignore
├── CONTRIBUTING.md
├── docs
│   ├── _images
│   │   ├── interactive.document.webp
│   │   ├── interactive.search.webp
│   │   └── mcpswc.svg
│   ├── _modules
│   │   ├── index.html
│   │   ├── mcp_server_webcrawl
│   │   │   ├── crawlers
│   │   │   │   ├── archivebox
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── base
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── api.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   ├── indexed.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── httrack
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── interrobot
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── katana
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── siteone
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   ├── warc
│   │   │   │   │   ├── adapter.html
│   │   │   │   │   ├── crawler.html
│   │   │   │   │   └── tests.html
│   │   │   │   └── wget
│   │   │   │       ├── adapter.html
│   │   │   │       ├── crawler.html
│   │   │   │       └── tests.html
│   │   │   ├── crawlers.html
│   │   │   ├── extras
│   │   │   │   ├── markdown.html
│   │   │   │   ├── regex.html
│   │   │   │   ├── snippets.html
│   │   │   │   ├── thumbnails.html
│   │   │   │   └── xpath.html
│   │   │   ├── interactive
│   │   │   │   ├── highlights.html
│   │   │   │   ├── search.html
│   │   │   │   ├── session.html
│   │   │   │   └── ui.html
│   │   │   ├── main.html
│   │   │   ├── models
│   │   │   │   ├── resources.html
│   │   │   │   └── sites.html
│   │   │   ├── templates
│   │   │   │   └── tests.html
│   │   │   ├── utils
│   │   │   │   ├── blobs.html
│   │   │   │   ├── cli.html
│   │   │   │   ├── logger.html
│   │   │   │   ├── querycache.html
│   │   │   │   ├── server.html
│   │   │   │   └── tools.html
│   │   │   └── utils.html
│   │   └── re.html
│   ├── _sources
│   │   ├── guides
│   │   │   ├── archivebox.rst.txt
│   │   │   ├── httrack.rst.txt
│   │   │   ├── interrobot.rst.txt
│   │   │   ├── katana.rst.txt
│   │   │   ├── siteone.rst.txt
│   │   │   ├── warc.rst.txt
│   │   │   └── wget.rst.txt
│   │   ├── guides.rst.txt
│   │   ├── index.rst.txt
│   │   ├── installation.rst.txt
│   │   ├── interactive.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.archivebox.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.base.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.httrack.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.interrobot.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.katana.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.siteone.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.warc.rst.txt
│   │   ├── mcp_server_webcrawl.crawlers.wget.rst.txt
│   │   ├── mcp_server_webcrawl.extras.rst.txt
│   │   ├── mcp_server_webcrawl.interactive.rst.txt
│   │   ├── mcp_server_webcrawl.models.rst.txt
│   │   ├── mcp_server_webcrawl.rst.txt
│   │   ├── mcp_server_webcrawl.templates.rst.txt
│   │   ├── mcp_server_webcrawl.utils.rst.txt
│   │   ├── modules.rst.txt
│   │   ├── prompts.rst.txt
│   │   └── usage.rst.txt
│   ├── _static
│   │   ├── _sphinx_javascript_frameworks_compat.js
│   │   ├── basic.css
│   │   ├── css
│   │   │   ├── badge_only.css
│   │   │   ├── fonts
│   │   │   │   ├── fontawesome-webfont.eot
│   │   │   │   ├── fontawesome-webfont.svg
│   │   │   │   ├── fontawesome-webfont.ttf
│   │   │   │   ├── fontawesome-webfont.woff
│   │   │   │   ├── fontawesome-webfont.woff2
│   │   │   │   ├── lato-bold-italic.woff
│   │   │   │   ├── lato-bold-italic.woff2
│   │   │   │   ├── lato-bold.woff
│   │   │   │   ├── lato-bold.woff2
│   │   │   │   ├── lato-normal-italic.woff
│   │   │   │   ├── lato-normal-italic.woff2
│   │   │   │   ├── lato-normal.woff
│   │   │   │   ├── lato-normal.woff2
│   │   │   │   ├── Roboto-Slab-Bold.woff
│   │   │   │   ├── Roboto-Slab-Bold.woff2
│   │   │   │   ├── Roboto-Slab-Regular.woff
│   │   │   │   └── Roboto-Slab-Regular.woff2
│   │   │   └── theme.css
│   │   ├── doctools.js
│   │   ├── documentation_options.js
│   │   ├── file.png
│   │   ├── fonts
│   │   │   ├── Lato
│   │   │   │   ├── lato-bold.eot
│   │   │   │   ├── lato-bold.ttf
│   │   │   │   ├── lato-bold.woff
│   │   │   │   ├── lato-bold.woff2
│   │   │   │   ├── lato-bolditalic.eot
│   │   │   │   ├── lato-bolditalic.ttf
│   │   │   │   ├── lato-bolditalic.woff
│   │   │   │   ├── lato-bolditalic.woff2
│   │   │   │   ├── lato-italic.eot
│   │   │   │   ├── lato-italic.ttf
│   │   │   │   ├── lato-italic.woff
│   │   │   │   ├── lato-italic.woff2
│   │   │   │   ├── lato-regular.eot
│   │   │   │   ├── lato-regular.ttf
│   │   │   │   ├── lato-regular.woff
│   │   │   │   └── lato-regular.woff2
│   │   │   └── RobotoSlab
│   │   │       ├── roboto-slab-v7-bold.eot
│   │   │       ├── roboto-slab-v7-bold.ttf
│   │   │       ├── roboto-slab-v7-bold.woff
│   │   │       ├── roboto-slab-v7-bold.woff2
│   │   │       ├── roboto-slab-v7-regular.eot
│   │   │       ├── roboto-slab-v7-regular.ttf
│   │   │       ├── roboto-slab-v7-regular.woff
│   │   │       └── roboto-slab-v7-regular.woff2
│   │   ├── images
│   │   │   ├── interactive.document.png
│   │   │   ├── interactive.document.webp
│   │   │   ├── interactive.search.png
│   │   │   ├── interactive.search.webp
│   │   │   └── mcpswc.svg
│   │   ├── jquery.js
│   │   ├── js
│   │   │   ├── badge_only.js
│   │   │   ├── theme.js
│   │   │   └── versions.js
│   │   ├── language_data.js
│   │   ├── minus.png
│   │   ├── plus.png
│   │   ├── pygments.css
│   │   ├── searchtools.js
│   │   └── sphinx_highlight.js
│   ├── .buildinfo
│   ├── .nojekyll
│   ├── genindex.html
│   ├── guides
│   │   ├── archivebox.html
│   │   ├── httrack.html
│   │   ├── interrobot.html
│   │   ├── katana.html
│   │   ├── siteone.html
│   │   ├── warc.html
│   │   └── wget.html
│   ├── guides.html
│   ├── index.html
│   ├── installation.html
│   ├── interactive.html
│   ├── mcp_server_webcrawl.crawlers.archivebox.html
│   ├── mcp_server_webcrawl.crawlers.base.html
│   ├── mcp_server_webcrawl.crawlers.html
│   ├── mcp_server_webcrawl.crawlers.httrack.html
│   ├── mcp_server_webcrawl.crawlers.interrobot.html
│   ├── mcp_server_webcrawl.crawlers.katana.html
│   ├── mcp_server_webcrawl.crawlers.siteone.html
│   ├── mcp_server_webcrawl.crawlers.warc.html
│   ├── mcp_server_webcrawl.crawlers.wget.html
│   ├── mcp_server_webcrawl.extras.html
│   ├── mcp_server_webcrawl.html
│   ├── mcp_server_webcrawl.interactive.html
│   ├── mcp_server_webcrawl.models.html
│   ├── mcp_server_webcrawl.templates.html
│   ├── mcp_server_webcrawl.utils.html
│   ├── modules.html
│   ├── objects.inv
│   ├── prompts.html
│   ├── py-modindex.html
│   ├── search.html
│   ├── searchindex.js
│   └── usage.html
├── LICENSE
├── MANIFEST.in
├── prompts
│   ├── audit404.md
│   ├── auditfiles.md
│   ├── auditperf.md
│   ├── auditseo.md
│   ├── gopher.md
│   ├── README.md
│   └── testsearch.md
├── pyproject.toml
├── README.md
├── setup.py
├── sphinx
│   ├── _static
│   │   └── images
│   │       ├── interactive.document.png
│   │       ├── interactive.document.webp
│   │       ├── interactive.search.png
│   │       ├── interactive.search.webp
│   │       └── mcpswc.svg
│   ├── _templates
│   │   └── layout.html
│   ├── conf.py
│   ├── guides
│   │   ├── archivebox.rst
│   │   ├── httrack.rst
│   │   ├── interrobot.rst
│   │   ├── katana.rst
│   │   ├── siteone.rst
│   │   ├── warc.rst
│   │   └── wget.rst
│   ├── guides.rst
│   ├── index.rst
│   ├── installation.rst
│   ├── interactive.rst
│   ├── make.bat
│   ├── Makefile
│   ├── mcp_server_webcrawl.crawlers.archivebox.rst
│   ├── mcp_server_webcrawl.crawlers.base.rst
│   ├── mcp_server_webcrawl.crawlers.httrack.rst
│   ├── mcp_server_webcrawl.crawlers.interrobot.rst
│   ├── mcp_server_webcrawl.crawlers.katana.rst
│   ├── mcp_server_webcrawl.crawlers.rst
│   ├── mcp_server_webcrawl.crawlers.siteone.rst
│   ├── mcp_server_webcrawl.crawlers.warc.rst
│   ├── mcp_server_webcrawl.crawlers.wget.rst
│   ├── mcp_server_webcrawl.extras.rst
│   ├── mcp_server_webcrawl.interactive.rst
│   ├── mcp_server_webcrawl.models.rst
│   ├── mcp_server_webcrawl.rst
│   ├── mcp_server_webcrawl.templates.rst
│   ├── mcp_server_webcrawl.utils.rst
│   ├── modules.rst
│   ├── prompts.rst
│   ├── readme.txt
│   └── usage.rst
└── src
    └── mcp_server_webcrawl
        ├── __init__.py
        ├── crawlers
        │   ├── __init__.py
        │   ├── archivebox
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   ├── base
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── api.py
        │   │   ├── crawler.py
        │   │   ├── indexed.py
        │   │   └── tests.py
        │   ├── httrack
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   ├── interrobot
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   ├── katana
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   ├── siteone
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   ├── warc
        │   │   ├── __init__.py
        │   │   ├── adapter.py
        │   │   ├── crawler.py
        │   │   └── tests.py
        │   └── wget
        │       ├── __init__.py
        │       ├── adapter.py
        │       ├── crawler.py
        │       └── tests.py
        ├── extras
        │   ├── __init__.py
        │   ├── markdown.py
        │   ├── regex.py
        │   ├── snippets.py
        │   ├── thumbnails.py
        │   └── xpath.py
        ├── interactive
        │   ├── __init__.py
        │   ├── highlights.py
        │   ├── search.py
        │   ├── session.py
        │   ├── ui.py
        │   └── views
        │       ├── base.py
        │       ├── document.py
        │       ├── help.py
        │       ├── requirements.py
        │       ├── results.py
        │       └── searchform.py
        ├── main.py
        ├── models
        │   ├── __init__.py
        │   ├── base.py
        │   ├── resources.py
        │   └── sites.py
        ├── settings.py
        ├── templates
        │   ├── __init__.py
        │   ├── markdown.xslt
        │   ├── tests_core.html
        │   └── tests.py
        └── utils
            ├── __init__.py
            ├── cli.py
            ├── logger.py
            ├── parser.py
            ├── parsetab.py
            ├── search.py
            ├── server.py
            ├── tests.py
            └── tools.py
```

# Files

--------------------------------------------------------------------------------
/docs/_modules/mcp_server_webcrawl/crawlers/base/tests.html:
--------------------------------------------------------------------------------

```html


<!DOCTYPE html>
<html class="writer-html5" lang="en" data-content_root="../../../../">
<head>
  <meta charset="utf-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <title>mcp_server_webcrawl.crawlers.base.tests &mdash; mcp-server-webcrawl  documentation</title>
      <link rel="stylesheet" type="text/css" href="../../../../_static/pygments.css?v=80d5e7a1" />
      <link rel="stylesheet" type="text/css" href="../../../../_static/css/theme.css?v=e59714d7" />

  
      <script src="../../../../_static/jquery.js?v=5d32c60e"></script>
      <script src="../../../../_static/_sphinx_javascript_frameworks_compat.js?v=2cd50e6c"></script>
      <script src="../../../../_static/documentation_options.js?v=5929fcd5"></script>
      <script src="../../../../_static/doctools.js?v=888ff710"></script>
      <script src="../../../../_static/sphinx_highlight.js?v=dc90522c"></script>
    <script src="../../../../_static/js/theme.js"></script>
    <link rel="index" title="Index" href="../../../../genindex.html" />
    <link rel="search" title="Search" href="../../../../search.html" /> 
</head>

<body class="wy-body-for-nav"> 
  <div class="wy-grid-for-nav">
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >

          
          
          <a href="../../../../index.html" class="icon icon-home">
            mcp-server-webcrawl
          </a>
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../../../../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>
        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
              <p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../../../../installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../guides.html">Setup Guides</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../usage.html">Usage</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../prompts.html">Prompt Routines</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../interactive.html">Interactive Mode</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../../modules.html">mcp_server_webcrawl</a></li>
</ul>

        </div>
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="../../../../index.html">mcp-server-webcrawl</a>
      </nav>

      <div class="wy-nav-content">
        <div class="rst-content">
          <div role="navigation" aria-label="Page navigation">
  <ul class="wy-breadcrumbs">
      <li><a href="../../../../index.html" class="icon icon-home" aria-label="Home"></a></li>
          <li class="breadcrumb-item"><a href="../../../index.html">Module code</a></li>
          <li class="breadcrumb-item"><a href="../../crawlers.html">mcp_server_webcrawl.crawlers</a></li>
      <li class="breadcrumb-item active">mcp_server_webcrawl.crawlers.base.tests</li>
      <li class="wy-breadcrumbs-aside">
      </li>
  </ul>
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
             
  <h1>Source code for mcp_server_webcrawl.crawlers.base.tests</h1><div class="highlight"><pre>
<span></span><span class="kn">import</span> <span class="nn">sys</span>
<span class="kn">import</span> <span class="nn">unittest</span>
<span class="kn">import</span> <span class="nn">asyncio</span>

<span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">Final</span>
<span class="kn">from</span> <span class="nn">datetime</span> <span class="kn">import</span> <span class="n">datetime</span>
<span class="kn">from</span> <span class="nn">logging</span> <span class="kn">import</span> <span class="n">Logger</span>

<span class="kn">from</span> <span class="nn">mcp_server_webcrawl.crawlers.base.crawler</span> <span class="kn">import</span> <span class="n">BaseCrawler</span>
<span class="kn">from</span> <span class="nn">mcp_server_webcrawl.crawlers.wget.crawler</span> <span class="kn">import</span> <span class="n">WgetCrawler</span>
<span class="kn">from</span> <span class="nn">mcp_server_webcrawl.models.resources</span> <span class="kn">import</span> <span class="n">ResourceResultType</span>
<span class="kn">from</span> <span class="nn">mcp_server_webcrawl.crawlers.base.api</span> <span class="kn">import</span> <span class="n">BaseJsonApi</span>
<span class="kn">from</span> <span class="nn">mcp_server_webcrawl.utils.logger</span> <span class="kn">import</span> <span class="n">get_logger</span>

<span class="n">logger</span><span class="p">:</span> <span class="n">Logger</span> <span class="o">=</span> <span class="n">get_logger</span><span class="p">()</span>


<div class="viewcode-block" id="BaseCrawlerTests">
<a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests">[docs]</a>
<span class="k">class</span> <span class="nc">BaseCrawlerTests</span><span class="p">(</span><span class="n">unittest</span><span class="o">.</span><span class="n">TestCase</span><span class="p">):</span>

    <span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="p">:</span> <span class="n">Final</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;crawler&quot;</span>
    <span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="p">:</span> <span class="n">Final</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;privacy&quot;</span>
    <span class="n">__PRAGMAR_HYPHENATED_KEYWORD</span><span class="p">:</span> <span class="n">Final</span><span class="p">[</span><span class="nb">str</span><span class="p">]</span> <span class="o">=</span> <span class="s2">&quot;one-click&quot;</span>

<div class="viewcode-block" id="BaseCrawlerTests.setUp">
<a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.setUp">[docs]</a>
    <span class="k">def</span> <span class="nf">setUp</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
        <span class="c1"># quiet asyncio error on tests, occurring after sucessful completion</span>
        <span class="k">if</span> <span class="n">sys</span><span class="o">.</span><span class="n">platform</span> <span class="o">==</span> <span class="s2">&quot;win32&quot;</span><span class="p">:</span>
            <span class="n">asyncio</span><span class="o">.</span><span class="n">set_event_loop_policy</span><span class="p">(</span><span class="n">asyncio</span><span class="o">.</span><span class="n">WindowsSelectorEventLoopPolicy</span><span class="p">())</span></div>



<div class="viewcode-block" id="BaseCrawlerTests.run_pragmar_search_tests">
<a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_search_tests">[docs]</a>
    <span class="k">def</span> <span class="nf">run_pragmar_search_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
<span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd">        Run a battery of database checks on the crawler and Boolean validation</span>
<span class="sd">        &quot;&quot;&quot;</span>

        <span class="n">resources_json</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">()</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">resources_json</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should have some resources in database&quot;</span><span class="p">)</span>

        <span class="n">site_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">])</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">site_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Pragmar site should have resources&quot;</span><span class="p">)</span>

        <span class="n">primary_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="p">,</span>
            <span class="n">fields</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;content&quot;</span><span class="p">,</span> <span class="s2">&quot;headers&quot;</span><span class="p">],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">primary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Keyword &#39;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2">&#39; should return results&quot;</span><span class="p">)</span>

        <span class="n">secondary_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">secondary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Keyword &#39;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">&#39; should return results&quot;</span><span class="p">)</span>

        <span class="bp">self</span><span class="o">.</span><span class="n">__run_pragmar_search_tests_fulltext</span><span class="p">(</span><span class="n">crawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">,</span> <span class="n">site_resources</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">__run_pragmar_search_tests_field_status</span><span class="p">(</span><span class="n">crawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">__run_pragmar_search_tests_field_headers</span><span class="p">(</span><span class="n">crawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">__run_pragmar_search_tests_field_content</span><span class="p">(</span><span class="n">crawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">__run_pragmar_search_tests_field_type</span><span class="p">(</span><span class="n">crawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">,</span> <span class="n">site_resources</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">__run_pragmar_search_tests_extras</span><span class="p">(</span><span class="n">crawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">,</span> <span class="n">site_resources</span><span class="p">,</span> <span class="n">primary_resources</span><span class="p">,</span> <span class="n">secondary_resources</span><span class="p">)</span></div>



<div class="viewcode-block" id="BaseCrawlerTests.run_pragmar_image_tests">
<a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_image_tests">[docs]</a>
    <span class="k">def</span> <span class="nf">run_pragmar_image_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">pragmar_site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
<span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd">        Test InterroBot-specific image handling and thumbnails.</span>
<span class="sd">        &quot;&quot;&quot;</span>
        <span class="n">img_results</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span> <span class="n">query</span><span class="o">=</span><span class="s2">&quot;type: img&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">5</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">img_results</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Image type filter should return results&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
            <span class="nb">all</span><span class="p">(</span><span class="n">r</span><span class="o">.</span><span class="n">type</span><span class="o">.</span><span class="n">value</span> <span class="o">==</span> <span class="s2">&quot;img&quot;</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">img_results</span><span class="o">.</span><span class="n">_results</span><span class="p">),</span>
            <span class="s2">&quot;All filtered resources should have type &#39;img&#39;&quot;</span>
        <span class="p">)</span></div>


<div class="viewcode-block" id="BaseCrawlerTests.run_sites_resources_tests">
<a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_sites_resources_tests">[docs]</a>
    <span class="k">def</span> <span class="nf">run_sites_resources_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">pragmar_site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">example_site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>

        <span class="n">resources_json</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">()</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">resources_json</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should have some resources in database&quot;</span><span class="p">)</span>

        <span class="n">site_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">])</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">site_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Pragmar site should have resources&quot;</span><span class="p">)</span>

        <span class="c1"># basic resource retrieval</span>
        <span class="n">resources_json</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">()</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">resources_json</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">)</span>

        <span class="c1"># fulltext keyword search</span>
        <span class="n">query_keyword1</span> <span class="o">=</span> <span class="s2">&quot;privacy&quot;</span>

        <span class="n">timestamp_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="n">query_keyword1</span><span class="p">,</span>
            <span class="n">fields</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;created&quot;</span><span class="p">,</span> <span class="s2">&quot;modified&quot;</span><span class="p">,</span> <span class="s2">&quot;time&quot;</span><span class="p">],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">timestamp_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Search query should return results&quot;</span><span class="p">)</span>
        <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">timestamp_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
            <span class="n">resource_dict</span> <span class="o">=</span> <span class="n">resource</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">assertIsNotNone</span><span class="p">(</span><span class="n">resource_dict</span><span class="p">[</span><span class="s2">&quot;created&quot;</span><span class="p">],</span> <span class="s2">&quot;Created timestamp should not be None&quot;</span><span class="p">)</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">assertIsNotNone</span><span class="p">(</span><span class="n">resource_dict</span><span class="p">[</span><span class="s2">&quot;modified&quot;</span><span class="p">],</span> <span class="s2">&quot;Modified timestamp should not be None&quot;</span><span class="p">)</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">assertIsNotNone</span><span class="p">(</span><span class="n">resource_dict</span><span class="p">[</span><span class="s2">&quot;time&quot;</span><span class="p">],</span> <span class="s2">&quot;Modified timestamp should not be None&quot;</span><span class="p">)</span>

        <span class="c1"># resource ID filtering</span>
        <span class="k">if</span> <span class="n">resources_json</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
            <span class="n">first_resource</span> <span class="o">=</span> <span class="n">resources_json</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
            <span class="n">id_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
                <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">first_resource</span><span class="o">.</span><span class="n">site</span><span class="p">],</span>
                <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;id: </span><span class="si">{</span><span class="n">first_resource</span><span class="o">.</span><span class="n">id</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
                <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
            <span class="p">)</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">id_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">id_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">id</span><span class="p">,</span> <span class="n">first_resource</span><span class="o">.</span><span class="n">id</span><span class="p">)</span>

        <span class="c1"># site filtering</span>
        <span class="n">site_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">])</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">site_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Site filtering should return results&quot;</span><span class="p">)</span>
        <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">site_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">site</span><span class="p">,</span> <span class="n">pragmar_site_id</span><span class="p">)</span>

        <span class="c1"># type filtering for HTML pages</span>
        <span class="n">html_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">html_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;HTML filtering should return results&quot;</span><span class="p">)</span>
        <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">html_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">type</span><span class="p">,</span> <span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="p">)</span>

        <span class="c1"># type filtering for multiple resource types</span>
        <span class="n">mixed_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2"> OR type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">SCRIPT</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="k">if</span> <span class="n">mixed_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
            <span class="n">types_found</span> <span class="o">=</span> <span class="p">{</span><span class="n">r</span><span class="o">.</span><span class="n">type</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">mixed_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">}</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
                <span class="nb">len</span><span class="p">(</span><span class="n">types_found</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span>
                <span class="s2">&quot;Should find at least one of the requested resource types&quot;</span>
            <span class="p">)</span>
            <span class="k">for</span> <span class="n">resource_type</span> <span class="ow">in</span> <span class="n">types_found</span><span class="p">:</span>
                <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span>
                    <span class="n">resource_type</span><span class="p">,</span>
                    <span class="p">[</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="p">,</span> <span class="n">ResourceResultType</span><span class="o">.</span><span class="n">SCRIPT</span><span class="p">]</span>
                <span class="p">)</span>

        <span class="c1"># custom fields in response</span>
        <span class="n">custom_fields</span> <span class="o">=</span> <span class="p">[</span><span class="s2">&quot;content&quot;</span><span class="p">,</span> <span class="s2">&quot;headers&quot;</span><span class="p">,</span> <span class="s2">&quot;time&quot;</span><span class="p">]</span>
        <span class="n">field_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">query</span><span class="o">=</span><span class="s2">&quot;type: html&quot;</span><span class="p">,</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span>
            <span class="n">fields</span><span class="o">=</span><span class="n">custom_fields</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">field_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">)</span>
        <span class="n">resource_dict</span> <span class="o">=</span> <span class="n">field_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()</span>
        <span class="k">for</span> <span class="n">field</span> <span class="ow">in</span> <span class="n">custom_fields</span><span class="p">:</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="n">field</span><span class="p">,</span> <span class="n">resource_dict</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Field &#39;</span><span class="si">{</span><span class="n">field</span><span class="si">}</span><span class="s2">&#39; should be in response&quot;</span><span class="p">)</span>

        <span class="n">asc_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span> <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;+url&quot;</span><span class="p">)</span>
        <span class="k">if</span> <span class="n">asc_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">asc_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">url</span> <span class="o">&lt;=</span> <span class="n">asc_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">url</span><span class="p">)</span>

        <span class="n">desc_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span> <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;-url&quot;</span><span class="p">)</span>
        <span class="k">if</span> <span class="n">desc_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">desc_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">url</span> <span class="o">&gt;=</span> <span class="n">desc_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">url</span><span class="p">)</span>

        <span class="n">limit_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span> <span class="n">limit</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">limit_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">&lt;=</span> <span class="mi">3</span><span class="p">)</span>

        <span class="n">offset_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">pragmar_site_id</span><span class="p">],</span> <span class="n">offset</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">offset_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">&lt;=</span> <span class="mi">2</span><span class="p">)</span>
        <span class="k">if</span> <span class="n">resources_json</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">4</span><span class="p">:</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">(</span>
                <span class="n">resources_json</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">id</span><span class="p">,</span>
                <span class="n">offset_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">id</span><span class="p">,</span>
                <span class="s2">&quot;Offset results should differ from first page&quot;</span>
            <span class="p">)</span>

        <span class="c1"># multi-site search, verify we got results from both sites</span>
        <span class="c1"># limit 100 sees all the pages, otherwise ArchiveBox needs -url</span>
        <span class="c1"># and everything else +url to float unique sites in a small result set</span>
        <span class="c1"># limit 100 is slower but more resilient</span>
        <span class="n">multisite_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">example_site_id</span><span class="p">,</span> <span class="n">pragmar_site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
            <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;+url&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="n">found_sites</span> <span class="o">=</span> <span class="nb">set</span><span class="p">()</span>
        <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">multisite_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
            <span class="n">found_sites</span><span class="o">.</span><span class="n">add</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">site</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">found_sites</span><span class="p">),</span> <span class="mi">2</span><span class="p">,</span> <span class="s2">&quot;Should have results from both sites&quot;</span><span class="p">)</span></div>


<div class="viewcode-block" id="BaseCrawlerTests.run_pragmar_tokenizer_tests">
<a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_tokenizer_tests">[docs]</a>
    <span class="k">def</span> <span class="nf">run_pragmar_tokenizer_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span><span class="nb">int</span><span class="p">):</span>
<span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd">        fts hyphens and underscores are particularly challenging, thus</span>
<span class="sd">        have a dedicated test. these must be configured in multiple places</span>
<span class="sd">        including CREATE TABLE ... tokenizer, as well as handled by the query</span>
<span class="sd">        parser.</span>
<span class="sd">        &quot;&quot;&quot;</span>

        <span class="n">mcp_resources_keyword</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s1">&#39;&quot;mcp-server-webcrawl&quot;&#39;</span><span class="p">,</span>
            <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="n">mcp_resources_quoted</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s1">&#39;&quot;mcp-server-webcrawl&quot;&#39;</span><span class="p">,</span>
            <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">mcp_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find mcp-server-webcrawl in HTML&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">mcp_resources_quoted</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find </span><span class="se">\&quot;</span><span class="s2">mcp-server-webcrawl</span><span class="se">\&quot;</span><span class="s2"> (phrase) in HTML&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">mcp_resources_quoted</span><span class="o">.</span><span class="n">total</span> <span class="o">==</span> <span class="n">mcp_resources_keyword</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;Quoted and unquoted equivalence expected&quot;</span><span class="p">)</span>
        <span class="n">mcp_resources_wildcarded</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s1">&#39;mcp*&#39;</span><span class="p">,</span>
            <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">mcp_resources_wildcarded</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find mcp-server-* in HTML&quot;</span><span class="p">)</span>

        <span class="n">combo_and_resources_keyword</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s1">&#39;&quot;mcp-server-webcrawl&quot; AND &quot;one-click&quot;&#39;</span><span class="p">,</span>
            <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="n">combo_and_resources_quoted</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s1">&#39;mcp-server-webcrawl AND one-click&#39;</span><span class="p">,</span>
            <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_and_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find mcp-server-webcrawl in HTML&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_and_resources_quoted</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find </span><span class="se">\&quot;</span><span class="s2">mcp-server-webcrawl</span><span class="se">\&quot;</span><span class="s2"> (phrase) in HTML&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_and_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">==</span> <span class="n">combo_and_resources_quoted</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;Quoted and unquoted equivalence expected&quot;</span><span class="p">)</span>

        <span class="n">combo_or_resources_keyword</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s1">&#39;&quot;mcp-server-webcrawl&quot; OR &quot;one-click&quot;&#39;</span><span class="p">,</span>
            <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="n">combo_or_resources_quoted</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s1">&#39;mcp-server-webcrawl OR one-click&#39;</span><span class="p">,</span>
            <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_or_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find mcp-server-webcrawl in HTML&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_or_resources_quoted</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find </span><span class="se">\&quot;</span><span class="s2">mcp-server-webcrawl</span><span class="se">\&quot;</span><span class="s2"> (phrase) in HTML&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_or_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">==</span> <span class="n">combo_or_resources_quoted</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;Quoted and unquoted equivalence expected&quot;</span><span class="p">)</span>

        <span class="n">combo_not_resources_keyword</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s1">&#39;&quot;mcp-server-webcrawl&quot; NOT &quot;one-click&quot;&#39;</span><span class="p">,</span>
            <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="n">combo_not_resources_quoted</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s1">&#39;mcp-server-webcrawl NOT one-click&#39;</span><span class="p">,</span>
            <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="n">combo_and_not_resources_quoted</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s1">&#39;mcp-server-webcrawl AND NOT one-click&#39;</span><span class="p">,</span>
            <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_not_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find mcp-server-webcrawl in HTML&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_not_resources_quoted</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find </span><span class="se">\&quot;</span><span class="s2">mcp-server-webcrawl</span><span class="se">\&quot;</span><span class="s2"> (phrase) in HTML&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_not_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">==</span> <span class="n">combo_not_resources_quoted</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;Quoted and unquoted equivalence expected&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">combo_not_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">==</span> <span class="n">combo_and_not_resources_quoted</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;NOT (</span><span class="si">{</span><span class="n">combo_not_resources_keyword</span><span class="o">.</span><span class="n">total</span><span class="si">}</span><span class="s2">) and AND NOT (</span><span class="si">{</span><span class="n">combo_and_not_resources_quoted</span><span class="o">.</span><span class="n">total</span><span class="si">}</span><span class="s2">) equivalence expected&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">mcp_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;=</span> <span class="n">combo_and_resources_keyword</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;Total records should be greater or equal to ANDs.&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">mcp_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">combo_or_resources_keyword</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;Total records should be less than or equal to ORs.&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">mcp_resources_keyword</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="n">combo_not_resources_keyword</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;Total records should be greater than NOTs.&quot;</span><span class="p">)</span></div>




<div class="viewcode-block" id="BaseCrawlerTests.run_pragmar_site_tests">
<a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_site_tests">[docs]</a>
    <span class="k">def</span> <span class="nf">run_pragmar_site_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span><span class="nb">int</span><span class="p">):</span>

        <span class="c1"># all sites</span>
        <span class="n">sites_json</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_sites_api</span><span class="p">()</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">sites_json</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;=</span> <span class="mi">2</span><span class="p">)</span>

        <span class="c1"># single site</span>
        <span class="n">site_json</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_sites_api</span><span class="p">(</span><span class="n">ids</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">])</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">site_json</span><span class="o">.</span><span class="n">total</span> <span class="o">==</span> <span class="mi">1</span><span class="p">)</span>

        <span class="c1"># site with fields</span>
        <span class="n">site_field_json</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_sites_api</span><span class="p">(</span><span class="n">ids</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">fields</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;created&quot;</span><span class="p">,</span> <span class="s2">&quot;modified&quot;</span><span class="p">])</span>
        <span class="n">site_field_result</span> <span class="o">=</span> <span class="n">site_field_json</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="s2">&quot;created&quot;</span> <span class="ow">in</span> <span class="n">site_field_result</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="s2">&quot;modified&quot;</span> <span class="ow">in</span> <span class="n">site_field_result</span><span class="p">)</span></div>


<div class="viewcode-block" id="BaseCrawlerTests.run_pragmar_sort_tests">
<a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_sort_tests">[docs]</a>
    <span class="k">def</span> <span class="nf">run_pragmar_sort_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
<span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd">        Test sorting functionality with performance optimizations.</span>
<span class="sd">        &quot;&quot;&quot;</span>
        <span class="n">sorted_default</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">limit</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">fields</span><span class="o">=</span><span class="p">[])</span>
        <span class="n">sorted_url_ascending</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;+url&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">fields</span><span class="o">=</span><span class="p">[])</span>
        <span class="n">sorted_url_descending</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;-url&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">fields</span><span class="o">=</span><span class="p">[])</span>

        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">sorted_url_ascending</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Database should contain resources&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">sorted_url_descending</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Database should contain resources&quot;</span><span class="p">)</span>
        <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sorted_default</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span> <span class="ow">and</span> <span class="nb">len</span><span class="p">(</span><span class="n">sorted_url_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
            <span class="n">default_urls</span> <span class="o">=</span> <span class="p">[</span><span class="n">r</span><span class="o">.</span><span class="n">url</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">sorted_default</span><span class="o">.</span><span class="n">_results</span><span class="p">]</span>
            <span class="n">ascending_urls</span> <span class="o">=</span> <span class="p">[</span><span class="n">r</span><span class="o">.</span><span class="n">url</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">sorted_url_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">]</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">default_urls</span><span class="p">,</span> <span class="n">ascending_urls</span><span class="p">,</span> <span class="s2">&quot;Default sort should match +url sort&quot;</span><span class="p">)</span>

        <span class="n">sorted_size_ascending</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;+size&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">fields</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;size&quot;</span><span class="p">])</span>
        <span class="n">sorted_size_descending</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;-size&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">fields</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;size&quot;</span><span class="p">])</span>
        <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sorted_url_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
            <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">sorted_url_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">):</span>
                <span class="bp">self</span><span class="o">.</span><span class="n">assertLessEqual</span><span class="p">(</span><span class="n">sorted_url_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">url</span><span class="p">,</span>
                        <span class="n">sorted_url_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="s2">&quot;URLs should be ascending&quot;</span><span class="p">)</span>
        <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sorted_url_descending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
            <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">sorted_url_descending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">):</span>
                <span class="bp">self</span><span class="o">.</span><span class="n">assertGreaterEqual</span><span class="p">(</span><span class="n">sorted_url_descending</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">url</span><span class="p">,</span>
                        <span class="n">sorted_url_descending</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">url</span><span class="p">,</span> <span class="s2">&quot;URLs should be descending&quot;</span><span class="p">)</span>
        <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sorted_size_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
            <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">sorted_size_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">):</span>
                <span class="bp">self</span><span class="o">.</span><span class="n">assertLessEqual</span><span class="p">(</span><span class="n">sorted_size_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;size&quot;</span><span class="p">],</span>
                        <span class="n">sorted_size_ascending</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;size&quot;</span><span class="p">],</span> <span class="s2">&quot;Sizes should be ascending&quot;</span><span class="p">)</span>
        <span class="k">if</span> <span class="nb">len</span><span class="p">(</span><span class="n">sorted_size_descending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">&gt;</span> <span class="mi">1</span><span class="p">:</span>
            <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">sorted_size_descending</span><span class="o">.</span><span class="n">_results</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">):</span>
                <span class="bp">self</span><span class="o">.</span><span class="n">assertGreaterEqual</span><span class="p">(</span><span class="n">sorted_size_descending</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;size&quot;</span><span class="p">],</span>
                        <span class="n">sorted_size_descending</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="n">i</span> <span class="o">+</span> <span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;size&quot;</span><span class="p">],</span> <span class="s2">&quot;Sizes should be descending&quot;</span><span class="p">)</span>

        <span class="n">random_1</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;?&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">fields</span><span class="o">=</span><span class="p">[])</span>
        <span class="n">random_2</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span><span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;?&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">20</span><span class="p">,</span> <span class="n">fields</span><span class="o">=</span><span class="p">[])</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">random_1</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Random sort should return results&quot;</span><span class="p">)</span>
        <span class="k">if</span> <span class="n">random_1</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;=</span> <span class="mi">10</span><span class="p">:</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">assertNotEqual</span><span class="p">([</span><span class="n">r</span><span class="o">.</span><span class="n">id</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">random_1</span><span class="o">.</span><span class="n">_results</span><span class="p">],</span> <span class="p">[</span><span class="n">r</span><span class="o">.</span><span class="n">id</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">random_2</span><span class="o">.</span><span class="n">_results</span><span class="p">],</span>
                            <span class="s2">&quot;Random sort should produce different orders&quot;</span><span class="p">)</span>
        <span class="k">else</span><span class="p">:</span>
            <span class="n">logger</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Skip randomness verification: Not enough resources (</span><span class="si">{</span><span class="n">random_1</span><span class="o">.</span><span class="n">total</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">)</span></div>


<div class="viewcode-block" id="BaseCrawlerTests.run_pragmar_content_tests">
<a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_content_tests">[docs]</a>
    <span class="k">def</span> <span class="nf">run_pragmar_content_tests</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span><span class="nb">int</span><span class="p">,</span> <span class="n">html_leniency</span><span class="p">:</span> <span class="nb">bool</span><span class="p">):</span>

        <span class="n">html_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
            <span class="n">fields</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;content&quot;</span><span class="p">,</span> <span class="s2">&quot;headers&quot;</span><span class="p">]</span>
        <span class="p">)</span>

        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">html_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Should find HTML resources&quot;</span><span class="p">)</span>
        <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">html_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
            <span class="n">resource_dict</span> <span class="o">=</span> <span class="n">resource</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()</span>
            <span class="k">if</span> <span class="s2">&quot;content&quot;</span> <span class="ow">in</span> <span class="n">resource_dict</span><span class="p">:</span>
                <span class="n">content</span> <span class="o">=</span>  <span class="n">resource_dict</span><span class="p">[</span><span class="s2">&quot;content&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
                <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
                    <span class="s2">&quot;&lt;!DOCTYPE html&gt;&quot;</span> <span class="ow">in</span> <span class="n">content</span> <span class="ow">or</span>
                    <span class="s2">&quot;&lt;html&quot;</span> <span class="ow">in</span> <span class="n">content</span> <span class="ow">or</span>
                    <span class="s2">&quot;&lt;meta&quot;</span> <span class="ow">in</span> <span class="n">content</span> <span class="ow">or</span>
                    <span class="n">html_leniency</span><span class="p">,</span>
                    <span class="sa">f</span><span class="s2">&quot;HTML content should contain HTML markup: </span><span class="si">{</span><span class="n">resource</span><span class="o">.</span><span class="n">url</span><span class="si">}</span><span class="se">\n\n</span><span class="si">{</span><span class="n">resource</span><span class="o">.</span><span class="n">content</span><span class="si">}</span><span class="s2">&quot;</span>
                <span class="p">)</span>

            <span class="k">if</span> <span class="s2">&quot;headers&quot;</span> <span class="ow">in</span> <span class="n">resource_dict</span> <span class="ow">and</span> <span class="n">resource_dict</span><span class="p">[</span><span class="s2">&quot;headers&quot;</span><span class="p">]:</span>
                <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
                    <span class="s2">&quot;Content-Type:&quot;</span> <span class="ow">in</span> <span class="n">resource_dict</span><span class="p">[</span><span class="s2">&quot;headers&quot;</span><span class="p">],</span>
                    <span class="sa">f</span><span class="s2">&quot;Headers should contain Content-Type: </span><span class="si">{</span><span class="n">resource</span><span class="o">.</span><span class="n">url</span><span class="si">}</span><span class="s2">&quot;</span>
                <span class="p">)</span>

        <span class="c1"># script content detection</span>
        <span class="n">script_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">SCRIPT</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
            <span class="n">fields</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;content&quot;</span><span class="p">,</span> <span class="s2">&quot;headers&quot;</span><span class="p">],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="k">if</span> <span class="n">script_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
            <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">script_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
                <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">type</span><span class="p">,</span> <span class="n">ResourceResultType</span><span class="o">.</span><span class="n">SCRIPT</span><span class="p">)</span>

        <span class="c1"># css content detection</span>
        <span class="n">css_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">CSS</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
            <span class="n">fields</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;content&quot;</span><span class="p">,</span> <span class="s2">&quot;headers&quot;</span><span class="p">],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="k">if</span> <span class="n">css_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
            <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">css_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
                <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">type</span><span class="p">,</span> <span class="n">ResourceResultType</span><span class="o">.</span><span class="n">CSS</span><span class="p">)</span></div>


<div class="viewcode-block" id="BaseCrawlerTests.run_pragmar_report">
<a class="viewcode-back" href="../../../../mcp_server_webcrawl.crawlers.base.html#mcp_server_webcrawl.crawlers.base.tests.BaseCrawlerTests.run_pragmar_report">[docs]</a>
    <span class="k">def</span> <span class="nf">run_pragmar_report</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">heading</span><span class="p">:</span> <span class="nb">str</span><span class="p">):</span>
<span class="w">        </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd">        Generate a comprehensive report of all resources for a site.</span>
<span class="sd">        Returns a formatted string with counts and URLs by type.</span>
<span class="sd">        &quot;&quot;&quot;</span>

        <span class="n">site_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s2">&quot;&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="n">html_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="n">css_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">CSS</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="n">js_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">SCRIPT</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="n">image_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">IMAGE</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="n">mcp_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (mcp)&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="n">report_lines</span> <span class="o">=</span> <span class="p">[]</span>
        <span class="n">sections</span> <span class="o">=</span> <span class="p">[</span>
            <span class="p">(</span><span class="s2">&quot;Total pages&quot;</span><span class="p">,</span> <span class="n">site_resources</span><span class="p">),</span>
            <span class="p">(</span><span class="s2">&quot;Total HTML&quot;</span><span class="p">,</span> <span class="n">html_resources</span><span class="p">),</span>
            <span class="p">(</span><span class="s2">&quot;Total MCP search hits&quot;</span><span class="p">,</span> <span class="n">mcp_resources</span><span class="p">),</span>
            <span class="p">(</span><span class="s2">&quot;Total CSS&quot;</span><span class="p">,</span> <span class="n">css_resources</span><span class="p">),</span>
            <span class="p">(</span><span class="s2">&quot;Total JS&quot;</span><span class="p">,</span> <span class="n">js_resources</span><span class="p">),</span>
            <span class="p">(</span><span class="s2">&quot;Total Images&quot;</span><span class="p">,</span> <span class="n">image_resources</span><span class="p">)</span>
        <span class="p">]</span>

        <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="p">(</span><span class="n">section_name</span><span class="p">,</span> <span class="n">resource_obj</span><span class="p">)</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="n">sections</span><span class="p">):</span>
            <span class="n">report_lines</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">section_name</span><span class="si">}</span><span class="s2">: </span><span class="si">{</span><span class="n">resource_obj</span><span class="o">.</span><span class="n">total</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
            <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">resource_obj</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
                <span class="n">report_lines</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">url</span><span class="p">)</span>
            <span class="k">if</span> <span class="n">i</span> <span class="o">&lt;</span> <span class="nb">len</span><span class="p">(</span><span class="n">sections</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span><span class="p">:</span>
                <span class="n">report_lines</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="s2">&quot;&quot;</span><span class="p">)</span>

        <span class="n">now</span> <span class="o">=</span> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span>
        <span class="n">lines_together</span> <span class="o">=</span> <span class="s2">&quot;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">report_lines</span><span class="p">)</span>

        <span class="k">return</span> <span class="sa">f</span><span class="s2">&quot;&quot;&quot;</span>
<span class="s2">**********************************************************************************</span>
<span class="s2">* </span><span class="si">{</span><span class="n">heading</span><span class="si">}</span><span class="s2"> </span><span class="si">{</span><span class="n">now</span><span class="o">.</span><span class="n">isoformat</span><span class="p">()</span><span class="si">}</span><span class="s2">                                                    *</span>
<span class="s2">**********************************************************************************</span>
<span class="si">{</span><span class="n">lines_together</span><span class="si">}</span>
<span class="s2">&quot;&quot;&quot;</span></div>

    <span class="k">def</span> <span class="nf">__run_pragmar_search_tests_field_status</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>

        <span class="c1"># status code filtering</span>
        <span class="n">status_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;status: 200&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">status_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Status filtering should return results&quot;</span><span class="p">)</span>
        <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">status_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
            <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">status</span><span class="p">,</span> <span class="mi">200</span><span class="p">)</span>

        <span class="c1"># status code filtering</span>
        <span class="n">appstat_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;status: 200 AND url: https://pragmar.com/appstat*&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">appstat_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="s2">&quot;Status filtering should return results&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertGreaterEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">appstat_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">),</span> <span class="mi">3</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Should have at least 3 results in appstat resources&quot;</span><span class="p">)</span>

        <span class="c1"># multiple status codes</span>
        <span class="n">multi_status_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;status: 200 OR status: 404&quot;</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="k">if</span> <span class="n">multi_status_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
            <span class="n">found_statuses</span> <span class="o">=</span> <span class="p">{</span><span class="n">r</span><span class="o">.</span><span class="n">status</span> <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="n">multi_status_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">}</span>
            <span class="k">for</span> <span class="n">status</span> <span class="ow">in</span> <span class="n">found_statuses</span><span class="p">:</span>
                <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="n">status</span><span class="p">,</span> <span class="p">[</span><span class="mi">200</span><span class="p">,</span> <span class="mi">404</span><span class="p">])</span>

    <span class="k">def</span> <span class="nf">__run_pragmar_search_tests_field_headers</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>

        <span class="c1"># supported crawls only (genuine headers data)</span>
        <span class="k">if</span> <span class="ow">not</span> <span class="bp">self</span><span class="o">.</span><span class="vm">__class__</span><span class="o">.</span><span class="vm">__name__</span> <span class="ow">in</span> <span class="p">(</span><span class="s2">&quot;InterroBotTests&quot;</span><span class="p">,</span><span class="s2">&quot;KatanaTests&quot;</span><span class="p">,</span> <span class="s2">&quot;WarcTests&quot;</span><span class="p">):</span>
            <span class="k">return</span>

        <span class="n">appstat_any</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;appstat&quot;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="n">appstat_headers_js</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;appstat AND headers: javascript&quot;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="c1"># https://pragmar.com/media/static/scripts/js/appstat.min.js</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">appstat_headers_js</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;Should have exactly one resource in database (appstat.min.js)&quot;</span><span class="p">)</span>

        <span class="n">appstat_headers_nojs</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;appstat NOT headers: javascript&quot;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertGreater</span><span class="p">(</span><span class="n">appstat_headers_nojs</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;Should have many appstat non-js resources in database&quot;</span><span class="p">)</span>

        <span class="n">appstat_sum</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">appstat_headers_js</span><span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">appstat_headers_nojs</span><span class="o">.</span><span class="n">total</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">appstat_sum</span><span class="p">,</span> <span class="n">appstat_any</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;appstat non-js + js resources should sum to all appstat&quot;</span><span class="p">)</span>

    <span class="k">def</span> <span class="nf">__run_pragmar_search_tests_field_content</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>

        <span class="n">mcp_any</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;mcp&quot;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="n">mcp_content_configuration</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;mcp AND content: configuration&quot;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="c1"># https://pragmar.com/mcp-server-webcrawl/</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertGreaterEqual</span><span class="p">(</span><span class="n">mcp_content_configuration</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;Should have one, possibly more resources (mcp-server-webcrawl)&quot;</span><span class="p">)</span>

        <span class="n">mcp_content_no_configuration</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;mcp NOT content: configuration&quot;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertGreater</span><span class="p">(</span><span class="n">mcp_content_no_configuration</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;Should have many mcp non-configuration resources&quot;</span><span class="p">)</span>

        <span class="n">mcp_sum</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">mcp_content_configuration</span><span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">mcp_content_no_configuration</span><span class="o">.</span><span class="n">total</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">mcp_sum</span><span class="p">,</span> <span class="n">mcp_any</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;mcp non-config + config resources should sum to all mcp&quot;</span><span class="p">)</span>

        <span class="n">mcp_html_content_config</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND mcp AND content: configuration&quot;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
            <span class="n">mcp_html_content_config</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">mcp_content_configuration</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
            <span class="s2">&quot;Adding type constraint should not increase results&quot;</span>
        <span class="p">)</span>

        <span class="n">wildcard_content_search</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s1">&#39;content: config*&#39;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="n">exact_config_search</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s1">&#39;content: configuration&#39;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
            <span class="n">wildcard_content_search</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;=</span> <span class="n">exact_config_search</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
            <span class="s2">&quot;Wildcard content search should return at least as many results as exact match&quot;</span>
        <span class="p">)</span>

    <span class="k">def</span> <span class="nf">__run_pragmar_search_tests_field_type</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span> <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span> <span class="n">site_resources</span><span class="p">:</span><span class="n">BaseJsonApi</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>

        <span class="n">html_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s2">&quot;type: html&quot;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="c1"># page count varies by crawler, 10 is conservative low end</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertGreater</span><span class="p">(</span><span class="n">html_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="s2">&quot;Should have greater than 10 HTML resources&quot;</span><span class="p">)</span>

        <span class="n">not_html_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s2">&quot;NOT type: html&quot;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="c1"># wget is HTML-only fixture</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertGreater</span><span class="p">(</span><span class="n">not_html_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="s2">&quot;Should have greater than 10 non-HTML resources&quot;</span><span class="p">)</span>

        <span class="n">html_sum</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">html_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">not_html_resources</span><span class="o">.</span><span class="n">total</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">html_sum</span><span class="p">,</span> <span class="n">site_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;HTML + non-HTML should sum to all resources&quot;</span><span class="p">)</span>

        <span class="c1"># keyword + type combination</span>
        <span class="n">appstat_any</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s2">&quot;appstat&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">10</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="n">appstat_script</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s2">&quot;appstat AND type: script&quot;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="c1"># https://pragmar.com/media/static/scripts/js/appstat.min.js</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">appstat_script</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;Should have exactly one appstat script (appstat.min.js)&quot;</span><span class="p">)</span>

        <span class="n">appstat_not_script</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s2">&quot;appstat NOT type: script&quot;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertGreater</span><span class="p">(</span><span class="n">appstat_not_script</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;Should have many appstat non-script resources&quot;</span><span class="p">)</span>

        <span class="n">appstat_sum</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="n">appstat_script</span><span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">appstat_not_script</span><span class="o">.</span><span class="n">total</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">appstat_sum</span><span class="p">,</span> <span class="n">appstat_any</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="s2">&quot;appstat script + non-script should sum to all appstat&quot;</span><span class="p">)</span>

        <span class="c1"># type OR combinations</span>
        <span class="n">html_or_img</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s2">&quot;type: html OR type: img&quot;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="bp">self</span><span class="o">.</span><span class="n">assertGreater</span><span class="p">(</span><span class="n">html_or_img</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">20</span><span class="p">,</span> <span class="s2">&quot;HTML + IMG should be greater than 20 resources&quot;</span><span class="p">)</span>

        <span class="n">img_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s2">&quot;type: img&quot;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
            <span class="n">html_or_img</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;=</span> <span class="n">html_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
            <span class="s2">&quot;OR should include all HTML resources&quot;</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
            <span class="n">html_or_img</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;=</span> <span class="n">img_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
            <span class="s2">&quot;OR should include all IMG resources&quot;</span>
        <span class="p">)</span>

        <span class="c1"># combined filtering</span>
        <span class="n">combined_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;style AND type: </span><span class="si">{</span><span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="o">.</span><span class="n">value</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">,</span>
            <span class="n">fields</span><span class="o">=</span><span class="p">[],</span>
            <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;+url&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="k">if</span> <span class="n">combined_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">:</span>
            <span class="k">for</span> <span class="n">resource</span> <span class="ow">in</span> <span class="n">combined_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">:</span>
                <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">site</span><span class="p">,</span> <span class="n">site_id</span><span class="p">)</span>
                <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="n">resource</span><span class="o">.</span><span class="n">type</span><span class="p">,</span> <span class="n">ResourceResultType</span><span class="o">.</span><span class="n">PAGE</span><span class="p">)</span>

    <span class="k">def</span> <span class="nf">__run_pragmar_search_tests_fulltext</span><span class="p">(</span>
            <span class="bp">self</span><span class="p">,</span>
            <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span>
            <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
            <span class="n">site_resources</span><span class="p">:</span><span class="n">BaseJsonApi</span>
        <span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>

        <span class="c1"># Boolean workout</span>
        <span class="c1"># result counts are fragile, intersections should not be</span>
        <span class="c1"># counts are worth the fragility, for now</span>

        <span class="n">boolean_primary_resources</span>  <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="c1"># varies by crawler, katana doesn&#39;t crawl /help/ depth by default</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">boolean_primary_resources</span> <span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Primary search should return results&quot;</span><span class="p">)</span>

        <span class="n">boolean_secondary_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">12</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="c1"># re: all these &gt; 0 checks, result counts vary by crawler, all have default crawl behaviors/depths/externals</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">boolean_secondary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Secondary search should return results&quot;</span><span class="p">)</span>

        <span class="c1"># AND</span>
        <span class="n">primary_and_secondary_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> AND </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">primary_and_secondary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;=</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Primary AND Secondary should return results&quot;</span><span class="p">)</span>

        <span class="c1"># OR</span>
        <span class="n">primary_or_secondary_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> OR </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">primary_or_secondary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Primary OR Secondary should return results (union)&quot;</span><span class="p">)</span>

        <span class="c1"># NOT</span>
        <span class="n">primary_not_secondary_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> NOT </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>

        <span class="n">secondary_not_primary_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2"> NOT </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">secondary_not_primary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;=</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Secondary NOT Primary should return results&quot;</span><span class="p">)</span>

        <span class="c1"># logical relationships</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span>
            <span class="n">primary_and_secondary_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
            <span class="n">boolean_primary_resources</span> <span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">boolean_secondary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">-</span> <span class="n">primary_or_secondary_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
            <span class="s2">&quot;Intersection should equal A + B - Union (inclusion-exclusion principle)&quot;</span>
        <span class="p">)</span>

        <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span>
            <span class="n">primary_not_secondary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">primary_and_secondary_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
            <span class="n">boolean_primary_resources</span> <span class="o">.</span><span class="n">total</span><span class="p">,</span>
            <span class="s2">&quot;Primary NOT Secondary + Primary AND Secondary should equal total Primary results&quot;</span>
        <span class="p">)</span>

        <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span>
            <span class="n">secondary_not_primary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">primary_and_secondary_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
            <span class="n">boolean_secondary_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
            <span class="s2">&quot;Secondary NOT Primary + Primary AND Secondary should equal total Secondary results&quot;</span>
        <span class="p">)</span>

        <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span>
            <span class="n">primary_not_secondary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">secondary_not_primary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">+</span> <span class="n">primary_and_secondary_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
            <span class="n">primary_or_secondary_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
            <span class="s2">&quot;Sum of exclusive sets plus intersection should equal union&quot;</span>
        <span class="p">)</span>

        <span class="c1"># complex boolean with field constraints</span>
        <span class="n">primary_and_html_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">primary_and_html_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Primary AND type:html should return results&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span>
            <span class="n">primary_and_html_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">boolean_primary_resources</span> <span class="o">.</span><span class="n">total</span><span class="p">,</span>
            <span class="s2">&quot;Adding AND constraints should not increase result count&quot;</span>
        <span class="p">)</span>

        <span class="c1"># Parentheses grouping</span>
        <span class="n">grouped_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: html AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> OR </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">grouped_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Grouped OR with HTML filter should return results&quot;</span><span class="p">)</span>


        <span class="n">hyphenated_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_HYPHENATED_KEYWORD</span><span class="p">,</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">hyphenated_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&gt;</span> <span class="mi">0</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;Keyword &#39;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_HYPHENATED_KEYWORD</span><span class="si">}</span><span class="s2">&#39; should return results&quot;</span><span class="p">)</span>

        <span class="n">double_or_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;(</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> OR </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2"> OR moffitor)&quot;</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertGreater</span><span class="p">(</span>
            <span class="n">double_or_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span>
            <span class="sa">f</span><span class="s2">&quot;OR query should return some results&quot;</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertLessEqual</span><span class="p">(</span>
            <span class="n">double_or_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="n">site_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
            <span class="sa">f</span><span class="s2">&quot;OR query should be less than, or equal to all results&quot;</span>
        <span class="p">)</span>
        <span class="n">parens_or_and_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;(</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> OR </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">) AND collaborations &quot;</span>
        <span class="p">)</span>
        <span class="c1"># respect the AND, there should be only one result</span>
        <span class="c1"># (A OR B) AND C vs. A OR B AND C</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span>
            <span class="n">parens_or_and_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span>
            <span class="sa">f</span><span class="s2">&quot;(A OR B) AND C should be 1 result (AND collaborations, unless fixture changed)&quot;</span>
        <span class="p">)</span>

        <span class="n">parens_or_and_resources_reverse</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;collaborations AND (</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> OR </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">) &quot;</span>
        <span class="p">)</span>
        <span class="c1"># respect the AND, there should be only one result</span>
        <span class="c1"># (A OR B) AND C vs. A OR B AND C</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span>
            <span class="n">parens_or_and_resources_reverse</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span>
            <span class="sa">f</span><span class="s2">&quot;A AND (B OR C) should be 1 result (collaborations AND, unless fixture changed)&quot;</span>
        <span class="p">)</span>

        <span class="n">wide_type_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;type: script OR type: style OR type: iframe OR type: font OR type: text OR type: rss OR type: other&quot;</span>
        <span class="p">)</span>

        <span class="bp">self</span><span class="o">.</span><span class="n">assertLess</span><span class="p">(</span>
            <span class="n">wide_type_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="n">site_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
            <span class="sa">f</span><span class="s2">&quot;A long chained OR should not return all results&quot;</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertGreater</span><span class="p">(</span>
            <span class="n">wide_type_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span>
            <span class="sa">f</span><span class="s2">&quot;A long chained OR should return some results&quot;</span>
        <span class="p">)</span>

        <span class="n">complex_and</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> AND type:html AND status:200&quot;</span>
        <span class="p">)</span>

        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">complex_and</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">boolean_primary_resources</span> <span class="o">.</span><span class="n">total</span><span class="p">,</span>
                <span class="s2">&quot;Adding AND conditions should not increase results&quot;</span><span class="p">)</span>

        <span class="n">grouped_or</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;(</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> OR </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_SECONDARY_KEYWORD</span><span class="si">}</span><span class="s2">) AND type:html AND status:200&quot;</span>
        <span class="p">)</span>

        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">grouped_or</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">primary_or_secondary_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
                <span class="s2">&quot;Adding AND conditions to OR should not increase results&quot;</span><span class="p">)</span>

        <span class="c1"># URL OR parsing, url is a special case, an fts5 field searched with SQL LIKE</span>
        <span class="n">url_or_simple</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">query</span><span class="o">=</span><span class="s2">&quot;url: pragmar.com OR url: example.com&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
        <span class="n">url_or_with_type</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">query</span><span class="o">=</span><span class="s2">&quot;type: html AND (url: pragmar.com OR url: example.com)&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
        <span class="n">html_total</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span> <span class="n">query</span><span class="o">=</span><span class="s2">&quot;type: html&quot;</span><span class="p">,</span> <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">url_or_with_type</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">url_or_simple</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
            <span class="sa">f</span><span class="s2">&quot;AND constraint should not increase results&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">url_or_with_type</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">html_total</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
            <span class="sa">f</span><span class="s2">&quot;URL filter should not exceed HTML total&quot;</span><span class="p">)</span>

    <span class="k">def</span> <span class="nf">__run_pragmar_search_tests_extras</span><span class="p">(</span>
            <span class="bp">self</span><span class="p">,</span>
            <span class="n">crawler</span><span class="p">:</span> <span class="n">BaseCrawler</span><span class="p">,</span>
            <span class="n">site_id</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
            <span class="n">site_resources</span><span class="p">:</span><span class="n">BaseJsonApi</span><span class="p">,</span>
            <span class="n">primary_resources</span><span class="p">:</span><span class="n">BaseJsonApi</span><span class="p">,</span>
            <span class="n">secondary_resources</span><span class="p">:</span><span class="n">BaseJsonApi</span><span class="p">,</span>
        <span class="p">)</span> <span class="o">-&gt;</span> <span class="kc">None</span><span class="p">:</span>

        <span class="n">snippet_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="si">}</span><span class="s2"> AND type: html&quot;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;snippets&quot;</span><span class="p">],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="s2">&quot;snippets&quot;</span><span class="p">,</span> <span class="n">snippet_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;extras&quot;</span><span class="p">],</span>
                <span class="s2">&quot;First result should have snippets in extras&quot;</span><span class="p">)</span>

        <span class="n">xpath_count_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;markdown&quot;</span><span class="p">],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="s2">&quot;markdown&quot;</span><span class="p">,</span> <span class="n">xpath_count_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;extras&quot;</span><span class="p">],</span>
                <span class="s2">&quot;First result should have markdown in extras&quot;</span><span class="p">)</span>

        <span class="n">xpath_count_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s2">&quot;url: pragmar.com AND status: 200&quot;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;xpath&quot;</span><span class="p">],</span>
            <span class="n">extrasXpath</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;count(//h1)&quot;</span><span class="p">],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
            <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;-url&quot;</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="s2">&quot;xpath&quot;</span><span class="p">,</span> <span class="n">xpath_count_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;extras&quot;</span><span class="p">],</span>
                <span class="s2">&quot;First result should have xpath in extras&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertEqual</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">xpath_count_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;extras&quot;</span><span class="p">][</span><span class="s2">&quot;xpath&quot;</span><span class="p">]),</span>
                <span class="mi">1</span><span class="p">,</span> <span class="s2">&quot;Should be exactly one H1 hit in xpath extras&quot;</span><span class="p">)</span>

        <span class="c1"># this test inadvertently also covers t_URL_FIELD parser testing</span>
        <span class="n">xpath_h1_text_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="s2">&quot;url: https://pragmar.com AND status: 200&quot;</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;xpath&quot;</span><span class="p">],</span>
            <span class="n">extrasXpath</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;//h1/text()&quot;</span><span class="p">],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
            <span class="n">sort</span><span class="o">=</span><span class="s2">&quot;+url&quot;</span>
        <span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="s2">&quot;xpath&quot;</span><span class="p">,</span> <span class="n">xpath_h1_text_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;extras&quot;</span><span class="p">],</span>
                <span class="s2">&quot;First result should have xpath in extras&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span> <span class="n">xpath_h1_text_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;extras&quot;</span><span class="p">]</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">,</span>
                <span class="s2">&quot;Should have pragmar in fixture h1&quot;</span><span class="p">)</span>

        <span class="c1"># should be pragmar homepage, assert &quot;pragmar&quot; in h1</span>
        <span class="n">first_xpath_result</span> <span class="o">=</span> <span class="n">xpath_h1_text_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()[</span><span class="s2">&quot;extras&quot;</span><span class="p">][</span><span class="s2">&quot;xpath&quot;</span><span class="p">][</span><span class="mi">0</span><span class="p">][</span><span class="s2">&quot;value&quot;</span><span class="p">]</span><span class="o">.</span><span class="n">lower</span><span class="p">()</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="s2">&quot;pragmar&quot;</span> <span class="ow">in</span> <span class="n">first_xpath_result</span><span class="p">,</span>
                <span class="sa">f</span><span class="s2">&quot;Should have pragmar in fixture homepage h1 (</span><span class="si">{</span><span class="n">first_xpath_result</span><span class="si">}</span><span class="s2">)&quot;</span><span class="p">)</span>

        <span class="n">combined_resources</span> <span class="o">=</span> <span class="n">crawler</span><span class="o">.</span><span class="n">get_resources_api</span><span class="p">(</span>
            <span class="n">sites</span><span class="o">=</span><span class="p">[</span><span class="n">site_id</span><span class="p">],</span>
            <span class="n">query</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">__PRAGMAR_PRIMARY_KEYWORD</span><span class="p">,</span>
            <span class="n">extras</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;snippets&quot;</span><span class="p">,</span> <span class="s2">&quot;markdown&quot;</span><span class="p">],</span>
            <span class="n">limit</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
        <span class="p">)</span>
        <span class="n">first_result</span> <span class="o">=</span> <span class="n">combined_resources</span><span class="o">.</span><span class="n">_results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">to_dict</span><span class="p">()</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="s2">&quot;extras&quot;</span><span class="p">,</span> <span class="n">first_result</span><span class="p">,</span> <span class="s2">&quot;First result should have extras field&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="s2">&quot;snippets&quot;</span><span class="p">,</span> <span class="n">first_result</span><span class="p">[</span><span class="s2">&quot;extras&quot;</span><span class="p">],</span> <span class="s2">&quot;First result should have snippets in extras&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertIn</span><span class="p">(</span><span class="s2">&quot;markdown&quot;</span><span class="p">,</span> <span class="n">first_result</span><span class="p">[</span><span class="s2">&quot;extras&quot;</span><span class="p">],</span> <span class="s2">&quot;First result should have markdown in extras&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">primary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">site_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
                <span class="s2">&quot;Search should return less than or equivalent results to site total&quot;</span><span class="p">)</span>
        <span class="bp">self</span><span class="o">.</span><span class="n">assertTrue</span><span class="p">(</span><span class="n">secondary_resources</span><span class="o">.</span><span class="n">total</span> <span class="o">&lt;=</span> <span class="n">site_resources</span><span class="o">.</span><span class="n">total</span><span class="p">,</span>
                <span class="s2">&quot;Search should return less than or equivalent results to site total&quot;</span><span class="p">)</span></div>

</pre></div>

           </div>
          </div>
          <footer>

  <hr/>

  <div role="contentinfo">
    <p>&#169; Copyright 2025, pragmar.</p>
  </div>

  Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
    provided by <a href="https://readthedocs.org">Read the Docs</a>.
   

</footer>
        </div>
      </div>
    </section>
  </div>
  <script>
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script> 

</body>
</html>
```
Page 32/33FirstPrevNextLast