feat(models): add dedicated tables field to CrawlResult
- Add tables field to CrawlResult model while maintaining backward compatibility - Update async_webcrawler.py to extract tables from media and pass to tables field - Update crypto_analysis_example.py to use the new tables field - Add /config/dump examples to demo_docker_api.py - Bump version to 0.6.1
This commit is contained in:
parent
ad4dfb21e1
commit
ccec40ed17
@ -5,6 +5,15 @@ All notable changes to Crawl4AI will be documented in this file.
|
|||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [0.6.1] - 2025-04-24
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- New dedicated `tables` field in `CrawlResult` model for better table extraction handling
|
||||||
|
- Updated crypto_analysis_example.py to use the new tables field with backward compatibility
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- Improved playground UI in Docker deployment with better endpoint handling and UI feedback
|
||||||
|
|
||||||
## [0.6.0] ‑ 2025‑04‑22
|
## [0.6.0] ‑ 2025‑04‑22
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
# crawl4ai/_version.py
|
# crawl4ai/_version.py
|
||||||
__version__ = "0.6.0"
|
__version__ = "0.6.1"
|
||||||
|
|
||||||
|
@ -496,11 +496,13 @@ class AsyncWebCrawler:
|
|||||||
cleaned_html = sanitize_input_encode(
|
cleaned_html = sanitize_input_encode(
|
||||||
result.get("cleaned_html", ""))
|
result.get("cleaned_html", ""))
|
||||||
media = result.get("media", {})
|
media = result.get("media", {})
|
||||||
|
tables = media.pop("tables", []) if isinstance(media, dict) else []
|
||||||
links = result.get("links", {})
|
links = result.get("links", {})
|
||||||
metadata = result.get("metadata", {})
|
metadata = result.get("metadata", {})
|
||||||
else:
|
else:
|
||||||
cleaned_html = sanitize_input_encode(result.cleaned_html)
|
cleaned_html = sanitize_input_encode(result.cleaned_html)
|
||||||
media = result.media.model_dump()
|
media = result.media.model_dump()
|
||||||
|
tables = media.pop("tables", [])
|
||||||
links = result.links.model_dump()
|
links = result.links.model_dump()
|
||||||
metadata = result.metadata
|
metadata = result.metadata
|
||||||
|
|
||||||
@ -627,6 +629,7 @@ class AsyncWebCrawler:
|
|||||||
cleaned_html=cleaned_html,
|
cleaned_html=cleaned_html,
|
||||||
markdown=markdown_result,
|
markdown=markdown_result,
|
||||||
media=media,
|
media=media,
|
||||||
|
tables=tables, # NEW
|
||||||
links=links,
|
links=links,
|
||||||
metadata=metadata,
|
metadata=metadata,
|
||||||
screenshot=screenshot_data,
|
screenshot=screenshot_data,
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from pydantic import BaseModel, HttpUrl, PrivateAttr
|
from pydantic import BaseModel, HttpUrl, PrivateAttr, Field
|
||||||
from typing import List, Dict, Optional, Callable, Awaitable, Union, Any
|
from typing import List, Dict, Optional, Callable, Awaitable, Union, Any
|
||||||
from typing import AsyncGenerator
|
from typing import AsyncGenerator
|
||||||
from typing import Generic, TypeVar
|
from typing import Generic, TypeVar
|
||||||
@ -150,6 +150,7 @@ class CrawlResult(BaseModel):
|
|||||||
redirected_url: Optional[str] = None
|
redirected_url: Optional[str] = None
|
||||||
network_requests: Optional[List[Dict[str, Any]]] = None
|
network_requests: Optional[List[Dict[str, Any]]] = None
|
||||||
console_messages: Optional[List[Dict[str, Any]]] = None
|
console_messages: Optional[List[Dict[str, Any]]] = None
|
||||||
|
tables: List[Dict] = Field(default_factory=list) # NEW – [{headers,rows,caption,summary}]
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
arbitrary_types_allowed = True
|
arbitrary_types_allowed = True
|
||||||
|
@ -193,7 +193,48 @@
|
|||||||
<textarea id="urls" class="w-full bg-dark border border-border rounded p-2 h-32 text-sm mb-4"
|
<textarea id="urls" class="w-full bg-dark border border-border rounded p-2 h-32 text-sm mb-4"
|
||||||
spellcheck="false">https://example.com</textarea>
|
spellcheck="false">https://example.com</textarea>
|
||||||
|
|
||||||
<details class="mb-4">
|
<!-- Specific options for /md endpoint -->
|
||||||
|
<details id="md-options" class="mb-4 hidden">
|
||||||
|
<summary class="text-sm text-secondary cursor-pointer">/md Options</summary>
|
||||||
|
<div class="mt-2 space-y-3 p-2 border border-border rounded">
|
||||||
|
<div>
|
||||||
|
<label for="md-filter" class="block text-xs text-secondary mb-1">Filter Type</label>
|
||||||
|
<select id="md-filter" class="bg-dark border border-border rounded px-2 py-1 text-sm w-full">
|
||||||
|
<option value="fit">fit - Adaptive content filtering</option>
|
||||||
|
<option value="raw">raw - No filtering</option>
|
||||||
|
<option value="bm25">bm25 - BM25 keyword relevance</option>
|
||||||
|
<option value="llm">llm - LLM-based filtering</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label for="md-query" class="block text-xs text-secondary mb-1">Query (for BM25/LLM filters)</label>
|
||||||
|
<input id="md-query" type="text" placeholder="Enter search terms or instructions"
|
||||||
|
class="bg-dark border border-border rounded px-2 py-1 text-sm w-full">
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label for="md-cache" class="block text-xs text-secondary mb-1">Cache Mode</label>
|
||||||
|
<select id="md-cache" class="bg-dark border border-border rounded px-2 py-1 text-sm w-full">
|
||||||
|
<option value="0">Write-Only (0)</option>
|
||||||
|
<option value="1">Enabled (1)</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<!-- Specific options for /llm endpoint -->
|
||||||
|
<details id="llm-options" class="mb-4 hidden">
|
||||||
|
<summary class="text-sm text-secondary cursor-pointer">/llm Options</summary>
|
||||||
|
<div class="mt-2 space-y-3 p-2 border border-border rounded">
|
||||||
|
<div>
|
||||||
|
<label for="llm-question" class="block text-xs text-secondary mb-1">Question</label>
|
||||||
|
<input id="llm-question" type="text" value="What is this page about?"
|
||||||
|
class="bg-dark border border-border rounded px-2 py-1 text-sm w-full">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<!-- Advanced config for /crawl endpoints -->
|
||||||
|
<details id="adv-config" class="mb-4">
|
||||||
<summary class="text-sm text-secondary cursor-pointer">Advanced Config <span
|
<summary class="text-sm text-secondary cursor-pointer">Advanced Config <span
|
||||||
class="text-xs text-primary">(Python → auto‑JSON)</span></summary>
|
class="text-xs text-primary">(Python → auto‑JSON)</span></summary>
|
||||||
|
|
||||||
@ -437,6 +478,33 @@
|
|||||||
cm.setValue(TEMPLATES[e.target.value]);
|
cm.setValue(TEMPLATES[e.target.value]);
|
||||||
document.getElementById('cfg-status').textContent = '';
|
document.getElementById('cfg-status').textContent = '';
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Handle endpoint selection change to show appropriate options
|
||||||
|
document.getElementById('endpoint').addEventListener('change', function(e) {
|
||||||
|
const endpoint = e.target.value;
|
||||||
|
const mdOptions = document.getElementById('md-options');
|
||||||
|
const llmOptions = document.getElementById('llm-options');
|
||||||
|
const advConfig = document.getElementById('adv-config');
|
||||||
|
|
||||||
|
// Hide all option sections first
|
||||||
|
mdOptions.classList.add('hidden');
|
||||||
|
llmOptions.classList.add('hidden');
|
||||||
|
advConfig.classList.add('hidden');
|
||||||
|
|
||||||
|
// Show the appropriate section based on endpoint
|
||||||
|
if (endpoint === 'md') {
|
||||||
|
mdOptions.classList.remove('hidden');
|
||||||
|
// Auto-open the /md options
|
||||||
|
mdOptions.setAttribute('open', '');
|
||||||
|
} else if (endpoint === 'llm') {
|
||||||
|
llmOptions.classList.remove('hidden');
|
||||||
|
// Auto-open the /llm options
|
||||||
|
llmOptions.setAttribute('open', '');
|
||||||
|
} else {
|
||||||
|
// For /crawl endpoints, show the advanced config
|
||||||
|
advConfig.classList.remove('hidden');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
async function pyConfigToJson() {
|
async function pyConfigToJson() {
|
||||||
const code = cm.getValue().trim();
|
const code = cm.getValue().trim();
|
||||||
@ -494,10 +562,18 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Generate code snippets
|
// Generate code snippets
|
||||||
function generateSnippets(api, payload) {
|
function generateSnippets(api, payload, method = 'POST') {
|
||||||
// Python snippet
|
// Python snippet
|
||||||
const pyCodeEl = document.querySelector('#python-content code');
|
const pyCodeEl = document.querySelector('#python-content code');
|
||||||
const pySnippet = `import httpx\n\nasync def crawl():\n async with httpx.AsyncClient() as client:\n response = await client.post(\n "${window.location.origin}${api}",\n json=${JSON.stringify(payload, null, 4).replace(/\n/g, '\n ')}\n )\n return response.json()`;
|
let pySnippet;
|
||||||
|
|
||||||
|
if (method === 'GET') {
|
||||||
|
// GET request (for /llm endpoint)
|
||||||
|
pySnippet = `import httpx\n\nasync def crawl():\n async with httpx.AsyncClient() as client:\n response = await client.get(\n "${window.location.origin}${api}"\n )\n return response.json()`;
|
||||||
|
} else {
|
||||||
|
// POST request (for /crawl and /md endpoints)
|
||||||
|
pySnippet = `import httpx\n\nasync def crawl():\n async with httpx.AsyncClient() as client:\n response = await client.post(\n "${window.location.origin}${api}",\n json=${JSON.stringify(payload, null, 4).replace(/\n/g, '\n ')}\n )\n return response.json()`;
|
||||||
|
}
|
||||||
|
|
||||||
pyCodeEl.textContent = pySnippet;
|
pyCodeEl.textContent = pySnippet;
|
||||||
pyCodeEl.className = 'python hljs'; // Reset classes
|
pyCodeEl.className = 'python hljs'; // Reset classes
|
||||||
@ -505,7 +581,15 @@
|
|||||||
|
|
||||||
// cURL snippet
|
// cURL snippet
|
||||||
const curlCodeEl = document.querySelector('#curl-content code');
|
const curlCodeEl = document.querySelector('#curl-content code');
|
||||||
const curlSnippet = `curl -X POST ${window.location.origin}${api} \\\n -H "Content-Type: application/json" \\\n -d '${JSON.stringify(payload)}'`;
|
let curlSnippet;
|
||||||
|
|
||||||
|
if (method === 'GET') {
|
||||||
|
// GET request (for /llm endpoint)
|
||||||
|
curlSnippet = `curl -X GET "${window.location.origin}${api}"`;
|
||||||
|
} else {
|
||||||
|
// POST request (for /crawl and /md endpoints)
|
||||||
|
curlSnippet = `curl -X POST ${window.location.origin}${api} \\\n -H "Content-Type: application/json" \\\n -d '${JSON.stringify(payload)}'`;
|
||||||
|
}
|
||||||
|
|
||||||
curlCodeEl.textContent = curlSnippet;
|
curlCodeEl.textContent = curlSnippet;
|
||||||
curlCodeEl.className = 'bash hljs'; // Reset classes
|
curlCodeEl.className = 'bash hljs'; // Reset classes
|
||||||
@ -536,20 +620,39 @@
|
|||||||
|
|
||||||
const endpointMap = {
|
const endpointMap = {
|
||||||
crawl: '/crawl',
|
crawl: '/crawl',
|
||||||
};
|
// crawl_stream: '/crawl/stream',
|
||||||
|
|
||||||
/*const endpointMap = {
|
|
||||||
crawl: '/crawl',
|
|
||||||
crawl_stream: '/crawl/stream',
|
|
||||||
md: '/md',
|
md: '/md',
|
||||||
llm: '/llm'
|
llm: '/llm'
|
||||||
};*/
|
};
|
||||||
|
|
||||||
const api = endpointMap[endpoint];
|
const api = endpointMap[endpoint];
|
||||||
const payload = {
|
let payload;
|
||||||
urls,
|
|
||||||
...advConfig
|
// Create appropriate payload based on endpoint type
|
||||||
};
|
if (endpoint === 'md') {
|
||||||
|
// Get values from the /md specific inputs
|
||||||
|
const filterType = document.getElementById('md-filter').value;
|
||||||
|
const query = document.getElementById('md-query').value.trim();
|
||||||
|
const cache = document.getElementById('md-cache').value;
|
||||||
|
|
||||||
|
// MD endpoint expects: { url, f, q, c }
|
||||||
|
payload = {
|
||||||
|
url: urls[0], // Take first URL
|
||||||
|
f: filterType, // Lowercase filter type as required by server
|
||||||
|
q: query || null, // Use the query if provided, otherwise null
|
||||||
|
c: cache
|
||||||
|
};
|
||||||
|
} else if (endpoint === 'llm') {
|
||||||
|
// LLM endpoint has a different URL pattern and uses query params
|
||||||
|
// This will be handled directly in the fetch below
|
||||||
|
payload = null;
|
||||||
|
} else {
|
||||||
|
// Default payload for /crawl and /crawl/stream
|
||||||
|
payload = {
|
||||||
|
urls,
|
||||||
|
...advConfig
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
updateStatus('processing');
|
updateStatus('processing');
|
||||||
|
|
||||||
@ -557,7 +660,18 @@
|
|||||||
const startTime = performance.now();
|
const startTime = performance.now();
|
||||||
let response, responseData;
|
let response, responseData;
|
||||||
|
|
||||||
if (endpoint === 'crawl_stream') {
|
if (endpoint === 'llm') {
|
||||||
|
// Special handling for LLM endpoint which uses URL pattern: /llm/{encoded_url}?q={query}
|
||||||
|
const url = urls[0];
|
||||||
|
const encodedUrl = encodeURIComponent(url);
|
||||||
|
// Get the question from the LLM-specific input
|
||||||
|
const question = document.getElementById('llm-question').value.trim() || "What is this page about?";
|
||||||
|
|
||||||
|
response = await fetch(`${api}/${encodedUrl}?q=${encodeURIComponent(question)}`, {
|
||||||
|
method: 'GET',
|
||||||
|
headers: { 'Accept': 'application/json' }
|
||||||
|
});
|
||||||
|
} else if (endpoint === 'crawl_stream') {
|
||||||
// Stream processing
|
// Stream processing
|
||||||
response = await fetch(api, {
|
response = await fetch(api, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
@ -597,7 +711,7 @@
|
|||||||
document.querySelector('#response-content code').className = 'json hljs'; // Reset classes
|
document.querySelector('#response-content code').className = 'json hljs'; // Reset classes
|
||||||
forceHighlightElement(document.querySelector('#response-content code'));
|
forceHighlightElement(document.querySelector('#response-content code'));
|
||||||
} else {
|
} else {
|
||||||
// Regular request
|
// Regular request (handles /crawl and /md)
|
||||||
response = await fetch(api, {
|
response = await fetch(api, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: { 'Content-Type': 'application/json' },
|
headers: { 'Content-Type': 'application/json' },
|
||||||
@ -625,7 +739,16 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
forceHighlightElement(document.querySelector('#response-content code'));
|
forceHighlightElement(document.querySelector('#response-content code'));
|
||||||
generateSnippets(api, payload);
|
|
||||||
|
// For generateSnippets, handle the LLM case specially
|
||||||
|
if (endpoint === 'llm') {
|
||||||
|
const url = urls[0];
|
||||||
|
const encodedUrl = encodeURIComponent(url);
|
||||||
|
const question = document.getElementById('llm-question').value.trim() || "What is this page about?";
|
||||||
|
generateSnippets(`${api}/${encodedUrl}?q=${encodeURIComponent(question)}`, null, 'GET');
|
||||||
|
} else {
|
||||||
|
generateSnippets(api, payload);
|
||||||
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error:', error);
|
console.error('Error:', error);
|
||||||
updateStatus('error');
|
updateStatus('error');
|
||||||
@ -807,9 +930,24 @@
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Function to initialize UI based on selected endpoint
|
||||||
|
function initUI() {
|
||||||
|
// Trigger the endpoint change handler to set initial UI state
|
||||||
|
const endpointSelect = document.getElementById('endpoint');
|
||||||
|
const event = new Event('change');
|
||||||
|
endpointSelect.dispatchEvent(event);
|
||||||
|
|
||||||
|
// Initialize copy buttons
|
||||||
|
initCopyButtons();
|
||||||
|
}
|
||||||
|
|
||||||
// Call this in your DOMContentLoaded or initialization
|
// Initialize on page load
|
||||||
initCopyButtons();
|
document.addEventListener('DOMContentLoaded', initUI);
|
||||||
|
// Also call it immediately in case the script runs after DOM is already loaded
|
||||||
|
if (document.readyState !== 'loading') {
|
||||||
|
initUI();
|
||||||
|
}
|
||||||
|
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
|
@ -391,12 +391,14 @@ async def main():
|
|||||||
# Process results
|
# Process results
|
||||||
raw_df = pd.DataFrame()
|
raw_df = pd.DataFrame()
|
||||||
for result in results:
|
for result in results:
|
||||||
if result.success and result.media["tables"]:
|
# Use the new tables field, falling back to media["tables"] for backward compatibility
|
||||||
|
tables = result.tables if hasattr(result, "tables") and result.tables else result.media.get("tables", [])
|
||||||
|
if result.success and tables:
|
||||||
# Extract primary market table
|
# Extract primary market table
|
||||||
# DataFrame
|
# DataFrame
|
||||||
raw_df = pd.DataFrame(
|
raw_df = pd.DataFrame(
|
||||||
result.media["tables"][0]["rows"],
|
tables[0]["rows"],
|
||||||
columns=result.media["tables"][0]["headers"],
|
columns=tables[0]["headers"],
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
|
|
||||||
|
@ -4,6 +4,8 @@ import json
|
|||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from typing import List, Dict, Any, AsyncGenerator, Optional
|
from typing import List, Dict, Any, AsyncGenerator, Optional
|
||||||
|
import textwrap # ← new: for pretty code literals
|
||||||
|
import urllib.parse # ← needed for URL-safe /llm calls
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
from rich.syntax import Syntax
|
from rich.syntax import Syntax
|
||||||
@ -969,13 +971,111 @@ async def demo_deep_with_ssl(client: httpx.AsyncClient):
|
|||||||
else:
|
else:
|
||||||
console.print(f" [red]✘[/] URL: [link={result['url']}]{result['url']}[/link] | Crawl failed.")
|
console.print(f" [red]✘[/] URL: [link={result['url']}]{result['url']}[/link] | Crawl failed.")
|
||||||
|
|
||||||
|
# 7. Markdown helper endpoint
|
||||||
|
async def demo_markdown_endpoint(client: httpx.AsyncClient):
|
||||||
|
"""
|
||||||
|
One-shot helper around /md.
|
||||||
|
Fetches PYTHON_URL with FIT filter and prints the first 500 chars of Markdown.
|
||||||
|
"""
|
||||||
|
target_url = PYTHON_URL
|
||||||
|
payload = {"url": target_url, "f": "fit", "q": None, "c": "0"}
|
||||||
|
|
||||||
|
console.rule("[bold blue]Demo 7a: /md Endpoint[/]", style="blue")
|
||||||
|
print_payload(payload)
|
||||||
|
|
||||||
|
try:
|
||||||
|
t0 = time.time()
|
||||||
|
resp = await client.post("/md", json=payload)
|
||||||
|
dt = time.time() - t0
|
||||||
|
console.print(f"Response Status: [bold {'green' if resp.is_success else 'red'}]{resp.status_code}[/] (took {dt:.2f}s)")
|
||||||
|
resp.raise_for_status()
|
||||||
|
md = resp.json().get("markdown", "")
|
||||||
|
snippet = (md[:500] + "...") if len(md) > 500 else md
|
||||||
|
console.print(Panel(snippet, title="Markdown snippet", border_style="cyan", expand=False))
|
||||||
|
except Exception as e:
|
||||||
|
console.print(f"[bold red]Error hitting /md:[/] {e}")
|
||||||
|
|
||||||
|
# 8. LLM QA helper endpoint
|
||||||
|
async def demo_llm_endpoint(client: httpx.AsyncClient):
|
||||||
|
"""
|
||||||
|
Quick QA round-trip with /llm.
|
||||||
|
Asks a trivial question against SIMPLE_URL just to show wiring.
|
||||||
|
"""
|
||||||
|
page_url = SIMPLE_URL
|
||||||
|
question = "What is the title of this page?"
|
||||||
|
|
||||||
|
console.rule("[bold magenta]Demo 7b: /llm Endpoint[/]", style="magenta")
|
||||||
|
enc = urllib.parse.quote_plus(page_url, safe="")
|
||||||
|
console.print(f"GET /llm/{enc}?q={question}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
t0 = time.time()
|
||||||
|
resp = await client.get(f"/llm/{enc}", params={"q": question})
|
||||||
|
dt = time.time() - t0
|
||||||
|
console.print(f"Response Status: [bold {'green' if resp.is_success else 'red'}]{resp.status_code}[/] (took {dt:.2f}s)")
|
||||||
|
resp.raise_for_status()
|
||||||
|
answer = resp.json().get("answer", "")
|
||||||
|
console.print(Panel(answer or "No answer returned", title="LLM answer", border_style="magenta", expand=False))
|
||||||
|
except Exception as e:
|
||||||
|
console.print(f"[bold red]Error hitting /llm:[/] {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# 9. /config/dump helpers --------------------------------------------------
|
||||||
|
|
||||||
|
async def demo_config_dump_valid(client: httpx.AsyncClient):
|
||||||
|
"""
|
||||||
|
Send a single top-level CrawlerRunConfig(...) expression and show the dump.
|
||||||
|
"""
|
||||||
|
code_snippet = "CrawlerRunConfig(cache_mode='BYPASS', screenshot=True)"
|
||||||
|
payload = {"code": code_snippet}
|
||||||
|
|
||||||
|
console.rule("[bold blue]Demo 8a: /config/dump (valid)[/]", style="blue")
|
||||||
|
print_payload(payload)
|
||||||
|
|
||||||
|
try:
|
||||||
|
t0 = time.time()
|
||||||
|
resp = await client.post("/config/dump", json=payload)
|
||||||
|
dt = time.time() - t0
|
||||||
|
console.print(f"Response Status: [bold {'green' if resp.is_success else 'red'}]{resp.status_code}[/] (took {dt:.2f}s)")
|
||||||
|
resp.raise_for_status()
|
||||||
|
dump_json = resp.json()
|
||||||
|
console.print(Panel(Syntax(json.dumps(dump_json, indent=2), "json", theme="monokai"), title="Dump()", border_style="cyan"))
|
||||||
|
except Exception as e:
|
||||||
|
console.print(f"[bold red]Error in valid /config/dump call:[/] {e}")
|
||||||
|
|
||||||
|
|
||||||
|
async def demo_config_dump_invalid(client: httpx.AsyncClient):
|
||||||
|
"""
|
||||||
|
Purposely break the rule (nested call) to show the 400 parse error.
|
||||||
|
"""
|
||||||
|
bad_code = textwrap.dedent("""
|
||||||
|
BrowserConfig(headless=True); CrawlerRunConfig()
|
||||||
|
""").strip()
|
||||||
|
payload = {"code": bad_code}
|
||||||
|
|
||||||
|
console.rule("[bold magenta]Demo 8b: /config/dump (invalid)[/]", style="magenta")
|
||||||
|
print_payload(payload)
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = await client.post("/config/dump", json=payload)
|
||||||
|
console.print(f"Response Status: [bold {'green' if resp.is_success else 'red'}]{resp.status_code}[/]")
|
||||||
|
resp.raise_for_status() # should throw -> except
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
console.print("[cyan]Expected parse/validation failure captured:[/]")
|
||||||
|
try:
|
||||||
|
console.print(Panel(Syntax(json.dumps(e.response.json(), indent=2), "json", theme="fruity"), title="Error payload"))
|
||||||
|
except Exception:
|
||||||
|
console.print(e.response.text)
|
||||||
|
except Exception as e:
|
||||||
|
console.print(f"[bold red]Unexpected error during invalid test:[/] {e}")
|
||||||
|
|
||||||
|
|
||||||
# --- Update Main Runner to include new demo ---
|
# --- Update Main Runner to include new demo ---
|
||||||
async def main_demo():
|
async def main_demo():
|
||||||
async with httpx.AsyncClient(base_url=BASE_URL, timeout=300.0) as client:
|
async with httpx.AsyncClient(base_url=BASE_URL, timeout=300.0) as client:
|
||||||
if not await check_server_health(client):
|
if not await check_server_health(client):
|
||||||
return
|
return
|
||||||
|
|
||||||
# --- Run Demos ---
|
# --- Run Demos ---
|
||||||
await demo_basic_single_url(client)
|
await demo_basic_single_url(client)
|
||||||
await demo_basic_multi_url(client)
|
await demo_basic_multi_url(client)
|
||||||
@ -1001,7 +1101,15 @@ async def main_demo():
|
|||||||
await demo_deep_with_css_extraction(client)
|
await demo_deep_with_css_extraction(client)
|
||||||
await demo_deep_with_llm_extraction(client) # Skips if no common LLM key env var
|
await demo_deep_with_llm_extraction(client) # Skips if no common LLM key env var
|
||||||
await demo_deep_with_proxy(client) # Skips if no PROXIES env var
|
await demo_deep_with_proxy(client) # Skips if no PROXIES env var
|
||||||
await demo_deep_with_ssl(client) # Added the new demo
|
await demo_deep_with_ssl(client) # Added the new demo
|
||||||
|
|
||||||
|
# --- Helper endpoints ---
|
||||||
|
await demo_markdown_endpoint(client)
|
||||||
|
await demo_llm_endpoint(client)
|
||||||
|
|
||||||
|
# --- /config/dump sanity checks ---
|
||||||
|
await demo_config_dump_valid(client)
|
||||||
|
await demo_config_dump_invalid(client)
|
||||||
|
|
||||||
console.rule("[bold green]Demo Complete[/]", style="green")
|
console.rule("[bold green]Demo Complete[/]", style="green")
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user