crawl4ai/tests/mcp/test_mcp_socket.py
UncleCode 5297e362f3 feat(mcp): Implement MCP protocol and enhance server capabilities
This commit introduces several significant enhancements to the Crawl4AI Docker deployment:

  1. Add MCP Protocol Support:
     - Implement WebSocket and SSE transport layers for MCP server communication
     - Create mcp_bridge.py to expose existing API endpoints via MCP protocol
     - Add comprehensive tests for both socket and SSE transport methods

  2. Enhance Docker Server Capabilities:
     - Add PDF generation endpoint with file saving functionality
     - Add screenshot capture endpoint with configurable wait time
     - Implement JavaScript execution endpoint for dynamic page interaction
     - Add intelligent file path handling for saving generated assets

  3. Improve Search and Context Functionality:
     - Implement syntax-aware code function chunking using AST parsing
     - Add BM25-based intelligent document search with relevance scoring
     - Create separate code and documentation context endpoints
     - Enhance response format with structured results and scores

  4. Rename and Fix File Organization:
     - Fix typo in test_docker_config_gen.py filename
     - Update import statements and dependencies
     - Add FileResponse for context endpoints

  This enhancement significantly improves the machine-to-machine communication
  capabilities of Crawl4AI, making it more suitable for integration with LLM agents
  and other automated systems.

  The CHANGELOG update has been applied successfully, highlighting the key features and improvements made in this release. The commit message provides a detailed explanation of all the
  changes, which will be helpful for tracking the project's evolution.
2025-04-21 22:22:02 +08:00

120 lines
3.8 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# pip install "mcp-sdk[ws]" anyio
import anyio, json
from mcp.client.websocket import websocket_client
from mcp.client.session import ClientSession
async def test_list():
async with websocket_client("ws://localhost:8020/mcp/ws") as (r, w):
async with ClientSession(r, w) as s:
await s.initialize()
print("tools :", [t.name for t in (await s.list_tools()).tools])
print("resources :", [r.name for r in (await s.list_resources()).resources])
print("templates :", [t.name for t in (await s.list_resource_templates()).resource_templates])
async def test_crawl(s: ClientSession) -> None:
"""Hit the @mcp_tool('crawl') endpoint."""
res = await s.call_tool(
"crawl",
{
"urls": ["https://example.com"],
"browser_config": {},
"crawler_config": {},
},
)
print("crawl →", json.loads(res.content[0].text))
async def test_md(s: ClientSession) -> None:
"""Hit the @mcp_tool('md') endpoint."""
res = await s.call_tool(
"md",
{
"url": "https://example.com",
"f": "fit", # or RAW, BM25, LLM
"q": None,
"c": "0",
},
)
result = json.loads(res.content[0].text)
print("md →", result['markdown'][:100], "...")
async def test_screenshot(s: ClientSession):
res = await s.call_tool(
"screenshot",
{
"url": "https://example.com",
"screenshot_wait_for": 1.0,
},
)
png_b64 = json.loads(res.content[0].text)["screenshot"]
print("screenshot →", png_b64[:60], "… (base64)")
async def test_pdf(s: ClientSession):
res = await s.call_tool(
"pdf",
{
"url": "https://example.com",
},
)
pdf_b64 = json.loads(res.content[0].text)["pdf"]
print("pdf →", pdf_b64[:60], "… (base64)")
async def test_execute_js(s: ClientSession):
# click the “More” link on Hacker News front page and wait 1 s
res = await s.call_tool(
"execute_js",
{
"url": "https://news.ycombinator.com/news",
"js_code": [
"await page.click('a.morelink')",
"await page.waitForTimeout(1000)",
],
},
)
crawl_result = json.loads(res.content[0].text)
print("execute_js → status", crawl_result["success"], "| html len:", len(crawl_result["html"]))
async def test_html(s: ClientSession):
# click the “More” link on Hacker News front page and wait 1 s
res = await s.call_tool(
"html",
{
"url": "https://news.ycombinator.com/news",
},
)
crawl_result = json.loads(res.content[0].text)
print("execute_js → status", crawl_result["success"], "| html len:", len(crawl_result["html"]))
async def test_context(s: ClientSession):
# click the “More” link on Hacker News front page and wait 1 s
res = await s.call_tool(
"ask",
{
"query": "I hv a question about Crawl4ai library, how to extract internal links when crawling a page?"
},
)
crawl_result = json.loads(res.content[0].text)
print("execute_js → status", crawl_result["success"], "| html len:", len(crawl_result["html"]))
async def main() -> None:
async with websocket_client("ws://localhost:8020/mcp/ws") as (r, w):
async with ClientSession(r, w) as s:
await s.initialize() # handshake
tools = (await s.list_tools()).tools
print("tools:", [t.name for t in tools])
# await test_list()
# await test_crawl(s)
# await test_md(s)
# await test_screenshot(s)
# await test_pdf(s)
# await test_execute_js(s)
# await test_html(s)
await test_context(s)
anyio.run(main)