From 146f9d415fe4451265eeefc9249e384f14da6a31 Mon Sep 17 00:00:00 2001 From: UncleCode Date: Wed, 23 Apr 2025 19:50:33 +0800 Subject: [PATCH] Update README --- README.md | 31 ++++++++++++++++++--- docs/examples/crypto_analysis_example.py | 34 ++++++++++++------------ 2 files changed, 45 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 7892a30..2147ee2 100644 --- a/README.md +++ b/README.md @@ -523,8 +523,33 @@ async def test_news_crawl(): - **📊 Table-to-DataFrame Extraction**: Extract HTML tables directly to CSV or pandas DataFrames: ```python - crawler_config = CrawlerRunConfig(extract_tables=True) - # Access tables via result.tables or result.tables_as_dataframe + crawler = AsyncWebCrawler(config=browser_config) + await crawler.start() + + try: + # Set up scraping parameters + crawl_config = CrawlerRunConfig( + table_score_threshold=8, # Strict table detection + ) + + # Execute market data extraction + results: List[CrawlResult] = await crawler.arun( + url="https://coinmarketcap.com/?page=1", config=crawl_config + ) + + # Process results + raw_df = pd.DataFrame() + for result in results: + if result.success and result.media["tables"]: + raw_df = pd.DataFrame( + result.media["tables"][0]["rows"], + columns=result.media["tables"][0]["headers"], + ) + break + print(raw_df.head()) + + finally: + await crawler.stop() ``` - **🚀 Browser Pooling**: Pages launch hot with pre-warmed browser instances for lower latency and memory usage @@ -544,7 +569,7 @@ async def test_news_crawl(): claude mcp add --transport sse c4ai-sse http://localhost:11235/mcp/sse ``` -- **🖥️ Interactive Playground**: Test configurations and generate API requests with the built-in web interface at `/playground` +- **🖥️ Interactive Playground**: Test configurations and generate API requests with the built-in web interface at `http://localhost:11235//playground` - **🐳 Revamped Docker Deployment**: Streamlined multi-architecture Docker image with improved resource efficiency diff --git a/docs/examples/crypto_analysis_example.py b/docs/examples/crypto_analysis_example.py index 3cdba2c..10b9e7a 100644 --- a/docs/examples/crypto_analysis_example.py +++ b/docs/examples/crypto_analysis_example.py @@ -383,29 +383,29 @@ async def main(): scroll_delay=0.2, ) - # # Execute market data extraction - # results: List[CrawlResult] = await crawler.arun( - # url="https://coinmarketcap.com/?page=1", config=crawl_config - # ) + # Execute market data extraction + results: List[CrawlResult] = await crawler.arun( + url="https://coinmarketcap.com/?page=1", config=crawl_config + ) - # # Process results - # raw_df = pd.DataFrame() - # for result in results: - # if result.success and result.media["tables"]: - # # Extract primary market table - # # DataFrame - # raw_df = pd.DataFrame( - # result.media["tables"][0]["rows"], - # columns=result.media["tables"][0]["headers"], - # ) - # break + # Process results + raw_df = pd.DataFrame() + for result in results: + if result.success and result.media["tables"]: + # Extract primary market table + # DataFrame + raw_df = pd.DataFrame( + result.media["tables"][0]["rows"], + columns=result.media["tables"][0]["headers"], + ) + break # This is for debugging only # ////// Remove this in production from here.. # Save raw data for debugging - # raw_df.to_csv(f"{__current_dir__}/tmp/raw_crypto_data.csv", index=False) - # print("🔍 Raw data saved to 'raw_crypto_data.csv'") + raw_df.to_csv(f"{__current_dir__}/tmp/raw_crypto_data.csv", index=False) + print("🔍 Raw data saved to 'raw_crypto_data.csv'") # Read from file for debugging raw_df = pd.read_csv(f"{__current_dir__}/tmp/raw_crypto_data.csv")