
Rename LlmConfig to LLMConfig across the codebase to follow consistent naming conventions. Update all imports and usages to use the new name. Update documentation and examples to reflect the change. BREAKING CHANGE: LlmConfig has been renamed to LLMConfig. Users need to update their imports and usage.
44 lines
1.7 KiB
Python
44 lines
1.7 KiB
Python
from crawl4ai.types import LLMConfig
|
|
from crawl4ai import AsyncWebCrawler, LLMExtractionStrategy
|
|
import asyncio
|
|
import os
|
|
import json
|
|
from pydantic import BaseModel, Field
|
|
|
|
url = "https://openai.com/api/pricing/"
|
|
|
|
|
|
class OpenAIModelFee(BaseModel):
|
|
model_name: str = Field(..., description="Name of the OpenAI model.")
|
|
input_fee: str = Field(..., description="Fee for input token for the OpenAI model.")
|
|
output_fee: str = Field(
|
|
..., description="Fee for output token for the OpenAI model."
|
|
)
|
|
|
|
async def main():
|
|
# Use AsyncWebCrawler
|
|
async with AsyncWebCrawler() as crawler:
|
|
result = await crawler.arun(
|
|
url=url,
|
|
word_count_threshold=1,
|
|
extraction_strategy=LLMExtractionStrategy(
|
|
# provider= "openai/gpt-4o", api_token = os.getenv('OPENAI_API_KEY'),
|
|
llm_config=LLMConfig(provider="groq/llama-3.1-70b-versatile", api_token=os.getenv("GROQ_API_KEY")),
|
|
schema=OpenAIModelFee.model_json_schema(),
|
|
extraction_type="schema",
|
|
instruction="From the crawled content, extract all mentioned model names along with their "
|
|
"fees for input and output tokens. Make sure not to miss anything in the entire content. "
|
|
"One extracted model JSON format should look like this: "
|
|
'{ "model_name": "GPT-4", "input_fee": "US$10.00 / 1M tokens", "output_fee": "US$30.00 / 1M tokens" }',
|
|
),
|
|
)
|
|
print("Success:", result.success)
|
|
model_fees = json.loads(result.extracted_content)
|
|
print(len(model_fees))
|
|
|
|
with open(".data/data.json", "w", encoding="utf-8") as f:
|
|
f.write(result.extracted_content)
|
|
|
|
|
|
asyncio.run(main())
|