Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import os
|
2 |
import asyncio
|
3 |
from fastapi import FastAPI, HTTPException
|
4 |
-
from pydantic import BaseModel, Field
|
5 |
from typing import List, Optional
|
6 |
from crawl4ai import AsyncWebCrawler
|
7 |
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy, LLMExtractionStrategy
|
@@ -31,15 +31,22 @@ async def crawl(input: CrawlerInput):
|
|
31 |
# Create a dictionary with columns as keys and descriptions as values
|
32 |
extraction_info = {col: desc for col, desc in zip(input.columns, input.descriptions)}
|
33 |
|
34 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
instruction = f"Extract the following information: {json.dumps(extraction_info)}"
|
36 |
|
37 |
async with AsyncWebCrawler(verbose=True) as crawler:
|
38 |
result = await crawler.arun(
|
39 |
url=input.url,
|
40 |
extraction_strategy=LLMExtractionStrategy(
|
41 |
-
provider="openai/gpt-
|
42 |
api_token=os.getenv('OPENAI_API_KEY'),
|
|
|
43 |
extraction_type="schema",
|
44 |
verbose=True,
|
45 |
instruction=instruction
|
@@ -56,4 +63,4 @@ async def test():
|
|
56 |
|
57 |
if __name__ == "__main__":
|
58 |
import uvicorn
|
59 |
-
uvicorn.run(app, host="0.0.0.0", port=
|
|
|
1 |
import os
|
2 |
import asyncio
|
3 |
from fastapi import FastAPI, HTTPException
|
4 |
+
from pydantic import BaseModel, Field, create_model
|
5 |
from typing import List, Optional
|
6 |
from crawl4ai import AsyncWebCrawler
|
7 |
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy, LLMExtractionStrategy
|
|
|
31 |
# Create a dictionary with columns as keys and descriptions as values
|
32 |
extraction_info = {col: desc for col, desc in zip(input.columns, input.descriptions)}
|
33 |
|
34 |
+
# Create a dynamic Pydantic model based on the input columns and descriptions
|
35 |
+
dynamic_model = create_model(
|
36 |
+
'DynamicModel',
|
37 |
+
**{col: (str, Field(..., description=desc)) for col, desc in extraction_info.items()}
|
38 |
+
)
|
39 |
+
|
40 |
+
# Convert the dictionary to a JSON string for the instruction
|
41 |
instruction = f"Extract the following information: {json.dumps(extraction_info)}"
|
42 |
|
43 |
async with AsyncWebCrawler(verbose=True) as crawler:
|
44 |
result = await crawler.arun(
|
45 |
url=input.url,
|
46 |
extraction_strategy=LLMExtractionStrategy(
|
47 |
+
provider="openai/gpt-3.5-turbo",
|
48 |
api_token=os.getenv('OPENAI_API_KEY'),
|
49 |
+
schema=dynamic_model.schema(),
|
50 |
extraction_type="schema",
|
51 |
verbose=True,
|
52 |
instruction=instruction
|
|
|
63 |
|
64 |
if __name__ == "__main__":
|
65 |
import uvicorn
|
66 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|