pvanand commited on
Commit
7b62481
·
verified ·
1 Parent(s): 0d8a725

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +11 -4
main.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  import asyncio
3
  from fastapi import FastAPI, HTTPException
4
- from pydantic import BaseModel, Field
5
  from typing import List, Optional
6
  from crawl4ai import AsyncWebCrawler
7
  from crawl4ai.extraction_strategy import JsonCssExtractionStrategy, LLMExtractionStrategy
@@ -31,15 +31,22 @@ async def crawl(input: CrawlerInput):
31
  # Create a dictionary with columns as keys and descriptions as values
32
  extraction_info = {col: desc for col, desc in zip(input.columns, input.descriptions)}
33
 
34
- # Convert the dictionary to a JSON string
 
 
 
 
 
 
35
  instruction = f"Extract the following information: {json.dumps(extraction_info)}"
36
 
37
  async with AsyncWebCrawler(verbose=True) as crawler:
38
  result = await crawler.arun(
39
  url=input.url,
40
  extraction_strategy=LLMExtractionStrategy(
41
- provider="openai/gpt-4o-mini",
42
  api_token=os.getenv('OPENAI_API_KEY'),
 
43
  extraction_type="schema",
44
  verbose=True,
45
  instruction=instruction
@@ -56,4 +63,4 @@ async def test():
56
 
57
  if __name__ == "__main__":
58
  import uvicorn
59
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  import os
2
  import asyncio
3
  from fastapi import FastAPI, HTTPException
4
+ from pydantic import BaseModel, Field, create_model
5
  from typing import List, Optional
6
  from crawl4ai import AsyncWebCrawler
7
  from crawl4ai.extraction_strategy import JsonCssExtractionStrategy, LLMExtractionStrategy
 
31
  # Create a dictionary with columns as keys and descriptions as values
32
  extraction_info = {col: desc for col, desc in zip(input.columns, input.descriptions)}
33
 
34
+ # Create a dynamic Pydantic model based on the input columns and descriptions
35
+ dynamic_model = create_model(
36
+ 'DynamicModel',
37
+ **{col: (str, Field(..., description=desc)) for col, desc in extraction_info.items()}
38
+ )
39
+
40
+ # Convert the dictionary to a JSON string for the instruction
41
  instruction = f"Extract the following information: {json.dumps(extraction_info)}"
42
 
43
  async with AsyncWebCrawler(verbose=True) as crawler:
44
  result = await crawler.arun(
45
  url=input.url,
46
  extraction_strategy=LLMExtractionStrategy(
47
+ provider="openai/gpt-3.5-turbo",
48
  api_token=os.getenv('OPENAI_API_KEY'),
49
+ schema=dynamic_model.schema(),
50
  extraction_type="schema",
51
  verbose=True,
52
  instruction=instruction
 
63
 
64
  if __name__ == "__main__":
65
  import uvicorn
66
+ uvicorn.run(app, host="0.0.0.0", port=8000)