NightFury2710 commited on
Commit
85f58d9
·
1 Parent(s): 6c4f9d7

update api handle 3

Browse files
Files changed (1) hide show
  1. app.py +4 -6
app.py CHANGED
@@ -23,7 +23,6 @@ class CrawlRequest(BaseModel):
23
  cache_mode: str = "ENABLED"
24
  excluded_tags: list[str] = ["nav", "footer", "aside", "header", "script", "style"]
25
  remove_overlay_elements: bool = True
26
- max_pages: int = 1 # Limit number of pages to crawl
27
  timeout: int = 30 # Timeout in seconds
28
 
29
  class Article(BaseModel):
@@ -169,15 +168,14 @@ async def crawl_url(request: CrawlRequest):
169
  try:
170
  cache_mode = getattr(CacheMode, request.cache_mode)
171
 
172
- # Create crawler with improved configuration
173
  async with AsyncWebCrawler() as crawler:
174
  config = CrawlerRunConfig(
175
  cache_mode=cache_mode,
176
  excluded_tags=request.excluded_tags,
177
  remove_overlay_elements=request.remove_overlay_elements,
178
- max_pages=request.max_pages,
179
  timeout=request.timeout,
180
- # Added from quickstart examples
181
  remove_ads=True,
182
  extract_text=True,
183
  extract_links=True,
@@ -189,11 +187,11 @@ async def crawl_url(request: CrawlRequest):
189
  config=config
190
  )
191
 
192
- # Use both markdown and HTML results for better extraction
193
  markdown = result.markdown_v2.raw_markdown
194
  html = result.html
195
 
196
- # Extract content using both markdown and HTML
197
  articles = extract_articles(markdown)
198
  metadata = extract_metadata(markdown, html)
199
 
 
23
  cache_mode: str = "ENABLED"
24
  excluded_tags: list[str] = ["nav", "footer", "aside", "header", "script", "style"]
25
  remove_overlay_elements: bool = True
 
26
  timeout: int = 30 # Timeout in seconds
27
 
28
  class Article(BaseModel):
 
168
  try:
169
  cache_mode = getattr(CacheMode, request.cache_mode)
170
 
171
+ # Create crawler with correct configuration parameters
172
  async with AsyncWebCrawler() as crawler:
173
  config = CrawlerRunConfig(
174
  cache_mode=cache_mode,
175
  excluded_tags=request.excluded_tags,
176
  remove_overlay_elements=request.remove_overlay_elements,
 
177
  timeout=request.timeout,
178
+ # Core features from documentation
179
  remove_ads=True,
180
  extract_text=True,
181
  extract_links=True,
 
187
  config=config
188
  )
189
 
190
+ # Use both markdown and HTML results
191
  markdown = result.markdown_v2.raw_markdown
192
  html = result.html
193
 
194
+ # Extract content
195
  articles = extract_articles(markdown)
196
  metadata = extract_metadata(markdown, html)
197