webscrapper

Sleeping

Arafath10 commited on Oct 11

Commit

cebdc58

•

1 Parent(s): b8a08e6

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -37,6 +37,25 @@ async def get_data(url: str):
             return {"title": "error", "URL": url, "Content": "none"}
 # FastAPI route to scrape the website
 @app.get("/scrape")
 async def scrape_website(url: str):

             return {"title": "error", "URL": url, "Content": "none"}
+@app.get("/fast_scrape")
+async def fast_scrape(url: str):
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+    }
+    # Step 1: Send a request to the webpage
+    response = requests.get(url, headers=headers)
+    # Step 2: Parse the HTML content using BeautifulSoup
+    soup = BeautifulSoup(response.content, 'html.parser')
+    # Step 3: Extract the body tag and get all text within it
+    body = soup.find('body')
+    body_text = body.get_text(separator=' ', strip=True) if body else ''
+    # Step 4: Output the body text
+    return body_text
 # FastAPI route to scrape the website
 @app.get("/scrape")
 async def scrape_website(url: str):