Arafath10 commited on
Commit
cebdc58
1 Parent(s): b8a08e6

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +19 -0
main.py CHANGED
@@ -37,6 +37,25 @@ async def get_data(url: str):
37
  return {"title": "error", "URL": url, "Content": "none"}
38
 
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  # FastAPI route to scrape the website
41
  @app.get("/scrape")
42
  async def scrape_website(url: str):
 
37
  return {"title": "error", "URL": url, "Content": "none"}
38
 
39
 
40
+
41
+ @app.get("/fast_scrape")
42
+ async def fast_scrape(url: str):
43
+ headers = {
44
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
45
+ }
46
+ # Step 1: Send a request to the webpage
47
+ response = requests.get(url, headers=headers)
48
+
49
+ # Step 2: Parse the HTML content using BeautifulSoup
50
+ soup = BeautifulSoup(response.content, 'html.parser')
51
+
52
+ # Step 3: Extract the body tag and get all text within it
53
+ body = soup.find('body')
54
+ body_text = body.get_text(separator=' ', strip=True) if body else ''
55
+
56
+ # Step 4: Output the body text
57
+ return body_text
58
+
59
  # FastAPI route to scrape the website
60
  @app.get("/scrape")
61
  async def scrape_website(url: str):