from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from scraper import Scraper try: from pip._internal.operations import freeze except ImportError: # pip < 10.0 from pip.operations import freeze pkgs = freeze.freeze() for pkg in pkgs: print(pkg) app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) @app.get("/get_scraped_data") async def get_data(url: str): import requests from bs4 import BeautifulSoup # URL of the page to scrape #url = "https://www.imf.org/en/News/Articles/2024/03/21/pr2494-sri-lanka-imf-staff-level-agreement-for-second-review-sla" url = url # Send a GET request to the URL response = requests.get(url) # Check if the request was successful if response.status_code == 200: # Parse the page content soup = BeautifulSoup(response.content, 'html.parser') # Extract all text content (paragraphs, headers, etc.) elements = soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']) body_text = "\n".join([element.get_text().strip() for element in elements]) # Extract all links links = [] for a_tag in soup.find_all('a', href=True): links.append(a_tag['href']) # Print the extracted information print("Body Text:") print(body_text) print("\nLinks:") for link in links: print(link) else: print("Failed to retrieve the webpage") return "done" try: data = await Scraper.scrape(url) return data except Exception as e: raise HTTPException(status_code=500, detail=str(e))