Spaces:

Engineer786
/

Hackathon

Build error

App Files Files Community

Engineer786 commited on Jan 5

Commit

6ef2bb8

verified ·

1 Parent(s): d5ca0a3

Update tariff_scraper.py

Browse files

Files changed (1) hide show

tariff_scraper.py +44 -7

tariff_scraper.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import requests
 from bs4 import BeautifulSoup
-# Dictionary mapping companies to their URLs
 TARIFF_URLS = {
     "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
     "FESCO": "https://fesco.com.pk/tariff",
@@ -16,25 +16,62 @@ TARIFF_URLS = {
 def scrape_tariff_data(url):
     """
     Scrape tariff data from the given URL.
     """
     try:
         response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
-        response.raise_for_status()  # Raise an error for bad responses
         soup = BeautifulSoup(response.text, 'html.parser')
-        # Extract specific elements based on the webpage structure
-        tariff_sections = soup.find_all('table')  # Assume tariff data is in <table> tags
         data = []
         for section in tariff_sections:
             table_rows = section.find_all('tr')
             for row in table_rows:
                 row_text = ' | '.join(
                     col.get_text(strip=True) for col in row.find_all(['th', 'td'])
                 )
-                if row_text:  # Add the row text only if it contains data
                     data.append(row_text)
-        return data if data else ["No data found on the webpage."]
     except Exception as e:
-        return [f"An error occurred: {e}"]

 import requests
 from bs4 import BeautifulSoup
+# URLs for all electricity companies
 TARIFF_URLS = {
     "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
     "FESCO": "https://fesco.com.pk/tariff",
 def scrape_tariff_data(url):
     """
     Scrape tariff data from the given URL.
+    Args:
+        url (str): The URL of the tariff page to scrape.
+    Returns:
+        list: A list of strings representing the rows of tariff data.
     """
     try:
+        # Send an HTTP GET request to the specified URL
         response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
+        response.raise_for_status()  # Raise an error for HTTP issues
+        # Parse the webpage content using BeautifulSoup
         soup = BeautifulSoup(response.text, 'html.parser')
+        # Try to find all <table> elements in the page
+        tariff_sections = soup.find_all('table')
+        if not tariff_sections:
+            return ["No tables found on the webpage."]
         data = []
         for section in tariff_sections:
             table_rows = section.find_all('tr')
             for row in table_rows:
+                # Extract text from each <td> or <th> within the row
                 row_text = ' | '.join(
                     col.get_text(strip=True) for col in row.find_all(['th', 'td'])
                 )
+                if row_text:  # Add only rows that have meaningful data
                     data.append(row_text)
+        return data if data else ["No data found in the tables."]
+    except requests.exceptions.RequestException as e:
+        # Handle request errors (e.g., connection issues, timeout)
+        return [f"Request error: {e}"]
     except Exception as e:
+        # Handle other potential errors
+        return [f"An unexpected error occurred: {e}"]
+if __name__ == "__main__":
+    # Let the user select a company and fetch the corresponding tariff data
+    print("Available Companies:")
+    for idx, company in enumerate(TARIFF_URLS.keys(), start=1):
+        print(f"{idx}. {company}")
+    try:
+        # User selects a company
+        selection = int(input("Enter the number corresponding to the company: "))
+        selected_company = list(TARIFF_URLS.keys())[selection - 1]
+        url = TARIFF_URLS[selected_company]
+        print(f"\nFetching tariff data for {selected_company} ({url})...\n")
+        # Scrape and display the data
+        tariff_data = scrape_tariff_data(url)
+        print("Tariff Data:")
+        for row in tariff_data[:10]:  # Show a preview of the first 10 rows
+            print(row)
+    except (ValueError, IndexError):
+        print("Invalid selection. Please choose a valid company number.")