Spaces:
Build error
Build error
Update tariff_scraper.py
Browse files- tariff_scraper.py +44 -7
tariff_scraper.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import requests
|
2 |
from bs4 import BeautifulSoup
|
3 |
|
4 |
-
#
|
5 |
TARIFF_URLS = {
|
6 |
"IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
|
7 |
"FESCO": "https://fesco.com.pk/tariff",
|
@@ -16,25 +16,62 @@ TARIFF_URLS = {
|
|
16 |
def scrape_tariff_data(url):
|
17 |
"""
|
18 |
Scrape tariff data from the given URL.
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
"""
|
20 |
try:
|
|
|
21 |
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
|
22 |
-
response.raise_for_status() # Raise an error for
|
|
|
|
|
23 |
soup = BeautifulSoup(response.text, 'html.parser')
|
24 |
|
25 |
-
#
|
26 |
-
tariff_sections = soup.find_all('table')
|
|
|
|
|
27 |
|
28 |
data = []
|
29 |
for section in tariff_sections:
|
30 |
table_rows = section.find_all('tr')
|
31 |
for row in table_rows:
|
|
|
32 |
row_text = ' | '.join(
|
33 |
col.get_text(strip=True) for col in row.find_all(['th', 'td'])
|
34 |
)
|
35 |
-
if row_text: # Add
|
36 |
data.append(row_text)
|
37 |
|
38 |
-
return data if data else ["No data found
|
|
|
|
|
|
|
39 |
except Exception as e:
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import requests
|
2 |
from bs4 import BeautifulSoup
|
3 |
|
4 |
+
# URLs for all electricity companies
|
5 |
TARIFF_URLS = {
|
6 |
"IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
|
7 |
"FESCO": "https://fesco.com.pk/tariff",
|
|
|
16 |
def scrape_tariff_data(url):
|
17 |
"""
|
18 |
Scrape tariff data from the given URL.
|
19 |
+
|
20 |
+
Args:
|
21 |
+
url (str): The URL of the tariff page to scrape.
|
22 |
+
|
23 |
+
Returns:
|
24 |
+
list: A list of strings representing the rows of tariff data.
|
25 |
"""
|
26 |
try:
|
27 |
+
# Send an HTTP GET request to the specified URL
|
28 |
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
|
29 |
+
response.raise_for_status() # Raise an error for HTTP issues
|
30 |
+
|
31 |
+
# Parse the webpage content using BeautifulSoup
|
32 |
soup = BeautifulSoup(response.text, 'html.parser')
|
33 |
|
34 |
+
# Try to find all <table> elements in the page
|
35 |
+
tariff_sections = soup.find_all('table')
|
36 |
+
if not tariff_sections:
|
37 |
+
return ["No tables found on the webpage."]
|
38 |
|
39 |
data = []
|
40 |
for section in tariff_sections:
|
41 |
table_rows = section.find_all('tr')
|
42 |
for row in table_rows:
|
43 |
+
# Extract text from each <td> or <th> within the row
|
44 |
row_text = ' | '.join(
|
45 |
col.get_text(strip=True) for col in row.find_all(['th', 'td'])
|
46 |
)
|
47 |
+
if row_text: # Add only rows that have meaningful data
|
48 |
data.append(row_text)
|
49 |
|
50 |
+
return data if data else ["No data found in the tables."]
|
51 |
+
except requests.exceptions.RequestException as e:
|
52 |
+
# Handle request errors (e.g., connection issues, timeout)
|
53 |
+
return [f"Request error: {e}"]
|
54 |
except Exception as e:
|
55 |
+
# Handle other potential errors
|
56 |
+
return [f"An unexpected error occurred: {e}"]
|
57 |
+
|
58 |
+
if __name__ == "__main__":
|
59 |
+
# Let the user select a company and fetch the corresponding tariff data
|
60 |
+
print("Available Companies:")
|
61 |
+
for idx, company in enumerate(TARIFF_URLS.keys(), start=1):
|
62 |
+
print(f"{idx}. {company}")
|
63 |
+
|
64 |
+
try:
|
65 |
+
# User selects a company
|
66 |
+
selection = int(input("Enter the number corresponding to the company: "))
|
67 |
+
selected_company = list(TARIFF_URLS.keys())[selection - 1]
|
68 |
+
url = TARIFF_URLS[selected_company]
|
69 |
+
print(f"\nFetching tariff data for {selected_company} ({url})...\n")
|
70 |
+
|
71 |
+
# Scrape and display the data
|
72 |
+
tariff_data = scrape_tariff_data(url)
|
73 |
+
print("Tariff Data:")
|
74 |
+
for row in tariff_data[:10]: # Show a preview of the first 10 rows
|
75 |
+
print(row)
|
76 |
+
except (ValueError, IndexError):
|
77 |
+
print("Invalid selection. Please choose a valid company number.")
|