Engineer786 commited on
Commit
6ef2bb8
·
verified ·
1 Parent(s): d5ca0a3

Update tariff_scraper.py

Browse files
Files changed (1) hide show
  1. tariff_scraper.py +44 -7
tariff_scraper.py CHANGED
@@ -1,7 +1,7 @@
1
  import requests
2
  from bs4 import BeautifulSoup
3
 
4
- # Dictionary mapping companies to their URLs
5
  TARIFF_URLS = {
6
  "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
7
  "FESCO": "https://fesco.com.pk/tariff",
@@ -16,25 +16,62 @@ TARIFF_URLS = {
16
  def scrape_tariff_data(url):
17
  """
18
  Scrape tariff data from the given URL.
 
 
 
 
 
 
19
  """
20
  try:
 
21
  response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
22
- response.raise_for_status() # Raise an error for bad responses
 
 
23
  soup = BeautifulSoup(response.text, 'html.parser')
24
 
25
- # Extract specific elements based on the webpage structure
26
- tariff_sections = soup.find_all('table') # Assume tariff data is in <table> tags
 
 
27
 
28
  data = []
29
  for section in tariff_sections:
30
  table_rows = section.find_all('tr')
31
  for row in table_rows:
 
32
  row_text = ' | '.join(
33
  col.get_text(strip=True) for col in row.find_all(['th', 'td'])
34
  )
35
- if row_text: # Add the row text only if it contains data
36
  data.append(row_text)
37
 
38
- return data if data else ["No data found on the webpage."]
 
 
 
39
  except Exception as e:
40
- return [f"An error occurred: {e}"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import requests
2
  from bs4 import BeautifulSoup
3
 
4
+ # URLs for all electricity companies
5
  TARIFF_URLS = {
6
  "IESCO": "https://iesco.com.pk/index.php/customer-services/tariff-guide",
7
  "FESCO": "https://fesco.com.pk/tariff",
 
16
  def scrape_tariff_data(url):
17
  """
18
  Scrape tariff data from the given URL.
19
+
20
+ Args:
21
+ url (str): The URL of the tariff page to scrape.
22
+
23
+ Returns:
24
+ list: A list of strings representing the rows of tariff data.
25
  """
26
  try:
27
+ # Send an HTTP GET request to the specified URL
28
  response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
29
+ response.raise_for_status() # Raise an error for HTTP issues
30
+
31
+ # Parse the webpage content using BeautifulSoup
32
  soup = BeautifulSoup(response.text, 'html.parser')
33
 
34
+ # Try to find all <table> elements in the page
35
+ tariff_sections = soup.find_all('table')
36
+ if not tariff_sections:
37
+ return ["No tables found on the webpage."]
38
 
39
  data = []
40
  for section in tariff_sections:
41
  table_rows = section.find_all('tr')
42
  for row in table_rows:
43
+ # Extract text from each <td> or <th> within the row
44
  row_text = ' | '.join(
45
  col.get_text(strip=True) for col in row.find_all(['th', 'td'])
46
  )
47
+ if row_text: # Add only rows that have meaningful data
48
  data.append(row_text)
49
 
50
+ return data if data else ["No data found in the tables."]
51
+ except requests.exceptions.RequestException as e:
52
+ # Handle request errors (e.g., connection issues, timeout)
53
+ return [f"Request error: {e}"]
54
  except Exception as e:
55
+ # Handle other potential errors
56
+ return [f"An unexpected error occurred: {e}"]
57
+
58
+ if __name__ == "__main__":
59
+ # Let the user select a company and fetch the corresponding tariff data
60
+ print("Available Companies:")
61
+ for idx, company in enumerate(TARIFF_URLS.keys(), start=1):
62
+ print(f"{idx}. {company}")
63
+
64
+ try:
65
+ # User selects a company
66
+ selection = int(input("Enter the number corresponding to the company: "))
67
+ selected_company = list(TARIFF_URLS.keys())[selection - 1]
68
+ url = TARIFF_URLS[selected_company]
69
+ print(f"\nFetching tariff data for {selected_company} ({url})...\n")
70
+
71
+ # Scrape and display the data
72
+ tariff_data = scrape_tariff_data(url)
73
+ print("Tariff Data:")
74
+ for row in tariff_data[:10]: # Show a preview of the first 10 rows
75
+ print(row)
76
+ except (ValueError, IndexError):
77
+ print("Invalid selection. Please choose a valid company number.")