Spaces:
Sleeping
Sleeping
# Import necessary libraries | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
import time | |
""" | |
This code uses Selenium to scrape data from a webpage. | |
It initializes a Chrome webdriver and loads the webpage defined by the url variable. | |
It then clicks a modal pop-up that appears when the webpage is loaded. | |
The code then enters a loop to scrape data from each page of the webpage. | |
It locates the data tables and extracts the table rows. | |
It loops through each row of the table and extracts the name, region, and description from the row. | |
It appends these values to the dai_values list. | |
The code then finds the 'Next' button and checks if it is disabled. | |
If the button is disabled, the loop is broken. | |
If the button is not disabled, the code clicks the button, scrolls to it, and waits for 2 seconds before moving on to the next page. | |
Finally, the code quits the webdriver. | |
""" | |
# Define the URL of the webpage to be scraped | |
url = 'https://www.hdrn.ca/en/inventory/' | |
# Initialize a Chrome webdriver | |
driver = webdriver.Chrome() | |
driver.get(url) | |
# Define a wait time for the driver to locate web elements | |
wait = WebDriverWait(driver, 2) | |
# Create an empty list to store the scraped data | |
dai_values = [] | |
# Click the modal pop-up that appears when the webpage is loaded | |
driver.find_element(By.ID, 'myModal').click() | |
# Loop through the webpage to scrape data from each page | |
while True: | |
# Locate the data tables and extract the table rows | |
data_tables_scroll = driver.find_elements(By.CLASS_NAME, 'dataTables_scrollBody')[-1] | |
table = data_tables_scroll.find_elements(By.TAG_NAME, 'tr') | |
# Loop through each row of the table | |
for row in table: | |
# Extract the values from each cell of the row | |
row_values = row.find_elements(By.TAG_NAME, 'td') | |
# If the row has less than 2 cells, skip to the next row | |
if len(row_values) < 2: | |
continue | |
# Extract the name, region, and description from the row and append to the dai_values list | |
name, region, description = row_values | |
dai_values.append({ | |
'name': name.text, | |
'region': region.text, | |
'description': description.text | |
}) | |
# Find the 'Next' button and check if it is disabled | |
next_button = driver.find_elements(By.ID, 'thelist_next') | |
if 'disabled' in next_button[0].get_attribute('class'): | |
# If the button is disabled, break out of the loop | |
break | |
else: | |
# If the button is not disabled, click it, scroll to it, and wait for 2 seconds before moving on to the next page | |
driver.click() | |
driver.execute_script("arguments[0].scrollIntoView();", next_button[0]) | |
time.sleep(2) | |
next_button[0].click() | |
# Quit the webdriver | |
driver.quit() | |