Spaces:
Sleeping
Sleeping
import requests | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
def scrape_courses(): | |
url = "https://courses.analyticsvidhya.com/pages/all-free-courses" # Use the actual URL where the courses are listed | |
response = requests.get(url) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
courses = [] | |
for course_card in soup.find_all("a", class_="card-link"): | |
# Extract title | |
title = course_card.find("h2", class_="card-text").get_text(strip=True) | |
# Extract duration and lessons (if available) | |
duration_lessons = course_card.find("p", class_="fs-14").get_text(strip=True) | |
duration, lessons = duration_lessons.split(" Hours")[0] + " Hours", duration_lessons.split(" Hours")[1].strip() | |
# Extract review rating (if available) | |
rating = course_card.find("span", class_="rating") # Assuming rating is in a span with class "rating" | |
rating = rating.get_text(strip=True) if rating else "No rating" | |
# Extract price (if available) | |
price = course_card.find("span", class_="price") # Assuming price is in a span with class "price" | |
price = price.get_text(strip=True) if price else "Free" # Assuming courses without price are free | |
courses.append({ | |
'title': title, | |
'duration': duration, | |
'lessons': lessons, | |
'rating': rating, | |
'price': price | |
}) | |
return pd.DataFrame(courses) | |
# Run the scraper and save the data to a CSV | |
if __name__ == "__main__": | |
courses_df = scrape_courses() | |
print(courses_df) # Preview the scraped data | |
courses_df.to_csv("courses_data.csv", index=False) | |