from bs4 import BeautifulSoup import requests import json list_of_courses = [] link_dict = {} for i in range(9): url = f"https://courses.analyticsvidhya.com/collections/courses?page={i+1}" response = requests.get(url) html_content = response.text soup = BeautifulSoup(html_content, 'html.parser') course_list = soup.find('ul', class_='products__list') course_items = course_list.find_all('li', class_='products__list-item') for course in course_items: courses = {} link_tag = course.find('a', class_='course-card') link = link_tag.get('href') price_tag = course.find('span', class_='course-card__price') price = price_tag.get_text(strip=True) if price != "Free": break url = 'https://courses.analyticsvidhya.com'+link response = requests.get(url) html_content = response.text soup = BeautifulSoup(html_content , 'html.parser') course_heading = soup.find('h1' , class_='section__heading').text print(course_heading) description_section = soup.find('div', class_='fr-view') description_paragraph = description_section.find('p') description_text = description_paragraph.get_text(strip=True) print(description_text) content_list = soup.find('ul' , class_='course-currculum__chapter-list') content_items = soup.find_all('li' , class_='course-curriculum__chapter') content_dict = {} for content in content_items: chapter_title = content.find('h5', class_='course-curriculum__chapter-title') chapter_title = chapter_title.get_text(strip=True) lesson_text = "" # Lessons within the chapter lessons = content.find_all('span', class_='course-curriculum__chapter-lesson') for lesson in lessons: text = lesson.get_text(strip=True) lesson_text = lesson_text + text lesson_text = lesson_text + " , " content_dict[chapter_title] = lesson_text print(content_dict) courses["course_heading"] = course_heading courses["description_text"] = description_text courses["chapters"] = content_dict link_dict[course_heading] = url list_of_courses.append(courses) with open('course_data.json', 'w', encoding='utf-8') as f: json.dump(list_of_courses, f, ensure_ascii=False, indent=4) with open('link_dict.json', 'w', encoding='utf-8') as f: json.dump(link_dict, f, ensure_ascii=False, indent=4)