import gradio as gr from bs4 import BeautifulSoup import requests from jinja2 import Template from urllib.parse import urljoin import os import warnings import logging warnings.filterwarnings("ignore") # Set up logging logging.basicConfig(level=logging.DEBUG, filename='app.log', filemode='w', format='%(name)s - %(levelname)s - %(message)s') books = { "College Physics AP": { "conceptual_link": "https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-conceptual-questions", "problem_link": "https://openstax.org/books/college-physics-ap-courses-2e/pages/{}-problems-exercises", "href_base_url": "https://openstax.org/books/college-physics-ap-courses-2e/pages/" }, "University Physics Vol. 1": { "conceptual_link": "https://openstax.org/books/university-physics-volume-1/pages/{}-conceptual-questions", "problem_link": "https://openstax.org/books/university-physics-volume-1/pages/{}-problems", "href_base_url": "https://openstax.org/books/university-physics-volume-1/pages/" }, "University Physics Vol. 2": { "conceptual_link": "https://openstax.org/books/university-physics-volume-2/pages/{}-conceptual-questions", "problem_link": "https://openstax.org/books/university-physics-volume-2/pages/{}-problems", "href_base_url": "https://openstax.org/books/university-physics-volume-2/pages/" } } img_base_url = 'https://openstax.org' def get_html(url): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" } try: response = requests.get(url, headers=headers) response.encoding = 'utf-8' response.raise_for_status() # Raise an HTTPError for bad responses soup = BeautifulSoup(response.text, 'html.parser') return soup except requests.RequestException as e: logging.error(f"An error occurred when getting HTML: {e}") return None def get_question(exercises, question_index, img_base_url, href_base_url): question_index = question_index - 1 if question_index >= len(exercises): return "Question index out of range." exercise_div = exercises[question_index] logging.debug(f"Processing exercise: {exercise_div}") for img_tag in exercise_div.find_all('img'): img_tag['src'] = urljoin(img_base_url, img_tag['data-lazy-src']) for a_tag in exercise_div.find_all('a'): a_tag['href'] = urljoin(href_base_url, a_tag['href']) return str(exercise_div) def get_all_questions(book_key, unit_num, conceptual_list, problem_list): book = books[book_key] conceptual_url = book["conceptual_link"].format(int(unit_num)) problem_url = book["problem_link"].format(int(unit_num)) conceptual_html = get_html(conceptual_url) if conceptual_html is None: return [] problem_html = get_html(problem_url) if problem_html is None: return [] conceptual_exercises = conceptual_html.find_all('div', {'data-type': 'exercise'}) problem_exercises = problem_html.find_all('div', {'data-type': 'exercise'}) questions = [] for i in conceptual_list: questions.append(get_question(conceptual_exercises, i, img_base_url, book["href_base_url"])) for i in problem_list: questions.append(get_question(problem_exercises, i, img_base_url, book["href_base_url"])) return questions def generate_html(book_key, chapter_num, conceptual_input, problem_input, path): conceptual_list = list(map(int, conceptual_input.split(",")) if conceptual_input else []) problem_list = list(map(int, problem_input.split(",")) if problem_input else []) questions = get_all_questions(book_key, int(chapter_num), conceptual_list, problem_list) template_str = '''