Spaces:
Runtime error
Runtime error
| import PyPDF2 | |
| import requests | |
| from bs4 import BeautifulSoup | |
| def get_text_from_website(url): | |
| response = requests.get(url) | |
| if response.status_code == 200: | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| content_elements = soup.find_all(['p', 'h1', 'h2', 'h3', 'li']) | |
| text = ' '.join([element.get_text(separator=' ', strip=True) for element in content_elements]) | |
| return text | |
| else: | |
| return f"Failed to retrieve the webpage. Status code: {response.status_code}" | |
| def get_text_from_pdf(file_path): | |
| with open(file_path, 'rb') as pdf_file: | |
| pdf_reader = PyPDF2.PdfReader(pdf_file) | |
| text = '' | |
| for page_num in range(len(pdf_reader.pages)): | |
| page = pdf_reader.pages[page_num] | |
| text += page.extract_text() | |
| return text |