import requests from bs4 import BeautifulSoup import gradio as gr import tempfile import os def scrape_article(url): """ Function to scrape title and content from the given article URL. """ # Sending a request to the webpage response = requests.get(url) if response.status_code != 200: return "Failed to retrieve the webpage. Status code: " + str(response.status_code), "" # Parsing the webpage content soup = BeautifulSoup(response.text, 'html.parser') # Extracting the title of the article title = soup.find('h1') if title: title = title.text.strip() else: title = "No title found" # Extracting the content of the article article_content = [] for paragraph in soup.find_all('p'): article_content.append(paragraph.text.strip()) # Joining all paragraphs to form the article content content = "\n".join(article_content) return title, content def save_as_txt(title, content, url): """ Save the scraped article content to a temporary .txt file. """ # Extract filename from the URL filename = url.split('/')[-1] or 'article' filename = f"{filename}.txt" # Creating a temporary file temp_dir = tempfile.gettempdir() file_path = os.path.join(temp_dir, filename) # Writing the title and content to the file with open(file_path, "w", encoding="utf-8") as file: file.write("Title: " + title + "\n\n") file.write("Content:\n" + content) return file_path def scrape_and_download(url): """ Combine scraping and file saving for Gradio interface. """ title, content = scrape_article(url) if not content: return "No content found or failed to retrieve the page.", None file_path = save_as_txt(title, content, url) return f"Title: {title}\n\nContent:\n{content[:500]}... (truncated)", file_path # Gradio Interface description = "Input an article URL to scrape its title and content. A .txt file will be generated for download." with gr.Blocks() as demo: gr.Markdown("## Web Article Scraper with Download") gr.Markdown(description) url_input = gr.Textbox(label="Enter Article URL") output_text = gr.Textbox(label="Extracted Content Preview", interactive=False) download_button = gr.File(label="Download Article as .txt") submit_button = gr.Button("Scrape Article") # Linking components submit_button.click(scrape_and_download, inputs=url_input, outputs=[output_text, download_button]) # Launch the Gradio app demo.launch()