World_News / app.py
Shreyas94's picture
Update app.py
871b845 verified
raw
history blame
3.03 kB
from transformers import pipeline, BartTokenizer
from googlesearch import search
from bs4 import BeautifulSoup
import requests
# Initialize BART tokenizer and summarization pipeline
tokenizer = BartTokenizer.from_pretrained('letgoofthepizza/Llama-3-8B-Instruct-ko-news-summary')
summarizer = pipeline("summarization", model="letgoofthepizza/Llama-3-8B-Instruct-ko-news-summary")
# Function to perform Google search and retrieve URLs, filtering by domain
def google_search(query: str, num_results: int = 10):
"""Perform a Google search and retrieve the URLs of the search results."""
search_results = []
try:
for url in search(query, num_results=num_results, domains=["tesla.com", "cnbc.com", "reuters.com", "bloomberg.com", "investopedia.com"]):
search_results.append(url)
except TypeError:
for url in search(query, num_results=num_results):
if any(domain in url for domain in ["tesla.com", "cnbc.com", "reuters.com", "bloomberg.com", "investopedia.com"]):
search_results.append(url)
return search_results
# Function to fetch content from a URL and summarize it
def fetch_and_summarize_url(url: str):
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Extract relevant content (e.g., paragraphs or sections)
paragraphs = [p.text for p in soup.find_all('p')]
combined_text = " ".join(paragraphs[:3]) # Combine first few paragraphs for summary
# Summarize using the pipeline
if combined_text.strip(): # Ensure there is text to summarize
summary = summarizer(combined_text, max_length=200, min_length=50, do_sample=False)
return summary[0]['summary_text']
else:
return None
except requests.RequestException as e:
return None
# Function to perform Google search and aggregate summaries
def google_search_and_answer(question: str, keywords: str):
search_query = f"{question} {keywords}"
search_results = google_search(search_query)
summaries = []
for url in search_results:
fetched_summary = fetch_and_summarize_url(url)
if fetched_summary:
# Add additional logic to filter summaries based on relevance
# Example: Check if either question or keywords are present in fetched_summary
if question.lower() in fetched_summary.lower() or keywords.lower() in fetched_summary.lower():
summaries.append(fetched_summary)
if summaries:
return "\n\n".join(summaries)
else:
return "No relevant information found."
# Main function to run the script
def main():
print("Intelligent Assistant")
question = input("Enter your query: ")
keywords = input("Enter specific keywords (e.g., 'Q1 2024 financial results Tesla'): ")
answer = google_search_and_answer(question, keywords)
print("Answer:", answer)
if __name__ == "__main__":
main()