Spaces:
Sleeping
Sleeping
from transformers import pipeline, BartTokenizer | |
from googlesearch import search | |
from bs4 import BeautifulSoup | |
import requests | |
# Initialize BART tokenizer and summarization pipeline | |
tokenizer = BartTokenizer.from_pretrained('letgoofthepizza/Llama-3-8B-Instruct-ko-news-summary') | |
summarizer = pipeline("summarization", model="letgoofthepizza/Llama-3-8B-Instruct-ko-news-summary") | |
# Function to perform Google search and retrieve URLs, filtering by domain | |
def google_search(query: str, num_results: int = 10): | |
"""Perform a Google search and retrieve the URLs of the search results.""" | |
search_results = [] | |
try: | |
for url in search(query, num_results=num_results, domains=["tesla.com", "cnbc.com", "reuters.com", "bloomberg.com", "investopedia.com"]): | |
search_results.append(url) | |
except TypeError: | |
for url in search(query, num_results=num_results): | |
if any(domain in url for domain in ["tesla.com", "cnbc.com", "reuters.com", "bloomberg.com", "investopedia.com"]): | |
search_results.append(url) | |
return search_results | |
# Function to fetch content from a URL and summarize it | |
def fetch_and_summarize_url(url: str): | |
try: | |
response = requests.get(url) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Extract relevant content (e.g., paragraphs or sections) | |
paragraphs = [p.text for p in soup.find_all('p')] | |
combined_text = " ".join(paragraphs[:3]) # Combine first few paragraphs for summary | |
# Summarize using the pipeline | |
if combined_text.strip(): # Ensure there is text to summarize | |
summary = summarizer(combined_text, max_length=200, min_length=50, do_sample=False) | |
return summary[0]['summary_text'] | |
else: | |
return None | |
except requests.RequestException as e: | |
return None | |
# Function to perform Google search and aggregate summaries | |
def google_search_and_answer(question: str, keywords: str): | |
search_query = f"{question} {keywords}" | |
search_results = google_search(search_query) | |
summaries = [] | |
for url in search_results: | |
fetched_summary = fetch_and_summarize_url(url) | |
if fetched_summary: | |
# Add additional logic to filter summaries based on relevance | |
# Example: Check if either question or keywords are present in fetched_summary | |
if question.lower() in fetched_summary.lower() or keywords.lower() in fetched_summary.lower(): | |
summaries.append(fetched_summary) | |
if summaries: | |
return "\n\n".join(summaries) | |
else: | |
return "No relevant information found." | |
# Main function to run the script | |
def main(): | |
print("Intelligent Assistant") | |
question = input("Enter your query: ") | |
keywords = input("Enter specific keywords (e.g., 'Q1 2024 financial results Tesla'): ") | |
answer = google_search_and_answer(question, keywords) | |
print("Answer:", answer) | |
if __name__ == "__main__": | |
main() |