Spaces:
Sleeping
Sleeping
shrivarshan
commited on
Commit
•
e0d9d37
1
Parent(s):
d567d1e
Upload app3.py
Browse files
app3.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import requests
|
3 |
+
from transformers import pipeline
|
4 |
+
import spacy
|
5 |
+
|
6 |
+
# Initialize the summarizer pipeline using Hugging Face Transformers
|
7 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
8 |
+
|
9 |
+
# Load spaCy model
|
10 |
+
nlp = spacy.load("en_core_web_sm")
|
11 |
+
|
12 |
+
# Function to perform search using Google Custom Search API
|
13 |
+
def perform_search(query):
|
14 |
+
api_key = 'AIzaSyAgKac39wfstboizc1StYGjqlT2rdQqVQ4'
|
15 |
+
cx = "7394b4ca2ca1040ef"
|
16 |
+
search_url = f"https://www.googleapis.com/customsearch/v1?q={query}&key={api_key}&cx={cx}"
|
17 |
+
response = requests.get(search_url)
|
18 |
+
return response.json()
|
19 |
+
|
20 |
+
# Function to summarize the overall combined content (make it longer)
|
21 |
+
def summarize_overall_content(content):
|
22 |
+
if len(content) > 3000: # Summarize up to 3000 characters for a larger summary
|
23 |
+
content = content[:3000]
|
24 |
+
summary = summarizer(content, max_length=300, min_length=100, do_sample=False) # Larger overall summary
|
25 |
+
return summary[0]['summary_text']
|
26 |
+
|
27 |
+
# Function to summarize individual search results (keep shorter)
|
28 |
+
def summarize_individual_content(content):
|
29 |
+
if len(content) > 1000: # Summarize first 1000 characters for brevity
|
30 |
+
content = content[:1000]
|
31 |
+
summary = summarizer(content, max_length=50, min_length=30, do_sample=False) # Shorter summary
|
32 |
+
return summary[0]['summary_text']
|
33 |
+
|
34 |
+
# Function to rank search results based on custom criteria
|
35 |
+
def rank_sources(results):
|
36 |
+
# For now, assume sources are ranked by default order from API
|
37 |
+
return results
|
38 |
+
|
39 |
+
# Function to extract related topics using spaCy
|
40 |
+
def extract_related_topics(query_list):
|
41 |
+
combined_query = " ".join(query_list)
|
42 |
+
doc = nlp(combined_query)
|
43 |
+
|
44 |
+
# Extract keywords or named entities
|
45 |
+
keywords = [token.text for token in doc if token.is_alpha and not token.is_stop]
|
46 |
+
entities = [ent.text for ent in doc.ents]
|
47 |
+
|
48 |
+
# Combine and deduplicate keywords and entities
|
49 |
+
related_topics = list(set(keywords + entities))
|
50 |
+
related_topics.insert(0,"Deep Learning")
|
51 |
+
return related_topics[:3] # Limit to 3 related topics
|
52 |
+
|
53 |
+
# Function to display search results and summaries
|
54 |
+
def display_results(query):
|
55 |
+
st.write(f"Searching for: {query}")
|
56 |
+
|
57 |
+
# Perform search and get results
|
58 |
+
search_results = perform_search(query)
|
59 |
+
|
60 |
+
# Extract relevant items from search results
|
61 |
+
if 'items' in search_results:
|
62 |
+
ranked_results = rank_sources(search_results['items'])
|
63 |
+
|
64 |
+
# Overall summary (bigger)
|
65 |
+
st.write("### Overall Summary:")
|
66 |
+
combined_content = " ".join([item['snippet'] for item in ranked_results])
|
67 |
+
overall_summary = summarize_overall_content(combined_content) # Use larger summary function
|
68 |
+
st.write(overall_summary)
|
69 |
+
|
70 |
+
# Individual results (shorter)
|
71 |
+
st.write("### Individual Results:")
|
72 |
+
for item in ranked_results:
|
73 |
+
st.write(f"**[{item['title']}]({item['link']})**")
|
74 |
+
st.write(summarize_individual_content(item['snippet'])) # Use shorter summary function
|
75 |
+
st.write("---")
|
76 |
+
else:
|
77 |
+
st.write("No results found.")
|
78 |
+
|
79 |
+
# Main Streamlit App UI
|
80 |
+
st.title("AI-Powered Information Retrieval and Summarization")
|
81 |
+
|
82 |
+
# Initialize query list to store search queries
|
83 |
+
if 'querylist' not in st.session_state:
|
84 |
+
st.session_state.querylist = []
|
85 |
+
|
86 |
+
# Search input by user
|
87 |
+
query = st.text_input("Enter your search query:")
|
88 |
+
|
89 |
+
# If query is provided, display results and update query list
|
90 |
+
if query:
|
91 |
+
st.session_state.querylist.append(query)
|
92 |
+
display_results(query)
|
93 |
+
|
94 |
+
# Generate related topics based on query list
|
95 |
+
related_topics = extract_related_topics(st.session_state.querylist)
|
96 |
+
|
97 |
+
st.write("### Related Topics:")
|
98 |
+
for topic in related_topics:
|
99 |
+
st.write(f"- **[{topic}]({requests.utils.requote_uri(f'https://www.google.com/search?q={topic}')})**")
|
100 |
+
|
101 |
+
# Trending Topics Section with clickable links
|
102 |
+
st.sidebar.title("Trending Topics")
|
103 |
+
trending_topics = ["AI", "Machine Learning", "Sustainability", "Technology Trends"]
|
104 |
+
for idx, topic in enumerate(trending_topics):
|
105 |
+
if st.sidebar.button(topic, key=f'topic_button_{idx}'):
|
106 |
+
query = topic # Automatically search for this topic when clicked
|
107 |
+
|
108 |
+
# Feedback Section (Visible after results)
|
109 |
+
if query or any(st.sidebar.button(topic) for topic in trending_topics):
|
110 |
+
st.write("### Feedback")
|
111 |
+
feedback = st.radio("Was this summary helpful?", ["Yes", "No"])
|
112 |
+
if feedback == "Yes":
|
113 |
+
st.write("Thank you for your feedback!")
|
114 |
+
else:
|
115 |
+
st.write("We will try to improve!")
|