shrivarshan commited on
Commit
e0d9d37
1 Parent(s): d567d1e

Upload app3.py

Browse files
Files changed (1) hide show
  1. app3.py +115 -0
app3.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ from transformers import pipeline
4
+ import spacy
5
+
6
+ # Initialize the summarizer pipeline using Hugging Face Transformers
7
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
8
+
9
+ # Load spaCy model
10
+ nlp = spacy.load("en_core_web_sm")
11
+
12
+ # Function to perform search using Google Custom Search API
13
+ def perform_search(query):
14
+ api_key = 'AIzaSyAgKac39wfstboizc1StYGjqlT2rdQqVQ4'
15
+ cx = "7394b4ca2ca1040ef"
16
+ search_url = f"https://www.googleapis.com/customsearch/v1?q={query}&key={api_key}&cx={cx}"
17
+ response = requests.get(search_url)
18
+ return response.json()
19
+
20
+ # Function to summarize the overall combined content (make it longer)
21
+ def summarize_overall_content(content):
22
+ if len(content) > 3000: # Summarize up to 3000 characters for a larger summary
23
+ content = content[:3000]
24
+ summary = summarizer(content, max_length=300, min_length=100, do_sample=False) # Larger overall summary
25
+ return summary[0]['summary_text']
26
+
27
+ # Function to summarize individual search results (keep shorter)
28
+ def summarize_individual_content(content):
29
+ if len(content) > 1000: # Summarize first 1000 characters for brevity
30
+ content = content[:1000]
31
+ summary = summarizer(content, max_length=50, min_length=30, do_sample=False) # Shorter summary
32
+ return summary[0]['summary_text']
33
+
34
+ # Function to rank search results based on custom criteria
35
+ def rank_sources(results):
36
+ # For now, assume sources are ranked by default order from API
37
+ return results
38
+
39
+ # Function to extract related topics using spaCy
40
+ def extract_related_topics(query_list):
41
+ combined_query = " ".join(query_list)
42
+ doc = nlp(combined_query)
43
+
44
+ # Extract keywords or named entities
45
+ keywords = [token.text for token in doc if token.is_alpha and not token.is_stop]
46
+ entities = [ent.text for ent in doc.ents]
47
+
48
+ # Combine and deduplicate keywords and entities
49
+ related_topics = list(set(keywords + entities))
50
+ related_topics.insert(0,"Deep Learning")
51
+ return related_topics[:3] # Limit to 3 related topics
52
+
53
+ # Function to display search results and summaries
54
+ def display_results(query):
55
+ st.write(f"Searching for: {query}")
56
+
57
+ # Perform search and get results
58
+ search_results = perform_search(query)
59
+
60
+ # Extract relevant items from search results
61
+ if 'items' in search_results:
62
+ ranked_results = rank_sources(search_results['items'])
63
+
64
+ # Overall summary (bigger)
65
+ st.write("### Overall Summary:")
66
+ combined_content = " ".join([item['snippet'] for item in ranked_results])
67
+ overall_summary = summarize_overall_content(combined_content) # Use larger summary function
68
+ st.write(overall_summary)
69
+
70
+ # Individual results (shorter)
71
+ st.write("### Individual Results:")
72
+ for item in ranked_results:
73
+ st.write(f"**[{item['title']}]({item['link']})**")
74
+ st.write(summarize_individual_content(item['snippet'])) # Use shorter summary function
75
+ st.write("---")
76
+ else:
77
+ st.write("No results found.")
78
+
79
+ # Main Streamlit App UI
80
+ st.title("AI-Powered Information Retrieval and Summarization")
81
+
82
+ # Initialize query list to store search queries
83
+ if 'querylist' not in st.session_state:
84
+ st.session_state.querylist = []
85
+
86
+ # Search input by user
87
+ query = st.text_input("Enter your search query:")
88
+
89
+ # If query is provided, display results and update query list
90
+ if query:
91
+ st.session_state.querylist.append(query)
92
+ display_results(query)
93
+
94
+ # Generate related topics based on query list
95
+ related_topics = extract_related_topics(st.session_state.querylist)
96
+
97
+ st.write("### Related Topics:")
98
+ for topic in related_topics:
99
+ st.write(f"- **[{topic}]({requests.utils.requote_uri(f'https://www.google.com/search?q={topic}')})**")
100
+
101
+ # Trending Topics Section with clickable links
102
+ st.sidebar.title("Trending Topics")
103
+ trending_topics = ["AI", "Machine Learning", "Sustainability", "Technology Trends"]
104
+ for idx, topic in enumerate(trending_topics):
105
+ if st.sidebar.button(topic, key=f'topic_button_{idx}'):
106
+ query = topic # Automatically search for this topic when clicked
107
+
108
+ # Feedback Section (Visible after results)
109
+ if query or any(st.sidebar.button(topic) for topic in trending_topics):
110
+ st.write("### Feedback")
111
+ feedback = st.radio("Was this summary helpful?", ["Yes", "No"])
112
+ if feedback == "Yes":
113
+ st.write("Thank you for your feedback!")
114
+ else:
115
+ st.write("We will try to improve!")