tjl8 commited on
Commit
60bbe7d
Β·
verified Β·
1 Parent(s): c134681

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -23
app.py CHANGED
@@ -87,14 +87,14 @@
87
  # df2 = df.copy()
88
  # if cat:
89
  # df2 = df2[df2['category_&_subcategory_standardized'] == cat]
90
- # st.info(f"πŸ”Ž Filtering by category: **{cat}**")
91
  # if yr:
92
  # df2 = df2[df2['status_date'].dt.year == yr]
93
  # if mon:
94
  # df2 = df2[df2['status_date'].dt.month == mon]
95
- # st.info(f"πŸ”Ž Filtering by date: **{datetime(yr, mon, 1):%B %Y}**")
96
  # else:
97
- # st.info(f"πŸ”Ž Filtering by year: **{yr}**")
98
 
99
  # if df2.empty:
100
  # st.warning("No matching records found.")
@@ -139,7 +139,7 @@ from sklearn.metrics.pairwise import cosine_similarity
139
  from sklearn.feature_extraction.text import TfidfVectorizer
140
  from datetime import datetime
141
 
142
- # ------------------ Load Data ------------------ #
143
  @st.cache_data
144
  def load_data():
145
  df = pd.read_csv("Illinois_Entire_Data_Insights_Final_v2.csv")
@@ -150,25 +150,22 @@ def load_data():
150
  df["summary_insight"] = df["llama_trend_summary"] + "\n" + df["llama_insight"]
151
  return df
152
 
153
- # ------------------ Load Models ------------------ #
154
  @st.cache_resource
155
  def load_models():
156
  embed_model = SentenceTransformer('all-MiniLM-L6-v2')
157
  summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small")
158
  return embed_model, summarizer
159
 
160
- # ------------------ Compute Embeddings ------------------ #
161
  @st.cache_data
162
  def compute_embeddings(texts, _model):
163
  return _model.encode(texts, show_progress_bar=True)
164
 
165
- # ------------------ Semantic Search ------------------ #
166
- def semantic_search(query, embeddings, model, threshold=0.5): # Increased threshold to 0.7
167
  query_embedding = model.encode([query])
168
  sims = cosine_similarity(query_embedding, embeddings)[0]
169
  return [(i, s) for i, s in enumerate(sims) if s > threshold]
170
 
171
- # ------------------ RAG Summarizer ------------------ #
172
  def rag_summarize(texts, summarizer, top_k=5):
173
  if not texts:
174
  return "No relevant content to summarize."
@@ -182,7 +179,6 @@ def rag_summarize(texts, summarizer, top_k=5):
182
  out = summarizer(prompt, max_length=60, min_length=30, do_sample=False)
183
  return out[0]['summary_text']
184
 
185
- # ------------------ Extract Month/Year from Query ------------------ #
186
  def extract_month_year(q):
187
  month_map = {m: i for i, m in enumerate(
188
  ["january", "february", "march", "april", "may", "june",
@@ -193,7 +189,6 @@ def extract_month_year(q):
193
  yr = int(ym.group()) if ym else None
194
  return mon, yr
195
 
196
- # ------------------ Topic-Based Matching ------------------ #
197
  def extract_topic_match(query, df):
198
  query_lower = query.lower()
199
  matched_rows = df[
@@ -204,33 +199,29 @@ def extract_topic_match(query, df):
204
  ]
205
  return matched_rows
206
 
207
- # ------------------ Streamlit UI ------------------ #
208
  st.set_page_config(page_title="IL Trends Q&A", layout="wide")
209
  st.title("Illinois Legislative Trends Q&A")
210
- st.markdown("Ask about **topics** like education, housing, mental health, higher education, etc.\nAlso supports filtering by **month/year**!")
211
 
212
  df = load_data()
213
  embed_model, summarizer = load_models()
214
 
215
- query = st.text_input("πŸ” Ask a question (e.g., β€˜Higher education in 2024’):")
216
 
217
  if query:
218
- # Extract filters
219
  mon, yr = extract_month_year(query)
220
  df2 = extract_topic_match(query, df)
221
 
222
- # Fallback to full dataset if nothing found on topic
223
  if df2.empty:
224
  df2 = df
225
-
226
- # Apply year/month filters
227
  if yr:
228
  df2 = df2[df2['status_date'].dt.year == yr]
229
  if mon:
230
  df2 = df2[df2['status_date'].dt.month == mon]
231
- st.info(f"πŸ”Ž Filtering by date: **{datetime(yr, mon, 1):%B %Y}**")
232
  else:
233
- st.info(f"πŸ”Ž Filtering by year: **{yr}**")
234
 
235
  if df2.empty:
236
  st.warning("No matching records found.")
@@ -254,15 +245,14 @@ if query:
254
  stance = row['stance_standardized']
255
  trend_summary = row['llama_trend_summary'].strip()
256
 
257
- st.markdown(f"- ** Date:** {date} | **πŸ”— Score:** {score:.2f}")
258
  st.markdown(f" - ** Category:** {cat_std}")
259
  st.markdown(f" - ** Goal:** {goal}")
260
- st.markdown(f" - ** Intent:** {intent} | **βš–οΈ Stance:** {stance}")
261
  st.markdown(f" > ** Trend Summary:** {trend_summary}")
262
 
263
  collected.append(row['summary_insight'])
264
 
265
- # RAG Summary
266
  st.subheader(" RAG-Generated Summary")
267
  summary = rag_summarize(collected, summarizer)
268
  st.success(summary)
 
87
  # df2 = df.copy()
88
  # if cat:
89
  # df2 = df2[df2['category_&_subcategory_standardized'] == cat]
90
+ # st.info(f"Filtering by category: **{cat}**")
91
  # if yr:
92
  # df2 = df2[df2['status_date'].dt.year == yr]
93
  # if mon:
94
  # df2 = df2[df2['status_date'].dt.month == mon]
95
+ # st.info(f"Filtering by date: **{datetime(yr, mon, 1):%B %Y}**")
96
  # else:
97
+ # st.info(f" Filtering by year: **{yr}**")
98
 
99
  # if df2.empty:
100
  # st.warning("No matching records found.")
 
139
  from sklearn.feature_extraction.text import TfidfVectorizer
140
  from datetime import datetime
141
 
142
+ # loading data
143
  @st.cache_data
144
  def load_data():
145
  df = pd.read_csv("Illinois_Entire_Data_Insights_Final_v2.csv")
 
150
  df["summary_insight"] = df["llama_trend_summary"] + "\n" + df["llama_insight"]
151
  return df
152
 
 
153
  @st.cache_resource
154
  def load_models():
155
  embed_model = SentenceTransformer('all-MiniLM-L6-v2')
156
  summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small")
157
  return embed_model, summarizer
158
 
 
159
  @st.cache_data
160
  def compute_embeddings(texts, _model):
161
  return _model.encode(texts, show_progress_bar=True)
162
 
163
+ def semantic_search(query, embeddings, model, threshold=0.5):
 
164
  query_embedding = model.encode([query])
165
  sims = cosine_similarity(query_embedding, embeddings)[0]
166
  return [(i, s) for i, s in enumerate(sims) if s > threshold]
167
 
168
+
169
  def rag_summarize(texts, summarizer, top_k=5):
170
  if not texts:
171
  return "No relevant content to summarize."
 
179
  out = summarizer(prompt, max_length=60, min_length=30, do_sample=False)
180
  return out[0]['summary_text']
181
 
 
182
  def extract_month_year(q):
183
  month_map = {m: i for i, m in enumerate(
184
  ["january", "february", "march", "april", "may", "june",
 
189
  yr = int(ym.group()) if ym else None
190
  return mon, yr
191
 
 
192
  def extract_topic_match(query, df):
193
  query_lower = query.lower()
194
  matched_rows = df[
 
199
  ]
200
  return matched_rows
201
 
202
+
203
  st.set_page_config(page_title="IL Trends Q&A", layout="wide")
204
  st.title("Illinois Legislative Trends Q&A")
205
+ st.markdown("Ask about trends in **topics** like education, higher education, etc!")
206
 
207
  df = load_data()
208
  embed_model, summarizer = load_models()
209
 
210
+ query = st.text_input(" Ask a question (e.g., β€˜trends in Higher education in 2024’):")
211
 
212
  if query:
 
213
  mon, yr = extract_month_year(query)
214
  df2 = extract_topic_match(query, df)
215
 
 
216
  if df2.empty:
217
  df2 = df
 
 
218
  if yr:
219
  df2 = df2[df2['status_date'].dt.year == yr]
220
  if mon:
221
  df2 = df2[df2['status_date'].dt.month == mon]
222
+ st.info(f"Filtering by date: **{datetime(yr, mon, 1):%B %Y}**")
223
  else:
224
+ st.info(f"Filtering by year: **{yr}**")
225
 
226
  if df2.empty:
227
  st.warning("No matching records found.")
 
245
  stance = row['stance_standardized']
246
  trend_summary = row['llama_trend_summary'].strip()
247
 
248
+ st.markdown(f"- ** Date:** {date} | ** Score:** {score:.2f}")
249
  st.markdown(f" - ** Category:** {cat_std}")
250
  st.markdown(f" - ** Goal:** {goal}")
251
+ st.markdown(f" - ** Intent:** {intent} | ** Stance:** {stance}")
252
  st.markdown(f" > ** Trend Summary:** {trend_summary}")
253
 
254
  collected.append(row['summary_insight'])
255
 
 
256
  st.subheader(" RAG-Generated Summary")
257
  summary = rag_summarize(collected, summarizer)
258
  st.success(summary)