Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -87,14 +87,14 @@
|
|
| 87 |
# df2 = df.copy()
|
| 88 |
# if cat:
|
| 89 |
# df2 = df2[df2['category_&_subcategory_standardized'] == cat]
|
| 90 |
-
# st.info(f"
|
| 91 |
# if yr:
|
| 92 |
# df2 = df2[df2['status_date'].dt.year == yr]
|
| 93 |
# if mon:
|
| 94 |
# df2 = df2[df2['status_date'].dt.month == mon]
|
| 95 |
-
# st.info(f"
|
| 96 |
# else:
|
| 97 |
-
# st.info(f"
|
| 98 |
|
| 99 |
# if df2.empty:
|
| 100 |
# st.warning("No matching records found.")
|
|
@@ -139,7 +139,7 @@ from sklearn.metrics.pairwise import cosine_similarity
|
|
| 139 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 140 |
from datetime import datetime
|
| 141 |
|
| 142 |
-
#
|
| 143 |
@st.cache_data
|
| 144 |
def load_data():
|
| 145 |
df = pd.read_csv("Illinois_Entire_Data_Insights_Final_v2.csv")
|
|
@@ -150,25 +150,22 @@ def load_data():
|
|
| 150 |
df["summary_insight"] = df["llama_trend_summary"] + "\n" + df["llama_insight"]
|
| 151 |
return df
|
| 152 |
|
| 153 |
-
# ------------------ Load Models ------------------ #
|
| 154 |
@st.cache_resource
|
| 155 |
def load_models():
|
| 156 |
embed_model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 157 |
summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small")
|
| 158 |
return embed_model, summarizer
|
| 159 |
|
| 160 |
-
# ------------------ Compute Embeddings ------------------ #
|
| 161 |
@st.cache_data
|
| 162 |
def compute_embeddings(texts, _model):
|
| 163 |
return _model.encode(texts, show_progress_bar=True)
|
| 164 |
|
| 165 |
-
|
| 166 |
-
def semantic_search(query, embeddings, model, threshold=0.5): # Increased threshold to 0.7
|
| 167 |
query_embedding = model.encode([query])
|
| 168 |
sims = cosine_similarity(query_embedding, embeddings)[0]
|
| 169 |
return [(i, s) for i, s in enumerate(sims) if s > threshold]
|
| 170 |
|
| 171 |
-
|
| 172 |
def rag_summarize(texts, summarizer, top_k=5):
|
| 173 |
if not texts:
|
| 174 |
return "No relevant content to summarize."
|
|
@@ -182,7 +179,6 @@ def rag_summarize(texts, summarizer, top_k=5):
|
|
| 182 |
out = summarizer(prompt, max_length=60, min_length=30, do_sample=False)
|
| 183 |
return out[0]['summary_text']
|
| 184 |
|
| 185 |
-
# ------------------ Extract Month/Year from Query ------------------ #
|
| 186 |
def extract_month_year(q):
|
| 187 |
month_map = {m: i for i, m in enumerate(
|
| 188 |
["january", "february", "march", "april", "may", "june",
|
|
@@ -193,7 +189,6 @@ def extract_month_year(q):
|
|
| 193 |
yr = int(ym.group()) if ym else None
|
| 194 |
return mon, yr
|
| 195 |
|
| 196 |
-
# ------------------ Topic-Based Matching ------------------ #
|
| 197 |
def extract_topic_match(query, df):
|
| 198 |
query_lower = query.lower()
|
| 199 |
matched_rows = df[
|
|
@@ -204,33 +199,29 @@ def extract_topic_match(query, df):
|
|
| 204 |
]
|
| 205 |
return matched_rows
|
| 206 |
|
| 207 |
-
|
| 208 |
st.set_page_config(page_title="IL Trends Q&A", layout="wide")
|
| 209 |
st.title("Illinois Legislative Trends Q&A")
|
| 210 |
-
st.markdown("Ask about **topics** like education,
|
| 211 |
|
| 212 |
df = load_data()
|
| 213 |
embed_model, summarizer = load_models()
|
| 214 |
|
| 215 |
-
query = st.text_input("
|
| 216 |
|
| 217 |
if query:
|
| 218 |
-
# Extract filters
|
| 219 |
mon, yr = extract_month_year(query)
|
| 220 |
df2 = extract_topic_match(query, df)
|
| 221 |
|
| 222 |
-
# Fallback to full dataset if nothing found on topic
|
| 223 |
if df2.empty:
|
| 224 |
df2 = df
|
| 225 |
-
|
| 226 |
-
# Apply year/month filters
|
| 227 |
if yr:
|
| 228 |
df2 = df2[df2['status_date'].dt.year == yr]
|
| 229 |
if mon:
|
| 230 |
df2 = df2[df2['status_date'].dt.month == mon]
|
| 231 |
-
st.info(f"
|
| 232 |
else:
|
| 233 |
-
st.info(f"
|
| 234 |
|
| 235 |
if df2.empty:
|
| 236 |
st.warning("No matching records found.")
|
|
@@ -254,15 +245,14 @@ if query:
|
|
| 254 |
stance = row['stance_standardized']
|
| 255 |
trend_summary = row['llama_trend_summary'].strip()
|
| 256 |
|
| 257 |
-
st.markdown(f"- ** Date:** {date} | **
|
| 258 |
st.markdown(f" - ** Category:** {cat_std}")
|
| 259 |
st.markdown(f" - ** Goal:** {goal}")
|
| 260 |
-
st.markdown(f" - ** Intent:** {intent} | **
|
| 261 |
st.markdown(f" > ** Trend Summary:** {trend_summary}")
|
| 262 |
|
| 263 |
collected.append(row['summary_insight'])
|
| 264 |
|
| 265 |
-
# RAG Summary
|
| 266 |
st.subheader(" RAG-Generated Summary")
|
| 267 |
summary = rag_summarize(collected, summarizer)
|
| 268 |
st.success(summary)
|
|
|
|
| 87 |
# df2 = df.copy()
|
| 88 |
# if cat:
|
| 89 |
# df2 = df2[df2['category_&_subcategory_standardized'] == cat]
|
| 90 |
+
# st.info(f"Filtering by category: **{cat}**")
|
| 91 |
# if yr:
|
| 92 |
# df2 = df2[df2['status_date'].dt.year == yr]
|
| 93 |
# if mon:
|
| 94 |
# df2 = df2[df2['status_date'].dt.month == mon]
|
| 95 |
+
# st.info(f"Filtering by date: **{datetime(yr, mon, 1):%B %Y}**")
|
| 96 |
# else:
|
| 97 |
+
# st.info(f" Filtering by year: **{yr}**")
|
| 98 |
|
| 99 |
# if df2.empty:
|
| 100 |
# st.warning("No matching records found.")
|
|
|
|
| 139 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 140 |
from datetime import datetime
|
| 141 |
|
| 142 |
+
# loading data
|
| 143 |
@st.cache_data
|
| 144 |
def load_data():
|
| 145 |
df = pd.read_csv("Illinois_Entire_Data_Insights_Final_v2.csv")
|
|
|
|
| 150 |
df["summary_insight"] = df["llama_trend_summary"] + "\n" + df["llama_insight"]
|
| 151 |
return df
|
| 152 |
|
|
|
|
| 153 |
@st.cache_resource
|
| 154 |
def load_models():
|
| 155 |
embed_model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 156 |
summarizer = pipeline("summarization", model="t5-small", tokenizer="t5-small")
|
| 157 |
return embed_model, summarizer
|
| 158 |
|
|
|
|
| 159 |
@st.cache_data
|
| 160 |
def compute_embeddings(texts, _model):
|
| 161 |
return _model.encode(texts, show_progress_bar=True)
|
| 162 |
|
| 163 |
+
def semantic_search(query, embeddings, model, threshold=0.5):
|
|
|
|
| 164 |
query_embedding = model.encode([query])
|
| 165 |
sims = cosine_similarity(query_embedding, embeddings)[0]
|
| 166 |
return [(i, s) for i, s in enumerate(sims) if s > threshold]
|
| 167 |
|
| 168 |
+
|
| 169 |
def rag_summarize(texts, summarizer, top_k=5):
|
| 170 |
if not texts:
|
| 171 |
return "No relevant content to summarize."
|
|
|
|
| 179 |
out = summarizer(prompt, max_length=60, min_length=30, do_sample=False)
|
| 180 |
return out[0]['summary_text']
|
| 181 |
|
|
|
|
| 182 |
def extract_month_year(q):
|
| 183 |
month_map = {m: i for i, m in enumerate(
|
| 184 |
["january", "february", "march", "april", "may", "june",
|
|
|
|
| 189 |
yr = int(ym.group()) if ym else None
|
| 190 |
return mon, yr
|
| 191 |
|
|
|
|
| 192 |
def extract_topic_match(query, df):
|
| 193 |
query_lower = query.lower()
|
| 194 |
matched_rows = df[
|
|
|
|
| 199 |
]
|
| 200 |
return matched_rows
|
| 201 |
|
| 202 |
+
|
| 203 |
st.set_page_config(page_title="IL Trends Q&A", layout="wide")
|
| 204 |
st.title("Illinois Legislative Trends Q&A")
|
| 205 |
+
st.markdown("Ask about trends in **topics** like education, higher education, etc!")
|
| 206 |
|
| 207 |
df = load_data()
|
| 208 |
embed_model, summarizer = load_models()
|
| 209 |
|
| 210 |
+
query = st.text_input(" Ask a question (e.g., βtrends in Higher education in 2024β):")
|
| 211 |
|
| 212 |
if query:
|
|
|
|
| 213 |
mon, yr = extract_month_year(query)
|
| 214 |
df2 = extract_topic_match(query, df)
|
| 215 |
|
|
|
|
| 216 |
if df2.empty:
|
| 217 |
df2 = df
|
|
|
|
|
|
|
| 218 |
if yr:
|
| 219 |
df2 = df2[df2['status_date'].dt.year == yr]
|
| 220 |
if mon:
|
| 221 |
df2 = df2[df2['status_date'].dt.month == mon]
|
| 222 |
+
st.info(f"Filtering by date: **{datetime(yr, mon, 1):%B %Y}**")
|
| 223 |
else:
|
| 224 |
+
st.info(f"Filtering by year: **{yr}**")
|
| 225 |
|
| 226 |
if df2.empty:
|
| 227 |
st.warning("No matching records found.")
|
|
|
|
| 245 |
stance = row['stance_standardized']
|
| 246 |
trend_summary = row['llama_trend_summary'].strip()
|
| 247 |
|
| 248 |
+
st.markdown(f"- ** Date:** {date} | ** Score:** {score:.2f}")
|
| 249 |
st.markdown(f" - ** Category:** {cat_std}")
|
| 250 |
st.markdown(f" - ** Goal:** {goal}")
|
| 251 |
+
st.markdown(f" - ** Intent:** {intent} | ** Stance:** {stance}")
|
| 252 |
st.markdown(f" > ** Trend Summary:** {trend_summary}")
|
| 253 |
|
| 254 |
collected.append(row['summary_insight'])
|
| 255 |
|
|
|
|
| 256 |
st.subheader(" RAG-Generated Summary")
|
| 257 |
summary = rag_summarize(collected, summarizer)
|
| 258 |
st.success(summary)
|