Spaces:
Running
Running
Update pages/5 Burst Detection.py
Browse files
pages/5 Burst Detection.py
CHANGED
@@ -135,7 +135,10 @@ def clean_data(df):
|
|
135 |
df['processed'] = df.apply(lambda row: preprocess_text(f"{row.get(col_name, '')}"), axis=1)
|
136 |
|
137 |
# Vectorize processed text
|
138 |
-
|
|
|
|
|
|
|
139 |
X = vectorizer.fit_transform(df['processed'].tolist())
|
140 |
|
141 |
# Create DataFrame from the Document-Term Matrix (DTM)
|
@@ -350,14 +353,16 @@ uploaded_file = st.file_uploader('', type=['csv', 'txt'], on_change=reset_all)
|
|
350 |
|
351 |
if uploaded_file is not None:
|
352 |
try:
|
353 |
-
c1, c2, c3 = st.columns([
|
354 |
top_n = c1.number_input("Number of top words to analyze", min_value=5, value=10, step=1, on_change=reset_all)
|
355 |
viz_selected = c2.selectbox("Option for visualization",
|
356 |
("Line graph", "Scatter plot"), on_change=reset_all)
|
357 |
-
running_total = c3.selectbox("
|
358 |
("Running total", "By occurrences each year"), on_change=reset_all)
|
|
|
|
|
359 |
|
360 |
-
d1, d2 = st.columns([
|
361 |
df, coldf, MIN, MAX, GAP = load_data(uploaded_file)
|
362 |
col_name = d1.selectbox("Select column to analyze",
|
363 |
(coldf), on_change=reset_all)
|
|
|
135 |
df['processed'] = df.apply(lambda row: preprocess_text(f"{row.get(col_name, '')}"), axis=1)
|
136 |
|
137 |
# Vectorize processed text
|
138 |
+
if count_method == "Document Frequency":
|
139 |
+
vectorizer = CountVectorizer(lowercase=False, tokenizer=lambda x: x.split(), binary=True)
|
140 |
+
else:
|
141 |
+
vectorizer = CountVectorizer(lowercase=False, tokenizer=lambda x: x.split())
|
142 |
X = vectorizer.fit_transform(df['processed'].tolist())
|
143 |
|
144 |
# Create DataFrame from the Document-Term Matrix (DTM)
|
|
|
353 |
|
354 |
if uploaded_file is not None:
|
355 |
try:
|
356 |
+
c1, c2, c3, c4 = st.columns([2,2,3,3])
|
357 |
top_n = c1.number_input("Number of top words to analyze", min_value=5, value=10, step=1, on_change=reset_all)
|
358 |
viz_selected = c2.selectbox("Option for visualization",
|
359 |
("Line graph", "Scatter plot"), on_change=reset_all)
|
360 |
+
running_total = c3.selectbox("Calculation method",
|
361 |
("Running total", "By occurrences each year"), on_change=reset_all)
|
362 |
+
count_method = c4.selectbox("Count by",
|
363 |
+
("Term Frequency", "Document Frequency"), on_change=reset_all)
|
364 |
|
365 |
+
d1, d2 = st.columns([2,8])
|
366 |
df, coldf, MIN, MAX, GAP = load_data(uploaded_file)
|
367 |
col_name = d1.selectbox("Select column to analyze",
|
368 |
(coldf), on_change=reset_all)
|