Spaces:
Sleeping
Sleeping
Commit
·
a4b6d10
1
Parent(s):
6f88b44
Add nltk.download for wordnet resource
Browse files
app.py
CHANGED
|
@@ -12,13 +12,17 @@ from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
|
|
| 12 |
import os
|
| 13 |
import time
|
| 14 |
import zipfile
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# Load models and label mapping
|
| 17 |
lda = joblib.load("lda_model.joblib")
|
| 18 |
vectorizer = joblib.load("vectorizer.joblib")
|
| 19 |
auto_labels = joblib.load("topic_labels.joblib")
|
| 20 |
|
| 21 |
-
#Optional topic summaries
|
| 22 |
topic_summaries = {
|
| 23 |
"Politics & Gun Rights": "Discussions about government policies, laws, gun control, and rights.",
|
| 24 |
"Computing & Hardware": "Technical issues and terms related to computer hardware and drivers.",
|
|
@@ -32,7 +36,7 @@ topic_summaries = {
|
|
| 32 |
"Middle East Politics & Conflicts": "Topics involving Israel, Armenia, conflict regions."
|
| 33 |
}
|
| 34 |
|
| 35 |
-
#Tokenizer and lemmatizer
|
| 36 |
tokenizer = TreebankWordTokenizer()
|
| 37 |
lemmatizer = WordNetLemmatizer()
|
| 38 |
|
|
@@ -85,7 +89,8 @@ def cleanup_old_predictions(directory=".", extension=".txt", max_age_minutes=10)
|
|
| 85 |
def download_log():
|
| 86 |
zip_filename = "lda_predictions_log.zip"
|
| 87 |
with zipfile.ZipFile(zip_filename, "w", zipfile.ZIP_DEFLATED) as zipf:
|
| 88 |
-
|
|
|
|
| 89 |
return zip_filename
|
| 90 |
|
| 91 |
def save_feedback(text, feedback):
|
|
|
|
| 12 |
import os
|
| 13 |
import time
|
| 14 |
import zipfile
|
| 15 |
+
import nltk
|
| 16 |
+
|
| 17 |
+
# Download wordnet resource to avoid LookupError
|
| 18 |
+
nltk.download('wordnet')
|
| 19 |
|
| 20 |
# Load models and label mapping
|
| 21 |
lda = joblib.load("lda_model.joblib")
|
| 22 |
vectorizer = joblib.load("vectorizer.joblib")
|
| 23 |
auto_labels = joblib.load("topic_labels.joblib")
|
| 24 |
|
| 25 |
+
# Optional topic summaries
|
| 26 |
topic_summaries = {
|
| 27 |
"Politics & Gun Rights": "Discussions about government policies, laws, gun control, and rights.",
|
| 28 |
"Computing & Hardware": "Technical issues and terms related to computer hardware and drivers.",
|
|
|
|
| 36 |
"Middle East Politics & Conflicts": "Topics involving Israel, Armenia, conflict regions."
|
| 37 |
}
|
| 38 |
|
| 39 |
+
# Tokenizer and lemmatizer
|
| 40 |
tokenizer = TreebankWordTokenizer()
|
| 41 |
lemmatizer = WordNetLemmatizer()
|
| 42 |
|
|
|
|
| 89 |
def download_log():
|
| 90 |
zip_filename = "lda_predictions_log.zip"
|
| 91 |
with zipfile.ZipFile(zip_filename, "w", zipfile.ZIP_DEFLATED) as zipf:
|
| 92 |
+
if os.path.exists("lda_predictions_log.csv"):
|
| 93 |
+
zipf.write("lda_predictions_log.csv")
|
| 94 |
return zip_filename
|
| 95 |
|
| 96 |
def save_feedback(text, feedback):
|