Spaces:
Sleeping
Sleeping
Merge branch 'main' of https://huggingface.co/spaces/haotle/sdkTest
Browse files- pages/2 Topic Modeling.py +19 -20
pages/2 Topic Modeling.py
CHANGED
|
@@ -749,6 +749,9 @@ with st.popover("🔗 Menu"):
|
|
| 749 |
st.page_link("pages/5 Burst Detection.py", label="Burst Detection", icon="5️⃣")
|
| 750 |
st.page_link("pages/6 Keywords Stem.py", label="Keywords Stem", icon="6️⃣")
|
| 751 |
st.page_link("pages/7 Sentiment Analysis.py", label="Sentiment Analysis", icon="7️⃣")
|
|
|
|
|
|
|
|
|
|
| 752 |
|
| 753 |
st.header("Topic Modeling", anchor=False)
|
| 754 |
st.subheader('Put your file here...', anchor=False)
|
|
@@ -871,7 +874,7 @@ if uploaded_file is not None:
|
|
| 871 |
method = c1.selectbox(
|
| 872 |
'Choose method',
|
| 873 |
('Choose...', 'pyLDA', 'Biterm', 'BERTopic'))
|
| 874 |
-
ColCho = c2.selectbox('Choose column', (["Title","Abstract"]))
|
| 875 |
num_cho = c3.number_input('Choose number of topics', min_value=2, max_value=30, value=5)
|
| 876 |
|
| 877 |
d1, d2 = st.columns([3,7])
|
|
@@ -910,8 +913,8 @@ if uploaded_file is not None:
|
|
| 910 |
if fine_tuning:
|
| 911 |
topic_labelling = st.toggle("Automatic topic labelling")
|
| 912 |
if topic_labelling:
|
| 913 |
-
|
| 914 |
-
if
|
| 915 |
api_key = st.text_input("API Key")
|
| 916 |
|
| 917 |
else:
|
|
@@ -920,6 +923,10 @@ if uploaded_file is not None:
|
|
| 920 |
#===clean csv===
|
| 921 |
@st.cache_data(ttl=3600, show_spinner=False)
|
| 922 |
def clean_csv(extype):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 923 |
paper = papers.dropna(subset=[ColCho])
|
| 924 |
|
| 925 |
#===mapping===
|
|
@@ -1202,37 +1209,30 @@ if uploaded_file is not None:
|
|
| 1202 |
"MMR": mmr,
|
| 1203 |
}
|
| 1204 |
if topic_labelling:
|
| 1205 |
-
if
|
| 1206 |
client = openai.OpenAI(api_key=api_key)
|
| 1207 |
representation_model = {
|
| 1208 |
"KeyBERT": keybert,
|
| 1209 |
"MMR": mmr,
|
| 1210 |
"test": OpenAI(client, model = "gpt-4o-mini", delay_in_seconds=10)
|
| 1211 |
}
|
| 1212 |
-
elif
|
| 1213 |
-
|
| 1214 |
-
clientmod = TextGeneration(
|
| 1215 |
representation_model = {
|
| 1216 |
"KeyBERT": keybert,
|
| 1217 |
"MMR": mmr,
|
| 1218 |
"test": clientmod
|
| 1219 |
}
|
| 1220 |
-
elif
|
| 1221 |
-
|
| 1222 |
-
|
| 1223 |
-
torch_dtype = "auto",
|
| 1224 |
-
device_map = "auto",
|
| 1225 |
-
)
|
| 1226 |
-
clientmod = TextGeneration(gen)
|
| 1227 |
-
|
| 1228 |
representation_model = {
|
| 1229 |
"KeyBERT": keybert,
|
| 1230 |
"MMR": mmr,
|
| 1231 |
-
"test":
|
| 1232 |
}
|
| 1233 |
|
| 1234 |
-
|
| 1235 |
-
|
| 1236 |
vectorizer_model = CountVectorizer(ngram_range=(1, xgram), stop_words='english')
|
| 1237 |
topic_model = BERTopic(representation_model = representation_model, embedding_model=model, hdbscan_model=cluster_model, language=lang, umap_model=umap_model, vectorizer_model=vectorizer_model, top_n_words=bert_top_n_words)
|
| 1238 |
topics, probs = topic_model.fit_transform(topic_abs, embeddings=embeddings)
|
|
@@ -1343,8 +1343,7 @@ if uploaded_file is not None:
|
|
| 1343 |
st.button("Download Results")
|
| 1344 |
st.text("Click Download results button at bottom of page")
|
| 1345 |
|
| 1346 |
-
except
|
| 1347 |
st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
|
| 1348 |
-
st.write(e)
|
| 1349 |
st.stop()
|
| 1350 |
>>>>>>> e52d4a30c18f770eb968980667fa8e5a7b287580
|
|
|
|
| 749 |
st.page_link("pages/5 Burst Detection.py", label="Burst Detection", icon="5️⃣")
|
| 750 |
st.page_link("pages/6 Keywords Stem.py", label="Keywords Stem", icon="6️⃣")
|
| 751 |
st.page_link("pages/7 Sentiment Analysis.py", label="Sentiment Analysis", icon="7️⃣")
|
| 752 |
+
st.page_link("pages/8 Shifterator.py", label="Shifterator", icon="8️⃣")
|
| 753 |
+
st.page_link("pages/9 Summarization.py", label = "Summarization",icon ="9️⃣")
|
| 754 |
+
st.page_link("pages/10 WordCloud.py", label = "WordCloud", icon = "🔟")
|
| 755 |
|
| 756 |
st.header("Topic Modeling", anchor=False)
|
| 757 |
st.subheader('Put your file here...', anchor=False)
|
|
|
|
| 874 |
method = c1.selectbox(
|
| 875 |
'Choose method',
|
| 876 |
('Choose...', 'pyLDA', 'Biterm', 'BERTopic'))
|
| 877 |
+
ColCho = c2.selectbox('Choose column', (["Abstract","Title", "Abstract + Title"]))
|
| 878 |
num_cho = c3.number_input('Choose number of topics', min_value=2, max_value=30, value=5)
|
| 879 |
|
| 880 |
d1, d2 = st.columns([3,7])
|
|
|
|
| 913 |
if fine_tuning:
|
| 914 |
topic_labelling = st.toggle("Automatic topic labelling")
|
| 915 |
if topic_labelling:
|
| 916 |
+
llm_provider = st.selectbox("Model",["OpenAI/gpt-4o","Google/flan-t5","LiquidAI/LFM2-350M"])
|
| 917 |
+
if llm_provider == "OpenAI/gpt-4o":
|
| 918 |
api_key = st.text_input("API Key")
|
| 919 |
|
| 920 |
else:
|
|
|
|
| 923 |
#===clean csv===
|
| 924 |
@st.cache_data(ttl=3600, show_spinner=False)
|
| 925 |
def clean_csv(extype):
|
| 926 |
+
if (ColCho=="Abstract + Title"):
|
| 927 |
+
papers["Abstract + Title"] = papers["Title"] + " " + papers["Abstract"]
|
| 928 |
+
st.write(papers["Abstract + Title"])
|
| 929 |
+
|
| 930 |
paper = papers.dropna(subset=[ColCho])
|
| 931 |
|
| 932 |
#===mapping===
|
|
|
|
| 1209 |
"MMR": mmr,
|
| 1210 |
}
|
| 1211 |
if topic_labelling:
|
| 1212 |
+
if llm_provider == "OpenAI/gpt-4o":
|
| 1213 |
client = openai.OpenAI(api_key=api_key)
|
| 1214 |
representation_model = {
|
| 1215 |
"KeyBERT": keybert,
|
| 1216 |
"MMR": mmr,
|
| 1217 |
"test": OpenAI(client, model = "gpt-4o-mini", delay_in_seconds=10)
|
| 1218 |
}
|
| 1219 |
+
elif llm_provider == "Google/flan-t5":
|
| 1220 |
+
pipe = pipeline("text2text-generation", model = "google/flan-t5-base")
|
| 1221 |
+
clientmod = TextGeneration(pipe)
|
| 1222 |
representation_model = {
|
| 1223 |
"KeyBERT": keybert,
|
| 1224 |
"MMR": mmr,
|
| 1225 |
"test": clientmod
|
| 1226 |
}
|
| 1227 |
+
elif llm_provider == "LiquidAI/LFM2-350M":
|
| 1228 |
+
pipe = pipeline("text-generation", model = "LiquidAI/LFM2-350M")
|
| 1229 |
+
clientmod = TextGeneration(pipe)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1230 |
representation_model = {
|
| 1231 |
"KeyBERT": keybert,
|
| 1232 |
"MMR": mmr,
|
| 1233 |
+
"test": clientmod
|
| 1234 |
}
|
| 1235 |
|
|
|
|
|
|
|
| 1236 |
vectorizer_model = CountVectorizer(ngram_range=(1, xgram), stop_words='english')
|
| 1237 |
topic_model = BERTopic(representation_model = representation_model, embedding_model=model, hdbscan_model=cluster_model, language=lang, umap_model=umap_model, vectorizer_model=vectorizer_model, top_n_words=bert_top_n_words)
|
| 1238 |
topics, probs = topic_model.fit_transform(topic_abs, embeddings=embeddings)
|
|
|
|
| 1343 |
st.button("Download Results")
|
| 1344 |
st.text("Click Download results button at bottom of page")
|
| 1345 |
|
| 1346 |
+
except:
|
| 1347 |
st.error("Please ensure that your file is correct. Please contact us if you find that this is an error.", icon="🚨")
|
|
|
|
| 1348 |
st.stop()
|
| 1349 |
>>>>>>> e52d4a30c18f770eb968980667fa8e5a7b287580
|