Spaces:

RamAnanth1
/

iclr2023

Sleeping

App Files Files Community

RamAnanth1 commited on Feb 14, 2023

Commit

553b9ec

•

1 Parent(s): 5601530

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -3

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ import pandas as pd
 import tqdm
 import cohere
 from topically import Topically
 from bertopic import BERTopic
@@ -47,10 +49,34 @@ st.write("Number of submissions accepted at ICLR 2023:", len(df))
 df_filtered = df[['id', 'content.title', 'content.keywords', 'content.abstract']]
 df = df_filtered
-list_of_abstracts = list(df["content.title"].values)
-x = st.slider('Select a value')
-st.write(x, 'squared is', x * x)

 import tqdm
 import cohere
+import os
 from topically import Topically
 from bertopic import BERTopic
 df_filtered = df[['id', 'content.title', 'content.keywords', 'content.abstract']]
 df = df_filtered
+if "CO_API_KEY" not in os.environ:
+    raise KeyError("CO_API_KEY not found in st.secrets or os.environ. Please set it in "
+                   ".streamlit/secrets.toml or as an environment variable.")
+co = cohere.Client(os.environ["CO_API_KEY"])
+list_of_titles = list(df["content.title"].values)
+embeds = co.embed(texts=list_of_titles,
+  					model="small").embeddings
+embeds_npy = np.array(embeds)
+# Load and initialize BERTopic to use KMeans clustering with 8 clusters only.
+cluster_model = KMeans(n_clusters=8)
+topic_model = BERTopic(hdbscan_model=cluster_model)
+# df is a dataframe. df['title'] is the column of text we're modeling
+df['topic'], probabilities = topic_model.fit_transform(df['content.title'], embeds_npy)
+app = Topically(os.environ["CO_API_KEY"])
+df['topic_name'], topic_names = app.name_topics((df['content.title'], df['topic']), num_generations=5)
+st.write("Topics extracted are:", topic_names)
+topic_model.set_topic_labels(topic_names)
+# topic_model.visualize_documents(df['content.title'].values,
+#                                 embeddings=embeds_npy,
+#                                 topics = list(range(8)),
+#                                 custom_labels=True,
+#                                 width=900,
+#                                 height=600)