Commit
•
33dca10
1
Parent(s):
3121bfd
Upload 7 files
Browse files- app.py +45 -0
- clusters.joblib +3 -0
- embeddings_tsne.joblib +3 -0
- language_list.joblib +3 -0
- requirements.txt +3 -0
- sentence_list.joblib +3 -0
app.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
from joblib import load
|
4 |
+
import pandas as pd
|
5 |
+
import plotly.express as px
|
6 |
+
|
7 |
+
|
8 |
+
reduced_embeddings = load("embeddings_tsne.joblib")
|
9 |
+
combined_sentences = load("sentence_list.joblib")
|
10 |
+
languages = load("language_list.joblib")
|
11 |
+
clusters = load("clusters.joblib")
|
12 |
+
|
13 |
+
def main():
|
14 |
+
st.title("LASER Multilingual Sentence Embeddings Visualization")
|
15 |
+
|
16 |
+
df = pd.DataFrame({
|
17 |
+
'TSNE Component 1': reduced_embeddings[:, 0],
|
18 |
+
'TSNE Component 2': reduced_embeddings[:, 1],
|
19 |
+
'Language': languages,
|
20 |
+
'Sentence': combined_sentences,
|
21 |
+
'Cluster': ['Cluster {}'.format(cluster) for cluster in clusters]
|
22 |
+
})
|
23 |
+
|
24 |
+
select_all = st.checkbox("Select All Clusters")
|
25 |
+
|
26 |
+
unique_clusters = df['Cluster'].unique()
|
27 |
+
|
28 |
+
# Active only if 'Select All Clusters' is unchecked
|
29 |
+
if not select_all:
|
30 |
+
selected_clusters = st.multiselect("Select clusters to display", unique_clusters, default=unique_clusters[:10])
|
31 |
+
else:
|
32 |
+
selected_clusters = unique_clusters
|
33 |
+
|
34 |
+
filtered_df = df[df['Cluster'].isin(selected_clusters)]
|
35 |
+
|
36 |
+
fig = px.scatter(filtered_df, x='TSNE Component 1', y='TSNE Component 2',
|
37 |
+
color='Language', hover_data=['Sentence', 'Cluster'])
|
38 |
+
fig.update_layout(title="Multilingual Sentence Embeddings Visualization",
|
39 |
+
xaxis_title="TSNE Component 1", yaxis_title="TSNE Component 2",
|
40 |
+
legend_title="Language")
|
41 |
+
|
42 |
+
st.plotly_chart(fig, use_container_width=True)
|
43 |
+
|
44 |
+
if __name__ == "__main__":
|
45 |
+
main()
|
clusters.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfebc019aa4c59a92263fd8827382a7740cc3b834ee0b36718862eb9f0269363
|
3 |
+
size 1025
|
embeddings_tsne.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2a4a720720a90789b2ab0243275d129d85106bc1183226927285b7748376933
|
3 |
+
size 1825
|
language_list.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ecf1dc86b022f7223afee99db1ccd1aae834733bd6a13c0ef0962fa81ed96a52
|
3 |
+
size 495
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
plotly
|
2 |
+
streamlit
|
3 |
+
joblib
|
sentence_list.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84d94a24b9bbde9827cf8a75517d09c3add35717e20cae1ced20f65d9aee3999
|
3 |
+
size 7669
|