Spaces:

rianders
/

live_view_embeddings

Running

App Files Files Community

rianders commited on May 13

Commit

4a49186

•

1 Parent(s): 4bb5140

Create app.py

Browse files

Files changed (1) hide show

app.py +93 -0

app.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import streamlit as st
+from transformers import BertModel, BertTokenizer
+import torch
+from sklearn.decomposition import PCA
+import plotly.graph_objs as go
+import numpy as np
+# BERT embeddings function
+def get_bert_embeddings(words):
+    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+    model = BertModel.from_pretrained('bert-base-uncased')
+    embeddings = []
+    # Extract embeddings
+    for word in words:
+        inputs = tokenizer(word, return_tensors='pt')
+        outputs = model(**inputs)
+        embeddings.append(outputs.last_hidden_state[0][0].detach().numpy())
+    # Reduce dimensions to 3 using PCA
+    if len(embeddings) > 0:
+        pca = PCA(n_components=3)
+        reduced_embeddings = pca.fit_transform(np.array(embeddings))
+        return reduced_embeddings
+    return []
+# Plotly plotting function
+def plot_interactive_bert_embeddings(embeddings, words):
+    if len(words) < 4:
+        st.error("Please provide at least 4 words/phrases for effective visualization.")
+        return None
+    data = []
+    for i, word in enumerate(words):
+        trace = go.Scatter3d(
+            x=[embeddings[i][0]],
+            y=[embeddings[i][1]],
+            z=[embeddings[i][2]],
+            mode='markers+text',
+            text=[word],
+            name=word
+        )
+        data.append(trace)
+    layout = go.Layout(
+        title='3D Scatter Plot of BERT Embeddings',
+        scene=dict(
+            xaxis=dict(title='PCA Component 1'),
+            yaxis=dict(title='PCA Component 2'),
+            zaxis=dict(title='PCA Component 3')
+        ),
+        autosize=False,
+        width=800,
+        height=600
+    )
+    fig = go.Figure(data=data, layout=layout)
+    return fig
+def main():
+    st.title("BERT Embeddings Visualization")
+    # Initialize or get existing words list from the session state
+    if 'words' not in st.session_state:
+        st.session_state.words = []
+    # Text input for new words
+    new_words_input = st.text_input("Enter a new word/phrase:")
+    # Button to add new words
+    if st.button("Add Word/Phrase"):
+        if new_words_input:
+            st.session_state.words.append(new_words_input)
+            st.success(f"Added: {new_words_input}")
+    # Display current list of words
+    if st.session_state.words:
+        st.write("Current list of words/phrases:", ', '.join(st.session_state.words))
+    # Generate embeddings and plot
+    if st.button("Generate Embeddings"):
+        with st.spinner('Generating embeddings...'):
+            embeddings = get_bert_embeddings(st.session_state.words)
+            fig = plot_interactive_bert_embeddings(embeddings, st.session_state.words)
+            if fig is not None:
+                st.plotly_chart(fig, use_container_width=True)
+    # Reset button
+    if st.button("Reset"):
+        st.session_state.words = []
+if __name__ == "__main__":
+    main()