Spaces:

imh0
/

transformers-p1-embeddings

Runtime error

App Files Files Community

im commited on Jul 26, 2023

Commit

4bb4754

1 Parent(s): 348c0ea

add heatmap and vector search examples

Browse files

Files changed (2) hide show

app.py +34 -3
requirements.txt +4 -1

app.py CHANGED Viewed

@@ -444,7 +444,6 @@ with st.expander("References:"):
 divider()
 st.header("Embeddings")
-st.caption("TBD...")
 st.write("""\
     Following tokenization, each token is transformed into a vector of numeric characteristics, a process
@@ -473,9 +472,11 @@ st.write("""\
     characteristics using numbers, not words.
 """)
 col1, col2 = st.columns(2)
-token_king = col1.text_input("Choose words to compare embeddings:", value="king")
-token_queen = col2.text_input("Choose words to compare embeddings:", value="queen")
 from torch import nn
 from transformers import AutoConfig
@@ -516,8 +517,38 @@ fig.update_layout(legend=dict(orientation="h"))
 st.plotly_chart(fig, use_container_width=True)
 with st.expander("References:"):
     st.write("""\

 divider()
 st.header("Embeddings")
 st.write("""\
     Following tokenization, each token is transformed into a vector of numeric characteristics, a process
     characteristics using numbers, not words.
 """)
+# TODO: cache
 col1, col2 = st.columns(2)
+token_king = col1.text_input("Choose a word to compare embeddings:", value="king")
+token_queen = col2.text_input("Choose a word to compare embeddings:", value="queen")
 from torch import nn
 from transformers import AutoConfig
 st.plotly_chart(fig, use_container_width=True)
+import numpy as np
+sentence = st.text_input(label="words to explore embeddings", value="a the king queen space sit eat from on")
+sentence = sentence.split()
+def get_embeddings(text):
+  return np.array(openai.Embedding.create(input=text, model=EMBEDDING_MODEL)["data"][0]["embedding"])
+input = {word: get_embeddings(word) for word in sentence}
+scores_matrix = np.zeros((len(sentence), len(sentence)))
+for i, word_i in enumerate(sentence):
+    for j, word_j in enumerate(sentence):
+        scores_matrix[i, j] = np.dot(input[word_i], input[word_j])
+fig = px.imshow(scores_matrix, x=sentence, y=sentence, color_continuous_scale="hot_r")
+fig.update_layout(coloraxis_showscale=False)
+fig.update_layout(width=6000, title_text='Similar words have similar embeddings')
+st.plotly_chart(fig, use_container_width=True)
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.schema.document import Document
+db = FAISS.from_documents([Document(page_content="king"), Document(page_content="queen")], OpenAIEmbeddings(model=EMBEDDING_MODEL))
+embeddings_query = st.text_input(label="search term")
+if embeddings_query is not None and embeddings_query != '':
+    embedding_vector = OpenAIEmbeddings(model=EMBEDDING_MODEL).embed_query(embeddings_query)
+    docs = db.similarity_search_by_vector(embedding_vector)
+    st.write(docs[0].page_content)
+st.caption("PCA explanation (optional materials) TBD...")
 with st.expander("References:"):
     st.write("""\

requirements.txt CHANGED Viewed

@@ -3,4 +3,7 @@ tokenizers~=0.13.3
 transformers~=4.31.0
 torch~=2.0.1
 openai~=0.27.8
-plotly~=5.15.0

 transformers~=4.31.0
 torch~=2.0.1
 openai~=0.27.8
+plotly~=5.15.0
+langchain~=0.0.242
+faiss-cpu~=1.7.4
+tiktoken~=0.4.0