Spaces:
Running
Running
File size: 1,984 Bytes
f11912b f68327f f11912b 4f7df78 f68327f aae2963 402af37 aae2963 f68327f 3ddbd4c f68327f 3ddbd4c f68327f aae2963 f68327f 35c4462 402af37 35c4462 402af37 35c4462 6ed36a2 35c4462 65131af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import streamlit as st
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from sentence_transformers import SentenceTransformer
def app():
st.title("Text Similarity")
with st.expander("ℹ️ - About this app", expanded=True):
st.write(
"""
Information cartography - Get your word/phrase/sentence/paragraph embedded and visualized.
The (English) sentence-transformers model "all-MiniLM-L6-v2" maps sentences & paragraphs to a 384 dimensional dense vector space This is normally used for tasks like clustering or semantic search, but in this case, we use it to calculate the (cosine) similarity. The sentence transformer is context sensitive and works best with whole sentences.
Simply put in your text and press COMPARE, the higher the similarity the closer the text in the embedding space (max 1).
""")
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
with st.container():
col1, col2 = st.columns(2)
with col1:
word_to_embed1 = st.text_input("Text 1", value="The most vulnerable countries are seeking ‘loss and damage’ compensation from the biggest polluters.",)
with col2:
word_to_embed2 = st.text_input("Text 2", value="COP27 opens amid compounding crises of war, warming and economic instability.",)
if st.button("Comapre"):
with st.spinner("Embedding comparing your inputs"):
document = [word_to_embed1 ,word_to_embed2]
#Encode paragraphs
document_embeddings = model.encode(document , show_progress_bar=False)
#Compute cosine similarity between labels sentences and paragraphs
similarity_matrix = cosine_similarity(document_embeddings)
st.write("Text similarity:", round(similarity_matrix[0][1]*100,2),"%") |