import streamlit as st from transformers import AutoTokenizer, AutoModelForSequenceClassification # load the pre-trained model and tokenizer tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased") model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-multilingual-cased") # set the app title st.title("Sentence Similarity Checker") # get the input sentences from the user sentence1 = st.text_input("Enter the first sentence:") sentence2 = st.text_input("Enter the second sentence:") # check if both sentences are not empty if sentence1 and sentence2: # tokenize the sentences and get the output logits for the sentence pair classification task inputs = tokenizer(sentence1, sentence2, padding=True, truncation=True, max_length=250, return_tensors="pt") outputs = model(**inputs).logits # calculate the softmax probabilities for the two classes (similar or dissimilar) probs = outputs.softmax(dim=1) # the probability of the sentences being similar is the second element of the output array similarity_score = probs[0][1].item() # display the similarity score to the user st.write("Similarity score:", similarity_score)