emiliosheinz commited on
Commit
30f532a
·
1 Parent(s): 6ddfa35

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -15
app.py CHANGED
@@ -1,22 +1,27 @@
1
  import streamlit as st
2
- import torch
3
  from transformers import AutoTokenizer, AutoModel
 
 
 
 
 
4
 
5
- # load the pre-trained model
6
- model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
7
 
8
- # set the app title
9
- st.title("Brazilian Portuguese Sentence Similarity Checker")
 
10
 
11
- # get the input sentences from the user
12
- sentence1 = st.text_input("Enter the first sentence:")
13
- sentence2 = st.text_input("Enter the second sentence:")
 
 
 
14
 
15
- # check if both sentences are not empty
16
- if sentence1 and sentence2:
17
- embedding_1= model.encode(sentence1, convert_to_tensor=True)
18
- embedding_2 = model.encode(sentence2, convert_to_tensor=True)
19
 
20
- similarity = util.pytorch_cos_sim(embedding_1, embedding_2)
21
-
22
- st.write("Similarity score between the sentences:", similarity)
 
1
  import streamlit as st
 
2
  from transformers import AutoTokenizer, AutoModel
3
+ import torch
4
+
5
+ # carregar o modelo e o tokenizer
6
+ model = AutoModel.from_pretrained("neuralmind/bert-large-portuguese-cased")
7
+ tokenizer = AutoTokenizer.from_pretrained("neuralmind/bert-large-portuguese-cased")
8
 
9
+ st.title("Comparador de Similaridade de Textos em Português")
10
+ st.write("Este aplicativo usa o modelo BERT-large treinado para o português para calcular a similaridade entre dois textos.")
11
 
12
+ # definir as caixas de entrada para as duas frases
13
+ frase1 = st.text_input("Insira a primeira frase:")
14
+ frase2 = st.text_input("Insira a segunda frase:")
15
 
16
+ # verificar se as frases foram inseridas e calcular a similaridade se sim
17
+ if frase1 and frase2:
18
+ # tokenizar as frases e obter os embeddings
19
+ input_ids = tokenizer.encode_plus(frase1, frase2, padding='max_length', truncation=True, return_tensors='pt')
20
+ with torch.no_grad():
21
+ embeddings = model(input_ids['input_ids'], attention_mask=input_ids['attention_mask'])[0]
22
 
23
+ # calcular a similaridade entre os embeddings
24
+ cos_sim = torch.nn.functional.cosine_similarity(embeddings[0], embeddings[1], dim=0)
 
 
25
 
26
+ # imprimir o resultado
27
+ st.write("A similaridade entre as frases é:", cos_sim.item())