Jerry0203 commited on
Commit
7bd2349
1 Parent(s): fabef1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -6
app.py CHANGED
@@ -142,12 +142,7 @@ def cut_sent(para):
142
 
143
  def embed(document):
144
  model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
145
- pattern = re.compile(r’[\u4e00-\u9fa5]+')
146
- result = pattern.search(document)
147
- if result:
148
- sentences = cut_sent(document)
149
- else:
150
- sentences = nltk.sent_tokenize(document)
151
  embeddings = model.encode(sentences, convert_to_tensor=True)
152
  #Compute the pair-wise cosine similarities
153
  cos_scores = util.pytorch_cos_sim(embeddings, embeddings).cpu().numpy()
 
142
 
143
  def embed(document):
144
  model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
145
+ sentences = cut_sent(document)
 
 
 
 
 
146
  embeddings = model.encode(sentences, convert_to_tensor=True)
147
  #Compute the pair-wise cosine similarities
148
  cos_scores = util.pytorch_cos_sim(embeddings, embeddings).cpu().numpy()