nam194 commited on
Commit
246d50e
·
1 Parent(s): ebfa8f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -9
app.py CHANGED
@@ -31,21 +31,18 @@ model_topic.resize_token_embeddings(len(tokenizer_topic))
31
 
32
 
33
  def sentiment(sent: str):
34
- try:
35
- sent_ = normalize(text=sent_) # segment input sentence, maybe raise ConnectionError: HTTPConnectionPool())
36
- except:
37
- pass
38
  input_sent = torch.tensor([tokenizer_sent.encode(sent_)]).to(device)
39
  with torch.no_grad():
40
  out_sent = model_sent(input_sent)
41
  logits_sent = out_sent.logits.softmax(dim=-1).tolist()[0]
42
  pred_sent = dict_[np.argmax(logits_sent)]
43
 
44
- try:
45
- sent = replace_all(text=sent) # segment input sentence, maybe raise ConnectionError: HTTPConnectionPool())
46
- except:
47
- pass
48
- sent_segment = rdrsegmenter.tokenize(sent)
49
  dump = [[i, 'O'] for s in sent_segment for i in s]
50
  dump_set = NerDataset(feature_for_phobert([dump], tokenizer=tokenizer_topic, use_crf=True))
51
  dump_iter = DataLoader(dump_set, batch_size=1)
 
31
 
32
 
33
  def sentiment(sent: str):
34
+ sent_ = normalize(text=sent) # segment input sentence, maybe raise ConnectionError: HTTPConnectionPool())
 
 
 
35
  input_sent = torch.tensor([tokenizer_sent.encode(sent_)]).to(device)
36
  with torch.no_grad():
37
  out_sent = model_sent(input_sent)
38
  logits_sent = out_sent.logits.softmax(dim=-1).tolist()[0]
39
  pred_sent = dict_[np.argmax(logits_sent)]
40
 
41
+ sent = replace_all(text=sent) # segment input sentence, maybe raise ConnectionError: HTTPConnectionPool())
42
+ sent_segment = sent.split(".")
43
+ for i, s in enumerate(sent_segment):
44
+ s = s.strip()
45
+ sent_segment[i] = underthesea.word_tokenize(s, format="text").split()
46
  dump = [[i, 'O'] for s in sent_segment for i in s]
47
  dump_set = NerDataset(feature_for_phobert([dump], tokenizer=tokenizer_topic, use_crf=True))
48
  dump_iter = DataLoader(dump_set, batch_size=1)