any0019 commited on
Commit
b3a0a26
1 Parent(s): f5382f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -1
app.py CHANGED
@@ -3,7 +3,10 @@ import streamlit as st
3
  from termcolor import colored
4
  import torch
5
  from transformers import BertTokenizer, BertForMaskedLM, BertForSequenceClassification
 
6
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
 
7
  @st.cache
8
  def load_models():
9
  tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
@@ -11,7 +14,11 @@ def load_models():
11
  bert_mlm_negative = BertForMaskedLM.from_pretrained('text_style_mlm_negative', return_dict=True).to(device).train(True)
12
  bert_classifier = BertForSequenceClassification.from_pretrained('text_style_classifier', num_labels=2).to(device).train(True)
13
  return tokenizer, bert_mlm_positive, bert_mlm_negative, bert_classifier
 
 
14
  tokenizer, bert_mlm_positive, bert_mlm_negative, bert_classifier = load_models()
 
 
15
  def highlight_diff(sent, sent_main):
16
  tokens = tokenizer.tokenize(sent)
17
  tokens_main = tokenizer.tokenize(sent_main)
@@ -24,11 +31,14 @@ def highlight_diff(sent, sent_main):
24
  new_toks.append(tok)
25
 
26
  return ' '.join(new_toks)
27
-
 
28
  def get_classifier_prob(sent):
29
  bert_classifier.eval()
30
  with torch.no_grad():
31
  return bert_classifier(**{k: v.to(device) for k, v in tokenizer(sent, return_tensors='pt').items()}).logits.softmax(dim=-1)[0].cpu().numpy()
 
 
32
  def beam_get_replacements(current_beam, beam_size, epsilon=1e-3, used_positions=[]):
33
  """
34
  - for each sentence in :current_beam: - split the sentence into tokens using the INGSOC-approved BERT tokenizer
@@ -74,6 +84,8 @@ def beam_get_replacements(current_beam, beam_size, epsilon=1e-3, used_positions=
74
  else:
75
  st.write("No more new hypotheses")
76
  return current_beam, None
 
 
77
  def get_best_hypotheses(sentence, beam_size, max_steps, epsilon=1e-3, pretty_output=False):
78
  current_beam = {sentence: get_classifier_prob(sentence)[1]}
79
  used_poss = []
@@ -94,10 +106,14 @@ def get_best_hypotheses(sentence, beam_size, max_steps, epsilon=1e-3, pretty_out
94
  used_poss.append(used_pos)
95
 
96
  return current_beam, used_poss
 
 
97
  st.title("Correcting opinions")
 
98
  default_value = "write your review here (in lower case - vocab reasons)"
99
  sentence = st.text_area("Text", default_value, height = 275)
100
  beam_size = st.sidebar.slider("Beam size", value = 3, min_value = 1, max_value=20, step=1)
101
  max_steps = st.sidebar.slider("Max steps", value = 3, min_value = 1, max_value=10, step=1)
102
  prettyfy = st.sidebar.slider("Higlight changes", value = 0, min_value = 0, max_value=1, step=1)
 
103
  beam, used_poss = get_best_hypotheses(sentence, beam_size=beam_size, max_steps=max_steps, pretty_output=bool(prettyfy))
 
3
  from termcolor import colored
4
  import torch
5
  from transformers import BertTokenizer, BertForMaskedLM, BertForSequenceClassification
6
+
7
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
8
+
9
+
10
  @st.cache
11
  def load_models():
12
  tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
 
14
  bert_mlm_negative = BertForMaskedLM.from_pretrained('text_style_mlm_negative', return_dict=True).to(device).train(True)
15
  bert_classifier = BertForSequenceClassification.from_pretrained('text_style_classifier', num_labels=2).to(device).train(True)
16
  return tokenizer, bert_mlm_positive, bert_mlm_negative, bert_classifier
17
+
18
+
19
  tokenizer, bert_mlm_positive, bert_mlm_negative, bert_classifier = load_models()
20
+
21
+
22
  def highlight_diff(sent, sent_main):
23
  tokens = tokenizer.tokenize(sent)
24
  tokens_main = tokenizer.tokenize(sent_main)
 
31
  new_toks.append(tok)
32
 
33
  return ' '.join(new_toks)
34
+
35
+
36
  def get_classifier_prob(sent):
37
  bert_classifier.eval()
38
  with torch.no_grad():
39
  return bert_classifier(**{k: v.to(device) for k, v in tokenizer(sent, return_tensors='pt').items()}).logits.softmax(dim=-1)[0].cpu().numpy()
40
+
41
+
42
  def beam_get_replacements(current_beam, beam_size, epsilon=1e-3, used_positions=[]):
43
  """
44
  - for each sentence in :current_beam: - split the sentence into tokens using the INGSOC-approved BERT tokenizer
 
84
  else:
85
  st.write("No more new hypotheses")
86
  return current_beam, None
87
+
88
+
89
  def get_best_hypotheses(sentence, beam_size, max_steps, epsilon=1e-3, pretty_output=False):
90
  current_beam = {sentence: get_classifier_prob(sentence)[1]}
91
  used_poss = []
 
106
  used_poss.append(used_pos)
107
 
108
  return current_beam, used_poss
109
+
110
+
111
  st.title("Correcting opinions")
112
+
113
  default_value = "write your review here (in lower case - vocab reasons)"
114
  sentence = st.text_area("Text", default_value, height = 275)
115
  beam_size = st.sidebar.slider("Beam size", value = 3, min_value = 1, max_value=20, step=1)
116
  max_steps = st.sidebar.slider("Max steps", value = 3, min_value = 1, max_value=10, step=1)
117
  prettyfy = st.sidebar.slider("Higlight changes", value = 0, min_value = 0, max_value=1, step=1)
118
+
119
  beam, used_poss = get_best_hypotheses(sentence, beam_size=beam_size, max_steps=max_steps, pretty_output=bool(prettyfy))