AhmedSSabir commited on
Commit
9be3938
1 Parent(s): 02397eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -26
app.py CHANGED
@@ -48,20 +48,22 @@ def get_sim(x):
48
  # Load pre-trained model
49
 
50
  #model = GPT2LMHeadModel.from_pretrained('distilgpt2', output_hidden_states = True, output_attentions = True)
51
- model = GPT2LMHeadModel.from_pretrained('gpt2', output_hidden_states = True, output_attentions = True)
52
  #model = gr.Interface.load('huggingface/distilgpt2', output_hidden_states = True, output_attentions = True)
53
 
54
  #model.eval()
55
  #tokenizer = gr.Interface.load('huggingface/distilgpt2')
56
 
57
  #tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
58
- tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
59
  #tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
 
 
 
60
 
 
 
61
 
62
 
63
- tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
64
- model = GPT2LMHeadModel.from_pretrained('distilgpt2')
65
 
66
  def sentence_prob_mean(text):
67
  # Tokenize the input text and add special tokens
@@ -85,37 +87,38 @@ def sentence_prob_mean(text):
85
  # Compute the mean probability across the tokens
86
  mean_prob = torch.mean(gathered_probs).item()
87
 
 
88
 
89
 
90
- def cloze_prob(text):
91
 
92
- whole_text_encoding = tokenizer.encode(text)
93
- text_list = text.split()
94
- stem = ' '.join(text_list[:-1])
95
- stem_encoding = tokenizer.encode(stem)
96
- cw_encoding = whole_text_encoding[len(stem_encoding):]
97
- tokens_tensor = torch.tensor([whole_text_encoding])
98
 
99
- with torch.no_grad():
100
- outputs = model(tokens_tensor)
101
- predictions = outputs[0]
102
-
103
- logprobs = []
104
- start = -1-len(cw_encoding)
105
- for j in range(start,-1,1):
106
- raw_output = []
107
- for i in predictions[-1][j]:
108
- raw_output.append(i.item())
109
 
110
- logprobs.append(np.log(softmax(raw_output)))
111
 
112
 
113
- conditional_probs = []
114
- for cw,prob in zip(cw_encoding,logprobs):
115
- conditional_probs.append(prob[cw])
116
 
117
 
118
- return np.exp(np.sum(conditional_probs))
119
 
120
 
121
 
 
48
  # Load pre-trained model
49
 
50
  #model = GPT2LMHeadModel.from_pretrained('distilgpt2', output_hidden_states = True, output_attentions = True)
51
+ #model = GPT2LMHeadModel.from_pretrained('gpt2', output_hidden_states = True, output_attentions = True)
52
  #model = gr.Interface.load('huggingface/distilgpt2', output_hidden_states = True, output_attentions = True)
53
 
54
  #model.eval()
55
  #tokenizer = gr.Interface.load('huggingface/distilgpt2')
56
 
57
  #tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
 
58
  #tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
59
+ #tokenizer = GPT2Tokenizer.from_pretrained('distilgpt2')
60
+
61
+
62
 
63
+ tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
64
+ model = GPT2LMHeadModel.from_pretrained('gpt2')
65
 
66
 
 
 
67
 
68
  def sentence_prob_mean(text):
69
  # Tokenize the input text and add special tokens
 
87
  # Compute the mean probability across the tokens
88
  mean_prob = torch.mean(gathered_probs).item()
89
 
90
+ return mean_prob
91
 
92
 
93
+ # def cloze_prob(text):
94
 
95
+ # whole_text_encoding = tokenizer.encode(text)
96
+ # text_list = text.split()
97
+ # stem = ' '.join(text_list[:-1])
98
+ # stem_encoding = tokenizer.encode(stem)
99
+ # cw_encoding = whole_text_encoding[len(stem_encoding):]
100
+ # tokens_tensor = torch.tensor([whole_text_encoding])
101
 
102
+ # with torch.no_grad():
103
+ # outputs = model(tokens_tensor)
104
+ # predictions = outputs[0]
105
+
106
+ # logprobs = []
107
+ # start = -1-len(cw_encoding)
108
+ # for j in range(start,-1,1):
109
+ # raw_output = []
110
+ # for i in predictions[-1][j]:
111
+ # raw_output.append(i.item())
112
 
113
+ # logprobs.append(np.log(softmax(raw_output)))
114
 
115
 
116
+ # conditional_probs = []
117
+ # for cw,prob in zip(cw_encoding,logprobs):
118
+ # conditional_probs.append(prob[cw])
119
 
120
 
121
+ # return np.exp(np.sum(conditional_probs))
122
 
123
 
124