Spaces:

AhmedSSabir
/

Demo-for-Gender-Score

Running

App Files Files Community

AhmedSSabir commited on Oct 29, 2023

Commit

5ea9bf1

•

1 Parent(s): 0b41e50

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -10

app.py CHANGED Viewed

@@ -20,8 +20,8 @@ from sentence_transformers import SentenceTransformer, util
 #model_sts = gr.Interface.load('huggingface/sentence-transformers/stsb-distilbert-base')
-#model_sts = SentenceTransformer('stsb-distilbert-base')
-model_sts = SentenceTransformer('roberta-large-nli-stsb-mean-tokens')
 #batch_size = 1
 #scorer = LMScorer.from_pretrained('gpt2' , device=device, batch_size=batch_size)
@@ -72,11 +72,7 @@ def cloze_prob(text):
 	text_list = text.split()
 	stem = ' '.join(text_list[:-1])
 	stem_encoding = tokenizer.encode(stem)
-	# cw_encoding is just the difference between whole_text_encoding and stem_encoding
-	# note: this might not correspond exactly to the word itself
 	cw_encoding = whole_text_encoding[len(stem_encoding):]
-	# Run the entire sentence through the model. Then go "back in time" to look at what the model predicted for each token, starting at the stem.
-	# Put the whole text encoding into a tensor, and get the model's comprehensive output
 	tokens_tensor = torch.tensor([whole_text_encoding])
 	with torch.no_grad():
@@ -93,10 +89,7 @@ def cloze_prob(text):
 			logprobs.append(np.log(softmax(raw_output)))
-	# if the critical word is three tokens long, the raw_probabilities should look something like this:
-	# [ [0.412, 0.001, ... ] ,[0.213, 0.004, ...], [0.002,0.001, 0.93 ...]]
-	# Then for the i'th token we want to find its associated probability
-	# this is just: raw_probabilities[i][token_index]
 	conditional_probs = []
 	for cw,prob in zip(cw_encoding,logprobs):
 			conditional_probs.append(prob[cw])

 #model_sts = gr.Interface.load('huggingface/sentence-transformers/stsb-distilbert-base')
+model_sts = SentenceTransformer('stsb-distilbert-base')
+#model_sts = SentenceTransformer('roberta-large-nli-stsb-mean-tokens')
 #batch_size = 1
 #scorer = LMScorer.from_pretrained('gpt2' , device=device, batch_size=batch_size)
 	text_list = text.split()
 	stem = ' '.join(text_list[:-1])
 	stem_encoding = tokenizer.encode(stem)
 	cw_encoding = whole_text_encoding[len(stem_encoding):]
 	tokens_tensor = torch.tensor([whole_text_encoding])
 	with torch.no_grad():
 			logprobs.append(np.log(softmax(raw_output)))
 	conditional_probs = []
 	for cw,prob in zip(cw_encoding,logprobs):
 			conditional_probs.append(prob[cw])