pedramyazdipoor
/

parsbert_question_answering_PQuAD

Question Answering

Inference Endpoints

Model card Files Files and versions Community

pedramyazdipoor commited on Sep 18, 2022

Commit

7730914

•

1 Parent(s): 2963ed6

Update README.md

Files changed (1) hide show

README.md +3 -3

README.md CHANGED Viewed

@@ -54,7 +54,7 @@ There are some considerations for inference:
 3) The selected span must be the most probable choice among N pairs of candidates.
 ```python
-def generate_indexes(start_logits, end_logits, N, min_index_list):
   output_start = start_logits
   output_end = end_logits
@@ -71,7 +71,7 @@ def generate_indexes(start_logits, end_logits, N, min_index_list):
   for a in range(0,N):
     for b in range(0,N):
       if (sorted_start_list[a][1] + sorted_end_list[b][1]) > prob :
-        if (sorted_start_list[a][0] <= sorted_end_list[b][0]) and (sorted_start_list[a][0] > min_index_list) :
           prob = sorted_start_list[a][1] + sorted_end_list[b][1]
           start_idx = sorted_start_list[a][0]
           end_idx = sorted_end_list[b][0]
@@ -97,7 +97,7 @@ encoding = tokenizer(text,question,add_special_tokens = True,
 out = model(encoding['input_ids'].to(device),encoding['attention_mask'].to(device), encoding['token_type_ids'].to(device))
 #we had to change some pieces of code to make it compatible with one answer generation at a time.
-#you can initialize min_index_list in generate_indexes() to put force on tokens being chosen to be within the context(start index must be bigger than the seperator token.
 start_index, end_index = generate_indexes(out['start_logits'][0], out['end_logits'][0], 5, 0)
 print(tokenizer.tokenize(text + question)[start_index:end_index+1])
 >>> ['اسمم', 'پدرام', '##ه', '.', 'اسمم', 'چیه', '؟']

 3) The selected span must be the most probable choice among N pairs of candidates.
 ```python
+def generate_indexes(start_logits, end_logits, N, max_index):
   output_start = start_logits
   output_end = end_logits
   for a in range(0,N):
     for b in range(0,N):
       if (sorted_start_list[a][1] + sorted_end_list[b][1]) > prob :
+        if (sorted_start_list[a][0] <= sorted_end_list[b][0]) and (sorted_end_list[a][0] < max_index) :
           prob = sorted_start_list[a][1] + sorted_end_list[b][1]
           start_idx = sorted_start_list[a][0]
           end_idx = sorted_end_list[b][0]
 out = model(encoding['input_ids'].to(device),encoding['attention_mask'].to(device), encoding['token_type_ids'].to(device))
 #we had to change some pieces of code to make it compatible with one answer generation at a time.
+#you can initialize max_index in generate_indexes() to put force on tokens being chosen to be within the context(end index must be less than seperator token).
 start_index, end_index = generate_indexes(out['start_logits'][0], out['end_logits'][0], 5, 0)
 print(tokenizer.tokenize(text + question)[start_index:end_index+1])
 >>> ['اسمم', 'پدرام', '##ه', '.', 'اسمم', 'چیه', '؟']