CosmoAI commited on
Commit
42cf287
1 Parent(s): 64d575d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -151,8 +151,10 @@ def prepare_training_data(qa_pairs, tokenizer):
151
  # Here, we assume the context is a single long string.
152
  context = "your_bhagavad_gita_text_here" # Replace with your preprocessed Bhagavad Gita text
153
  context_encoded = tokenizer(context, add_special_tokens=True, return_tensors="pt")
154
- start_positions = answer_encoded.input_ids == tokenizer.convert_tokens_to_ids(tokenizer.sep_token)[0] # Find first SEP token
155
- end_positions = answer_encoded.input_ids == tokenizer.convert_tokens_to_ids(tokenizer.eos_token)[0] # Find first EOS token
 
 
156
 
157
  # Combine all data into a dictionary for each QA pair
158
  encoded_data.append({
 
151
  # Here, we assume the context is a single long string.
152
  context = "your_bhagavad_gita_text_here" # Replace with your preprocessed Bhagavad Gita text
153
  context_encoded = tokenizer(context, add_special_tokens=True, return_tensors="pt")
154
+ # start_positions = answer_encoded.input_ids == tokenizer.convert_tokens_to_ids(tokenizer.sep_token)[0] # Find first SEP token
155
+ start_positions = answer_encoded.input_ids == [tokenizer.convert_tokens_to_ids(tokenizer.sep_token)[0]]
156
+
157
+ end_positions = answer_encoded.input_ids == [tokenizer.convert_tokens_to_ids(tokenizer.eos_token)[0]] # Find first EOS token
158
 
159
  # Combine all data into a dictionary for each QA pair
160
  encoded_data.append({