CosmoAI commited on
Commit
4fcb874
1 Parent(s): 3d00337

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -152,9 +152,10 @@ def prepare_training_data(qa_pairs, tokenizer):
152
  context = "your_bhagavad_gita_text_here" # Replace with your preprocessed Bhagavad Gita text
153
  context_encoded = tokenizer(context, add_special_tokens=True, return_tensors="pt")
154
  # start_positions = answer_encoded.input_ids == tokenizer.convert_tokens_to_ids(tokenizer.sep_token)[0] # Find first SEP token
155
- start_positions = answer_encoded.input_ids == [tokenizer.convert_tokens_to_ids(tokenizer.sep_token)[0]]
 
156
 
157
- end_positions = answer_encoded.input_ids == [tokenizer.convert_tokens_to_ids(tokenizer.eos_token)[0]] # Find first EOS token
158
 
159
  # Combine all data into a dictionary for each QA pair
160
  encoded_data.append({
 
152
  context = "your_bhagavad_gita_text_here" # Replace with your preprocessed Bhagavad Gita text
153
  context_encoded = tokenizer(context, add_special_tokens=True, return_tensors="pt")
154
  # start_positions = answer_encoded.input_ids == tokenizer.convert_tokens_to_ids(tokenizer.sep_token)[0] # Find first SEP token
155
+ # start_positions = answer_encoded.input_ids == [tokenizer.convert_tokens_to_ids(tokenizer.sep_token)[0]]
156
+ start_positions = answer_encoded.input_ids == [[tokenizer.convert_tokens_to_ids(tokenizer.sep_token)]] # Double square brackets for list of list
157
 
158
+ end_positions = answer_encoded.input_ids == [[tokenizer.convert_tokens_to_ids(tokenizer.eos_token)]] # Find first EOS token
159
 
160
  # Combine all data into a dictionary for each QA pair
161
  encoded_data.append({