Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -152,9 +152,10 @@ def prepare_training_data(qa_pairs, tokenizer):
|
|
152 |
context = "your_bhagavad_gita_text_here" # Replace with your preprocessed Bhagavad Gita text
|
153 |
context_encoded = tokenizer(context, add_special_tokens=True, return_tensors="pt")
|
154 |
# start_positions = answer_encoded.input_ids == tokenizer.convert_tokens_to_ids(tokenizer.sep_token)[0] # Find first SEP token
|
155 |
-
start_positions = answer_encoded.input_ids == [tokenizer.convert_tokens_to_ids(tokenizer.sep_token)[0]]
|
|
|
156 |
|
157 |
-
end_positions = answer_encoded.input_ids == [tokenizer.convert_tokens_to_ids(tokenizer.eos_token)
|
158 |
|
159 |
# Combine all data into a dictionary for each QA pair
|
160 |
encoded_data.append({
|
|
|
152 |
context = "your_bhagavad_gita_text_here" # Replace with your preprocessed Bhagavad Gita text
|
153 |
context_encoded = tokenizer(context, add_special_tokens=True, return_tensors="pt")
|
154 |
# start_positions = answer_encoded.input_ids == tokenizer.convert_tokens_to_ids(tokenizer.sep_token)[0] # Find first SEP token
|
155 |
+
# start_positions = answer_encoded.input_ids == [tokenizer.convert_tokens_to_ids(tokenizer.sep_token)[0]]
|
156 |
+
start_positions = answer_encoded.input_ids == [[tokenizer.convert_tokens_to_ids(tokenizer.sep_token)]] # Double square brackets for list of list
|
157 |
|
158 |
+
end_positions = answer_encoded.input_ids == [[tokenizer.convert_tokens_to_ids(tokenizer.eos_token)]] # Find first EOS token
|
159 |
|
160 |
# Combine all data into a dictionary for each QA pair
|
161 |
encoded_data.append({
|