Update generation_test_hf_script.py
Browse files
generation_test_hf_script.py
CHANGED
@@ -13,7 +13,7 @@ def load_rag_benchmark_tester_ds():
|
|
13 |
|
14 |
dataset = load_dataset(ds_name)
|
15 |
|
16 |
-
print("update: loading test dataset - ", dataset)
|
17 |
|
18 |
test_set = []
|
19 |
for i, samples in enumerate(dataset["train"]):
|
@@ -29,7 +29,9 @@ def run_test(model_name, test_ds):
|
|
29 |
|
30 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
31 |
|
32 |
-
print("
|
|
|
|
|
33 |
|
34 |
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
35 |
model.to(device)
|
@@ -39,6 +41,7 @@ def run_test(model_name, test_ds):
|
|
39 |
for i, entries in enumerate(test_ds):
|
40 |
|
41 |
# prepare prompt packaging used in fine-tuning process
|
|
|
42 |
new_prompt = "<human>: " + entries["context"] + "\n" + entries["query"] + "\n" + "<bot>:" + "\n"
|
43 |
|
44 |
inputs = tokenizer(new_prompt, return_tensors="pt")
|
|
|
13 |
|
14 |
dataset = load_dataset(ds_name)
|
15 |
|
16 |
+
print("update: loading RAG Benchmark test dataset - ", dataset)
|
17 |
|
18 |
test_set = []
|
19 |
for i, samples in enumerate(dataset["train"]):
|
|
|
29 |
|
30 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
31 |
|
32 |
+
print("\nRAG Performance Test - 200 questions")
|
33 |
+
print("update: model - ", model_name)
|
34 |
+
print("update: device - ", device)
|
35 |
|
36 |
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
|
37 |
model.to(device)
|
|
|
41 |
for i, entries in enumerate(test_ds):
|
42 |
|
43 |
# prepare prompt packaging used in fine-tuning process
|
44 |
+
# note: in out testing, Yi model performed better with trailing "\n" at end of prompt
|
45 |
new_prompt = "<human>: " + entries["context"] + "\n" + entries["query"] + "\n" + "<bot>:" + "\n"
|
46 |
|
47 |
inputs = tokenizer(new_prompt, return_tensors="pt")
|