alKoGolik's picture
Upload 169 files
c87c295 verified
JSON_PATH = "/home/seungyoun/llama_related/llama_code_interpreter/eval/grade-school-math/grade_school_math/data/test.jsonl"
import os, sys
import random
import json
import re
from tqdm import tqdm
def load_jsonl(file_path, num_lines_to_display=5):
data = []
with open(file_path, "r") as f:
for line in f:
out_dict = json.loads(line)
answer_raw = out_dict["answer"]
answer_refined = answer_raw.split("####")[-1].strip()
out_dict["answer_refined"] = answer_refined
data.append(out_dict) # dict_keys(['question', 'answer'])
return data
few_shot_1 = [
{
"role": "user",
"content": "Sandra wants to buy some sweets. She saved $10 for this purpose. Her mother gave her an additional $4, and her father twice as much as her mother. One candy costs $0.5, and one jelly bean $0.2. She wants to buy 14 candies and 20 jelly beans. How much money will she be left with after the purchase?",
},
{
"role": "assistant",
"content": "```python\n# Saved Money\nsandra_money = 10\n# mother gave her\nmother_money = 4\n# father gave twice as much as her mother\nfather_money = 2 * mother_money\n# Cost\ncandy_cost = 0.5\njelly_bean_cost = 0.2\n\n# Calculations\ntotal_money = sandra_money + mother_money + father_money\n#Desired to buy\nShe wants to buy 14 candies and 20 jelly beans so the total cost would be\ntotal_cost = 14 * candy_cost + 20 * jelly_bean_cost\n# After the purchase\nremaining_money = total_money - total_cost\n\nprint(remaining_money)\n```\n```RESULT\n11.0\n```\nAfter buying 14 candies and 20 jelly beans, Sandra will be left with $11.",
},
]
few_shot_2 = [
{
"role": "user",
"content": "Keanu bought some fish from the store for his pets. He gave his cat half as many fish as he gave to his dog. If he gave his dog 40 fish, and each fish cost him $4, calculate the total amount of money he spent at the store to buy all the fish.",
},
{
"role": "assistant",
"content": "```python\ndef calculate_total_cost(dog_fish, fish_price):\n cat_fish = dog_fish / 2\n total_fish = dog_fish + cat_fish\n total_cost = total_fish * fish_price\n return total_cost\n\ntotal_cost = calculate_total_cost(40, 4)\nprint(total_cost)\n```\n```RESULT\n240.0\n```\nKeanu spent a total of $240 to buy all the fish from the store.\n### Answer : 240",
},
]
few_shot_3 = [
{
"role": "user",
"content": "Mike is feeding the birds at the park. He counts out the seeds as he throws them. He throws 20 seeds to the birds on the left. He throws twice as much to the bigger group of birds on the right. Some more birds flutter over to join the others for lunch and Mike throws 30 more seeds for them. If Mike has 30 seeds left to feed the last of the birds, how many seeds did Mike start with?",
},
{
"role": "assistant",
"content": "Let's break down the problem:\n\n- Mike throws 20 seeds to the birds on the left.\n- He throws twice as much to the bigger group of birds on the right, which is 2 * 20 = 40 seeds.\n- Some more birds join and Mike throws 30 more seeds for them.\n- Mike has 30 seeds left to feed the last of the birds.\n\nSo, the total number of seeds Mike started with is the sum of all these seeds.\n\n```python\nseeds_left = 20\nseeds_right = 2 * seeds_left\nseeds_more_birds = 30\nseeds_last = 30\n\ntotal_seeds = seeds_left + seeds_right + seeds_more_birds + seeds_last\nprint(total_seeds)\n```\n```RESULT\n120\n```\nFrom the calculations, we can conclude that Mike started with 120 seeds to feed the birds at the park.\n### Answer : 120",
},
]
if __name__ == "__main__":
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from code_interpreter.LlamaCodeInterpreter import LlamaCodeInterpreter
import argparse
parser = argparse.ArgumentParser(description="Process path for LLAMA2_FINETUNEED.")
parser.add_argument(
"--path",
type=str,
required=True,
help="Path to the finetuned LLAMA2 model.",
default='"./output/llama-2-7b-chat-ci"',
)
args = parser.parse_args()
LLAMA2_FINETUNEED_PATH = args.path
interpreter = LlamaCodeInterpreter(
model_path=LLAMA2_FINETUNEED_PATH,
# load_in_4bit=True
)
sample_data = load_jsonl(JSON_PATH)
correct = 0
for idx, data_dict in enumerate(tqdm(sample_data)):
question = data_dict["question"]
answer = data_dict["answer"]
answer_only = data_dict["answer_refined"]
# reset dilag : fewshot to follow answering format
interpreter.dialog = [
{
"role": "system",
"content": "You are helpful robot that can generate code , excute it and debug then answer",
}
] # this will replaced in template conversion
# interpreter.dialog += few_shot_1
# interpreter.dialog += few_shot_2
# interpreter.dialog += few_shot_3
output = interpreter.chat(
user_message=f"{question}",
VERBOSE=True,
)
pattern = r"\[RESULT_TOK\]\s*(\d+(\.\d+)?)\s*\[/RESULT_TOK\]"
pred = -9212323 # for no code output (which is wrong answer)
if re.search(pattern, output["content"]):
pred = re.search(pattern, output["content"]).group(1)
pred = str(pred)
answer_only = str(answer_only)
if float(pred.replace(",", "")) == float(answer_only.replace(",", "")):
correct += 1
print("-" * 30)
print(f"\tThe question was : {question}")
print(f"\tThe answer was : [{answer_only}]")
print(f"\tModel pred is : [{pred}]")
print(f"\t Accuracy : [{correct/(idx+1)}]")
print("-" * 30)