File size: 3,662 Bytes
5860b41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5002792
 
5860b41
 
 
 
5002792
 
c8f289a
387046f
7670f2e
0b5e165
f754508
 
4c31851
387046f
 
d772d9d
 
 
 
 
 
 
387046f
5860b41
 
 
 
 
 
 
 
 
 
387046f
 
 
0b5e165
387046f
5860b41
 
 
 
 
 
 
 
c8f289a
387046f
 
 
 
7670f2e
c8f289a
387046f
5002792
 
 
 
 
5860b41
c8f289a
c755e09
5860b41
f754508
 
 
 
 
 
4c31851
f754508
5860b41
 
 
 
 
 
 
 
be560ea
5860b41
 
7670f2e
 
 
 
 
5860b41
 
 
 
 
 
7670f2e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import os
import sys
import torch
from dotenv import find_dotenv, load_dotenv

found_dotenv = find_dotenv(".env")

if len(found_dotenv) == 0:
    found_dotenv = find_dotenv(".env.example")
print(f"loading env vars from: {found_dotenv}")
load_dotenv(found_dotenv, override=False)

path = os.path.dirname(found_dotenv)
print(f"Adding {path} to sys.path")
sys.path.append(path)

from llm_toolkit.llm_utils import *
from llm_toolkit.logical_reasoning_utils import *

model_name = os.getenv("MODEL_NAME")
adapter_name_or_path = os.getenv("ADAPTER_NAME_OR_PATH")
load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
data_path = os.getenv("LOGICAL_REASONING_DATA_PATH")
results_path = os.getenv("LOGICAL_REASONING_RESULTS_PATH")
use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
using_p1 = os.getenv("USING_P1_PROMPT_TEMPLATE") == "true"
test_data = os.getenv("TEST_DATA", None)
using_llama_factory = os.getenv("USING_LLAMA_FACTORY") == "true"
max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 16))
repetition_penalty = float(os.getenv("REPETITION_PENALTY", 1.0))
batch_size = int(os.getenv("BATCH_SIZE", 2))

dtype = (
    torch.float32
    if os.getenv("USE_FLOAT32_FOR_INFERENCE") == "true"
    else (
        torch.bfloat16
        if os.getenv("USE_BF16_FOR_INFERENCE") == "true"
        else torch.float16
    )
)

print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)

gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"(1) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

model, tokenizer = load_model(
    model_name,
    load_in_4bit=load_in_4bit,
    adapter_name_or_path=adapter_name_or_path,
    using_llama_factory=using_llama_factory,
    dtype=dtype,
)

gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

datasets = load_logical_reasoning_dataset(
    data_path,
    tokenizer=tokenizer,
    chinese_prompt=not use_english_datasets,
    using_p1=using_p1,
    test_data=test_data,
)

if len(sys.argv) > 1:
    num = int(sys.argv[1])
    if num > 0:
        print(f"--- evaluating {num} entries")
        datasets["test"] = datasets["test"].select(range(num))

print_row_details(datasets["test"].to_pandas(), indices=[0, -1])

print("Evaluating model: " + model_name)
predictions = eval_model(
    model,
    tokenizer,
    datasets["test"],
    max_new_tokens=max_new_tokens,
    repetition_penalty=repetition_penalty,
    batch_size=batch_size,
)

gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

if adapter_name_or_path is not None:
    model_name += "/" + adapter_name_or_path.split("/")[-1]

save_results(
    (
        "answer"
        if test_data
        else f"{model_name}_{dtype}{'_4bit' if load_in_4bit else ''}{'_lf' if using_llama_factory else ''}"
    ),
    results_path,
    datasets["test"],
    predictions,
    debug=True,
)

if not test_data:
    metrics = calc_metrics(datasets["test"]["label"], predictions, debug=True)
    print(metrics)