inflaton commited on
Commit
5002792
·
1 Parent(s): 36cb2cb

InternLM 2.5 results

Browse files
competition/04_InternLM_T4.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
competition/05_InternLM_NV4080.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
llm_toolkit/{chat_mac.py → chat.py} RENAMED
File without changes
llm_toolkit/{eval_mac.py → eval.py} RENAMED
File without changes
llm_toolkit/eval_lf.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import torch
4
+ from dotenv import find_dotenv, load_dotenv
5
+ from llamafactory.chat import ChatModel
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
7
+
8
+ found_dotenv = find_dotenv(".env")
9
+
10
+ if len(found_dotenv) == 0:
11
+ found_dotenv = find_dotenv(".env.example")
12
+ print(f"loading env vars from: {found_dotenv}")
13
+ load_dotenv(found_dotenv, override=False)
14
+
15
+ path = os.path.dirname(found_dotenv)
16
+ print(f"Adding {path} to sys.path")
17
+ sys.path.append(path)
18
+
19
+ from llm_toolkit.translation_utils import *
20
+
21
+ model_name = os.getenv("MODEL_NAME")
22
+ adapter_name_or_path = os.getenv("ADAPTER_NAME_OR_PATH")
23
+ load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
24
+ data_path = os.getenv("DATA_PATH")
25
+ results_path = os.getenv("RESULTS_PATH")
26
+
27
+ print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)
28
+
29
+
30
+ def load_model(
31
+ model_name,
32
+ max_seq_length=2048,
33
+ dtype=torch.bfloat16,
34
+ load_in_4bit=False,
35
+ adapter_name_or_path=None,
36
+ ):
37
+ print(f"loading model: {model_name}")
38
+
39
+ if adapter_name_or_path:
40
+ template = "llama3" if "llama-3" in model_name.lower() else "chatml"
41
+
42
+ args = dict(
43
+ model_name_or_path=model_name,
44
+ adapter_name_or_path=adapter_name_or_path, # load the saved LoRA adapters
45
+ template=template, # same to the one in training
46
+ finetuning_type="lora", # same to the one in training
47
+ quantization_bit=4 if load_in_4bit else None, # load 4-bit quantized model
48
+ )
49
+ chat_model = ChatModel(args)
50
+ return chat_model.engine.model, chat_model.engine.tokenizer
51
+
52
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
53
+ bnb_config = BitsAndBytesConfig(
54
+ load_in_4bit=load_in_4bit,
55
+ bnb_4bit_quant_type="nf4",
56
+ bnb_4bit_use_double_quant=False,
57
+ bnb_4bit_compute_dtype=dtype,
58
+ )
59
+
60
+ model = AutoModelForCausalLM.from_pretrained(
61
+ model_name,
62
+ quantization_config=bnb_config,
63
+ torch_dtype=dtype,
64
+ trust_remote_code=True,
65
+ device_map="auto",
66
+ )
67
+
68
+ return model, tokenizer
69
+
70
+
71
+ gpu_stats = torch.cuda.get_device_properties(0)
72
+ start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
73
+ max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
74
+ print(f"(1) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
75
+ print(f"{start_gpu_memory} GB of memory reserved.")
76
+
77
+ model, tokenizer = load_model(
78
+ model_name, load_in_4bit=load_in_4bit, adapter_name_or_path=adapter_name_or_path
79
+ )
80
+
81
+ gpu_stats = torch.cuda.get_device_properties(0)
82
+ start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
83
+ max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
84
+ print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
85
+ print(f"{start_gpu_memory} GB of memory reserved.")
86
+
87
+ datasets = load_translation_dataset(data_path, tokenizer)
88
+
89
+ print("Evaluating model: " + model_name)
90
+ predictions = eval_model(model, tokenizer, datasets["test"])
91
+
92
+ gpu_stats = torch.cuda.get_device_properties(0)
93
+ start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
94
+ max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
95
+ print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
96
+ print(f"{start_gpu_memory} GB of memory reserved.")
97
+
98
+ if adapter_name_or_path is not None:
99
+ model_name += "_" + adapter_name_or_path.split("/")[-1]
100
+
101
+ save_results(
102
+ model_name,
103
+ results_path,
104
+ datasets["test"],
105
+ predictions,
106
+ debug=True,
107
+ )
108
+
109
+ metrics = calc_metrics(datasets["test"]["english"], predictions, debug=True)
110
+ print(metrics)
llm_toolkit/eval_logical_reasoning.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import torch
4
+ from dotenv import find_dotenv, load_dotenv
5
+
6
+ found_dotenv = find_dotenv(".env")
7
+
8
+ if len(found_dotenv) == 0:
9
+ found_dotenv = find_dotenv(".env.example")
10
+ print(f"loading env vars from: {found_dotenv}")
11
+ load_dotenv(found_dotenv, override=False)
12
+
13
+ path = os.path.dirname(found_dotenv)
14
+ print(f"Adding {path} to sys.path")
15
+ sys.path.append(path)
16
+
17
+ from llm_toolkit.llm_utils import *
18
+ from llm_toolkit.logical_reasoning_utils import *
19
+
20
+ model_name = os.getenv("MODEL_NAME")
21
+ adapter_name_or_path = os.getenv("ADAPTER_NAME_OR_PATH")
22
+ load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
23
+ data_path = os.getenv("LOGICAL_REASONING_DATA_PATH")
24
+ results_path = os.getenv("LOGICAL_REASONING_RESULTS_PATH")
25
+
26
+ print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)
27
+
28
+ gpu_stats = torch.cuda.get_device_properties(0)
29
+ start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
30
+ max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
31
+ print(f"(1) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
32
+ print(f"{start_gpu_memory} GB of memory reserved.")
33
+
34
+ model, tokenizer = load_model(
35
+ model_name, load_in_4bit=load_in_4bit, adapter_name_or_path=adapter_name_or_path
36
+ )
37
+
38
+ gpu_stats = torch.cuda.get_device_properties(0)
39
+ start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
40
+ max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
41
+ print(f"(2) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
42
+ print(f"{start_gpu_memory} GB of memory reserved.")
43
+
44
+ datasets = load_logical_reasoning_dataset(data_path, tokenizer)
45
+ if len(sys.argv) > 1:
46
+ num = int(sys.argv[1])
47
+ if num > 0:
48
+ print(f"--- evaluating {num} entries")
49
+ # create new dataset exluding those idx
50
+ datasets["test"] = datasets["test"].select(range(num))
51
+
52
+ print("Evaluating model: " + model_name)
53
+ predictions = eval_model(model, tokenizer, datasets["test"])
54
+
55
+ gpu_stats = torch.cuda.get_device_properties(0)
56
+ start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
57
+ max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
58
+ print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
59
+ print(f"{start_gpu_memory} GB of memory reserved.")
60
+
61
+ if adapter_name_or_path is not None:
62
+ model_name += "_" + adapter_name_or_path.split("/")[-1]
63
+
64
+ save_results(
65
+ model_name,
66
+ results_path,
67
+ datasets["test"],
68
+ predictions,
69
+ debug=True,
70
+ )
71
+
72
+ metrics = calc_metrics(datasets["test"]["label"], predictions, debug=True)
73
+ print(metrics)
llm_toolkit/llm_utils.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import sys
4
+ import torch
5
+ from llamafactory.chat import ChatModel
6
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextStreamer
7
+
8
+
9
+ def load_model(
10
+ model_name,
11
+ max_seq_length=2048,
12
+ dtype=torch.bfloat16,
13
+ load_in_4bit=False,
14
+ adapter_name_or_path=None,
15
+ ):
16
+ print(f"loading model: {model_name}")
17
+
18
+ if adapter_name_or_path:
19
+ template = "llama3" if "llama-3" in model_name.lower() else "chatml"
20
+
21
+ args = dict(
22
+ model_name_or_path=model_name,
23
+ adapter_name_or_path=adapter_name_or_path, # load the saved LoRA adapters
24
+ template=template, # same to the one in training
25
+ finetuning_type="lora", # same to the one in training
26
+ quantization_bit=4 if load_in_4bit else None, # load 4-bit quantized model
27
+ )
28
+ chat_model = ChatModel(args)
29
+ return chat_model.engine.model, chat_model.engine.tokenizer
30
+
31
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
32
+ bnb_config = BitsAndBytesConfig(
33
+ load_in_4bit=load_in_4bit,
34
+ bnb_4bit_quant_type="nf4",
35
+ bnb_4bit_use_double_quant=False,
36
+ bnb_4bit_compute_dtype=dtype,
37
+ )
38
+
39
+ model = AutoModelForCausalLM.from_pretrained(
40
+ model_name,
41
+ quantization_config=bnb_config,
42
+ torch_dtype=dtype,
43
+ trust_remote_code=True,
44
+ device_map="auto",
45
+ )
46
+
47
+ return model, tokenizer
48
+
49
+ def test_model(model, tokenizer, prompt):
50
+ inputs = tokenizer(
51
+ [prompt],
52
+ return_tensors="pt",
53
+ ).to("cuda")
54
+
55
+ text_streamer = TextStreamer(tokenizer)
56
+
57
+ _ = model.generate(
58
+ **inputs, max_new_tokens=2048, streamer=text_streamer, use_cache=True
59
+ )
60
+
61
+
62
+ def extract_answer(text, debug=False):
63
+ if text:
64
+ # Remove the begin and end tokens
65
+ text = re.sub(
66
+ r".*?(assistant|\[/INST\]).+?\b", "", text, flags=re.DOTALL | re.MULTILINE
67
+ )
68
+ if debug:
69
+ print("--------\nstep 1:", text)
70
+
71
+ text = re.sub(r"<.+?>.*", "", text, flags=re.DOTALL | re.MULTILINE)
72
+ if debug:
73
+ print("--------\nstep 2:", text)
74
+
75
+ text = re.sub(
76
+ r".*?end_header_id\|>\n\n", "", text, flags=re.DOTALL | re.MULTILINE
77
+ )
78
+ if debug:
79
+ print("--------\nstep 3:", text)
80
+
81
+ return text
82
+
83
+ def eval_model(model, tokenizer, eval_dataset):
84
+ total = len(eval_dataset)
85
+ predictions = []
86
+ for i in tqdm(range(total)):
87
+ inputs = tokenizer(
88
+ eval_dataset["prompt"][i : i + 1],
89
+ return_tensors="pt",
90
+ ).to("cuda")
91
+
92
+ outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)
93
+ decoded_output = tokenizer.batch_decode(outputs)
94
+ debug = i == 0
95
+ decoded_output = [
96
+ extract_answer(output, debug=debug) for output in decoded_output
97
+ ]
98
+ predictions.extend(decoded_output)
99
+
100
+ return predictions
101
+
102
+ def save_model(
103
+ model,
104
+ tokenizer,
105
+ include_gguf=True,
106
+ include_merged=True,
107
+ publish=True,
108
+ ):
109
+ try:
110
+ token = os.getenv("HF_TOKEN") or None
111
+ model_name = os.getenv("MODEL_NAME")
112
+
113
+ save_method = "lora"
114
+ quantization_method = "q5_k_m"
115
+
116
+ model_names = get_model_names(
117
+ model_name, save_method=save_method, quantization_method=quantization_method
118
+ )
119
+
120
+ model.save_pretrained(model_names["local"])
121
+ tokenizer.save_pretrained(model_names["local"])
122
+
123
+ if publish:
124
+ model.push_to_hub(
125
+ model_names["hub"],
126
+ token=token,
127
+ )
128
+ tokenizer.push_to_hub(
129
+ model_names["hub"],
130
+ token=token,
131
+ )
132
+
133
+ if include_merged:
134
+ model.save_pretrained_merged(
135
+ model_names["local"] + "-merged", tokenizer, save_method=save_method
136
+ )
137
+ if publish:
138
+ model.push_to_hub_merged(
139
+ model_names["hub"] + "-merged",
140
+ tokenizer,
141
+ save_method="lora",
142
+ token="",
143
+ )
144
+
145
+ if include_gguf:
146
+ model.save_pretrained_gguf(
147
+ model_names["local-gguf"],
148
+ tokenizer,
149
+ quantization_method=quantization_method,
150
+ )
151
+
152
+ if publish:
153
+ model.push_to_hub_gguf(
154
+ model_names["hub-gguf"],
155
+ tokenizer,
156
+ quantization_method=quantization_method,
157
+ token=token,
158
+ )
159
+ except Exception as e:
160
+ print(e)
llm_toolkit/logical_reasoning_utils.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import pandas as pd
4
+ import seaborn as sns
5
+ import matplotlib.pyplot as plt
6
+ from datasets import load_dataset
7
+ from llm_toolkit.llm_utils import extract_answer
8
+ from tqdm import tqdm
9
+
10
+ print(f"loading {__file__}")
11
+
12
+ def calc_metrics(references, predictions, debug=False):
13
+ assert len(references) == len(
14
+ predictions
15
+ ), f"lengths are difference: {len(references)} != {len(predictions)}"
16
+
17
+ predictions = [extract_answer(text) for text in predictions]
18
+
19
+ correct = [1 if ref == pred else 0 for ref, pred in zip(references, predictions)]
20
+ accuracy = sum(correct) / len(references)
21
+
22
+ results = {"accuracy": accuracy}
23
+ if debug:
24
+ incorrect_ids = [i for i, c in enumerate(correct) if c == 0]
25
+ results["incorrect_ids"] = incorrect_ids
26
+
27
+ return results
28
+
29
+
30
+ def save_results(model_name, results_path, dataset, predictions, debug=False):
31
+ if not os.path.exists(results_path):
32
+ # Get the directory part of the file path
33
+ dir_path = os.path.dirname(results_path)
34
+
35
+ # Create all directories in the path (if they don't exist)
36
+ os.makedirs(dir_path, exist_ok=True)
37
+ df = dataset.to_pandas()
38
+ df.drop(columns=["answer", "prompt", "train_text"], inplace=True)
39
+ else:
40
+ df = pd.read_csv(results_path, on_bad_lines="warn")
41
+
42
+ df[model_name] = predictions
43
+
44
+ if debug:
45
+ print(df.head(1))
46
+
47
+ df.to_csv(results_path, index=False)
48
+
49
+
50
+ def load_logical_reasoning_dataset(data_path, tokenizer=None):
51
+ train_data_file = data_path + "/train.csv"
52
+ test_data_file = data_path + "/dev.csv"
53
+
54
+ print("loading train/test data files")
55
+ datasets = load_dataset(
56
+ "csv",
57
+ data_files={"train": train_data_file, "test": test_data_file},
58
+ )
59
+
60
+ if tokenizer:
61
+ reasoning_prompt = """你是一个逻辑游戏的主持人。游戏规则如下:
62
+
63
+ 1. 参与者会得到一个谜题。
64
+ 2. 参与者可以通过提问来获取线索,尝试解开谜题。
65
+ 3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。
66
+ 4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。
67
+ 5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。
68
+
69
+ 请严格按照这些规则回答参与者提出的问题。
70
+
71
+ 谜题: {}
72
+
73
+ 实际情况: {}
74
+
75
+ 参与者提出的问题: {}
76
+ """
77
+ def formatting_prompts_func(examples):
78
+ inputs = examples["text"]
79
+ outputs = examples["label"]
80
+ puzzles = examples["puzzle"]
81
+ truths = examples["truth"]
82
+
83
+ messages = [
84
+ {
85
+ "role": "system",
86
+ "content": "You are an expert in logical reasoning.",
87
+ },
88
+ None,
89
+ ]
90
+
91
+ model_name = os.getenv("MODEL_NAME")
92
+
93
+ if "mistral" in model_name.lower():
94
+ messages = messages[1:]
95
+
96
+ texts = []
97
+ prompts = []
98
+ for input, output, puzzle, truth in zip(inputs, outputs, puzzles, truths):
99
+ prompt = reasoning_prompt.format(puzzle, truth, input)
100
+ messages[-1] = {"role": "user", "content": prompt}
101
+
102
+ prompt = tokenizer.apply_chat_template(
103
+ messages, tokenize=False, add_generation_prompt=True
104
+ )
105
+ prompts.append(prompt)
106
+ texts.append(prompt + output + tokenizer.eos_token)
107
+ return {"train_text": texts, "prompt": prompts}
108
+
109
+ datasets = datasets.map(
110
+ formatting_prompts_func,
111
+ batched=True,
112
+ )
113
+
114
+ print(datasets)
115
+ return datasets
116
+
117
+
118
+ def eval_model(model, tokenizer, eval_dataset):
119
+ total = len(eval_dataset)
120
+ predictions = []
121
+ for i in tqdm(range(total)):
122
+ inputs = tokenizer(
123
+ eval_dataset["prompt"][i : i + 1],
124
+ return_tensors="pt",
125
+ ).to("cuda")
126
+
127
+ outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False)
128
+ decoded_output = tokenizer.batch_decode(outputs)
129
+ debug = i == 0
130
+ decoded_output = [
131
+ extract_answer(output, debug=debug) for output in decoded_output
132
+ ]
133
+ predictions.extend(decoded_output)
134
+
135
+ return predictions
136
+
137
+
138
+ def save_model(
139
+ model,
140
+ tokenizer,
141
+ include_gguf=True,
142
+ include_merged=True,
143
+ publish=True,
144
+ ):
145
+ try:
146
+ token = os.getenv("HF_TOKEN") or None
147
+ model_name = os.getenv("MODEL_NAME")
148
+
149
+ save_method = "lora"
150
+ quantization_method = "q5_k_m"
151
+
152
+ model_names = get_model_names(
153
+ model_name, save_method=save_method, quantization_method=quantization_method
154
+ )
155
+
156
+ model.save_pretrained(model_names["local"])
157
+ tokenizer.save_pretrained(model_names["local"])
158
+
159
+ if publish:
160
+ model.push_to_hub(
161
+ model_names["hub"],
162
+ token=token,
163
+ )
164
+ tokenizer.push_to_hub(
165
+ model_names["hub"],
166
+ token=token,
167
+ )
168
+
169
+ if include_merged:
170
+ model.save_pretrained_merged(
171
+ model_names["local"] + "-merged", tokenizer, save_method=save_method
172
+ )
173
+ if publish:
174
+ model.push_to_hub_merged(
175
+ model_names["hub"] + "-merged",
176
+ tokenizer,
177
+ save_method="lora",
178
+ token="",
179
+ )
180
+
181
+ if include_gguf:
182
+ model.save_pretrained_gguf(
183
+ model_names["local-gguf"],
184
+ tokenizer,
185
+ quantization_method=quantization_method,
186
+ )
187
+
188
+ if publish:
189
+ model.push_to_hub_gguf(
190
+ model_names["hub-gguf"],
191
+ tokenizer,
192
+ quantization_method=quantization_method,
193
+ token=token,
194
+ )
195
+ except Exception as e:
196
+ print(e)
197
+
198
+
199
+ def get_metrics(df):
200
+ metrics_df = pd.DataFrame(df.columns.T)[2:]
201
+ metrics_df.rename(columns={0: "model"}, inplace=True)
202
+ metrics_df["model"] = metrics_df["model"].apply(lambda x: x.split("/")[-1])
203
+ metrics_df.reset_index(inplace=True)
204
+ metrics_df = metrics_df.drop(columns=["index"])
205
+
206
+ accuracy = []
207
+ meteor = []
208
+ bleu_1 = []
209
+ rouge_l = []
210
+ all_metrics = []
211
+ for col in df.columns[2:]:
212
+ metrics = calc_metrics(df["english"], df[col], debug=True)
213
+ print(f"{col}: {metrics}")
214
+
215
+ accuracy.append(metrics["accuracy"])
216
+ all_metrics.append(metrics)
217
+
218
+ metrics_df["accuracy"] = accuracy
219
+ metrics_df["all_metrics"] = all_metrics
220
+
221
+ return metrics_df
222
+
llm_toolkit/{tune_mac.py → tune.py} RENAMED
@@ -22,7 +22,6 @@ model_name = os.getenv("MODEL_NAME")
22
  load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
23
  eval_base_model = os.getenv("EVAL_BASE_MODEL") == "true"
24
  eval_fine_tuned = os.getenv("EVAL_FINE_TUNED") == "true"
25
- do_fine_tuning = os.getenv("DO_FINE_TUNING") == "true"
26
  save_fine_tuned_model = os.getenv("SAVE_FINE_TUNED") == "true"
27
  num_train_epochs = int(os.getenv("NUM_TRAIN_EPOCHS") or 0)
28
  data_path = os.getenv("DATA_PATH")
@@ -42,7 +41,6 @@ print(
42
  data_path,
43
  results_path,
44
  eval_base_model,
45
- do_fine_tuning,
46
  eval_fine_tuned,
47
  save_fine_tuned_model,
48
  )
@@ -84,8 +82,9 @@ print(f"(3) GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
84
  print(f"{start_gpu_memory} GB of memory reserved.")
85
 
86
 
87
- if not do_fine_tuning:
88
- sys.exit(0)
 
89
 
90
  trainer = load_trainer(
91
  model,
 
22
  load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
23
  eval_base_model = os.getenv("EVAL_BASE_MODEL") == "true"
24
  eval_fine_tuned = os.getenv("EVAL_FINE_TUNED") == "true"
 
25
  save_fine_tuned_model = os.getenv("SAVE_FINE_TUNED") == "true"
26
  num_train_epochs = int(os.getenv("NUM_TRAIN_EPOCHS") or 0)
27
  data_path = os.getenv("DATA_PATH")
 
41
  data_path,
42
  results_path,
43
  eval_base_model,
 
44
  eval_fine_tuned,
45
  save_fine_tuned_model,
46
  )
 
82
  print(f"{start_gpu_memory} GB of memory reserved.")
83
 
84
 
85
+ def is_bfloat16_supported():
86
+ return True
87
+
88
 
89
  trainer = load_trainer(
90
  model,
novel-translation/00_Data_Analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
novel-translation/07r2_tune-lf-py3.11.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
novel-translation/08r2_eval-lf-py3.11.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
novel-translation/09_tune-lf-medium-py3.11.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
results/mgtv-results.csv ADDED
The diff for this file is too large to render. See raw diff
 
results/mgtv-results_nv4080.csv ADDED
The diff for this file is too large to render. See raw diff