HaotianHu commited on
Commit
b370fd5
·
1 Parent(s): 6f26410
competition/13_Qwen2_7b_finetuning_l40.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
competition/13_Qwen2_7b_finetuning_l40.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%
2
+ %load_ext autoreload
3
+ %autoreload 2
4
+
5
+ # %%
6
+ import os
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ workding_dir = str(Path.cwd().parent)
11
+ os.chdir(workding_dir)
12
+ sys.path.append(workding_dir)
13
+ print("working dir:", workding_dir)
14
+
15
+ # %%
16
+ need_to_setup_env = False
17
+ need_to_setup_env
18
+
19
+ # %%
20
+ if need_to_setup_env:
21
+ !pip config set global.index-url https://pypi.org/simple
22
+ # %pip install tf-keras
23
+ # %pip install -q --upgrade accelerate einops xformers torchvision
24
+ # %pip install -r requirements.txt
25
+ !cd LLaMA-Factory && pip install -e .[torch,bitsandbytes] && FLASH_ATTENTION_FORCE_BUILD=TRUE pip install --upgrade flash-attn
26
+
27
+ # %%
28
+ from dotenv import find_dotenv, load_dotenv
29
+
30
+ found_dotenv = find_dotenv(".env.qwen2_7b")
31
+
32
+ if len(found_dotenv) == 0:
33
+ found_dotenv = find_dotenv(".env.example")
34
+ print(f"loading env vars from: {found_dotenv}")
35
+ load_dotenv(found_dotenv, override=True)
36
+
37
+ # %%
38
+ import os
39
+
40
+ model_name = os.getenv("MODEL_NAME")
41
+ adapter_name_or_path = os.getenv("ADAPTER_NAME_OR_PATH")
42
+ load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
43
+ data_path = os.getenv("LOGICAL_REASONING_DATA_PATH")
44
+ results_path = os.getenv("LOGICAL_REASONING_RESULTS_PATH")
45
+ use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
46
+
47
+ print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path, use_english_datasets)
48
+
49
+ # %%
50
+ import pandas as pd
51
+ df = pd.read_csv("datasets/mgtv/train.csv")
52
+ df.head()
53
+
54
+ # %%
55
+ dataset_data = [
56
+ {
57
+ "instruction": "你是一个逻辑游戏的主持人。游戏规则如下:1. 参与者会得到一个谜题。2. 参与者可以通过提问来获取线索,尝试解开谜题。3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。请严格按照这些规则回答参与者提出的问题。谜题: {}实际情况: {}参与者提出的问题: {}",
58
+ "input": "谜题: " + row_dict["title"] + " " + row_dict["puzzle"] + "实际情况: " + row_dict["truth"] + "参与者提出的问题: " + row_dict["text"],
59
+ "output": row_dict["label"]
60
+ }
61
+ for row_dict in df.to_dict(orient="records")
62
+ ]
63
+
64
+ dataset_data[0]
65
+
66
+ # %%
67
+ import os
68
+ import json
69
+
70
+ # Define the directory where you want to save the JSON file
71
+ output_dir = "LLaMA-Factory/data/"
72
+
73
+ # Ensure the directory exists
74
+ os.makedirs(output_dir, exist_ok=True)
75
+
76
+ # Define the full path for the JSON file
77
+ json_file_path = os.path.join(output_dir, "mgtv_train.json")
78
+
79
+ # Save the dataset data to the specified path
80
+ with open(json_file_path, "w") as f:
81
+ json.dump(dataset_data, f)
82
+
83
+ print(f"JSON file saved to {json_file_path}")
84
+
85
+
86
+ # %%
87
+ !nvidia-smi
88
+
89
+ # %%
90
+ %%time
91
+ import subprocess
92
+ import os
93
+
94
+ # git_bash_path = r"C:/Program Files/Git/bin/bash.exe"
95
+
96
+ script_path = "scripts/tune-lf.sh"
97
+ config_path = "config/qwen2_7b_lora_sft_4bit.yaml"
98
+
99
+ # Print paths for debugging
100
+ print(f"Git Bash Path: {git_bash_path}")
101
+ print(f"Script Path: {script_path}")
102
+ print(f"Config Path: {config_path}")
103
+
104
+ # Check if the files exist
105
+ if not os.path.exists(git_bash_path):
106
+ print(f"Git Bash not found at {git_bash_path}")
107
+ if not os.path.exists(script_path):
108
+ print(f"Script not found at {script_path}")
109
+ if not os.path.exists(config_path):
110
+ print(f"Config file not found at {config_path}")
111
+
112
+ # Using Git Bash or WSL
113
+ subprocess.run(['bash', script_path, config_path], check=True)
114
+
115
+
116
+ # %%
117
+ # import subprocess
118
+ # import os
119
+
120
+ # git_bash_path = r"C:/Program Files/Git/bin/bash.exe"
121
+
122
+ # script_path = "scripts/tune-lf.sh"
123
+ # config_path = "config/qwen2_7b_lora_sft_4bit.yaml"
124
+
125
+ # # Print paths for debugging
126
+ # print(f"Git Bash Path: {git_bash_path}")
127
+ # print(f"Script Path: {script_path}")
128
+ # print(f"Config Path: {config_path}")
129
+
130
+ # # Check if the files exist
131
+ # if not os.path.exists(git_bash_path):
132
+ # print(f"Git Bash not found at {git_bash_path}")
133
+ # if not os.path.exists(script_path):
134
+ # print(f"Script not found at {script_path}")
135
+ # if not os.path.exists(config_path):
136
+ # print(f"Config file not found at {config_path}")
137
+
138
+ # # Ensure the script has execution permissions
139
+ # os.chmod(script_path, 0o755)
140
+
141
+ # # Using Git Bash or WSL
142
+ # try:
143
+ # result = subprocess.run(
144
+ # [git_bash_path, '-c', f'bash {script_path} {config_path}'],
145
+ # check=True, capture_output=True, text=True
146
+ # )
147
+ # print("Output:", result.stdout)
148
+ # print("Error:", result.stderr)
149
+ # except subprocess.CalledProcessError as e:
150
+ # print("Command failed with error code:", e.returncode)
151
+ # print("Output:", e.stdout)
152
+ # print("Error:", e.stderr)
153
+
154
+
155
+
llama-factory/config/qwen2_0.5b_lora_sft_4bit.yaml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### model
2
+ model_name_or_path: Qwen/Qwen2-0.5B
3
+
4
+ ### method
5
+ stage: sft
6
+ do_train: true
7
+ finetuning_type: lora
8
+ lora_target: all
9
+ quantization_bit: 4 # use 4-bit QLoRA
10
+ loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
11
+ upcast_layernorm: true
12
+
13
+ ### dataset
14
+ dataset: mgtv_train
15
+ template: qwen
16
+ cutoff_len: 4096
17
+ max_samples: 25000
18
+ overwrite_cache: true
19
+ preprocessing_num_workers: 16
20
+
21
+ ### output
22
+ output_dir: saves/qwen2_0.5b/lora/sft_4bit
23
+ logging_steps: 562
24
+ save_steps: 562
25
+ plot_loss: true
26
+ # overwrite_output_dir: true
27
+ # resume_from_checkpoint: true
28
+
29
+ ### train
30
+ per_device_train_batch_size: 8
31
+ gradient_accumulation_steps: 8
32
+ learning_rate: 1.0e-4
33
+ num_train_epochs: 3.0
34
+ lr_scheduler_type: cosine
35
+ warmup_ratio: 0.1
36
+ bf16: true
37
+ ddp_timeout: 180000000
38
+
39
+ ### eval
40
+ val_size: 0.1
41
+ per_device_eval_batch_size: 1
42
+ eval_strategy: steps
43
+ eval_steps: 562
44
+
45
+ report_to: none
46
+ run_name: qwen2_0.5b # optional
llama-factory/config/qwen2_7b_lora_sft_4bit.yaml CHANGED
@@ -23,11 +23,11 @@ output_dir: saves/qwen2_7b/lora/sft_4bit
23
  logging_steps: 562
24
  save_steps: 562
25
  plot_loss: true
26
- overwrite_output_dir: true
27
  # resume_from_checkpoint: true
28
 
29
  ### train
30
- per_device_train_batch_size: 32
31
  gradient_accumulation_steps: 8
32
  learning_rate: 1.0e-4
33
  num_train_epochs: 3.0
@@ -42,5 +42,5 @@ per_device_eval_batch_size: 1
42
  eval_strategy: steps
43
  eval_steps: 562
44
 
45
- report_to: wandb
46
  run_name: qwen2_7b # optional
 
23
  logging_steps: 562
24
  save_steps: 562
25
  plot_loss: true
26
+ # overwrite_output_dir: true
27
  # resume_from_checkpoint: true
28
 
29
  ### train
30
+ per_device_train_batch_size: 8
31
  gradient_accumulation_steps: 8
32
  learning_rate: 1.0e-4
33
  num_train_epochs: 3.0
 
42
  eval_strategy: steps
43
  eval_steps: 562
44
 
45
+ report_to: none
46
  run_name: qwen2_7b # optional
scripts/tune-mgtv-qwen2_7b.sh CHANGED
@@ -13,8 +13,8 @@ cat /etc/os-release
13
  lscpu
14
  grep MemTotal /proc/meminfo
15
 
16
- #pip install -r requirements.txt
17
- #cd ../LLaMA-Factory && pip install -e .[torch,bitsandbytes]
18
 
19
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
20
 
 
13
  lscpu
14
  grep MemTotal /proc/meminfo
15
 
16
+ pip install -r requirements.txt
17
+ cd ../LLaMA-Factory && pip install -e .[torch,bitsandbytes]
18
 
19
  export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
20