inflaton commited on
Commit
df3521b
·
1 Parent(s): 43b88d5

h100 results

Browse files
.env.example CHANGED
@@ -1,15 +1,11 @@
 
 
1
  OPENAI_API_KEY=
2
  HF_TOKEN=
 
3
 
4
- MODEL_NAME=Qwen2/Qwen2-1.5B-Instruct
5
-
6
- LOAD_IN_4BIT=true
7
- NUM_TRAIN_EPOCHS=10
8
-
9
- DATA_PATH=datasets/mac/mac.tsv
10
- RESULTS_PATH=results/mac-results_lf.csv
11
 
12
- EVAL_BASE_MODEL=true
13
- EVAL_FINE_TUNED=true
14
- SAVE_FINE_TUNED=true
15
- DO_FINE_TUNING=true
 
1
+ MODEL_NAME=internlm/internlm2_5-7b-chat-1m
2
+
3
  OPENAI_API_KEY=
4
  HF_TOKEN=
5
+ WANDB_API_KEY=
6
 
7
+ LOAD_IN_4BIT=false
8
+ NUM_TRAIN_EPOCHS=3
 
 
 
 
 
9
 
10
+ LOGICAL_REASONING_DATA_PATH=datasets/mgtv
11
+ LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_l40.csv
 
 
.gitattributes CHANGED
@@ -52,3 +52,6 @@ results/mgtv-results_t4_r3.csv filter=lfs diff=lfs merge=lfs -text
52
  llama-factory/data/alpaca_mac.json filter=lfs diff=lfs merge=lfs -text
53
  llama-factory/data/alpaca_mgtv_p2.json filter=lfs diff=lfs merge=lfs -text
54
  llama-factory/data/dataset_info.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
52
  llama-factory/data/alpaca_mac.json filter=lfs diff=lfs merge=lfs -text
53
  llama-factory/data/alpaca_mgtv_p2.json filter=lfs diff=lfs merge=lfs -text
54
  llama-factory/data/dataset_info.json filter=lfs diff=lfs merge=lfs -text
55
+ results/mgtv-results_colab_p2.csv filter=lfs diff=lfs merge=lfs -text
56
+ results/mgtv-results_h100.csv filter=lfs diff=lfs merge=lfs -text
57
+ results/mgtv-results_bf16.csv filter=lfs diff=lfs merge=lfs -text
competition/{09b_InternLM_bf16_p2_analysis.ipynb → 09c_InternLM_bf16_p2_analysis.ipynb} RENAMED
File without changes
competition/10_InterLM_h100_eval.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
competition/10a_InternLM_h100_analysis.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
llm_toolkit/eval_logical_reasoning_all_epochs.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import subprocess
4
+ from dotenv import find_dotenv, load_dotenv
5
+
6
+
7
+ def evaluate_model_all_epochs_v2(
8
+ model_name,
9
+ adapter_path_base=None,
10
+ start_epoch=0,
11
+ load_in_4bit=True,
12
+ num_of_entries=-1,
13
+ result_file=None,
14
+ ):
15
+ new_env = os.environ.copy()
16
+ new_env["MODEL_NAME"] = model_name
17
+ model = model_name.split("/")[-1]
18
+
19
+ new_env["LOAD_IN_4BIT"] = "true" if load_in_4bit else "false"
20
+ if result_file is not None:
21
+ new_env["LOGICAL_REASONING_RESULTS_PATH"] = result_file
22
+
23
+ if adapter_path_base is None:
24
+ num_train_epochs = 0
25
+ print(f"No adapter path provided. Running with base model:{model_name}")
26
+ else:
27
+ # find subdirectories in adapter_path_base
28
+ # and sort them by epoch number
29
+ subdirs = [
30
+ d
31
+ for d in os.listdir(adapter_path_base)
32
+ if os.path.isdir(os.path.join(adapter_path_base, d))
33
+ ]
34
+
35
+ subdirs = sorted(subdirs, key=lambda x: int(x.split("-")[-1]))
36
+ num_train_epochs = len(subdirs)
37
+ print(f"found {num_train_epochs} checkpoints: {subdirs}")
38
+
39
+ for i in range(start_epoch, num_train_epochs + 1):
40
+ print(f"Epoch {i}")
41
+ if i == 0:
42
+ os.unsetenv("ADAPTER_NAME_OR_PATH")
43
+ else:
44
+ adapter_path = subdirs[i - 1]
45
+ new_env["ADAPTER_NAME_OR_PATH"] = adapter_path
46
+
47
+ print(f"adapter path: {new_env.get('ADAPTER_NAME_OR_PATH')}")
48
+
49
+ log_file = "./logs/{}_epoch_{}.txt".format(model, i)
50
+ with open(log_file, "w") as f_obj:
51
+ subprocess.run(
52
+ f"python llm_toolkit/eval_logical_reasoning.py {num_of_entries}",
53
+ shell=True,
54
+ env=new_env,
55
+ stdout=f_obj,
56
+ text=True,
57
+ )
58
+
59
+
60
+ if __name__ == "__main__":
61
+ found_dotenv = find_dotenv(".env")
62
+
63
+ if len(found_dotenv) == 0:
64
+ found_dotenv = find_dotenv(".env.example")
65
+ print(f"loading env vars from: {found_dotenv}")
66
+ load_dotenv(found_dotenv, override=False)
67
+
68
+ workding_dir = os.path.dirname(found_dotenv)
69
+ os.chdir(workding_dir)
70
+ sys.path.append(workding_dir)
71
+ print("workding dir:", workding_dir)
72
+ print(f"adding {workding_dir} to sys.path")
73
+ sys.path.append(workding_dir)
74
+
75
+ model_name = os.getenv("MODEL_NAME")
76
+ adapter_path_base = os.getenv("ADAPTER_PATH_BASE")
77
+ start_epoch = int(os.getenv("START_EPOCH", 0))
78
+ load_in_4bit = os.getenv("LOAD_IN_4BIT", "true").lower() == "true"
79
+ result_file = os.getenv("LOGICAL_REASONING_RESULTS_PATH", None)
80
+
81
+ num_of_entries = int(sys.argv[1]) if len(sys.argv) > 1 else -1
82
+
83
+ evaluate_model_all_epochs_v2(
84
+ model_name,
85
+ adapter_path_base=adapter_path_base,
86
+ start_epoch=start_epoch,
87
+ load_in_4bit=load_in_4bit,
88
+ num_of_entries=num_of_entries,
89
+ result_file=result_file,
90
+ )
{results → logs}/h100_p1.txt RENAMED
File without changes
{results → logs}/l40_p2.txt RENAMED
File without changes
results/mgtv-results_bf16.csv CHANGED
The diff for this file is too large to render. See raw diff
 
results/mgtv-results_colab_p2.csv CHANGED
The diff for this file is too large to render. See raw diff
 
results/mgtv-results_h100.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12a51462b06704ff4867c91a3c7a305371c706ceefe02be5473f27f7f612f4f9
3
+ size 2812796
scripts/eval-mgtv-internlm.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ BASEDIR=$(dirname "$0")
4
+ cd $BASEDIR/..
5
+ echo Current Directory:
6
+ pwd
7
+
8
+ BASEDIR=`pwd`
9
+
10
+ nvidia-smi
11
+ uname -a
12
+ cat /etc/os-release
13
+ lscpu
14
+ grep MemTotal /proc/meminfo
15
+
16
+ #pip install -r requirements.txt
17
+ #cd ../LLaMA-Factory && pip install -e .[torch,bitsandbytes]
18
+
19
+ export MODEL_NAME=internlm/internlm2_5-7b-chat-1m
20
+ export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
21
+ export LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_p2_full.csv
22
+ export ADAPTER_PATH_BASE=llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full
23
+
24
+ echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
25
+ python llm_toolkit/eval_logical_reasoning_all_epochs.py
scripts/eval-mgtv.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ eval-mgtv-internlm.sh
scripts/tune-mgtv-internlm.sh ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ BASEDIR=$(dirname "$0")
4
+ cd $BASEDIR/..
5
+ echo Current Directory:
6
+ pwd
7
+
8
+ BASEDIR=`pwd`
9
+
10
+ nvidia-smi
11
+ uname -a
12
+ cat /etc/os-release
13
+ lscpu
14
+ grep MemTotal /proc/meminfo
15
+
16
+ #pip install -r requirements.txt
17
+ #cd ../LLaMA-Factory && pip install -e .[torch,bitsandbytes]
18
+
19
+ export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
20
+
21
+ export LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_p1_full.csv
22
+ export CONFIG_FILE=config/internlm2_5_7b_lora_sft_bf16_p1_full.yaml
23
+ #echo "Tuning with $CONFIG_FILE"
24
+ #$BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
25
+
26
+ export LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_p2_full.csv
27
+ export CONFIG_FILE=config/internlm2_5_7b_lora_sft_bf16_p2_full.yaml
28
+ echo "Tuning with $CONFIG_FILE"
29
+ $BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
scripts/tune-mgtv.sh DELETED
@@ -1,29 +0,0 @@
1
- #!/bin/sh
2
-
3
- BASEDIR=$(dirname "$0")
4
- cd $BASEDIR/..
5
- echo Current Directory:
6
- pwd
7
-
8
- BASEDIR=`pwd`
9
-
10
- nvidia-smi
11
- uname -a
12
- cat /etc/os-release
13
- lscpu
14
- grep MemTotal /proc/meminfo
15
-
16
- #pip install -r requirements.txt
17
- #cd ../LLaMA-Factory && pip install -e .[torch,bitsandbytes]
18
-
19
- export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
20
-
21
- export LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_p1_full.csv
22
- export CONFIG_FILE=config/internlm2_5_7b_lora_sft_bf16_p1_full.yaml
23
- #echo "Tuning with $CONFIG_FILE"
24
- #$BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
25
-
26
- export LOGICAL_REASONING_RESULTS_PATH=results/mgtv-results_p2_full.csv
27
- export CONFIG_FILE=config/internlm2_5_7b_lora_sft_bf16_p2_full.yaml
28
- cho "Tuning with $CONFIG_FILE"
29
- $BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/tune-mgtv.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ tune-mgtv-internlm.sh