Spaces:
Build error
Build error
test_b resultls
Browse files
.gitattributes
CHANGED
@@ -69,3 +69,4 @@ llama-factory/data/Icon
|
|
69 |
llama-factory/data/alpaca_mgtv_p1_en.json filter=lfs diff=lfs merge=lfs -text
|
70 |
llama-factory/data/alpaca_mgtv_p2_en.json filter=lfs diff=lfs merge=lfs -text
|
71 |
datasets/mgtv/test_b.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
69 |
llama-factory/data/alpaca_mgtv_p1_en.json filter=lfs diff=lfs merge=lfs -text
|
70 |
llama-factory/data/alpaca_mgtv_p2_en.json filter=lfs diff=lfs merge=lfs -text
|
71 |
datasets/mgtv/test_b.csv filter=lfs diff=lfs merge=lfs -text
|
72 |
+
results/test_b-results.csv filter=lfs diff=lfs merge=lfs -text
|
competition/15_InternLM_NV4090_test.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
llm_toolkit/eval_logical_reasoning.py
CHANGED
@@ -24,6 +24,7 @@ data_path = os.getenv("LOGICAL_REASONING_DATA_PATH")
|
|
24 |
results_path = os.getenv("LOGICAL_REASONING_RESULTS_PATH")
|
25 |
use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
|
26 |
using_p1 = os.getenv("USING_P1_PROMPT_TEMPLATE") == "true"
|
|
|
27 |
using_llama_factory = os.getenv("USING_LLAMA_FACTORY") == "true"
|
28 |
max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 16))
|
29 |
repetition_penalty = float(os.getenv("REPETITION_PENALTY", 1.0))
|
@@ -65,6 +66,7 @@ datasets = load_logical_reasoning_dataset(
|
|
65 |
tokenizer=tokenizer,
|
66 |
chinese_prompt=not use_english_datasets,
|
67 |
using_p1=using_p1,
|
|
|
68 |
)
|
69 |
|
70 |
if len(sys.argv) > 1:
|
@@ -94,12 +96,17 @@ if adapter_name_or_path is not None:
|
|
94 |
model_name += "/" + adapter_name_or_path.split("/")[-1]
|
95 |
|
96 |
save_results(
|
97 |
-
|
|
|
|
|
|
|
|
|
98 |
results_path,
|
99 |
datasets["test"],
|
100 |
predictions,
|
101 |
debug=True,
|
102 |
)
|
103 |
|
104 |
-
|
105 |
-
|
|
|
|
24 |
results_path = os.getenv("LOGICAL_REASONING_RESULTS_PATH")
|
25 |
use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
|
26 |
using_p1 = os.getenv("USING_P1_PROMPT_TEMPLATE") == "true"
|
27 |
+
test_data = os.getenv("TEST_DATA", None)
|
28 |
using_llama_factory = os.getenv("USING_LLAMA_FACTORY") == "true"
|
29 |
max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 16))
|
30 |
repetition_penalty = float(os.getenv("REPETITION_PENALTY", 1.0))
|
|
|
66 |
tokenizer=tokenizer,
|
67 |
chinese_prompt=not use_english_datasets,
|
68 |
using_p1=using_p1,
|
69 |
+
test_data=test_data,
|
70 |
)
|
71 |
|
72 |
if len(sys.argv) > 1:
|
|
|
96 |
model_name += "/" + adapter_name_or_path.split("/")[-1]
|
97 |
|
98 |
save_results(
|
99 |
+
(
|
100 |
+
"answer"
|
101 |
+
if test_data
|
102 |
+
else f"{model_name}_{dtype}{'_4bit' if load_in_4bit else ''}{'_lf' if using_llama_factory else ''}"
|
103 |
+
),
|
104 |
results_path,
|
105 |
datasets["test"],
|
106 |
predictions,
|
107 |
debug=True,
|
108 |
)
|
109 |
|
110 |
+
if not test_data:
|
111 |
+
metrics = calc_metrics(datasets["test"]["label"], predictions, debug=True)
|
112 |
+
print(metrics)
|
llm_toolkit/logical_reasoning_utils.py
CHANGED
@@ -167,11 +167,11 @@ def save_results(model_name, results_path, dataset, predictions, debug=False):
|
|
167 |
|
168 |
|
169 |
def load_logical_reasoning_dataset(
|
170 |
-
data_path, tokenizer=None, using_p1=True, chinese_prompt=True
|
171 |
):
|
172 |
postfix = "" if chinese_prompt else "_en"
|
173 |
train_data_file = data_path + f"/train{postfix}.csv"
|
174 |
-
test_data_file = data_path + f"/dev{postfix}.csv"
|
175 |
|
176 |
print("loading train/test data files")
|
177 |
datasets = load_dataset(
|
@@ -211,7 +211,7 @@ def load_logical_reasoning_dataset(
|
|
211 |
messages, tokenize=False, add_generation_prompt=True
|
212 |
)
|
213 |
prompts.append(prompt)
|
214 |
-
texts.append(prompt + output + tokenizer.eos_token)
|
215 |
return {"train_text": texts, "prompt": prompts}
|
216 |
|
217 |
datasets = datasets.map(
|
|
|
167 |
|
168 |
|
169 |
def load_logical_reasoning_dataset(
|
170 |
+
data_path, tokenizer=None, using_p1=True, chinese_prompt=True, test_data=None
|
171 |
):
|
172 |
postfix = "" if chinese_prompt else "_en"
|
173 |
train_data_file = data_path + f"/train{postfix}.csv"
|
174 |
+
test_data_file = data_path + f"/{test_data if test_data else 'dev'}{postfix}.csv"
|
175 |
|
176 |
print("loading train/test data files")
|
177 |
datasets = load_dataset(
|
|
|
211 |
messages, tokenize=False, add_generation_prompt=True
|
212 |
)
|
213 |
prompts.append(prompt)
|
214 |
+
texts.append(prompt + output + tokenizer.eos_token if output else "")
|
215 |
return {"train_text": texts, "prompt": prompts}
|
216 |
|
217 |
datasets = datasets.map(
|
results/test_b-results.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8efee9e847b45b8d1ccf187a7e214e73cfbd368840b1d699af6b228eca3b22a
|
3 |
+
size 23116718
|
scripts/test-mgtv.sh
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
|
3 |
+
BASEDIR=$(dirname "$0")
|
4 |
+
cd $BASEDIR/..
|
5 |
+
echo Current Directory:
|
6 |
+
pwd
|
7 |
+
|
8 |
+
BASEDIR=`pwd`
|
9 |
+
|
10 |
+
nvidia-smi
|
11 |
+
uname -a
|
12 |
+
cat /etc/os-release
|
13 |
+
lscpu
|
14 |
+
grep MemTotal /proc/meminfo
|
15 |
+
|
16 |
+
export LOAD_IN_4BIT=false
|
17 |
+
|
18 |
+
export MODEL_NAME=internlm/internlm2_5-7b-chat-1m
|
19 |
+
export ADAPTER_NAME_OR_PATH=inflaton-ai/InternLM_2_5-7b_LoRA-Adapter
|
20 |
+
export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
21 |
+
|
22 |
+
export TEST_DATA=test_b
|
23 |
+
export LOGICAL_REASONING_RESULTS_PATH=results/$TEST_DATA-results.csv
|
24 |
+
|
25 |
+
echo "Eval $MODEL_NAME with $ADAPTER_NAME_OR_PATH"
|
26 |
+
python llm_toolkit/eval_logical_reasoning.py
|