dh-mc commited on
Commit
7670f2e
·
1 Parent(s): 0e7344a

test_b resultls

Browse files
.gitattributes CHANGED
@@ -69,3 +69,4 @@ llama-factory/data/Icon
69
  llama-factory/data/alpaca_mgtv_p1_en.json filter=lfs diff=lfs merge=lfs -text
70
  llama-factory/data/alpaca_mgtv_p2_en.json filter=lfs diff=lfs merge=lfs -text
71
  datasets/mgtv/test_b.csv filter=lfs diff=lfs merge=lfs -text
 
 
69
  llama-factory/data/alpaca_mgtv_p1_en.json filter=lfs diff=lfs merge=lfs -text
70
  llama-factory/data/alpaca_mgtv_p2_en.json filter=lfs diff=lfs merge=lfs -text
71
  datasets/mgtv/test_b.csv filter=lfs diff=lfs merge=lfs -text
72
+ results/test_b-results.csv filter=lfs diff=lfs merge=lfs -text
competition/15_InternLM_NV4090_test.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
llm_toolkit/eval_logical_reasoning.py CHANGED
@@ -24,6 +24,7 @@ data_path = os.getenv("LOGICAL_REASONING_DATA_PATH")
24
  results_path = os.getenv("LOGICAL_REASONING_RESULTS_PATH")
25
  use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
26
  using_p1 = os.getenv("USING_P1_PROMPT_TEMPLATE") == "true"
 
27
  using_llama_factory = os.getenv("USING_LLAMA_FACTORY") == "true"
28
  max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 16))
29
  repetition_penalty = float(os.getenv("REPETITION_PENALTY", 1.0))
@@ -65,6 +66,7 @@ datasets = load_logical_reasoning_dataset(
65
  tokenizer=tokenizer,
66
  chinese_prompt=not use_english_datasets,
67
  using_p1=using_p1,
 
68
  )
69
 
70
  if len(sys.argv) > 1:
@@ -94,12 +96,17 @@ if adapter_name_or_path is not None:
94
  model_name += "/" + adapter_name_or_path.split("/")[-1]
95
 
96
  save_results(
97
- f"{model_name}_{dtype}{'_4bit' if load_in_4bit else ''}{'_lf' if using_llama_factory else ''}",
 
 
 
 
98
  results_path,
99
  datasets["test"],
100
  predictions,
101
  debug=True,
102
  )
103
 
104
- metrics = calc_metrics(datasets["test"]["label"], predictions, debug=True)
105
- print(metrics)
 
 
24
  results_path = os.getenv("LOGICAL_REASONING_RESULTS_PATH")
25
  use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
26
  using_p1 = os.getenv("USING_P1_PROMPT_TEMPLATE") == "true"
27
+ test_data = os.getenv("TEST_DATA", None)
28
  using_llama_factory = os.getenv("USING_LLAMA_FACTORY") == "true"
29
  max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", 16))
30
  repetition_penalty = float(os.getenv("REPETITION_PENALTY", 1.0))
 
66
  tokenizer=tokenizer,
67
  chinese_prompt=not use_english_datasets,
68
  using_p1=using_p1,
69
+ test_data=test_data,
70
  )
71
 
72
  if len(sys.argv) > 1:
 
96
  model_name += "/" + adapter_name_or_path.split("/")[-1]
97
 
98
  save_results(
99
+ (
100
+ "answer"
101
+ if test_data
102
+ else f"{model_name}_{dtype}{'_4bit' if load_in_4bit else ''}{'_lf' if using_llama_factory else ''}"
103
+ ),
104
  results_path,
105
  datasets["test"],
106
  predictions,
107
  debug=True,
108
  )
109
 
110
+ if not test_data:
111
+ metrics = calc_metrics(datasets["test"]["label"], predictions, debug=True)
112
+ print(metrics)
llm_toolkit/logical_reasoning_utils.py CHANGED
@@ -167,11 +167,11 @@ def save_results(model_name, results_path, dataset, predictions, debug=False):
167
 
168
 
169
  def load_logical_reasoning_dataset(
170
- data_path, tokenizer=None, using_p1=True, chinese_prompt=True
171
  ):
172
  postfix = "" if chinese_prompt else "_en"
173
  train_data_file = data_path + f"/train{postfix}.csv"
174
- test_data_file = data_path + f"/dev{postfix}.csv"
175
 
176
  print("loading train/test data files")
177
  datasets = load_dataset(
@@ -211,7 +211,7 @@ def load_logical_reasoning_dataset(
211
  messages, tokenize=False, add_generation_prompt=True
212
  )
213
  prompts.append(prompt)
214
- texts.append(prompt + output + tokenizer.eos_token)
215
  return {"train_text": texts, "prompt": prompts}
216
 
217
  datasets = datasets.map(
 
167
 
168
 
169
  def load_logical_reasoning_dataset(
170
+ data_path, tokenizer=None, using_p1=True, chinese_prompt=True, test_data=None
171
  ):
172
  postfix = "" if chinese_prompt else "_en"
173
  train_data_file = data_path + f"/train{postfix}.csv"
174
+ test_data_file = data_path + f"/{test_data if test_data else 'dev'}{postfix}.csv"
175
 
176
  print("loading train/test data files")
177
  datasets = load_dataset(
 
211
  messages, tokenize=False, add_generation_prompt=True
212
  )
213
  prompts.append(prompt)
214
+ texts.append(prompt + output + tokenizer.eos_token if output else "")
215
  return {"train_text": texts, "prompt": prompts}
216
 
217
  datasets = datasets.map(
results/test_b-results.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8efee9e847b45b8d1ccf187a7e214e73cfbd368840b1d699af6b228eca3b22a
3
+ size 23116718
scripts/test-mgtv.sh ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ BASEDIR=$(dirname "$0")
4
+ cd $BASEDIR/..
5
+ echo Current Directory:
6
+ pwd
7
+
8
+ BASEDIR=`pwd`
9
+
10
+ nvidia-smi
11
+ uname -a
12
+ cat /etc/os-release
13
+ lscpu
14
+ grep MemTotal /proc/meminfo
15
+
16
+ export LOAD_IN_4BIT=false
17
+
18
+ export MODEL_NAME=internlm/internlm2_5-7b-chat-1m
19
+ export ADAPTER_NAME_OR_PATH=inflaton-ai/InternLM_2_5-7b_LoRA-Adapter
20
+ export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
21
+
22
+ export TEST_DATA=test_b
23
+ export LOGICAL_REASONING_RESULTS_PATH=results/$TEST_DATA-results.csv
24
+
25
+ echo "Eval $MODEL_NAME with $ADAPTER_NAME_OR_PATH"
26
+ python llm_toolkit/eval_logical_reasoning.py