Spaces:

inflaton-ai
/

logical-reasoning

Build error

App Files Files Community

dh-mc commited on Jul 23, 2024

Commit

58a3992

1 Parent(s): 144336f

llama3 en

Browse files

Files changed (17) hide show

.gitattributes +3 -0
competition/00a_InternLM2.5_Llama3_GLM4_Results.ipynb +0 -0
competition/00a_r2_InternLM2.5_Llama3_GLM4_Results.ipynb +0 -0
competition/03_EDA_en.ipynb +0 -0
competition/11b_Llama-3_8b_p1_r2_analysis.ipynb +0 -0
competition/11c_Llama-3_8b_p2_r2_analysis.ipynb +0 -0
llama-factory/config/llama3-8b_lora_sft_bf16-p1_en.yaml +46 -0
llama-factory/config/llama3-8b_lora_sft_bf16-p2_en.yaml +46 -0
llama-factory/data/alpaca_mgtv_p1_en.json +3 -0
llama-factory/data/alpaca_mgtv_p2_en.json +3 -0
llama-factory/data/dataset_info.json +2 -2
llm_toolkit/logical_reasoning_utils.py +52 -26
results/mgtv-llama3_p1_r2_full_metrics.csv +6 -0
results/mgtv-llama3_p2_r2_full_metrics.csv +6 -0
scripts/eval-mgtv-llama3_8b_en.sh +37 -0
scripts/tune-mgtv-llama3_8b_en.sh +37 -0
scripts/tune-mgtv.sh +1 -1

.gitattributes CHANGED Viewed

@@ -64,3 +64,6 @@ results/mgtv-results_merged_model.csv filter=lfs diff=lfs merge=lfs -text
 results/mgtv-results_p1_full_metrics.csv filter=lfs diff=lfs merge=lfs -text
 results/mgtv-results_p2_full_metrics.csv filter=lfs diff=lfs merge=lfs -text
 results/llama3-8b_lora_sft_bf16-p1.csv filter=lfs diff=lfs merge=lfs -text
 filter=lfs diff=lfs merge=lfs -text

 results/mgtv-results_p1_full_metrics.csv filter=lfs diff=lfs merge=lfs -text
 results/mgtv-results_p2_full_metrics.csv filter=lfs diff=lfs merge=lfs -text
 results/llama3-8b_lora_sft_bf16-p1.csv filter=lfs diff=lfs merge=lfs -text
+llama-factory/data/Icon
 filter=lfs diff=lfs merge=lfs -text
+llama-factory/data/alpaca_mgtv_p1_en.json filter=lfs diff=lfs merge=lfs -text
+llama-factory/data/alpaca_mgtv_p2_en.json filter=lfs diff=lfs merge=lfs -text

competition/00a_InternLM2.5_Llama3_GLM4_Results.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

competition/00a_r2_InternLM2.5_Llama3_GLM4_Results.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

competition/03_EDA_en.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

competition/11b_Llama-3_8b_p1_r2_analysis.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

competition/11c_Llama-3_8b_p2_r2_analysis.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

llama-factory/config/llama3-8b_lora_sft_bf16-p1_en.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+### model
+model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
+### method
+stage: sft
+do_train: true
+finetuning_type: lora
+lora_target: all
+# quantization_bit: 4                     # use 4-bit QLoRA
+loraplus_lr_ratio: 16.0                 # use LoRA+ with lambda=16.0
+# use_unsloth: true                       # use UnslothAI's LoRA optimization for 2x faster training
+upcast_layernorm: true
+### dataset
+dataset: alpaca_mgtv_p1_en
+template: llama3
+cutoff_len: 4096
+max_samples: 25000
+overwrite_cache: true
+preprocessing_num_workers: 16
+### output
+output_dir: saves/llama3-8b/lora/sft_bf16_p1_full_en
+logging_steps: 10
+save_steps: 175
+plot_loss: true
+# overwrite_output_dir: true
+### train
+per_device_train_batch_size: 16
+gradient_accumulation_steps: 8
+learning_rate: 1.0e-4
+num_train_epochs: 1.0
+lr_scheduler_type: cosine
+warmup_ratio: 0.1
+bf16: true
+ddp_timeout: 180000000
+### eval
+val_size: 0.1
+per_device_eval_batch_size: 1
+eval_strategy: steps
+eval_steps: 175
+report_to: wandb
+run_name: llama3_8b_p1_en # optional

llama-factory/config/llama3-8b_lora_sft_bf16-p2_en.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+### model
+model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
+### method
+stage: sft
+do_train: true
+finetuning_type: lora
+lora_target: all
+# quantization_bit: 4                     # use 4-bit QLoRA
+loraplus_lr_ratio: 16.0                 # use LoRA+ with lambda=16.0
+# use_unsloth: true                       # use UnslothAI's LoRA optimization for 2x faster training
+upcast_layernorm: true
+### dataset
+dataset: alpaca_mgtv_p2_en
+template: llama3
+cutoff_len: 4096
+max_samples: 25000
+overwrite_cache: true
+preprocessing_num_workers: 16
+### output
+output_dir: saves/llama3-8b/lora/sft_bf16_p2_full_en
+logging_steps: 10
+save_steps: 175
+plot_loss: true
+# overwrite_output_dir: true
+### train
+per_device_train_batch_size: 16
+gradient_accumulation_steps: 8
+learning_rate: 1.0e-4
+num_train_epochs: 1.0
+lr_scheduler_type: cosine
+warmup_ratio: 0.1
+bf16: true
+ddp_timeout: 180000000
+### eval
+val_size: 0.1
+per_device_eval_batch_size: 1
+eval_strategy: steps
+eval_steps: 175
+report_to: wandb
+run_name: llama3_8b_p2_en # optional

llama-factory/data/alpaca_mgtv_p1_en.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d88d2715925c53b13b1e393a1e5c1b0a62a98ddd273a4a6ae6dbbfd6ef96c32c
+size 44594582

llama-factory/data/alpaca_mgtv_p2_en.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89cd205feb846c67906926afdf1776ae05a210c6f1653a04e753f071f8506859
+size 57644582

llama-factory/data/dataset_info.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a491ee69f583486d9649f5fb4ef2f06c72d5f4397d98afec654b1e917901e66a
-size 13750

 version https://git-lfs.github.com/spec/v1
+oid sha256:f1f4455a35b493baea9718535166c71778ba14994522057129a99b3614b659aa
+size 13894

llm_toolkit/logical_reasoning_utils.py CHANGED Viewed

@@ -24,6 +24,22 @@ P1 = """你是一个逻辑游戏的主持人。游戏规则如下：
 参与者提出的问题: {}
 """
 P2 = """你是一个情景猜谜游戏的主持人。游戏规则如下：
 1. 参与者会得到一个谜面，谜面会描述一个简单又难以理解的事件。
@@ -45,6 +61,34 @@ P2 = """你是一个情景猜谜游戏的主持人。游戏规则如下：
 **参与者提出的问题:** {}
 """
 def extract_answer(text, debug=False):
     if text:
@@ -136,30 +180,7 @@ def load_logical_reasoning_dataset(
     )
     if tokenizer:
-        reasoning_prompt = (
-            (P1 if using_p1 else P2)
-            if chinese_prompt
-            else """You are the host of a situational guessing game. The rules of the game are as follows:
-1. Participants will receive a riddle that describes a simple yet difficult to understand event.
-2. The host knows the answer, which is the solution to the riddle.
-3. Participants can ask any closed-ended questions to uncover the truth of the event.
-4. For each question, the host will respond with one of the following five options based on the actual situation: Yes, No, Unimportant, Correct answer, or Incorrect questioning. The criteria for each response are as follows:
-   - If the riddle and answer can provide an answer to the question, respond with: Yes or No
-   - If the riddle and answer cannot directly or indirectly infer an answer to the question, respond with: Unimportant
-   - If the participant's question is not a closed-ended question or is difficult to understand, respond with: Incorrect questioning
-   - If the participant's question essentially reveals the truth of the answer, respond with: Correct answer
-5. The response must not include any additional information, nor should any word be omitted from the options. For example, "No" cannot be abbreviated to "N".
-Please strictly follow these rules when answering the participant's questions.
-**Riddle:** {}
-**Answer:** {}
-**Participant's question:** {}
-"""
-        )
         def formatting_prompts_func(examples):
             inputs = examples["text"]
@@ -234,17 +255,22 @@ def load_alpaca_data(data_path, using_p1=True, use_english_datasets=False):
         else "llama-factory/data/alpaca_mgtv_p2.json"
     )
     if os.path.exists(alpaca_data_path):
         print("loading existing data from:", alpaca_data_path)
         data = pd.read_json(alpaca_data_path, orient="records", lines=False)
         return data
     print("loading new data from:", alpaca_data_path)
     datasets = load_logical_reasoning_dataset(
-        data_path, chinese_prompt=not use_english_datasets
     )
-    prompt_template = P1 if using_p1 else P2
     df_train = datasets["train"].to_pandas()
     df_train["instruction"] = df_train.apply(
         lambda x: prompt_template.format(x["puzzle"], x["truth"], x["text"]), axis=1

 参与者提出的问题: {}
 """
+P1_en = """You are the host of a logic game. The rules of the game are as follows:
+	1.	Participants will receive a puzzle.
+	2.	Participants can ask questions to obtain clues and try to solve the puzzle.
+	3.	For each question, the host will answer with one of the following five options based on the actual situation: Yes, No, Unimportant, Correct answer, or Incorrect questioning.
+	4.	The answer cannot include any additional information, nor can any word in the options be omitted. For example, “No” cannot be shortened to “N”.
+	5.	Participants need to infer and ultimately find the correct answer to the puzzle based on the responses.
+Please strictly adhere to these rules when answering participants’ questions.
+Puzzle: {}
+Actual situation: {}
+Question from participants: {}"""
 P2 = """你是一个情景猜谜游戏的主持人。游戏规则如下：
 1. 参与者会得到一个谜面，谜面会描述一个简单又难以理解的事件。
 **参与者提出的问题:** {}
 """
+P2_en = """You are the host of a situational guessing game. The rules of the game are as follows:
+1. Participants will receive a riddle that describes a simple yet difficult to understand event.
+2. The host knows the answer, which is the solution to the riddle.
+3. Participants can ask any closed-ended questions to uncover the truth of the event.
+4. For each question, the host will respond with one of the following five options based on the actual situation: Yes, No, Unimportant, Correct answer, or Incorrect questioning. The criteria for each response are as follows:
+   - If the riddle and answer can provide an answer to the question, respond with: Yes or No
+   - If the riddle and answer cannot directly or indirectly infer an answer to the question, respond with: Unimportant
+   - If the participant's question is not a closed-ended question or is difficult to understand, respond with: Incorrect questioning
+   - If the participant's question essentially reveals the truth of the answer, respond with: Correct answer
+5. The response must not include any additional information, nor should any word be omitted from the options. For example, "No" cannot be abbreviated to "N".
+Please strictly follow these rules when answering the participant's questions.
+**Riddle:** {}
+**Answer:** {}
+**Participant's question:** {}
+"""
+def get_prompt_template(using_p1=True, chinese_prompt=True):
+    if using_p1:
+        return P1 if chinese_prompt else P1_en
+    else:
+        return P2 if chinese_prompt else P2_en
 def extract_answer(text, debug=False):
     if text:
     )
     if tokenizer:
+        reasoning_prompt = get_prompt_template(using_p1, chinese_prompt)
         def formatting_prompts_func(examples):
             inputs = examples["text"]
         else "llama-factory/data/alpaca_mgtv_p2.json"
     )
+    if use_english_datasets:
+        alpaca_data_path = alpaca_data_path.replace(".json", "_en.json")
     if os.path.exists(alpaca_data_path):
         print("loading existing data from:", alpaca_data_path)
         data = pd.read_json(alpaca_data_path, orient="records", lines=False)
         return data
     print("loading new data from:", alpaca_data_path)
+    chinese_prompt = not use_english_datasets
     datasets = load_logical_reasoning_dataset(
+        data_path, using_p1=using_p1, chinese_prompt=chinese_prompt
     )
+    prompt_template = get_prompt_template(using_p1, chinese_prompt)
     df_train = datasets["train"].to_pandas()
     df_train["instruction"] = df_train.apply(
         lambda x: prompt_template.format(x["puzzle"], x["truth"], x["text"]), axis=1

results/mgtv-llama3_p1_r2_full_metrics.csv ADDED Viewed

	@@ -0,0 +1,6 @@

+epoch,model,accuracy,precision,recall,f1
+0,shenzhi-wang/Llama3-8B-Chinese-Chat_torch.bfloat16,0.7836666666666666,0.7667122897184859,0.7929173693086004,0.7679400621793133
+1,shenzhi-wang/Llama3-8B-Chinese-Chat/checkpoint-175_torch.bfloat16,0.7706666666666667,0.8072750943858197,0.7706666666666667,0.7835719791561528
+2,shenzhi-wang/Llama3-8B-Chinese-Chat/checkpoint-350_torch.bfloat16,0.724,0.8118050163437011,0.724,0.7562266825513707
+3,shenzhi-wang/Llama3-8B-Chinese-Chat/checkpoint-525_torch.bfloat16,0.6756666666666666,0.7811762160181578,0.6756666666666666,0.7108457483297581
+4,shenzhi-wang/Llama3-8B-Chinese-Chat/checkpoint-700_torch.bfloat16,0.6496666666666666,0.779896556141616,0.6496666666666666,0.6931844557591907

results/mgtv-llama3_p2_r2_full_metrics.csv ADDED Viewed

	@@ -0,0 +1,6 @@

+epoch,model,accuracy,precision,recall,f1
+0,shenzhi-wang/Llama3-8B-Chinese-Chat_torch.bfloat16,0.73,0.7709739363586101,0.73,0.7462914191370829
+1,shenzhi-wang/Llama3-8B-Chinese-Chat/checkpoint-175_torch.bfloat16,0.718,0.8113087212796575,0.718,0.75010620119596
+2,shenzhi-wang/Llama3-8B-Chinese-Chat/checkpoint-350_torch.bfloat16,0.7273333333333334,0.8025119533060184,0.7273333333333334,0.7549822040428019
+3,shenzhi-wang/Llama3-8B-Chinese-Chat/checkpoint-525_torch.bfloat16,0.6883333333333334,0.7816168321128566,0.6883333333333334,0.716763388345211
+4,shenzhi-wang/Llama3-8B-Chinese-Chat/checkpoint-700_torch.bfloat16,0.6406666666666667,0.7636301286950402,0.6406666666666667,0.6807929233309169

scripts/eval-mgtv-llama3_8b_en.sh ADDED Viewed

	@@ -0,0 +1,37 @@

+#!/bin/sh
+BASEDIR=$(dirname "$0")
+cd $BASEDIR/..
+echo Current Directory:
+pwd
+BASEDIR=`pwd`
+nvidia-smi
+uname -a
+cat /etc/os-release
+lscpu
+grep MemTotal /proc/meminfo
+#pip install -r requirements.txt
+#cd ../LLaMA-Factory && pip install -e .[torch,bitsandbytes]
+export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
+export RESIZE_TOKEN_EMBEDDINGS=true
+export START_EPOCH=0
+export USING_LLAMA_FACTORY=true
+export MODEL_NAME=shenzhi-wang/Llama3-8B-Chinese-Chat
+export MODEL_PREFIX=llama3-8b_lora_sft_bf16
+export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p1_en.csv
+export ADAPTER_PATH_BASE=llama-factory/saves/llama3-8b/lora/sft_bf16_p1_full_en
+export USING_P1_PROMPT_TEMPLATE=true
+echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
+python llm_toolkit/eval_logical_reasoning_all_epochs.py
+export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p2_en.csv
+export ADAPTER_PATH_BASE=llama-factory/saves/llama3-8b/lora/sft_bf16_p2_full_en
+export USING_P1_PROMPT_TEMPLATE=false
+echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
+python llm_toolkit/eval_logical_reasoning_all_epochs.py

scripts/tune-mgtv-llama3_8b_en.sh ADDED Viewed

	@@ -0,0 +1,37 @@

+#!/bin/sh
+BASEDIR=$(dirname "$0")
+cd $BASEDIR/..
+echo Current Directory:
+pwd
+BASEDIR=`pwd`
+nvidia-smi
+uname -a
+cat /etc/os-release
+lscpu
+grep MemTotal /proc/meminfo
+#pip install -r requirements.txt
+#cd ../LLaMA-Factory && pip install -e .[torch,bitsandbytes] && cd $BASEDIR
+#pip install transformers==4.41.2
+#pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
+export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
+export MODEL_NAME=meta-llama/Meta-Llama-3-8B-Instruct
+export MODEL_PREFIX=llama3-8b_lora_sft_bf16
+export CONFIG_FILE=config/$MODEL_PREFIX-p1_en.yaml
+echo "Tuning $MODEL_NAME with $CONFIG_FILE"
+$BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
+export CONFIG_FILE=config/$MODEL_PREFIX-p2_en.yaml
+echo "Tuning $MODEL_NAME with $CONFIG_FILE"
+$BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
+$BASEDIR/scripts/eval-mgtv-llama3_8b_en.sh

scripts/tune-mgtv.sh CHANGED Viewed

	@@ -1 +1 @@
1	- tune-mgtv-~~llama3_8b~~.sh


1	+ tune-mgtv-llama3_8b_en.sh