Spaces:

inflaton-ai
/

logical-reasoning

Build error

App Files Files Community

HaotianHu commited on Aug 12, 2024

Commit

b370fd5

1 Parent(s): 6f26410

ht

Browse files

Files changed (5) hide show

competition/13_Qwen2_7b_finetuning_l40.ipynb +0 -0
competition/13_Qwen2_7b_finetuning_l40.py +155 -0
llama-factory/config/qwen2_0.5b_lora_sft_4bit.yaml +46 -0
llama-factory/config/qwen2_7b_lora_sft_4bit.yaml +3 -3
scripts/tune-mgtv-qwen2_7b.sh +2 -2

competition/13_Qwen2_7b_finetuning_l40.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

competition/13_Qwen2_7b_finetuning_l40.py ADDED Viewed

	@@ -0,0 +1,155 @@

+# %%
+%load_ext autoreload
+%autoreload 2
+# %%
+import os
+import sys
+from pathlib import Path
+workding_dir = str(Path.cwd().parent)
+os.chdir(workding_dir)
+sys.path.append(workding_dir)
+print("working dir:", workding_dir)
+# %%
+need_to_setup_env = False
+need_to_setup_env
+# %%
+if need_to_setup_env:
+    !pip config set global.index-url https://pypi.org/simple
+    # %pip install tf-keras
+    # %pip install -q --upgrade accelerate einops xformers torchvision
+    # %pip install -r requirements.txt
+    !cd LLaMA-Factory && pip install -e .[torch,bitsandbytes] && FLASH_ATTENTION_FORCE_BUILD=TRUE pip install --upgrade flash-attn
+# %%
+from dotenv import find_dotenv, load_dotenv
+found_dotenv = find_dotenv(".env.qwen2_7b")
+if len(found_dotenv) == 0:
+    found_dotenv = find_dotenv(".env.example")
+print(f"loading env vars from: {found_dotenv}")
+load_dotenv(found_dotenv, override=True)
+# %%
+import os
+model_name = os.getenv("MODEL_NAME")
+adapter_name_or_path = os.getenv("ADAPTER_NAME_OR_PATH")
+load_in_4bit = os.getenv("LOAD_IN_4BIT") == "true"
+data_path = os.getenv("LOGICAL_REASONING_DATA_PATH")
+results_path = os.getenv("LOGICAL_REASONING_RESULTS_PATH")
+use_english_datasets = os.getenv("USE_ENGLISH_DATASETS") == "true"
+print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path, use_english_datasets)
+# %%
+import pandas as pd
+df = pd.read_csv("datasets/mgtv/train.csv")
+df.head()
+# %%
+dataset_data = [
+    {
+        "instruction": "你是一个逻辑游戏的主持人。游戏规则如下：1. 参与者会得到一个谜题。2. 参与者可以通过提问来获取线索，尝试解开谜题。3. 对于每个问题，主持人将根据实际情况回答以下五个选项之一：是、不是、不重要、回答正确、问法错误。4. 回答中不能添加任何其它信息，也不能省略选项中的任何一个字。例如，不可以把“不是”省略成“不”。5. 参与者需要根据回答来推理，并最终找出谜题的正确答案。请严格按照这些规则回答参与者提出的问题。谜题: {}实际情况: {}参与者提出的问题: {}",
+        "input": "谜题: " + row_dict["title"] + " " + row_dict["puzzle"] + "实际情况: " + row_dict["truth"] + "参与者提出的问题: " + row_dict["text"],
+        "output": row_dict["label"]
+    }
+    for row_dict in df.to_dict(orient="records")
+]
+dataset_data[0]
+# %%
+import os
+import json
+# Define the directory where you want to save the JSON file
+output_dir = "LLaMA-Factory/data/"
+# Ensure the directory exists
+os.makedirs(output_dir, exist_ok=True)
+# Define the full path for the JSON file
+json_file_path = os.path.join(output_dir, "mgtv_train.json")
+# Save the dataset data to the specified path
+with open(json_file_path, "w") as f:
+   json.dump(dataset_data, f)
+print(f"JSON file saved to {json_file_path}")
+# %%
+!nvidia-smi
+# %%
+%%time
+import subprocess
+import os
+# git_bash_path = r"C:/Program Files/Git/bin/bash.exe"
+script_path = "scripts/tune-lf.sh"
+config_path = "config/qwen2_7b_lora_sft_4bit.yaml"
+# Print paths for debugging
+print(f"Git Bash Path: {git_bash_path}")
+print(f"Script Path: {script_path}")
+print(f"Config Path: {config_path}")
+# Check if the files exist
+if not os.path.exists(git_bash_path):
+    print(f"Git Bash not found at {git_bash_path}")
+if not os.path.exists(script_path):
+    print(f"Script not found at {script_path}")
+if not os.path.exists(config_path):
+    print(f"Config file not found at {config_path}")
+# Using Git Bash or WSL
+subprocess.run(['bash', script_path, config_path], check=True)
+# %%
+# import subprocess
+# import os
+# git_bash_path = r"C:/Program Files/Git/bin/bash.exe"
+# script_path = "scripts/tune-lf.sh"
+# config_path = "config/qwen2_7b_lora_sft_4bit.yaml"
+# # Print paths for debugging
+# print(f"Git Bash Path: {git_bash_path}")
+# print(f"Script Path: {script_path}")
+# print(f"Config Path: {config_path}")
+# # Check if the files exist
+# if not os.path.exists(git_bash_path):
+#     print(f"Git Bash not found at {git_bash_path}")
+# if not os.path.exists(script_path):
+#     print(f"Script not found at {script_path}")
+# if not os.path.exists(config_path):
+#     print(f"Config file not found at {config_path}")
+# # Ensure the script has execution permissions
+# os.chmod(script_path, 0o755)
+# # Using Git Bash or WSL
+# try:
+#     result = subprocess.run(
+#         [git_bash_path, '-c', f'bash {script_path} {config_path}'],
+#         check=True, capture_output=True, text=True
+#     )
+#     print("Output:", result.stdout)
+#     print("Error:", result.stderr)
+# except subprocess.CalledProcessError as e:
+#     print("Command failed with error code:", e.returncode)
+#     print("Output:", e.stdout)
+#     print("Error:", e.stderr)

llama-factory/config/qwen2_0.5b_lora_sft_4bit.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+### model
+model_name_or_path: Qwen/Qwen2-0.5B
+### method
+stage: sft
+do_train: true
+finetuning_type: lora
+lora_target: all
+quantization_bit: 4                     # use 4-bit QLoRA
+loraplus_lr_ratio: 16.0                 # use LoRA+ with lambda=16.0
+upcast_layernorm: true
+### dataset
+dataset: mgtv_train
+template: qwen
+cutoff_len: 4096
+max_samples: 25000
+overwrite_cache: true
+preprocessing_num_workers: 16
+### output
+output_dir: saves/qwen2_0.5b/lora/sft_4bit
+logging_steps: 562
+save_steps: 562
+plot_loss: true
+# overwrite_output_dir: true
+# resume_from_checkpoint: true
+### train
+per_device_train_batch_size: 8
+gradient_accumulation_steps: 8
+learning_rate: 1.0e-4
+num_train_epochs: 3.0
+lr_scheduler_type: cosine
+warmup_ratio: 0.1
+bf16: true
+ddp_timeout: 180000000
+### eval
+val_size: 0.1
+per_device_eval_batch_size: 1
+eval_strategy: steps
+eval_steps: 562
+report_to: none
+run_name: qwen2_0.5b # optional

llama-factory/config/qwen2_7b_lora_sft_4bit.yaml CHANGED Viewed

@@ -23,11 +23,11 @@ output_dir: saves/qwen2_7b/lora/sft_4bit
 logging_steps: 562
 save_steps: 562
 plot_loss: true
-overwrite_output_dir: true
 # resume_from_checkpoint: true
 ### train
-per_device_train_batch_size: 32
 gradient_accumulation_steps: 8
 learning_rate: 1.0e-4
 num_train_epochs: 3.0
@@ -42,5 +42,5 @@ per_device_eval_batch_size: 1
 eval_strategy: steps
 eval_steps: 562
-report_to: wandb
 run_name: qwen2_7b # optional

 logging_steps: 562
 save_steps: 562
 plot_loss: true
+# overwrite_output_dir: true
 # resume_from_checkpoint: true
 ### train
+per_device_train_batch_size: 8
 gradient_accumulation_steps: 8
 learning_rate: 1.0e-4
 num_train_epochs: 3.0
 eval_strategy: steps
 eval_steps: 562
+report_to: none
 run_name: qwen2_7b # optional

scripts/tune-mgtv-qwen2_7b.sh CHANGED Viewed

@@ -13,8 +13,8 @@ cat /etc/os-release
 lscpu
 grep MemTotal /proc/meminfo
-#pip install -r requirements.txt
-#cd ../LLaMA-Factory && pip install -e .[torch,bitsandbytes]
 export LOGICAL_REASONING_DATA_PATH=datasets/mgtv

 lscpu
 grep MemTotal /proc/meminfo
+pip install -r requirements.txt
+cd ../LLaMA-Factory && pip install -e .[torch,bitsandbytes]
 export LOGICAL_REASONING_DATA_PATH=datasets/mgtv