Spaces:
Build error
Build error
ready for tuning glm-4
Browse files- competition/10c_InternLM_M3_eval.ipynb +1 -1
- competition/10d_InternLM_M3_analysis.ipynb +0 -1
- competition/10f_InternLM_best_analysis.ipynb +0 -0
- competition/14_GLM-4_M3_eval.ipynb +1 -1
- llama-factory/config/glm-4-9b_lora_sft_bf16-p1.yaml +46 -0
- llama-factory/config/glm-4-9b_lora_sft_bf16-p2.yaml +46 -0
- results/mgtv-results_internlm_best.csv +0 -1
- results/mgtv-results_internlm_best.csv +0 -0
- results/mgtv-results_m3.csv +0 -0
- scripts/eval-mgtv-glm-4-9b.sh +36 -0
- scripts/tune-mgtv-glm-4-9b.sh +36 -0
- scripts/tune-mgtv.sh +1 -1
competition/10c_InternLM_M3_eval.ipynb
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"executionInfo":{"elapsed":476,"status":"ok","timestamp":1720679526275,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"uWKRSV6eZsCn"},"outputs":[],"source":["%load_ext autoreload\n","%autoreload 2"]},{"cell_type":"code","execution_count":2,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"eb33b19f-1206-41ee-84e2-e6258a12eef7","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2534,"status":"ok","timestamp":1720679529344,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"xwFh14uiZBrI","outputId":"d767799c-34c2-46a5-f052-378146a55321"},"outputs":[],"source":["from pathlib import Path\n","\n","try:\n"," from google.colab import drive\n","\n"," drive.mount(\"/content/drive\")\n"," workding_dir = \"/content/drive/MyDrive/logical-reasoning/\"\n","except ModuleNotFoundError:\n"," workding_dir = str(Path.cwd().parent)"]},{"cell_type":"code","execution_count":3,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"6d394937-6c99-4a7c-9d32-7600a280032f","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":5,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"G5pNu3zgZBrL","outputId":"160a554f-fb08-4aa0-bc00-0422fb7c1fac"},"outputs":[{"name":"stdout","output_type":"stream","text":["workding dir: /Users/inflaton/code/engd/projects/logical-reasoning\n"]}],"source":["import os\n","import sys\n","from pathlib import Path\n","\n","os.chdir(workding_dir)\n","sys.path.append(workding_dir)\n","print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":4,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"ac667aba-076e-4de6-9984-8f6a67cb09cd","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":4,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"0dVRAabNZBrL","outputId":"b977e116-df16-47cd-9160-a24f611da687"},"outputs":[{"data":{"text/plain":["False"]},"execution_count":4,"metadata":{},"output_type":"execute_result"}],"source":["need_to_setup_env = False\n","need_to_setup_env"]},{"cell_type":"code","execution_count":5,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"72f9cf79-7b0d-4d9e-90a0-1fa5251b947f","showTitle":false,"title":""},"executionInfo":{"elapsed":4,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"hKUOfP2HZBrL"},"outputs":[],"source":["if need_to_setup_env:\n"," %pip install -r requirements.txt\n"," %cd /content/\n"," %rm -rf LLaMA-Factory\n"," !git clone https://github.com/hiyouga/LLaMA-Factory.git\n"," %cd LLaMA-Factory\n"," %ls\n"," %pip install -e .[torch,bitsandbytes]\n"," \n"," os.chdir(workding_dir)\n"," sys.path.append(workding_dir)\n"," print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":6,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"9f67ec60-2f24-411c-84eb-0dd664b44775","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"hPCC-6m7ZBrM","outputId":"c7aa2c96-5e99-440a-c148-201d79465ff9"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading env vars from: /Users/inflaton/code/engd/projects/logical-reasoning/.env\n"]},{"data":{"text/plain":["True"]},"execution_count":6,"metadata":{},"output_type":"execute_result"}],"source":["from dotenv import find_dotenv, load_dotenv\n","\n","found_dotenv = find_dotenv(\".env\")\n","\n","if len(found_dotenv) == 0:\n"," found_dotenv = find_dotenv(\".env.example\")\n","print(f\"loading env vars from: {found_dotenv}\")\n","load_dotenv(found_dotenv, override=True)"]},{"cell_type":"code","execution_count":7,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"f1597656-8042-4878-9d3b-9ebfb8dd86dc","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"1M3IraVtZBrM","outputId":"29ab35f6-2970-4ade-d85d-3174acf8cda0"},"outputs":[{"name":"stdout","output_type":"stream","text":["internlm/internlm2_5-7b-chat-1m llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full/checkpoint-88 False datasets/mgtv results/mgtv-results_m3.csv\n"]}],"source":["import os\n","\n","model_name = os.getenv(\"MODEL_NAME\")\n","adapter_name_or_path = os.getenv(\"ADAPTER_NAME_OR_PATH\")\n","load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n","data_path = os.getenv(\"LOGICAL_REASONING_DATA_PATH\")\n","results_path = os.getenv(\"LOGICAL_REASONING_RESULTS_PATH\")\n","use_english_datasets = os.getenv(\"USE_ENGLISH_DATASETS\") == \"true\"\n","\n","print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)"]},{"cell_type":"code","execution_count":8,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"b2a43943-9324-4839-9a47-cfa72de2244b","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":564,"status":"ok","timestamp":1720679529907,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"UgMvt6dIZBrM","outputId":"ce37581c-fd26-46c2-ad87-d933d99f68f7"},"outputs":[{"name":"stdout","output_type":"stream","text":["Python 3.11.9\n","\u001b[33mWARNING: Package(s) not found: flash-attn\u001b[0m\u001b[33m\n","\u001b[0mCPU times: user 38.9 ms, sys: 26.7 ms, total: 65.7 ms\n","Wall time: 2.97 s\n"]}],"source":["%%time\n","!python --version\n","!pip show flash-attn"]},{"cell_type":"code","execution_count":9,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1685,"status":"ok","timestamp":1720679531591,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"ZuS_FsLyZBrN","outputId":"2cba0105-c505-4395-afbd-2f2fee6581d0"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading /Users/inflaton/code/engd/projects/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n","MPS is available\n"]}],"source":["from llm_toolkit.llm_utils import *\n","from llm_toolkit.logical_reasoning_utils import *\n","\n","device = check_gpu()"]},{"cell_type":"code","execution_count":10,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["loading model: internlm/internlm2_5-7b-chat-1m with adapter: llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full/checkpoint-88\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"443302f393fe45e3a5150cb5e1f35a11","version_major":2,"version_minor":0},"text/plain":["Loading checkpoint shards: 0%| | 0/8 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stdout","output_type":"stream","text":["CPU times: user 3.13 s, sys: 6.52 s, total: 9.65 s\n","Wall time: 30.7 s\n"]}],"source":["%%time\n","\n","model, tokenizer = load_model(model_name, adapter_name_or_path=adapter_name_or_path, using_llama_factory=False)"]},{"cell_type":"code","execution_count":11,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["loading train/test data files\n","DatasetDict({\n"," train: Dataset({\n"," features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n"," num_rows: 25000\n"," })\n"," test: Dataset({\n"," features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n"," num_rows: 3000\n"," })\n","})\n"]}],"source":["datasets = load_logical_reasoning_dataset(\n"," data_path,\n"," tokenizer=tokenizer,\n"," chinese_prompt=not use_english_datasets,\n"," using_p1=False,\n",")"]},{"cell_type":"code","execution_count":12,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["--------------------------------------------------\n","text: 甄加索是自杀吗\n","--------------------------------------------------\n","label: 不是\n","--------------------------------------------------\n","answer: nan\n","--------------------------------------------------\n","title: 海岸之谜\n","--------------------------------------------------\n","puzzle: 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么?\n","--------------------------------------------------\n","truth: 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上,他骑着自行车外出,打算在海边观赏夜景。然而,他在沙滩上意外发现了一只搁浅的海豚,为了救助这只海豚,他耗费了极大的体力,最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了,由于他患有严重的心脏病,却未告知旁人,在寒冷的海风中,他的心脏停止了跳动。因此,警方在现场只发现了车轮痕迹和未完成的画作,而没有发现任何他杀的迹象。\n","--------------------------------------------------\n","train_text: <s><|im_start|>system\n","You are an expert in logical reasoning.<|im_end|>\n","<|im_start|>user\n","你是一个情景猜谜游戏的主持人。游戏规则如下:\n","\n","1. 参与者会得到一个谜面,谜面会描述一个简单又难以理解的事件。\n","2. 主持人知道谜底,谜底是谜面的答案。\n","3. 参与者可以询问任何封闭式问题来找寻事件的真相。\n","4. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。各回答的判断标准如下:\n"," - 若谜面和谜底能找到问题的答案,回答:是或者不是\n"," - 若谜面和谜底不能直接或者间接推断出问题的答案,回答:不重要\n"," - 若参与者提问不是一个封闭式问题或者问题难以理解,回答:问法错误\n"," - 若参与者提问基本还原了谜底真相,回答:回答正确\n","5. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n","\n","请严格按照这些规则回答参与者提出的问题。\n","\n","**谜面:** 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么?\n","\n","**谜底:** 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上,他骑着自行车外出,打算在海边观赏夜景。然而,他在沙滩上意外发现了一只搁浅的海豚,为了救助这只海豚,他耗费了极大的体力,最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了,由于他患有严重的心脏病,却未告知旁人,在寒冷的海风中,他的心脏停止了跳动。因此,警方在现场只发现了车轮痕迹和未完成的画作,而没有发现任何他杀的迹象。\n","\n","**参与者提出的问题:** 甄加索是自杀吗\n","<|im_end|>\n","<|im_start|>assistant\n","不是</s>\n","--------------------------------------------------\n","prompt: <s><|im_start|>system\n","You are an expert in logical reasoning.<|im_end|>\n","<|im_start|>user\n","你是一个情景猜谜游戏的主持人。游戏规则如下:\n","\n","1. 参与者会得到一个谜面,谜面会描述一个简单又难以理解的事件。\n","2. 主持人知道谜底,谜底是谜面的答案。\n","3. 参与者可以询问任何封闭式问题来找寻事件的真相。\n","4. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。各回答的判断标准如下:\n"," - 若谜面和谜底能找到问题的答案,回答:是或者不是\n"," - 若谜面和谜底不能直接或者间接推断出问题的答案,回答:不重要\n"," - 若参与者提问不是一个封闭式问题或者问题难以理解,回答:问法错误\n"," - 若参与者提问基本还原了谜底真相,回答:回答正确\n","5. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n","\n","请严格按照这些规则回答参与者提出的问题。\n","\n","**谜面:** 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么?\n","\n","**谜底:** 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上,他骑着自行车外出,打算在海边观赏夜景。然而,他在沙滩上意外发现了一只搁浅的海豚,为了救助这只海豚,他耗费了极大的体力,最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了,由于他患有严重的心脏病,却未告知旁人,在寒冷的海风中,他的心脏停止了跳动。因此,警方在现场只发现了车轮痕迹和未完成的画作,而没有发现任何他杀的迹象。\n","\n","**参与者提出的问题:** 甄加索是自杀吗\n","<|im_end|>\n","<|im_start|>assistant\n","\n"]}],"source":["print_row_details(datasets[\"test\"].to_pandas())"]},{"cell_type":"code","execution_count":13,"metadata":{},"outputs":[],"source":["def evaluate_model(model, tokenizer, model_name, dataset, batch_size=1):\n"," print(f\"Evaluating model: {model_name} on {device}\")\n"," predictions = eval_model(\n"," model, tokenizer, dataset, device=device, batch_size=batch_size\n"," )\n","\n"," save_results(\n"," model_name,\n"," results_path,\n"," dataset,\n"," predictions,\n"," debug=False,\n"," )\n","\n"," metrics = calc_metrics(dataset[\"label\"], predictions, debug=False)\n"," print(metrics)"]},{"cell_type":"code","execution_count":13,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Evaluating model: internlm/internlm2_5-7b-chat-1m_llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full/checkpoint-88 on mps\n"]},{"name":"stderr","output_type":"stream","text":[" 0%| | 1/3000 [00:03<2:33:23, 3.07s/it]"]},{"name":"stdout","output_type":"stream","text":["--------\n","step 1: 不是</s>\n","--------\n","step 2: 不是\n","--------\n","step 3: 不是\n","--------\n","step 4: 不是\n","--------\n","step 5: 不是\n"]},{"name":"stderr","output_type":"stream","text":["100%|██████████| 3000/3000 [10:20:50<00:00, 12.42s/it] "]},{"name":"stdout","output_type":"stream","text":["{'accuracy': 0.7836666666666666}\n","CPU times: user 12min 26s, sys: 11min 38s, total: 24min 4s\n","Wall time: 10h 20min 50s\n"]},{"name":"stderr","output_type":"stream","text":["\n"]}],"source":["%%time\n","\n","evaluate_model(model, tokenizer, f\"{model_name}_{adapter_name_or_path}\", datasets[\"test\"])"]},{"cell_type":"code","execution_count":15,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Evaluating model: internlm/internlm2_5-7b-chat-1m_llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full/checkpoint-88_batch_16 on mps\n"]},{"name":"stderr","output_type":"stream","text":[" 1%| | 1/188 [01:28<4:34:52, 88.20s/it]"]},{"name":"stdout","output_type":"stream","text":["--------\n","step 1: 不是\n","--------\n","step 2: 不是\n","--------\n","step 3: 不是\n","--------\n","step 4: 不是\n","--------\n","step 5: 不是\n"]},{"name":"stderr","output_type":"stream","text":[" 2%|▏ | 4/188 [04:32<3:21:43, 65.78s/it]"]}],"source":["%%time\n","\n","evaluate_model(model, tokenizer, f\"{model_name}_{adapter_name_or_path}_batch_16\", datasets[\"test\"], batch_size=16)"]}],"metadata":{"accelerator":"GPU","application/vnd.databricks.v1+notebook":{"dashboards":[],"environmentMetadata":null,"language":"python","notebookMetadata":{"mostRecentlyExecutedCommandWithImplicitDF":{"commandId":-1,"dataframes":["_sqldf"]},"pythonIndentUnit":4},"notebookName":"10_eval-lf-medium-py3.11","widgets":{}},"colab":{"gpuType":"L4","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.9"}},"nbformat":4,"nbformat_minor":0}
|
|
|
1 |
+
{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"executionInfo":{"elapsed":476,"status":"ok","timestamp":1720679526275,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"uWKRSV6eZsCn"},"outputs":[],"source":["%load_ext autoreload\n","%autoreload 2"]},{"cell_type":"code","execution_count":2,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"eb33b19f-1206-41ee-84e2-e6258a12eef7","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2534,"status":"ok","timestamp":1720679529344,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"xwFh14uiZBrI","outputId":"d767799c-34c2-46a5-f052-378146a55321"},"outputs":[],"source":["from pathlib import Path\n","\n","try:\n"," from google.colab import drive\n","\n"," drive.mount(\"/content/drive\")\n"," workding_dir = \"/content/drive/MyDrive/logical-reasoning/\"\n","except ModuleNotFoundError:\n"," workding_dir = str(Path.cwd().parent)"]},{"cell_type":"code","execution_count":3,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"6d394937-6c99-4a7c-9d32-7600a280032f","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":5,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"G5pNu3zgZBrL","outputId":"160a554f-fb08-4aa0-bc00-0422fb7c1fac"},"outputs":[{"name":"stdout","output_type":"stream","text":["workding dir: /Users/inflaton/code/engd/projects/logical-reasoning\n"]}],"source":["import os\n","import sys\n","from pathlib import Path\n","\n","os.chdir(workding_dir)\n","sys.path.append(workding_dir)\n","print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":4,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"ac667aba-076e-4de6-9984-8f6a67cb09cd","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":4,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"0dVRAabNZBrL","outputId":"b977e116-df16-47cd-9160-a24f611da687"},"outputs":[{"data":{"text/plain":["False"]},"execution_count":4,"metadata":{},"output_type":"execute_result"}],"source":["need_to_setup_env = False\n","need_to_setup_env"]},{"cell_type":"code","execution_count":5,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"72f9cf79-7b0d-4d9e-90a0-1fa5251b947f","showTitle":false,"title":""},"executionInfo":{"elapsed":4,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"hKUOfP2HZBrL"},"outputs":[],"source":["if need_to_setup_env:\n"," %pip install -r requirements.txt\n"," %cd /content/\n"," %rm -rf LLaMA-Factory\n"," !git clone https://github.com/hiyouga/LLaMA-Factory.git\n"," %cd LLaMA-Factory\n"," %ls\n"," %pip install -e .[torch,bitsandbytes]\n"," \n"," os.chdir(workding_dir)\n"," sys.path.append(workding_dir)\n"," print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":6,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"9f67ec60-2f24-411c-84eb-0dd664b44775","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"hPCC-6m7ZBrM","outputId":"c7aa2c96-5e99-440a-c148-201d79465ff9"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading env vars from: /Users/inflaton/code/engd/projects/logical-reasoning/.env\n"]},{"data":{"text/plain":["True"]},"execution_count":6,"metadata":{},"output_type":"execute_result"}],"source":["from dotenv import find_dotenv, load_dotenv\n","\n","found_dotenv = find_dotenv(\".env\")\n","\n","if len(found_dotenv) == 0:\n"," found_dotenv = find_dotenv(\".env.example\")\n","print(f\"loading env vars from: {found_dotenv}\")\n","load_dotenv(found_dotenv, override=True)"]},{"cell_type":"code","execution_count":7,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"f1597656-8042-4878-9d3b-9ebfb8dd86dc","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"1M3IraVtZBrM","outputId":"29ab35f6-2970-4ade-d85d-3174acf8cda0"},"outputs":[{"name":"stdout","output_type":"stream","text":["internlm/internlm2_5-7b-chat-1m llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full/checkpoint-88 False datasets/mgtv results/mgtv-results_m3.csv\n"]}],"source":["import os\n","\n","model_name = os.getenv(\"MODEL_NAME\")\n","adapter_name_or_path = os.getenv(\"ADAPTER_NAME_OR_PATH\")\n","load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n","data_path = os.getenv(\"LOGICAL_REASONING_DATA_PATH\")\n","results_path = os.getenv(\"LOGICAL_REASONING_RESULTS_PATH\")\n","use_english_datasets = os.getenv(\"USE_ENGLISH_DATASETS\") == \"true\"\n","\n","print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)"]},{"cell_type":"code","execution_count":8,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"b2a43943-9324-4839-9a47-cfa72de2244b","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":564,"status":"ok","timestamp":1720679529907,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"UgMvt6dIZBrM","outputId":"ce37581c-fd26-46c2-ad87-d933d99f68f7"},"outputs":[{"name":"stdout","output_type":"stream","text":["Python 3.11.9\n","\u001b[33mWARNING: Package(s) not found: flash-attn\u001b[0m\u001b[33m\n","\u001b[0mCPU times: user 38.9 ms, sys: 26.7 ms, total: 65.7 ms\n","Wall time: 2.97 s\n"]}],"source":["%%time\n","!python --version\n","!pip show flash-attn"]},{"cell_type":"code","execution_count":9,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1685,"status":"ok","timestamp":1720679531591,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"ZuS_FsLyZBrN","outputId":"2cba0105-c505-4395-afbd-2f2fee6581d0"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading /Users/inflaton/code/engd/projects/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n","MPS is available\n"]}],"source":["from llm_toolkit.llm_utils import *\n","from llm_toolkit.logical_reasoning_utils import *\n","\n","device = check_gpu()"]},{"cell_type":"code","execution_count":10,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["loading model: internlm/internlm2_5-7b-chat-1m with adapter: llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full/checkpoint-88\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"443302f393fe45e3a5150cb5e1f35a11","version_major":2,"version_minor":0},"text/plain":["Loading checkpoint shards: 0%| | 0/8 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stdout","output_type":"stream","text":["CPU times: user 3.13 s, sys: 6.52 s, total: 9.65 s\n","Wall time: 30.7 s\n"]}],"source":["%%time\n","\n","model, tokenizer = load_model(model_name, adapter_name_or_path=adapter_name_or_path, using_llama_factory=False)"]},{"cell_type":"code","execution_count":11,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["loading train/test data files\n","DatasetDict({\n"," train: Dataset({\n"," features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n"," num_rows: 25000\n"," })\n"," test: Dataset({\n"," features: ['text', 'label', 'answer', 'title', 'puzzle', 'truth', 'train_text', 'prompt'],\n"," num_rows: 3000\n"," })\n","})\n"]}],"source":["datasets = load_logical_reasoning_dataset(\n"," data_path,\n"," tokenizer=tokenizer,\n"," chinese_prompt=not use_english_datasets,\n"," using_p1=False,\n",")"]},{"cell_type":"code","execution_count":12,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["--------------------------------------------------\n","text: 甄加索是自杀吗\n","--------------------------------------------------\n","label: 不是\n","--------------------------------------------------\n","answer: nan\n","--------------------------------------------------\n","title: 海岸之谜\n","--------------------------------------------------\n","puzzle: 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么?\n","--------------------------------------------------\n","truth: 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上,他骑着自行车外出,打算在海边观赏夜景。然而,他在沙滩上意外发现了一只搁浅的海豚,为了救助这只海豚,他耗费了极大的体力,最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了,由于他患有严重的心脏病,却未告知旁人,在寒冷的海风中,他的心脏停止了跳动。因此,警方在现场只发现了车轮痕迹和未完成的画作,而没有发现任何他杀的迹象。\n","--------------------------------------------------\n","train_text: <s><|im_start|>system\n","You are an expert in logical reasoning.<|im_end|>\n","<|im_start|>user\n","你是一个情景猜谜游戏的主持人。游戏规则如下:\n","\n","1. 参与者会得到一个谜面,谜面会描述一个简单又难以理解的事件。\n","2. 主持人知道谜底,谜底是谜面的答案。\n","3. 参与者可以询问任何封闭式问题来找寻事件的真相。\n","4. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。各回答的判断标准如下:\n"," - 若谜面和谜底能找到问题的答案,回答:是或者不是\n"," - 若谜面和谜底不能直接或者间接推断出问题的答案,回答:不重要\n"," - 若参与者提问不是一个封闭式问题或者问题难以理解,回答:问法错误\n"," - 若参与者提问基本还原了谜底真相,回答:回答正确\n","5. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n","\n","请严格按照这些规则回答参与者提出的问题。\n","\n","**谜面:** 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么?\n","\n","**谜底:** 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上,他骑着自行车外出,打算在海边观赏夜景。然而,他在沙滩上意外发现了一只搁浅的海豚,为了救助这只海豚,他耗费了极大的体力,最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了,由于他患有严重的心脏病,却未告知旁人,在寒冷的海风中,他的心脏停止了跳动。因此,警方在现场只发现了车轮痕迹和未完成的画作,而没有发现任何他杀的迹象。\n","\n","**参与者提出的问题:** 甄加索是自杀吗\n","<|im_end|>\n","<|im_start|>assistant\n","不是</s>\n","--------------------------------------------------\n","prompt: <s><|im_start|>system\n","You are an expert in logical reasoning.<|im_end|>\n","<|im_start|>user\n","你是一个情景猜谜游戏的主持人。游戏规则如下:\n","\n","1. 参与者会得到一个谜面,谜面会描述一个简单又难以理解的事件。\n","2. 主持人知道谜底,���底是谜面的答案。\n","3. 参与者可以询问任何封闭式问题来找寻事件的真相。\n","4. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。各回答的判断标准如下:\n"," - 若谜面和谜底能找到问题的答案,回答:是或者不是\n"," - 若谜面和谜底不能直接或者间接推断出问题的答案,回答:不重要\n"," - 若参与者提问不是一个封闭式问题或者问题难以理解,回答:问法错误\n"," - 若参与者提问基本还原了谜底真相,回答:回答正确\n","5. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。\n","\n","请严格按照这些规则回答参与者提出的问题。\n","\n","**谜面:** 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任何打斗的迹象。请问甄加索的死因是什么?\n","\n","**谜底:** 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在创作一幅描绘海洋生物的画作。在画即将完成的前一天晚上,他骑着自行车外出,打算在海边观赏夜景。然而,他在沙滩上意外发现了一只搁浅的海豚,为了救助这只海豚,他耗费了极大的体力,最终成功将其送回海中。筋疲力尽的甄加索在沙滩上睡着了,由于他患有严重的心脏病,却未告知旁人,在寒冷的海风中,他的心脏停止了跳动。因此,警方在现场只发现了车轮痕迹和未完成的画作,而没有发现任何他杀的迹象。\n","\n","**参与者提出的问题:** 甄加索是自杀吗\n","<|im_end|>\n","<|im_start|>assistant\n","\n"]}],"source":["print_row_details(datasets[\"test\"].to_pandas())"]},{"cell_type":"code","execution_count":13,"metadata":{},"outputs":[],"source":["def evaluate_model(model, tokenizer, model_name, dataset, batch_size=1):\n"," print(f\"Evaluating model: {model_name} on {device}\")\n"," predictions = eval_model(\n"," model, tokenizer, dataset, device=device, batch_size=batch_size\n"," )\n","\n"," save_results(\n"," model_name,\n"," results_path,\n"," dataset,\n"," predictions,\n"," debug=False,\n"," )\n","\n"," metrics = calc_metrics(dataset[\"label\"], predictions, debug=False)\n"," print(metrics)"]},{"cell_type":"code","execution_count":13,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Evaluating model: internlm/internlm2_5-7b-chat-1m_llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full/checkpoint-88 on mps\n"]},{"name":"stderr","output_type":"stream","text":[" 0%| | 1/3000 [00:03<2:33:23, 3.07s/it]"]},{"name":"stdout","output_type":"stream","text":["--------\n","step 1: 不是</s>\n","--------\n","step 2: 不是\n","--------\n","step 3: 不是\n","--------\n","step 4: 不是\n","--------\n","step 5: 不是\n"]},{"name":"stderr","output_type":"stream","text":["100%|██████████| 3000/3000 [10:20:50<00:00, 12.42s/it] "]},{"name":"stdout","output_type":"stream","text":["{'accuracy': 0.7836666666666666}\n","CPU times: user 12min 26s, sys: 11min 38s, total: 24min 4s\n","Wall time: 10h 20min 50s\n"]},{"name":"stderr","output_type":"stream","text":["\n"]}],"source":["%%time\n","\n","evaluate_model(model, tokenizer, f\"{model_name}_{adapter_name_or_path}\", datasets[\"test\"])"]}],"metadata":{"accelerator":"GPU","application/vnd.databricks.v1+notebook":{"dashboards":[],"environmentMetadata":null,"language":"python","notebookMetadata":{"mostRecentlyExecutedCommandWithImplicitDF":{"commandId":-1,"dataframes":["_sqldf"]},"pythonIndentUnit":4},"notebookName":"10_eval-lf-medium-py3.11","widgets":{}},"colab":{"gpuType":"L4","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.9"}},"nbformat":4,"nbformat_minor":0}
|
competition/10d_InternLM_M3_analysis.ipynb
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{},"inputWidgets":{},"nuid":"0ea8b46b-839b-445b-8043-ccdf4e920ace","showTitle":false,"title":""},"id":"YLH80COBzi_F"},"outputs":[],"source":["%load_ext autoreload\n","%autoreload 2"]},{"cell_type":"code","execution_count":2,"metadata":{"id":"63B5exAuzq4M"},"outputs":[],"source":["from pathlib import Path\n","\n","try:\n"," from google.colab import drive\n"," drive.mount('/content/drive')\n"," workding_dir = \"/content/drive/MyDrive/logical-reasoning/\"\n","except ModuleNotFoundError:\n"," workding_dir = str(Path.cwd().parent)"]},{"cell_type":"code","execution_count":3,"metadata":{"executionInfo":{"elapsed":368,"status":"ok","timestamp":1719461634865,"user":{"displayName":"Donghao Huang","userId":"00463591218503521679"},"user_tz":-480},"id":"zFulf0bg0H-9","outputId":"debdd535-c828-40b9-efc0-8a180e5830dd"},"outputs":[{"name":"stdout","output_type":"stream","text":["workding dir: /Users/inflaton/code/engd/projects/logical-reasoning\n"]}],"source":["import os\n","import sys\n","\n","os.chdir(workding_dir)\n","sys.path.append(workding_dir)\n","print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":4,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{},"inputWidgets":{},"nuid":"9f67ec60-2f24-411c-84eb-0dd664b44775","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":589,"status":"ok","timestamp":1719462011879,"user":{"displayName":"Donghao Huang","userId":"00463591218503521679"},"user_tz":-480},"id":"DIUiweYYzi_I","outputId":"e16e9247-9077-4b0c-f8ea-17059f05a1c4"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading env vars from: /Users/inflaton/code/engd/projects/logical-reasoning/.env\n"]},{"data":{"text/plain":["True"]},"execution_count":4,"metadata":{},"output_type":"execute_result"}],"source":["from dotenv import find_dotenv, load_dotenv\n","\n","found_dotenv = find_dotenv(\".env\")\n","\n","if len(found_dotenv) == 0:\n"," found_dotenv = find_dotenv(\".env.example\")\n","print(f\"loading env vars from: {found_dotenv}\")\n","load_dotenv(found_dotenv, override=True)"]},{"cell_type":"code","execution_count":5,"metadata":{"id":"W2QyVreqhOGM","outputId":"68b9590e-1ac6-4c6f-e0c4-e273ec816419"},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>text</th>\n"," <th>label</th>\n"," <th>title</th>\n"," <th>puzzle</th>\n"," <th>truth</th>\n"," <th>internlm/internlm2_5-7b-chat-1m_llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full/checkpoint-88</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>甄加索是自杀吗</td>\n"," <td>不是</td>\n"," <td>海岸之谜</td>\n"," <td>在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任...</td>\n"," <td>甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在...</td>\n"," <td>不是</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>甄加索有身体上的疾病吗</td>\n"," <td>是</td>\n"," <td>海岸之谜</td>\n"," <td>在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任...</td>\n"," <td>甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在...</td>\n"," <td>是</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>画作是甄的</td>\n"," <td>是</td>\n"," <td>海岸之谜</td>\n"," <td>在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任...</td>\n"," <td>甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在...</td>\n"," <td>是</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>甄有心脏病吗</td>\n"," <td>是</td>\n"," <td>海岸之谜</td>\n"," <td>在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任...</td>\n"," <td>甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在...</td>\n"," <td>是</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>车轮是凶手留下的</td>\n"," <td>不是</td>\n"," <td>海岸之谜</td>\n"," <td>在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任...</td>\n"," <td>甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在...</td>\n"," <td>不是</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>2995</th>\n"," <td>哭泣者必须在晚上祭奠吗</td>\n"," <td>是</td>\n"," <td>甄庄哭声</td>\n"," <td>在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着...</td>\n"," <td>原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖...</td>\n"," <td>不重要</td>\n"," </tr>\n"," <tr>\n"," <th>2996</th>\n"," <td>尸体在湖里吗</td>\n"," <td>不是</td>\n"," <td>甄庄哭声</td>\n"," <td>在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着...</td>\n"," <td>原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖...</td>\n"," <td>不是</td>\n"," </tr>\n"," <tr>\n"," <th>2997</th>\n"," <td>哭泣者和死者有特殊关系吗</td>\n"," <td>是</td>\n"," <td>甄庄哭声</td>\n"," <td>在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着...</td>\n"," <td>原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖...</td>\n"," <td>是</td>\n"," </tr>\n"," <tr>\n"," <th>2998</th>\n"," <td>是帽子的主人去世了吗</td>\n"," <td>不是</td>\n"," <td>甄庄哭声</td>\n"," <td>在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着...</td>\n"," <td>原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖...</td>\n"," <td>是</td>\n"," </tr>\n"," <tr>\n"," <th>2999</th>\n"," <td>死者受伤了吗</td>\n"," <td>不是</td>\n"," <td>甄庄哭声</td>\n"," <td>在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着...</td>\n"," <td>原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖...</td>\n"," <td>不是</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>3000 rows × 6 columns</p>\n","</div>"],"text/plain":[" text label title \\\n","0 甄加索是自杀吗 不是 海岸之谜 \n","1 甄加索有身体上的疾病吗 是 海岸之谜 \n","2 画作是甄的 是 海岸之谜 \n","3 甄有心脏病吗 是 海岸之谜 \n","4 车轮是凶手留下的 不是 海岸之谜 \n","... ... ... ... \n","2995 哭泣者必须在晚上祭奠吗 是 甄庄哭声 \n","2996 尸体在湖里吗 不是 甄庄哭声 \n","2997 哭泣者和死者有特殊关系吗 是 甄庄哭声 \n","2998 是帽子的主人去世了吗 不是 甄庄哭声 \n","2999 死者受伤了吗 不是 甄庄哭声 \n","\n"," puzzle \\\n","0 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任... \n","1 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任... \n","2 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任... \n","3 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任... \n","4 在远离城市喧嚣的海边小屋,一天清晨,邻居发现甄加索僵卧在沙滩上,已无生命迹象。现场没有发现任... \n","... ... \n","2995 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着... \n","2996 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。��二天早晨,村长甄锐发现湖边的石头上放着... \n","2997 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着... \n","2998 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着... \n","2999 在一个安静的夜晚,小村庄的湖边突然传来了阵阵哭泣声。第二天早晨,村长甄锐发现湖边的石头上放着... \n","\n"," truth \\\n","0 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在... \n","1 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在... \n","2 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在... \n","3 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在... \n","4 甄加索是一位热爱自然的画家,他每年都会来到这个海边小屋寻找灵感。在他生命的最后几天,他一直在... \n","... ... \n","2995 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖... \n","2996 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖... \n","2997 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖... \n","2998 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖... \n","2999 原来,这顶破旧的帽子属于一个小男孩,他小时候与爷爷在湖边生活。爷爷教他钓鱼、游泳,还告诉他湖... \n","\n"," internlm/internlm2_5-7b-chat-1m_llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full/checkpoint-88 \n","0 不是 \n","1 是 \n","2 是 \n","3 是 \n","4 不是 \n","... ... \n","2995 不重要 \n","2996 不是 \n","2997 是 \n","2998 是 \n","2999 不是 \n","\n","[3000 rows x 6 columns]"]},"execution_count":5,"metadata":{},"output_type":"execute_result"}],"source":["import pandas as pd\n","\n","df = pd.read_csv(\"results/mgtv-results_m3.csv\")\n","df"]},{"cell_type":"code","execution_count":6,"metadata":{},"outputs":[],"source":["import matplotlib.pyplot as plt\n","from matplotlib import rcParams\n","\n","def plot_value_counts(df, column):\n"," font_family = rcParams[\"font.family\"]\n"," # Set the font to SimHei to support Chinese characters\n"," rcParams[\"font.family\"] = \"STHeiti\"\n"," rcParams[\"axes.unicode_minus\"] = False # This is to support the minus sign in Chinese.\n","\n"," plt.figure(figsize=(12, 6))\n"," df[column].value_counts().plot(kind=\"bar\")\n"," # add values on top of bars\n"," for i, v in enumerate(df[column].value_counts()):\n"," plt.text(i, v + 0.1, str(v), ha=\"center\")\n"," plt.show()\n"," \n"," rcParams[\"font.family\"] = font_family\n"]},{"cell_type":"code","execution_count":7,"metadata":{},"outputs":[{"data":{"text/plain":["['text',\n"," 'label',\n"," 'title',\n"," 'puzzle',\n"," 'truth',\n"," 'internlm/internlm2_5-7b-chat-1m_llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full/checkpoint-88']"]},"execution_count":7,"metadata":{},"output_type":"execute_result"}],"source":["df.columns.to_list()"]},{"cell_type":"code","execution_count":8,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["********** internlm/internlm2_5-7b-chat-1m_llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full/checkpoint-88 **********\n","internlm/internlm2_5-7b-chat-1m_llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full/checkpoint-88\n","不是 1505\n","是 1140\n","不重要 264\n","问法错误 53\n","回答正确 38\n","Name: count, dtype: int64\n"]},{"data":{"image/png":"","text/plain":["<Figure size 1200x600 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["for col in df.columns[5:]:\n"," print(\"*\" * 10, col, \"*\" * 10)\n"," print(df[col].value_counts())\n"," plot_value_counts(df, col)"]},{"cell_type":"code","execution_count":9,"metadata":{},"outputs":[],"source":["import pandas as pd\n","import numpy as np\n","from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score\n","\n","\n","def calc_metrics_for_col(df, col):\n"," y_true = df[\"label\"]\n"," y_pred = df[col]\n","\n"," accuracy = accuracy_score(y_true, y_pred)\n"," precision = precision_score(y_true, y_pred, average=\"weighted\", labels=np.unique(y_pred))\n"," recall = recall_score(y_true, y_pred, average=\"weighted\", labels=np.unique(y_pred))\n"," f1 = f1_score(y_true, y_pred, average=\"weighted\", labels=np.unique(y_pred))\n","\n"," return accuracy, float(precision), float(recall), float(f1)"]},{"cell_type":"code","execution_count":10,"metadata":{},"outputs":[{"name":"stderr","output_type":"stream","text":["/var/folders/7x/56svhln929zdh2xhr3mwqg4r0000gn/T/ipykernel_73014/961288552.py:9: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n"," perf_df = pd.concat([perf_df, pd.DataFrame([new_model_metrics])], ignore_index=True)\n"]},{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>epoch</th>\n"," <th>model</th>\n"," <th>accuracy</th>\n"," <th>precision</th>\n"," <th>recall</th>\n"," <th>f1</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>0</td>\n"," <td>internlm/internlm2_5-7b-chat-1m_llama-factory/...</td>\n"," <td>0.783667</td>\n"," <td>0.809455</td>\n"," <td>0.783667</td>\n"," <td>0.794048</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" epoch model accuracy \\\n","0 0 internlm/internlm2_5-7b-chat-1m_llama-factory/... 0.783667 \n","\n"," precision recall f1 \n","0 0.809455 0.783667 0.794048 "]},"execution_count":10,"metadata":{},"output_type":"execute_result"}],"source":["import pandas as pd\n","\n","perf_df = pd.DataFrame(columns=[\"epoch\", \"model\", \"accuracy\", \"precision\", \"recall\", \"f1\"])\n","for i, col in enumerate(df.columns[5:]):\n"," accuracy, precision, recall, f1 = calc_metrics_for_col(df, col)\n"," new_model_metrics = {\"epoch\": i, \"model\": col, \"accuracy\": accuracy, \"precision\": precision, \"recall\": recall, \"f1\": f1}\n","\n"," # Convert the dictionary to a DataFrame and concatenate it with the existing DataFrame\n"," perf_df = pd.concat([perf_df, pd.DataFrame([new_model_metrics])], ignore_index=True)\n","\n","perf_df"]},{"cell_type":"code","execution_count":13,"metadata":{},"outputs":[{"data":{"image/png":"","text/plain":["<Figure size 1200x500 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["# plot metrics for each model\n","import matplotlib.pyplot as plt\n","\n","fig, ax = plt.subplots(1, 1, figsize=(12, 5))\n","\n","perf_df.plot(x=\"model\", y=[\"accuracy\", \"precision\", \"recall\", \"f1\"], kind=\"bar\", ax=ax)\n","\n","# add values on top of bars\n","for p in ax.patches:\n"," ax.annotate(\n"," f\"{p.get_height():.3f}\",\n"," (p.get_x() + p.get_width() / 2, p.get_height()),\n"," ha=\"center\",\n"," va=\"bottom\",\n"," fontsize=10,\n"," )\n","\n","# add title and labels\n","# ax.set_title(\"Metrics for different settings\")\n","# ax.set_ylabel(\"Value\")\n","# ax.set_xlabel(\"Epoch (0: base model, 1-4: fine-tuned models)\")\n","# rotate x labels\n","plt.xticks(rotation=0)\n","\n","# set legend at the right to avoid overlapping with bars\n","plt.legend(loc=\"center left\", bbox_to_anchor=(1.0, 0.5))\n","# plt.tight_layout()\n","\n","plt.show()"]},{"cell_type":"code","execution_count":14,"metadata":{},"outputs":[],"source":["perf_df.to_csv(\"results/mgtv-results_p2_full_metrics.csv\", index=False)"]}],"metadata":{"accelerator":"GPU","application/vnd.databricks.v1+notebook":{"dashboards":[],"environmentMetadata":null,"language":"python","notebookMetadata":{"pythonIndentUnit":4},"notebookName":"07_MAC_+_Qwen2-7B-Instructi_Unsloth_train","widgets":{}},"colab":{"gpuType":"T4","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.9"}},"nbformat":4,"nbformat_minor":0}
|
|
|
|
competition/10f_InternLM_best_analysis.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
competition/14_GLM-4_M3_eval.ipynb
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"executionInfo":{"elapsed":476,"status":"ok","timestamp":1720679526275,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"uWKRSV6eZsCn"},"outputs":[],"source":["%load_ext autoreload\n","%autoreload 2"]},{"cell_type":"code","execution_count":2,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"eb33b19f-1206-41ee-84e2-e6258a12eef7","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2534,"status":"ok","timestamp":1720679529344,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"xwFh14uiZBrI","outputId":"d767799c-34c2-46a5-f052-378146a55321"},"outputs":[],"source":["from pathlib import Path\n","\n","try:\n"," from google.colab import drive\n","\n"," drive.mount(\"/content/drive\")\n"," workding_dir = \"/content/drive/MyDrive/logical-reasoning/\"\n","except ModuleNotFoundError:\n"," workding_dir = str(Path.cwd().parent)"]},{"cell_type":"code","execution_count":3,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"6d394937-6c99-4a7c-9d32-7600a280032f","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":5,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"G5pNu3zgZBrL","outputId":"160a554f-fb08-4aa0-bc00-0422fb7c1fac"},"outputs":[{"name":"stdout","output_type":"stream","text":["workding dir: /Users/inflaton/code/engd/projects/logical-reasoning\n"]}],"source":["import os\n","import sys\n","from pathlib import Path\n","\n","os.chdir(workding_dir)\n","sys.path.append(workding_dir)\n","print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":4,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"ac667aba-076e-4de6-9984-8f6a67cb09cd","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":4,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"0dVRAabNZBrL","outputId":"b977e116-df16-47cd-9160-a24f611da687"},"outputs":[{"data":{"text/plain":["False"]},"execution_count":4,"metadata":{},"output_type":"execute_result"}],"source":["need_to_setup_env = False\n","need_to_setup_env"]},{"cell_type":"code","execution_count":5,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"72f9cf79-7b0d-4d9e-90a0-1fa5251b947f","showTitle":false,"title":""},"executionInfo":{"elapsed":4,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"hKUOfP2HZBrL"},"outputs":[],"source":["if need_to_setup_env:\n"," %pip install -r requirements.txt\n"," %cd /content/\n"," %rm -rf LLaMA-Factory\n"," !git clone https://github.com/hiyouga/LLaMA-Factory.git\n"," %cd LLaMA-Factory\n"," %ls\n"," %pip install -e .[torch,bitsandbytes]\n"," \n"," os.chdir(workding_dir)\n"," sys.path.append(workding_dir)\n"," print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":6,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"9f67ec60-2f24-411c-84eb-0dd664b44775","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"hPCC-6m7ZBrM","outputId":"c7aa2c96-5e99-440a-c148-201d79465ff9"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading env vars from: /Users/inflaton/code/engd/projects/logical-reasoning/.env\n"]},{"data":{"text/plain":["True"]},"execution_count":6,"metadata":{},"output_type":"execute_result"}],"source":["from dotenv import find_dotenv, load_dotenv\n","\n","found_dotenv = find_dotenv(\".env\")\n","\n","if len(found_dotenv) == 0:\n"," found_dotenv = find_dotenv(\".env.example\")\n","print(f\"loading env vars from: {found_dotenv}\")\n","load_dotenv(found_dotenv, override=True)"]},{"cell_type":"code","execution_count":7,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"f1597656-8042-4878-9d3b-9ebfb8dd86dc","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"1M3IraVtZBrM","outputId":"29ab35f6-2970-4ade-d85d-3174acf8cda0"},"outputs":[{"name":"stdout","output_type":"stream","text":["THUDM/glm-4-9b-chat-1m None False datasets/mgtv results/mgtv-results_m3.csv\n"]}],"source":["import os\n","\n","model_name = os.getenv(\"MODEL_NAME\")\n","adapter_name_or_path = os.getenv(\"ADAPTER_NAME_OR_PATH\")\n","load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n","data_path = os.getenv(\"LOGICAL_REASONING_DATA_PATH\")\n","results_path = os.getenv(\"LOGICAL_REASONING_RESULTS_PATH\")\n","use_english_datasets = os.getenv(\"USE_ENGLISH_DATASETS\") == \"true\"\n","\n","print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)"]},{"cell_type":"code","execution_count":8,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"b2a43943-9324-4839-9a47-cfa72de2244b","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":564,"status":"ok","timestamp":1720679529907,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"UgMvt6dIZBrM","outputId":"ce37581c-fd26-46c2-ad87-d933d99f68f7"},"outputs":[{"name":"stdout","output_type":"stream","text":["Python 3.11.9\n","\u001b[33mWARNING: Package(s) not found: flash-attn\u001b[0m\u001b[33m\n","\u001b[0mCPU times: user 4.12 ms, sys: 9.4 ms, total: 13.5 ms\n","Wall time: 651 ms\n"]}],"source":["%%time\n","!python --version\n","!pip show flash-attn"]},{"cell_type":"code","execution_count":9,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1685,"status":"ok","timestamp":1720679531591,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"ZuS_FsLyZBrN","outputId":"2cba0105-c505-4395-afbd-2f2fee6581d0"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading /Users/inflaton/code/engd/projects/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n","MPS is available\n"]}],"source":["from llm_toolkit.llm_utils import *\n","from llm_toolkit.logical_reasoning_utils import *\n","\n","device = check_gpu()"]},{"cell_type":"code","execution_count":14,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["loading model: THUDM/glm-4-9b-chat-1m with adapter: None\n"]},{"name":"stderr","output_type":"stream","text":["Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"19aebcc5f4934933878ce3bb1fdc4b32","version_major":2,"version_minor":0},"text/plain":["Downloading shards: 0%| | 0/10 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c161cec46c72435a8b8d0beab1a329fa","version_major":2,"version_minor":0},"text/plain":["model-00006-of-00010.safetensors: 96%|#########6| 1.91G/1.99G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"13d593455bc94919846a6b5bd288740a","version_major":2,"version_minor":0},"text/plain":["model-00007-of-00010.safetensors: 0%| | 0.00/1.95G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"6154990ed2f047348720919a884f67d0","version_major":2,"version_minor":0},"text/plain":["model-00008-of-00010.safetensors: 0%| | 0.00/1.84G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/8f/69/8f69006a64acf627b9dd5b8ed9962abf5ef416d57fc370004ac5f598cea0df41/ea76696ccd8b266eff2e755d9286a789a2b8d644a19b2cdccd365933e97cfa94?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00008-of-00010.safetensors%3B+filename%3D%22model-00008-of-00010.safetensors%22%3B&Expires=1721695460&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMTY5NTQ2MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzhmLzY5LzhmNjkwMDZhNjRhY2Y2MjdiOWRkNWI4ZWQ5OTYyYWJmNWVmNDE2ZDU3ZmMzNzAwMDRhYzVmNTk4Y2VhMGRmNDEvZWE3NjY5NmNjZDhiMjY2ZWZmMmU3NTVkOTI4NmE3ODlhMmI4ZDY0NGExOWIyY2RjY2QzNjU5MzNlOTdjZmE5ND9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=BFlf2hKIdIMTSR%7E5dNEt5kHkOVcTfJOy3l8sseMydbGlzagX5bSyy18zLAc450pgxun6NKtv1ke8Db3nCDnl4DSSFOkzxH0zEzNzWN0Jt7P7axDubD%7EE7qeD1VMn1NB-r8OI0QDaF9Z%7EnAd9--fXyiXX7hacB3aCvmnsiwQbHkNy2DO89UKGcKIrf1yvXQMY-uO3RYtypVGqwjpgBBE9pf3n-Z1SEjAHwpxLeqowvj3Jc8yI5M-R60ymjlGe24Zcrdg25ScWJxnzKlqrAPB2p3P9clz7LdxLiI-7Ip0k8TsbkM-5BMe-yc99ED77-qzb6t9qQhlAFCOznu67KjFb-w__&Key-Pair-Id=K24J24Z295AEI9: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n","Trying to resume download...\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"0c0b15e2f9e344b9ad4274fcc150d55d","version_major":2,"version_minor":0},"text/plain":["model-00008-of-00010.safetensors: 31%|### | 566M/1.84G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/8f/69/8f69006a64acf627b9dd5b8ed9962abf5ef416d57fc370004ac5f598cea0df41/ea76696ccd8b266eff2e755d9286a789a2b8d644a19b2cdccd365933e97cfa94?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00008-of-00010.safetensors%3B+filename%3D%22model-00008-of-00010.safetensors%22%3B&Expires=1721695460&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMTY5NTQ2MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzhmLzY5LzhmNjkwMDZhNjRhY2Y2MjdiOWRkNWI4ZWQ5OTYyYWJmNWVmNDE2ZDU3ZmMzNzAwMDRhYzVmNTk4Y2VhMGRmNDEvZWE3NjY5NmNjZDhiMjY2ZWZmMmU3NTVkOTI4NmE3ODlhMmI4ZDY0NGExOWIyY2RjY2QzNjU5MzNlOTdjZmE5ND9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=BFlf2hKIdIMTSR%7E5dNEt5kHkOVcTfJOy3l8sseMydbGlzagX5bSyy18zLAc450pgxun6NKtv1ke8Db3nCDnl4DSSFOkzxH0zEzNzWN0Jt7P7axDubD%7EE7qeD1VMn1NB-r8OI0QDaF9Z%7EnAd9--fXyiXX7hacB3aCvmnsiwQbHkNy2DO89UKGcKIrf1yvXQMY-uO3RYtypVGqwjpgBBE9pf3n-Z1SEjAHwpxLeqowvj3Jc8yI5M-R60ymjlGe24Zcrdg25ScWJxnzKlqrAPB2p3P9clz7LdxLiI-7Ip0k8TsbkM-5BMe-yc99ED77-qzb6t9qQhlAFCOznu67KjFb-w__&Key-Pair-Id=K24J24Z295AEI9: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n","Trying to resume download...\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"82f48613f82047a7abc23a99939275ac","version_major":2,"version_minor":0},"text/plain":["model-00008-of-00010.safetensors: 34%|###3 | 619M/1.84G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/8f/69/8f69006a64acf627b9dd5b8ed9962abf5ef416d57fc370004ac5f598cea0df41/ea76696ccd8b266eff2e755d9286a789a2b8d644a19b2cdccd365933e97cfa94?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00008-of-00010.safetensors%3B+filename%3D%22model-00008-of-00010.safetensors%22%3B&Expires=1721695460&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMTY5NTQ2MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzhmLzY5LzhmNjkwMDZhNjRhY2Y2MjdiOWRkNWI4ZWQ5OTYyYWJmNWVmNDE2ZDU3ZmMzNzAwMDRhYzVmNTk4Y2VhMGRmNDEvZWE3NjY5NmNjZDhiMjY2ZWZmMmU3NTVkOTI4NmE3ODlhMmI4ZDY0NGExOWIyY2RjY2QzNjU5MzNlOTdjZmE5ND9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=BFlf2hKIdIMTSR%7E5dNEt5kHkOVcTfJOy3l8sseMydbGlzagX5bSyy18zLAc450pgxun6NKtv1ke8Db3nCDnl4DSSFOkzxH0zEzNzWN0Jt7P7axDubD%7EE7qeD1VMn1NB-r8OI0QDaF9Z%7EnAd9--fXyiXX7hacB3aCvmnsiwQbHkNy2DO89UKGcKIrf1yvXQMY-uO3RYtypVGqwjpgBBE9pf3n-Z1SEjAHwpxLeqowvj3Jc8yI5M-R60ymjlGe24Zcrdg25ScWJxnzKlqrAPB2p3P9clz7LdxLiI-7Ip0k8TsbkM-5BMe-yc99ED77-qzb6t9qQhlAFCOznu67KjFb-w__&Key-Pair-Id=K24J24Z295AEI9: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n","Trying to resume download...\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"d028a93b6cc146c5abc72f0469ea7481","version_major":2,"version_minor":0},"text/plain":["model-00008-of-00010.safetensors: 37%|###6 | 671M/1.84G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c838854922d141779d3b2e42067ae07c","version_major":2,"version_minor":0},"text/plain":["model-00009-of-00010.safetensors: 0%| | 0.00/1.99G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"}],"source":["%%time\n","\n","model, tokenizer = load_model(model_name, adapter_name_or_path=adapter_name_or_path, using_llama_factory=False)"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[{"ename":"NameError","evalue":"name 'tokenizer' is not defined","output_type":"error","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)","Cell \u001b[0;32mIn[13], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m datasets \u001b[38;5;241m=\u001b[39m load_logical_reasoning_dataset(\n\u001b[1;32m 2\u001b[0m data_path,\n\u001b[0;32m----> 3\u001b[0m tokenizer\u001b[38;5;241m=\u001b[39m\u001b[43mtokenizer\u001b[49m,\n\u001b[1;32m 4\u001b[0m chinese_prompt\u001b[38;5;241m=\u001b[39m\u001b[38;5;129;01mnot\u001b[39;00m use_english_datasets,\n\u001b[1;32m 5\u001b[0m using_p1\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 6\u001b[0m )\n","\u001b[0;31mNameError\u001b[0m: name 'tokenizer' is not defined"]}],"source":["datasets = load_logical_reasoning_dataset(\n"," data_path,\n"," tokenizer=tokenizer,\n"," chinese_prompt=not use_english_datasets,\n"," using_p1=False,\n",")"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["def evaluate_model(model, tokenizer, model_name, dataset, batch_size=1):\n"," print(f\"Evaluating model: {model_name} on {device}\")\n"," predictions = eval_model(\n"," model, tokenizer, dataset, device=device, batch_size=batch_size\n"," )\n","\n"," save_results(\n"," model_name,\n"," results_path,\n"," dataset,\n"," predictions,\n"," debug=False,\n"," )\n","\n"," metrics = calc_metrics(dataset[\"label\"], predictions, debug=False)\n"," print(metrics)"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Evaluating model: internlm/internlm2_5-7b-chat-1m_llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full/checkpoint-88 on mps\n"]},{"name":"stderr","output_type":"stream","text":[" 0%| | 1/3000 [00:06<5:23:35, 6.47s/it]"]},{"name":"stdout","output_type":"stream","text":["--------\n","step 1: 不是</s>\n","--------\n","step 2: 不是\n","--------\n","step 3: 不是\n","--------\n","step 4: 不是\n","--------\n","step 5: 不是\n"]},{"name":"stderr","output_type":"stream","text":[" 22%|██▏ | 657/3000 [40:38<2:18:00, 3.53s/it]"]}],"source":["%%time\n","\n","evaluate_model(model, tokenizer, f\"{model_name}{'_' + adapter_name_or_path if adapter_name_or_path else ''}\", datasets[\"test\"], batch_size=16)"]}],"metadata":{"accelerator":"GPU","application/vnd.databricks.v1+notebook":{"dashboards":[],"environmentMetadata":null,"language":"python","notebookMetadata":{"mostRecentlyExecutedCommandWithImplicitDF":{"commandId":-1,"dataframes":["_sqldf"]},"pythonIndentUnit":4},"notebookName":"10_eval-lf-medium-py3.11","widgets":{}},"colab":{"gpuType":"L4","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.9"}},"nbformat":4,"nbformat_minor":0}
|
|
|
1 |
+
{"cells":[{"cell_type":"code","execution_count":1,"metadata":{"executionInfo":{"elapsed":476,"status":"ok","timestamp":1720679526275,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"uWKRSV6eZsCn"},"outputs":[],"source":["%load_ext autoreload\n","%autoreload 2"]},{"cell_type":"code","execution_count":2,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"eb33b19f-1206-41ee-84e2-e6258a12eef7","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2534,"status":"ok","timestamp":1720679529344,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"xwFh14uiZBrI","outputId":"d767799c-34c2-46a5-f052-378146a55321"},"outputs":[],"source":["from pathlib import Path\n","\n","try:\n"," from google.colab import drive\n","\n"," drive.mount(\"/content/drive\")\n"," workding_dir = \"/content/drive/MyDrive/logical-reasoning/\"\n","except ModuleNotFoundError:\n"," workding_dir = str(Path.cwd().parent)"]},{"cell_type":"code","execution_count":3,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"6d394937-6c99-4a7c-9d32-7600a280032f","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":5,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"G5pNu3zgZBrL","outputId":"160a554f-fb08-4aa0-bc00-0422fb7c1fac"},"outputs":[{"name":"stdout","output_type":"stream","text":["workding dir: /Users/inflaton/code/engd/projects/logical-reasoning\n"]}],"source":["import os\n","import sys\n","from pathlib import Path\n","\n","os.chdir(workding_dir)\n","sys.path.append(workding_dir)\n","print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":4,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"ac667aba-076e-4de6-9984-8f6a67cb09cd","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":4,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"0dVRAabNZBrL","outputId":"b977e116-df16-47cd-9160-a24f611da687"},"outputs":[{"data":{"text/plain":["False"]},"execution_count":4,"metadata":{},"output_type":"execute_result"}],"source":["need_to_setup_env = False\n","need_to_setup_env"]},{"cell_type":"code","execution_count":5,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"72f9cf79-7b0d-4d9e-90a0-1fa5251b947f","showTitle":false,"title":""},"executionInfo":{"elapsed":4,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"hKUOfP2HZBrL"},"outputs":[],"source":["if need_to_setup_env:\n"," %pip install -r requirements.txt\n"," %cd /content/\n"," %rm -rf LLaMA-Factory\n"," !git clone https://github.com/hiyouga/LLaMA-Factory.git\n"," %cd LLaMA-Factory\n"," %ls\n"," %pip install -e .[torch,bitsandbytes]\n"," \n"," os.chdir(workding_dir)\n"," sys.path.append(workding_dir)\n"," print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":6,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"9f67ec60-2f24-411c-84eb-0dd664b44775","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"hPCC-6m7ZBrM","outputId":"c7aa2c96-5e99-440a-c148-201d79465ff9"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading env vars from: /Users/inflaton/code/engd/projects/logical-reasoning/.env\n"]},{"data":{"text/plain":["True"]},"execution_count":6,"metadata":{},"output_type":"execute_result"}],"source":["from dotenv import find_dotenv, load_dotenv\n","\n","found_dotenv = find_dotenv(\".env\")\n","\n","if len(found_dotenv) == 0:\n"," found_dotenv = find_dotenv(\".env.example\")\n","print(f\"loading env vars from: {found_dotenv}\")\n","load_dotenv(found_dotenv, override=True)"]},{"cell_type":"code","execution_count":7,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"f1597656-8042-4878-9d3b-9ebfb8dd86dc","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3,"status":"ok","timestamp":1720679529345,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"1M3IraVtZBrM","outputId":"29ab35f6-2970-4ade-d85d-3174acf8cda0"},"outputs":[{"name":"stdout","output_type":"stream","text":["THUDM/glm-4-9b-chat-1m None False datasets/mgtv results/mgtv-results_m3.csv\n"]}],"source":["import os\n","\n","model_name = os.getenv(\"MODEL_NAME\")\n","adapter_name_or_path = os.getenv(\"ADAPTER_NAME_OR_PATH\")\n","load_in_4bit = os.getenv(\"LOAD_IN_4BIT\") == \"true\"\n","data_path = os.getenv(\"LOGICAL_REASONING_DATA_PATH\")\n","results_path = os.getenv(\"LOGICAL_REASONING_RESULTS_PATH\")\n","use_english_datasets = os.getenv(\"USE_ENGLISH_DATASETS\") == \"true\"\n","\n","print(model_name, adapter_name_or_path, load_in_4bit, data_path, results_path)"]},{"cell_type":"code","execution_count":8,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{"byteLimit":2048000,"rowLimit":10000},"inputWidgets":{},"nuid":"b2a43943-9324-4839-9a47-cfa72de2244b","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":564,"status":"ok","timestamp":1720679529907,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"UgMvt6dIZBrM","outputId":"ce37581c-fd26-46c2-ad87-d933d99f68f7"},"outputs":[{"name":"stdout","output_type":"stream","text":["Python 3.11.9\n","\u001b[33mWARNING: Package(s) not found: flash-attn\u001b[0m\u001b[33m\n","\u001b[0mCPU times: user 4.12 ms, sys: 9.4 ms, total: 13.5 ms\n","Wall time: 651 ms\n"]}],"source":["%%time\n","!python --version\n","!pip show flash-attn"]},{"cell_type":"code","execution_count":9,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1685,"status":"ok","timestamp":1720679531591,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"ZuS_FsLyZBrN","outputId":"2cba0105-c505-4395-afbd-2f2fee6581d0"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading /Users/inflaton/code/engd/projects/logical-reasoning/llm_toolkit/logical_reasoning_utils.py\n","MPS is available\n"]}],"source":["from llm_toolkit.llm_utils import *\n","from llm_toolkit.logical_reasoning_utils import *\n","\n","device = check_gpu()"]},{"cell_type":"code","execution_count":14,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["loading model: THUDM/glm-4-9b-chat-1m with adapter: None\n"]},{"name":"stderr","output_type":"stream","text":["Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"19aebcc5f4934933878ce3bb1fdc4b32","version_major":2,"version_minor":0},"text/plain":["Downloading shards: 0%| | 0/10 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c161cec46c72435a8b8d0beab1a329fa","version_major":2,"version_minor":0},"text/plain":["model-00006-of-00010.safetensors: 96%|#########6| 1.91G/1.99G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"13d593455bc94919846a6b5bd288740a","version_major":2,"version_minor":0},"text/plain":["model-00007-of-00010.safetensors: 0%| | 0.00/1.95G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"6154990ed2f047348720919a884f67d0","version_major":2,"version_minor":0},"text/plain":["model-00008-of-00010.safetensors: 0%| | 0.00/1.84G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/8f/69/8f69006a64acf627b9dd5b8ed9962abf5ef416d57fc370004ac5f598cea0df41/ea76696ccd8b266eff2e755d9286a789a2b8d644a19b2cdccd365933e97cfa94?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00008-of-00010.safetensors%3B+filename%3D%22model-00008-of-00010.safetensors%22%3B&Expires=1721695460&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMTY5NTQ2MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzhmLzY5LzhmNjkwMDZhNjRhY2Y2MjdiOWRkNWI4ZWQ5OTYyYWJmNWVmNDE2ZDU3ZmMzNzAwMDRhYzVmNTk4Y2VhMGRmNDEvZWE3NjY5NmNjZDhiMjY2ZWZmMmU3NTVkOTI4NmE3ODlhMmI4ZDY0NGExOWIyY2RjY2QzNjU5MzNlOTdjZmE5ND9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=BFlf2hKIdIMTSR%7E5dNEt5kHkOVcTfJOy3l8sseMydbGlzagX5bSyy18zLAc450pgxun6NKtv1ke8Db3nCDnl4DSSFOkzxH0zEzNzWN0Jt7P7axDubD%7EE7qeD1VMn1NB-r8OI0QDaF9Z%7EnAd9--fXyiXX7hacB3aCvmnsiwQbHkNy2DO89UKGcKIrf1yvXQMY-uO3RYtypVGqwjpgBBE9pf3n-Z1SEjAHwpxLeqowvj3Jc8yI5M-R60ymjlGe24Zcrdg25ScWJxnzKlqrAPB2p3P9clz7LdxLiI-7Ip0k8TsbkM-5BMe-yc99ED77-qzb6t9qQhlAFCOznu67KjFb-w__&Key-Pair-Id=K24J24Z295AEI9: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n","Trying to resume download...\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"0c0b15e2f9e344b9ad4274fcc150d55d","version_major":2,"version_minor":0},"text/plain":["model-00008-of-00010.safetensors: 31%|### | 566M/1.84G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/8f/69/8f69006a64acf627b9dd5b8ed9962abf5ef416d57fc370004ac5f598cea0df41/ea76696ccd8b266eff2e755d9286a789a2b8d644a19b2cdccd365933e97cfa94?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00008-of-00010.safetensors%3B+filename%3D%22model-00008-of-00010.safetensors%22%3B&Expires=1721695460&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMTY5NTQ2MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzhmLzY5LzhmNjkwMDZhNjRhY2Y2MjdiOWRkNWI4ZWQ5OTYyYWJmNWVmNDE2ZDU3ZmMzNzAwMDRhYzVmNTk4Y2VhMGRmNDEvZWE3NjY5NmNjZDhiMjY2ZWZmMmU3NTVkOTI4NmE3ODlhMmI4ZDY0NGExOWIyY2RjY2QzNjU5MzNlOTdjZmE5ND9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=BFlf2hKIdIMTSR%7E5dNEt5kHkOVcTfJOy3l8sseMydbGlzagX5bSyy18zLAc450pgxun6NKtv1ke8Db3nCDnl4DSSFOkzxH0zEzNzWN0Jt7P7axDubD%7EE7qeD1VMn1NB-r8OI0QDaF9Z%7EnAd9--fXyiXX7hacB3aCvmnsiwQbHkNy2DO89UKGcKIrf1yvXQMY-uO3RYtypVGqwjpgBBE9pf3n-Z1SEjAHwpxLeqowvj3Jc8yI5M-R60ymjlGe24Zcrdg25ScWJxnzKlqrAPB2p3P9clz7LdxLiI-7Ip0k8TsbkM-5BMe-yc99ED77-qzb6t9qQhlAFCOznu67KjFb-w__&Key-Pair-Id=K24J24Z295AEI9: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n","Trying to resume download...\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"82f48613f82047a7abc23a99939275ac","version_major":2,"version_minor":0},"text/plain":["model-00008-of-00010.safetensors: 34%|###3 | 619M/1.84G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/8f/69/8f69006a64acf627b9dd5b8ed9962abf5ef416d57fc370004ac5f598cea0df41/ea76696ccd8b266eff2e755d9286a789a2b8d644a19b2cdccd365933e97cfa94?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00008-of-00010.safetensors%3B+filename%3D%22model-00008-of-00010.safetensors%22%3B&Expires=1721695460&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMTY5NTQ2MH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzhmLzY5LzhmNjkwMDZhNjRhY2Y2MjdiOWRkNWI4ZWQ5OTYyYWJmNWVmNDE2ZDU3ZmMzNzAwMDRhYzVmNTk4Y2VhMGRmNDEvZWE3NjY5NmNjZDhiMjY2ZWZmMmU3NTVkOTI4NmE3ODlhMmI4ZDY0NGExOWIyY2RjY2QzNjU5MzNlOTdjZmE5ND9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=BFlf2hKIdIMTSR%7E5dNEt5kHkOVcTfJOy3l8sseMydbGlzagX5bSyy18zLAc450pgxun6NKtv1ke8Db3nCDnl4DSSFOkzxH0zEzNzWN0Jt7P7axDubD%7EE7qeD1VMn1NB-r8OI0QDaF9Z%7EnAd9--fXyiXX7hacB3aCvmnsiwQbHkNy2DO89UKGcKIrf1yvXQMY-uO3RYtypVGqwjpgBBE9pf3n-Z1SEjAHwpxLeqowvj3Jc8yI5M-R60ymjlGe24Zcrdg25ScWJxnzKlqrAPB2p3P9clz7LdxLiI-7Ip0k8TsbkM-5BMe-yc99ED77-qzb6t9qQhlAFCOznu67KjFb-w__&Key-Pair-Id=K24J24Z295AEI9: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n","Trying to resume download...\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"d028a93b6cc146c5abc72f0469ea7481","version_major":2,"version_minor":0},"text/plain":["model-00008-of-00010.safetensors: 37%|###6 | 671M/1.84G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c838854922d141779d3b2e42067ae07c","version_major":2,"version_minor":0},"text/plain":["model-00009-of-00010.safetensors: 0%| | 0.00/1.99G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/8f/69/8f69006a64acf627b9dd5b8ed9962abf5ef416d57fc370004ac5f598cea0df41/6100cba5825057f51778ab70e4c4fdd4485a401a8ef97893bf6b36a8efd48ce3?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00009-of-00010.safetensors%3B+filename%3D%22model-00009-of-00010.safetensors%22%3B&Expires=1721697773&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMTY5Nzc3M319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzhmLzY5LzhmNjkwMDZhNjRhY2Y2MjdiOWRkNWI4ZWQ5OTYyYWJmNWVmNDE2ZDU3ZmMzNzAwMDRhYzVmNTk4Y2VhMGRmNDEvNjEwMGNiYTU4MjUwNTdmNTE3NzhhYjcwZTRjNGZkZDQ0ODVhNDAxYThlZjk3ODkzYmY2YjM2YThlZmQ0OGNlMz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=aGPzuWpi1BvTr2gbos2WuuTyyC-JMAAE1YWe3nZNSwTDzc3vlHUly2jUSCRCa8K2exeOZQsiXFCRSuHsk4Qa6gM1Qa--pmB0zsashQLoK2aazR8uUzzM8H4M7UQoOYTmGsEg9znoKZIMnmajJBWYDyp9MEXMkw%7ELDooaZO-G0DHjYi-bAbarua-D2tCMw%7EuyylMzUisJ2bywAnfDjpJ%7EU9GRVEGzcMYjI3178sOu09es4IYPG-D2beO217KOJsO6W4QtwUsbMmGBKzq2GRV2U6ZNn0wm%7EHogTyk0mIaVYm8a-kHi5JfypmzDGGwyJJCK-XvsTMSRB33rOT84muusGg__&Key-Pair-Id=K24J24Z295AEI9: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n","Trying to resume download...\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"276e5b1f083e4c40a08a7a94f63a8394","version_major":2,"version_minor":0},"text/plain":["model-00009-of-00010.safetensors: 62%|######2 | 1.24G/1.99G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"7a92b91006b745aa9e3c471a8d33f32b","version_major":2,"version_minor":0},"text/plain":["model-00010-of-00010.safetensors: 0%| | 0.00/1.65G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/8f/69/8f69006a64acf627b9dd5b8ed9962abf5ef416d57fc370004ac5f598cea0df41/e12b9195e943b6561c6541008881350dc7488520b9938427c1a7b97ddc147283?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00010-of-00010.safetensors%3B+filename%3D%22model-00010-of-00010.safetensors%22%3B&Expires=1721699528&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMTY5OTUyOH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzhmLzY5LzhmNjkwMDZhNjRhY2Y2MjdiOWRkNWI4ZWQ5OTYyYWJmNWVmNDE2ZDU3ZmMzNzAwMDRhYzVmNTk4Y2VhMGRmNDEvZTEyYjkxOTVlOTQzYjY1NjFjNjU0MTAwODg4MTM1MGRjNzQ4ODUyMGI5OTM4NDI3YzFhN2I5N2RkYzE0NzI4Mz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=ug6MzTDVzWb5Z6HTBIeFu%7EJkbAaGC%7E%7ECPnM04d9ZtcPtPMBqF-D1i2mc37EggfJDQ7YGHoON-qgUP%7E1GOw6R9klaND3IXCIvugfHfu6IY2k5kuYpN5JIL9yWS0ocYCPdU28fJMXAasjwLLYdXiACgzOEQd8hVQQmCBzw6QPzzAdnkM5ARG5%7EQW%7EVD1nY0aySU1DeYT9gMj74HsJuoJGBbuQSTwZw9i3Wrn9pDbEBW6fJ5uEM-hJIziGhvAX28dW0UKcYVP-8bfvvbcMmax20pZDHUgksqXTbMgJ9f7BdN7GhsH-AzYu9d3FvNXgF2jtoiHrjXA6VcFZyyTOknLr9zw__&Key-Pair-Id=K24J24Z295AEI9: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n","Trying to resume download...\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"86aa33621fd746e6ae3f0303aba3bb25","version_major":2,"version_minor":0},"text/plain":["model-00010-of-00010.safetensors: 35%|###4 | 577M/1.65G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/8f/69/8f69006a64acf627b9dd5b8ed9962abf5ef416d57fc370004ac5f598cea0df41/e12b9195e943b6561c6541008881350dc7488520b9938427c1a7b97ddc147283?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00010-of-00010.safetensors%3B+filename%3D%22model-00010-of-00010.safetensors%22%3B&Expires=1721699528&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMTY5OTUyOH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzhmLzY5LzhmNjkwMDZhNjRhY2Y2MjdiOWRkNWI4ZWQ5OTYyYWJmNWVmNDE2ZDU3ZmMzNzAwMDRhYzVmNTk4Y2VhMGRmNDEvZTEyYjkxOTVlOTQzYjY1NjFjNjU0MTAwODg4MTM1MGRjNzQ4ODUyMGI5OTM4NDI3YzFhN2I5N2RkYzE0NzI4Mz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=ug6MzTDVzWb5Z6HTBIeFu%7EJkbAaGC%7E%7ECPnM04d9ZtcPtPMBqF-D1i2mc37EggfJDQ7YGHoON-qgUP%7E1GOw6R9klaND3IXCIvugfHfu6IY2k5kuYpN5JIL9yWS0ocYCPdU28fJMXAasjwLLYdXiACgzOEQd8hVQQmCBzw6QPzzAdnkM5ARG5%7EQW%7EVD1nY0aySU1DeYT9gMj74HsJuoJGBbuQSTwZw9i3Wrn9pDbEBW6fJ5uEM-hJIziGhvAX28dW0UKcYVP-8bfvvbcMmax20pZDHUgksqXTbMgJ9f7BdN7GhsH-AzYu9d3FvNXgF2jtoiHrjXA6VcFZyyTOknLr9zw__&Key-Pair-Id=K24J24Z295AEI9: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n","Trying to resume download...\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"f7b297755911496a9ee53e3560136e23","version_major":2,"version_minor":0},"text/plain":["model-00010-of-00010.safetensors: 64%|######4 | 1.06G/1.65G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"},{"name":"stderr","output_type":"stream","text":["Error while downloading from https://cdn-lfs-us-1.huggingface.co/repos/8f/69/8f69006a64acf627b9dd5b8ed9962abf5ef416d57fc370004ac5f598cea0df41/e12b9195e943b6561c6541008881350dc7488520b9938427c1a7b97ddc147283?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00010-of-00010.safetensors%3B+filename%3D%22model-00010-of-00010.safetensors%22%3B&Expires=1721699528&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMTY5OTUyOH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzhmLzY5LzhmNjkwMDZhNjRhY2Y2MjdiOWRkNWI4ZWQ5OTYyYWJmNWVmNDE2ZDU3ZmMzNzAwMDRhYzVmNTk4Y2VhMGRmNDEvZTEyYjkxOTVlOTQzYjY1NjFjNjU0MTAwODg4MTM1MGRjNzQ4ODUyMGI5OTM4NDI3YzFhN2I5N2RkYzE0NzI4Mz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=ug6MzTDVzWb5Z6HTBIeFu%7EJkbAaGC%7E%7ECPnM04d9ZtcPtPMBqF-D1i2mc37EggfJDQ7YGHoON-qgUP%7E1GOw6R9klaND3IXCIvugfHfu6IY2k5kuYpN5JIL9yWS0ocYCPdU28fJMXAasjwLLYdXiACgzOEQd8hVQQmCBzw6QPzzAdnkM5ARG5%7EQW%7EVD1nY0aySU1DeYT9gMj74HsJuoJGBbuQSTwZw9i3Wrn9pDbEBW6fJ5uEM-hJIziGhvAX28dW0UKcYVP-8bfvvbcMmax20pZDHUgksqXTbMgJ9f7BdN7GhsH-AzYu9d3FvNXgF2jtoiHrjXA6VcFZyyTOknLr9zw__&Key-Pair-Id=K24J24Z295AEI9: HTTPSConnectionPool(host='cdn-lfs-us-1.huggingface.co', port=443): Read timed out.\n","Trying to resume download...\n"]},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"16b66bf89ab24bad9bd8732e382e7262","version_major":2,"version_minor":0},"text/plain":["model-00010-of-00010.safetensors: 64%|######4 | 1.06G/1.65G [00:00<?, ?B/s]"]},"metadata":{},"output_type":"display_data"}],"source":["%%time\n","\n","model, tokenizer = load_model(model_name, adapter_name_or_path=adapter_name_or_path, using_llama_factory=False)"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[{"ename":"NameError","evalue":"name 'tokenizer' is not defined","output_type":"error","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)","Cell \u001b[0;32mIn[13], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m datasets \u001b[38;5;241m=\u001b[39m load_logical_reasoning_dataset(\n\u001b[1;32m 2\u001b[0m data_path,\n\u001b[0;32m----> 3\u001b[0m tokenizer\u001b[38;5;241m=\u001b[39m\u001b[43mtokenizer\u001b[49m,\n\u001b[1;32m 4\u001b[0m chinese_prompt\u001b[38;5;241m=\u001b[39m\u001b[38;5;129;01mnot\u001b[39;00m use_english_datasets,\n\u001b[1;32m 5\u001b[0m using_p1\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 6\u001b[0m )\n","\u001b[0;31mNameError\u001b[0m: name 'tokenizer' is not defined"]}],"source":["datasets = load_logical_reasoning_dataset(\n"," data_path,\n"," tokenizer=tokenizer,\n"," chinese_prompt=not use_english_datasets,\n"," using_p1=False,\n",")"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["def evaluate_model(model, tokenizer, model_name, dataset, batch_size=1):\n"," print(f\"Evaluating model: {model_name} on {device}\")\n"," predictions = eval_model(\n"," model, tokenizer, dataset, device=device, batch_size=batch_size\n"," )\n","\n"," save_results(\n"," model_name,\n"," results_path,\n"," dataset,\n"," predictions,\n"," debug=False,\n"," )\n","\n"," metrics = calc_metrics(dataset[\"label\"], predictions, debug=False)\n"," print(metrics)"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Evaluating model: internlm/internlm2_5-7b-chat-1m_llama-factory/saves/internlm2_5_7b/lora/sft_bf16_p2_full/checkpoint-88 on mps\n"]},{"name":"stderr","output_type":"stream","text":[" 0%| | 1/3000 [00:06<5:23:35, 6.47s/it]"]},{"name":"stdout","output_type":"stream","text":["--------\n","step 1: 不是</s>\n","--------\n","step 2: 不是\n","--------\n","step 3: 不是\n","--------\n","step 4: 不是\n","--------\n","step 5: 不是\n"]},{"name":"stderr","output_type":"stream","text":[" 22%|██▏ | 657/3000 [40:38<2:18:00, 3.53s/it]"]}],"source":["%%time\n","\n","evaluate_model(model, tokenizer, f\"{model_name}{'_' + adapter_name_or_path if adapter_name_or_path else ''}\", datasets[\"test\"], batch_size=16)"]}],"metadata":{"accelerator":"GPU","application/vnd.databricks.v1+notebook":{"dashboards":[],"environmentMetadata":null,"language":"python","notebookMetadata":{"mostRecentlyExecutedCommandWithImplicitDF":{"commandId":-1,"dataframes":["_sqldf"]},"pythonIndentUnit":4},"notebookName":"10_eval-lf-medium-py3.11","widgets":{}},"colab":{"gpuType":"L4","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.11.9"}},"nbformat":4,"nbformat_minor":0}
|
llama-factory/config/glm-4-9b_lora_sft_bf16-p1.yaml
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: THUDM/glm-4-9b-chat-1m
|
3 |
+
|
4 |
+
### method
|
5 |
+
stage: sft
|
6 |
+
do_train: true
|
7 |
+
finetuning_type: lora
|
8 |
+
lora_target: all
|
9 |
+
# quantization_bit: 4 # use 4-bit QLoRA
|
10 |
+
loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
|
11 |
+
# use_unsloth: true # use UnslothAI's LoRA optimization for 2x faster training
|
12 |
+
upcast_layernorm: true
|
13 |
+
|
14 |
+
### dataset
|
15 |
+
dataset: alpaca_mgtv_p1
|
16 |
+
template: llama3
|
17 |
+
cutoff_len: 4096
|
18 |
+
max_samples: 25000
|
19 |
+
overwrite_cache: true
|
20 |
+
preprocessing_num_workers: 16
|
21 |
+
|
22 |
+
### output
|
23 |
+
output_dir: saves/glm-4-9b/lora/sft_bf16_p1_full
|
24 |
+
logging_steps: 10
|
25 |
+
save_steps: 175
|
26 |
+
plot_loss: true
|
27 |
+
# overwrite_output_dir: true
|
28 |
+
|
29 |
+
### train
|
30 |
+
per_device_train_batch_size: 16
|
31 |
+
gradient_accumulation_steps: 8
|
32 |
+
learning_rate: 1.0e-4
|
33 |
+
num_train_epochs: 4.0
|
34 |
+
lr_scheduler_type: cosine
|
35 |
+
warmup_ratio: 0.1
|
36 |
+
bf16: true
|
37 |
+
ddp_timeout: 180000000
|
38 |
+
|
39 |
+
### eval
|
40 |
+
val_size: 0.1
|
41 |
+
per_device_eval_batch_size: 1
|
42 |
+
eval_strategy: steps
|
43 |
+
eval_steps: 175
|
44 |
+
|
45 |
+
report_to: wandb
|
46 |
+
run_name: glm-4-9b_p1_full # optional
|
llama-factory/config/glm-4-9b_lora_sft_bf16-p2.yaml
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
### model
|
2 |
+
model_name_or_path: THUDM/glm-4-9b-chat-1m
|
3 |
+
|
4 |
+
### method
|
5 |
+
stage: sft
|
6 |
+
do_train: true
|
7 |
+
finetuning_type: lora
|
8 |
+
lora_target: all
|
9 |
+
# quantization_bit: 4 # use 4-bit QLoRA
|
10 |
+
loraplus_lr_ratio: 16.0 # use LoRA+ with lambda=16.0
|
11 |
+
# use_unsloth: true # use UnslothAI's LoRA optimization for 2x faster training
|
12 |
+
upcast_layernorm: true
|
13 |
+
|
14 |
+
### dataset
|
15 |
+
dataset: alpaca_mgtv_p1
|
16 |
+
template: llama3
|
17 |
+
cutoff_len: 4096
|
18 |
+
max_samples: 25000
|
19 |
+
overwrite_cache: true
|
20 |
+
preprocessing_num_workers: 16
|
21 |
+
|
22 |
+
### output
|
23 |
+
output_dir: saves/glm-4-9b/lora/sft_bf16_p1_full
|
24 |
+
logging_steps: 10
|
25 |
+
save_steps: 175
|
26 |
+
plot_loss: true
|
27 |
+
# overwrite_output_dir: true
|
28 |
+
|
29 |
+
### train
|
30 |
+
per_device_train_batch_size: 16
|
31 |
+
gradient_accumulation_steps: 8
|
32 |
+
learning_rate: 1.0e-4
|
33 |
+
num_train_epochs: 4.0
|
34 |
+
lr_scheduler_type: cosine
|
35 |
+
warmup_ratio: 0.1
|
36 |
+
bf16: true
|
37 |
+
ddp_timeout: 180000000
|
38 |
+
|
39 |
+
### eval
|
40 |
+
val_size: 0.1
|
41 |
+
per_device_eval_batch_size: 1
|
42 |
+
eval_strategy: steps
|
43 |
+
eval_steps: 175
|
44 |
+
|
45 |
+
report_to: wandb
|
46 |
+
run_name: glm-4-9b_p1_full # optional
|
results/mgtv-results_internlm_best.csv
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
mgtv-results_m3.csv
|
|
|
|
results/mgtv-results_internlm_best.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
results/mgtv-results_m3.csv
DELETED
The diff for this file is too large to render.
See raw diff
|
|
scripts/eval-mgtv-glm-4-9b.sh
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
|
3 |
+
BASEDIR=$(dirname "$0")
|
4 |
+
cd $BASEDIR/..
|
5 |
+
echo Current Directory:
|
6 |
+
pwd
|
7 |
+
|
8 |
+
BASEDIR=`pwd`
|
9 |
+
|
10 |
+
nvidia-smi
|
11 |
+
uname -a
|
12 |
+
cat /etc/os-release
|
13 |
+
lscpu
|
14 |
+
grep MemTotal /proc/meminfo
|
15 |
+
|
16 |
+
#pip install -r requirements.txt
|
17 |
+
#cd ../LLaMA-Factory && pip install -e .[torch,bitsandbytes]
|
18 |
+
|
19 |
+
export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
20 |
+
export RESIZE_TOKEN_EMBEDDINGS=true
|
21 |
+
export START_EPOCH=0
|
22 |
+
|
23 |
+
export MODEL_NAME=THUDM/glm-4-9b-chat-1m
|
24 |
+
export MODEL_PREFIX=glm-4-9b_lora_sft_bf16
|
25 |
+
|
26 |
+
export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p1.csv
|
27 |
+
export ADAPTER_PATH_BASE=llama-factory/saves/glm-4-9b/lora/sft_bf16_p1_full
|
28 |
+
export USING_P1_PROMPT_TEMPLATE=true
|
29 |
+
echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
|
30 |
+
python llm_toolkit/eval_logical_reasoning_all_epochs.py
|
31 |
+
|
32 |
+
export LOGICAL_REASONING_RESULTS_PATH=results/$MODEL_PREFIX-p2.csv
|
33 |
+
export ADAPTER_PATH_BASE=llama-factory/saves/glm-4-9b/lora/sft_bf16_p2_full
|
34 |
+
export USING_P1_PROMPT_TEMPLATE=false
|
35 |
+
echo "Eval $MODEL_NAME with $ADAPTER_PATH_BASE"
|
36 |
+
python llm_toolkit/eval_logical_reasoning_all_epochs.py
|
scripts/tune-mgtv-glm-4-9b.sh
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
|
3 |
+
BASEDIR=$(dirname "$0")
|
4 |
+
cd $BASEDIR/..
|
5 |
+
echo Current Directory:
|
6 |
+
pwd
|
7 |
+
|
8 |
+
BASEDIR=`pwd`
|
9 |
+
|
10 |
+
nvidia-smi
|
11 |
+
uname -a
|
12 |
+
cat /etc/os-release
|
13 |
+
lscpu
|
14 |
+
grep MemTotal /proc/meminfo
|
15 |
+
|
16 |
+
#pip install -r requirements.txt
|
17 |
+
#cd ../LLaMA-Factory && pip install -e .[torch,bitsandbytes] && cd $BASEDIR
|
18 |
+
#pip install transformers==4.41.2
|
19 |
+
#pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
|
20 |
+
|
21 |
+
export LOGICAL_REASONING_DATA_PATH=datasets/mgtv
|
22 |
+
|
23 |
+
export MODEL_NAME=THUDM/glm-4-9b-chat-1m
|
24 |
+
export MODEL_PREFIX=glm-4-9b_lora_sft_bf16
|
25 |
+
|
26 |
+
export CONFIG_FILE=config/$MODEL_PREFIX-p1.yaml
|
27 |
+
echo "Tuning $MODEL_NAME with $CONFIG_FILE"
|
28 |
+
$BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
|
29 |
+
|
30 |
+
|
31 |
+
export CONFIG_FILE=config/$MODEL_PREFIX-p2.yaml
|
32 |
+
echo "Tuning $MODEL_NAME with $CONFIG_FILE"
|
33 |
+
$BASEDIR/scripts/tune-lf.sh $CONFIG_FILE
|
34 |
+
|
35 |
+
|
36 |
+
$BASEDIR/scripts/eval-mgtv-glm-4-9b.sh
|
scripts/tune-mgtv.sh
CHANGED
@@ -1 +1 @@
|
|
1 |
-
tune-mgtv-
|
|
|
1 |
+
tune-mgtv-glm-4-9b.sh
|