{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{},"inputWidgets":{},"nuid":"0ea8b46b-839b-445b-8043-ccdf4e920ace","showTitle":false,"title":""},"id":"YLH80COBzi_F"},"outputs":[],"source":["%load_ext autoreload\n","%autoreload 2"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":3258,"status":"ok","timestamp":1720184815416,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"63B5exAuzq4M","outputId":"0e375aef-9a56-4dc7-99de-c4c5c86816d7"},"outputs":[{"name":"stdout","output_type":"stream","text":["Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":152949,"status":"ok","timestamp":1720184968361,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"lU1-raSK_s-n","outputId":"b82539bf-d5a1-42f8-bb8e-a56a5c8f97d1"},"outputs":[{"name":"stdout","output_type":"stream","text":["/content\n","Cloning into 'LLaMA-Factory'...\n","remote: Enumerating objects: 15261, done.\u001b[K\n","remote: Counting objects: 100% (211/211), done.\u001b[K\n","remote: Compressing objects: 100% (91/91), done.\u001b[K\n","remote: Total 15261 (delta 128), reused 179 (delta 120), pack-reused 15050\u001b[K\n","Receiving objects: 100% (15261/15261), 221.42 MiB | 14.45 MiB/s, done.\n","Resolving deltas: 100% (11177/11177), done.\n","/content/LLaMA-Factory\n","\u001b[0m\u001b[01;34massets\u001b[0m/       \u001b[01;34mdocker\u001b[0m/      LICENSE      pyproject.toml  requirements.txt  \u001b[01;34msrc\u001b[0m/\n","CITATION.cff  \u001b[01;34mevaluation\u001b[0m/  Makefile     README.md       \u001b[01;34mscripts\u001b[0m/          \u001b[01;34mtests\u001b[0m/\n","\u001b[01;34mdata\u001b[0m/         \u001b[01;34mexamples\u001b[0m/    MANIFEST.in  README_zh.md    setup.py\n","\u001b[33mWARNING: Ignoring invalid distribution -yarrow (/usr/local/lib/python3.10/dist-packages)\u001b[0m\u001b[33m\n","\u001b[0mObtaining file:///content/LLaMA-Factory\n","  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n","  Checking if build backend supports build_editable ... \u001b[?25l\u001b[?25hdone\n","  Getting requirements to build editable ... \u001b[?25l\u001b[?25hdone\n","  Preparing editable metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","\u001b[33m  WARNING: Ignoring invalid distribution -yarrow (/usr/local/lib/python3.10/dist-packages)\u001b[0m\u001b[33m\n","\u001b[0mRequirement already satisfied: transformers>=4.41.2 in /usr/local/lib/python3.10/dist-packages (from llamafactory==0.8.3.dev0) (4.41.2)\n","Collecting datasets>=2.16.0 (from llamafactory==0.8.3.dev0)\n","  Using cached datasets-2.20.0-py3-none-any.whl (547 kB)\n","Collecting accelerate>=0.30.1 (from llamafactory==0.8.3.dev0)\n","  Using cached accelerate-0.32.1-py3-none-any.whl (314 kB)\n","Collecting peft>=0.11.1 (from llamafactory==0.8.3.dev0)\n","  Using cached peft-0.11.1-py3-none-any.whl (251 kB)\n","Collecting trl>=0.8.6 (from llamafactory==0.8.3.dev0)\n","  Using cached trl-0.9.4-py3-none-any.whl (226 kB)\n","Collecting gradio>=4.0.0 (from llamafactory==0.8.3.dev0)\n","  Using cached gradio-4.37.2-py3-none-any.whl (12.3 MB)\n","Requirement already satisfied: pandas>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from llamafactory==0.8.3.dev0) (2.0.3)\n","Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from llamafactory==0.8.3.dev0) (1.11.4)\n","Collecting einops (from llamafactory==0.8.3.dev0)\n","  Using cached einops-0.8.0-py3-none-any.whl (43 kB)\n","Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from llamafactory==0.8.3.dev0) (0.1.99)\n","Collecting tiktoken (from llamafactory==0.8.3.dev0)\n","  Using cached tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n","Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from llamafactory==0.8.3.dev0) (3.20.3)\n","Collecting uvicorn (from llamafactory==0.8.3.dev0)\n","  Using cached uvicorn-0.30.1-py3-none-any.whl (62 kB)\n","Requirement already satisfied: pydantic in /usr/local/lib/python3.10/dist-packages (from llamafactory==0.8.3.dev0) (2.8.0)\n","Collecting fastapi (from llamafactory==0.8.3.dev0)\n","  Using cached fastapi-0.111.0-py3-none-any.whl (91 kB)\n","Collecting sse-starlette (from llamafactory==0.8.3.dev0)\n","  Using cached sse_starlette-2.1.2-py3-none-any.whl (9.3 kB)\n","Requirement already satisfied: matplotlib>=3.7.0 in /usr/local/lib/python3.10/dist-packages (from llamafactory==0.8.3.dev0) (3.7.1)\n","Collecting fire (from llamafactory==0.8.3.dev0)\n","  Using cached fire-0.6.0-py2.py3-none-any.whl\n","Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from llamafactory==0.8.3.dev0) (24.1)\n","Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from llamafactory==0.8.3.dev0) (6.0.1)\n","Requirement already satisfied: numpy<2.0.0 in /usr/local/lib/python3.10/dist-packages (from llamafactory==0.8.3.dev0) (1.25.2)\n","Collecting bitsandbytes>=0.39.0 (from llamafactory==0.8.3.dev0)\n","  Using cached bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl (119.8 MB)\n","Requirement already satisfied: torch>=1.13.1 in /usr/local/lib/python3.10/dist-packages (from llamafactory==0.8.3.dev0) (2.3.0+cu121)\n","Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.30.1->llamafactory==0.8.3.dev0) (5.9.5)\n","Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.30.1->llamafactory==0.8.3.dev0) (0.23.4)\n","Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.30.1->llamafactory==0.8.3.dev0) (0.4.3)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->llamafactory==0.8.3.dev0) (3.15.4)\n","Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->llamafactory==0.8.3.dev0) (16.1.0)\n","Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->llamafactory==0.8.3.dev0) (0.6)\n","Collecting dill<0.3.9,>=0.3.0 (from datasets>=2.16.0->llamafactory==0.8.3.dev0)\n","  Using cached dill-0.3.8-py3-none-any.whl (116 kB)\n","Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->llamafactory==0.8.3.dev0) (2.32.3)\n","Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->llamafactory==0.8.3.dev0) (4.66.4)\n","Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->llamafactory==0.8.3.dev0) (3.4.1)\n","Collecting multiprocess (from datasets>=2.16.0->llamafactory==0.8.3.dev0)\n","  Using cached multiprocess-0.70.16-py310-none-any.whl (134 kB)\n","Requirement already satisfied: fsspec[http]<=2024.5.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->llamafactory==0.8.3.dev0) (2023.6.0)\n","Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->llamafactory==0.8.3.dev0) (3.9.5)\n","Collecting aiofiles<24.0,>=22.0 (from gradio>=4.0.0->llamafactory==0.8.3.dev0)\n","  Using cached aiofiles-23.2.1-py3-none-any.whl (15 kB)\n","Requirement already satisfied: altair<6.0,>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from gradio>=4.0.0->llamafactory==0.8.3.dev0) (4.2.2)\n","Requirement already satisfied: ffmpy in /usr/local/lib/python3.10/dist-packages (from gradio>=4.0.0->llamafactory==0.8.3.dev0) (0.3.2)\n","Collecting gradio-client==1.0.2 (from gradio>=4.0.0->llamafactory==0.8.3.dev0)\n","  Using cached gradio_client-1.0.2-py3-none-any.whl (318 kB)\n","Collecting httpx>=0.24.1 (from gradio>=4.0.0->llamafactory==0.8.3.dev0)\n","  Using cached httpx-0.27.0-py3-none-any.whl (75 kB)\n","Requirement already satisfied: importlib-resources<7.0,>=1.3 in /usr/local/lib/python3.10/dist-packages (from gradio>=4.0.0->llamafactory==0.8.3.dev0) (6.4.0)\n","Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.10/dist-packages (from gradio>=4.0.0->llamafactory==0.8.3.dev0) (3.1.4)\n","Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio>=4.0.0->llamafactory==0.8.3.dev0) (2.1.5)\n","Collecting orjson~=3.0 (from gradio>=4.0.0->llamafactory==0.8.3.dev0)\n","  Using cached orjson-3.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (141 kB)\n","Requirement already satisfied: pillow<11.0,>=8.0 in /usr/local/lib/python3.10/dist-packages (from gradio>=4.0.0->llamafactory==0.8.3.dev0) (9.4.0)\n","Requirement already satisfied: pydub in /usr/local/lib/python3.10/dist-packages (from gradio>=4.0.0->llamafactory==0.8.3.dev0) (0.25.1)\n","Requirement already satisfied: python-multipart>=0.0.9 in /usr/local/lib/python3.10/dist-packages (from gradio>=4.0.0->llamafactory==0.8.3.dev0) (0.0.9)\n","Requirement already satisfied: ruff>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from gradio>=4.0.0->llamafactory==0.8.3.dev0) (0.5.0)\n","Requirement already satisfied: semantic-version~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio>=4.0.0->llamafactory==0.8.3.dev0) (2.10.0)\n","Requirement already satisfied: tomlkit==0.12.0 in /usr/local/lib/python3.10/dist-packages (from gradio>=4.0.0->llamafactory==0.8.3.dev0) (0.12.0)\n","Requirement already satisfied: typer<1.0,>=0.12 in /usr/local/lib/python3.10/dist-packages (from gradio>=4.0.0->llamafactory==0.8.3.dev0) (0.12.3)\n","Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.10/dist-packages (from gradio>=4.0.0->llamafactory==0.8.3.dev0) (4.12.2)\n","Requirement already satisfied: urllib3~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio>=4.0.0->llamafactory==0.8.3.dev0) (2.0.7)\n","Requirement already satisfied: websockets<12.0,>=10.0 in /usr/local/lib/python3.10/dist-packages (from gradio-client==1.0.2->gradio>=4.0.0->llamafactory==0.8.3.dev0) (11.0.3)\n","Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.7.0->llamafactory==0.8.3.dev0) (1.2.1)\n","Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.7.0->llamafactory==0.8.3.dev0) (0.12.1)\n","Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.7.0->llamafactory==0.8.3.dev0) (4.53.0)\n","Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.7.0->llamafactory==0.8.3.dev0) (1.4.5)\n","Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.7.0->llamafactory==0.8.3.dev0) (3.1.2)\n","Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib>=3.7.0->llamafactory==0.8.3.dev0) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=2.0.0->llamafactory==0.8.3.dev0) (2023.4)\n","Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=2.0.0->llamafactory==0.8.3.dev0) (2024.1)\n","Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic->llamafactory==0.8.3.dev0) (0.7.0)\n","Requirement already satisfied: pydantic-core==2.20.0 in /usr/local/lib/python3.10/dist-packages (from pydantic->llamafactory==0.8.3.dev0) (2.20.0)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->llamafactory==0.8.3.dev0) (1.12.1)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->llamafactory==0.8.3.dev0) (3.3)\n","Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13.1->llamafactory==0.8.3.dev0)\n","  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n","Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.13.1->llamafactory==0.8.3.dev0)\n","  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n","Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.13.1->llamafactory==0.8.3.dev0)\n","  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n","Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.13.1->llamafactory==0.8.3.dev0)\n","  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n","Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.13.1->llamafactory==0.8.3.dev0)\n","  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n","Collecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.13.1->llamafactory==0.8.3.dev0)\n","  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n","Collecting nvidia-curand-cu12==10.3.2.106 (from torch>=1.13.1->llamafactory==0.8.3.dev0)\n","  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n","Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch>=1.13.1->llamafactory==0.8.3.dev0)\n","  Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n","Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch>=1.13.1->llamafactory==0.8.3.dev0)\n","  Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n","Collecting nvidia-nccl-cu12==2.20.5 (from torch>=1.13.1->llamafactory==0.8.3.dev0)\n","  Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n","Collecting nvidia-nvtx-cu12==12.1.105 (from torch>=1.13.1->llamafactory==0.8.3.dev0)\n","  Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n","Requirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.1->llamafactory==0.8.3.dev0) (2.3.0)\n","Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.13.1->llamafactory==0.8.3.dev0)\n","  Using cached nvidia_nvjitlink_cu12-12.5.82-py3-none-manylinux2014_x86_64.whl (21.3 MB)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.41.2->llamafactory==0.8.3.dev0) (2024.5.15)\n","Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.41.2->llamafactory==0.8.3.dev0) (0.19.1)\n","Collecting tyro>=0.5.11 (from trl>=0.8.6->llamafactory==0.8.3.dev0)\n","  Using cached tyro-0.8.5-py3-none-any.whl (103 kB)\n","Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn->llamafactory==0.8.3.dev0) (8.1.7)\n","Collecting h11>=0.8 (from uvicorn->llamafactory==0.8.3.dev0)\n","  Using cached h11-0.14.0-py3-none-any.whl (58 kB)\n","Collecting starlette<0.38.0,>=0.37.2 (from fastapi->llamafactory==0.8.3.dev0)\n","  Using cached starlette-0.37.2-py3-none-any.whl (71 kB)\n","Collecting fastapi-cli>=0.0.2 (from fastapi->llamafactory==0.8.3.dev0)\n","  Using cached fastapi_cli-0.0.4-py3-none-any.whl (9.5 kB)\n","Requirement already satisfied: ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from fastapi->llamafactory==0.8.3.dev0) (5.10.0)\n","Collecting email_validator>=2.0.0 (from fastapi->llamafactory==0.8.3.dev0)\n","  Using cached email_validator-2.2.0-py3-none-any.whl (33 kB)\n","Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from fire->llamafactory==0.8.3.dev0) (1.16.0)\n","Requirement already satisfied: termcolor in /usr/local/lib/python3.10/dist-packages (from fire->llamafactory==0.8.3.dev0) (2.4.0)\n","Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from sse-starlette->llamafactory==0.8.3.dev0) (3.7.1)\n","Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio>=4.0.0->llamafactory==0.8.3.dev0) (0.4)\n","Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio>=4.0.0->llamafactory==0.8.3.dev0) (4.19.2)\n","Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio>=4.0.0->llamafactory==0.8.3.dev0) (0.12.1)\n","Collecting dnspython>=2.0.0 (from email_validator>=2.0.0->fastapi->llamafactory==0.8.3.dev0)\n","  Using cached dnspython-2.6.1-py3-none-any.whl (307 kB)\n","Requirement already satisfied: idna>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from email_validator>=2.0.0->fastapi->llamafactory==0.8.3.dev0) (3.7)\n","Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->llamafactory==0.8.3.dev0) (1.3.1)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->llamafactory==0.8.3.dev0) (23.2.0)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->llamafactory==0.8.3.dev0) (1.4.1)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->llamafactory==0.8.3.dev0) (6.0.5)\n","Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->llamafactory==0.8.3.dev0) (1.9.4)\n","Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->llamafactory==0.8.3.dev0) (4.0.3)\n","Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx>=0.24.1->gradio>=4.0.0->llamafactory==0.8.3.dev0) (2024.6.2)\n","Collecting httpcore==1.* (from httpx>=0.24.1->gradio>=4.0.0->llamafactory==0.8.3.dev0)\n","  Using cached httpcore-1.0.5-py3-none-any.whl (77 kB)\n","Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx>=0.24.1->gradio>=4.0.0->llamafactory==0.8.3.dev0) (1.3.1)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->llamafactory==0.8.3.dev0) (3.3.2)\n","Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->sse-starlette->llamafactory==0.8.3.dev0) (1.2.1)\n","Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio>=4.0.0->llamafactory==0.8.3.dev0) (1.5.4)\n","Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio>=4.0.0->llamafactory==0.8.3.dev0) (13.7.1)\n","Requirement already satisfied: docstring-parser>=0.16 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl>=0.8.6->llamafactory==0.8.3.dev0) (0.16)\n","Requirement already satisfied: shtab>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from tyro>=0.5.11->trl>=0.8.6->llamafactory==0.8.3.dev0) (1.7.1)\n","Collecting httptools>=0.5.0 (from uvicorn->llamafactory==0.8.3.dev0)\n","  Using cached httptools-0.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (341 kB)\n","Requirement already satisfied: python-dotenv>=0.13 in /usr/local/lib/python3.10/dist-packages (from uvicorn->llamafactory==0.8.3.dev0) (1.0.1)\n","Requirement already satisfied: uvloop!=0.15.0,!=0.15.1,>=0.14.0 in /usr/local/lib/python3.10/dist-packages (from uvicorn->llamafactory==0.8.3.dev0) (0.19.0)\n","Collecting watchfiles>=0.13 (from uvicorn->llamafactory==0.8.3.dev0)\n","  Using cached watchfiles-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n","Requirement already satisfied: mpmath<1.4.0,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13.1->llamafactory==0.8.3.dev0) (1.3.0)\n","Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio>=4.0.0->llamafactory==0.8.3.dev0) (2023.12.1)\n","Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio>=4.0.0->llamafactory==0.8.3.dev0) (0.35.1)\n","Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio>=4.0.0->llamafactory==0.8.3.dev0) (0.18.1)\n","Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio>=4.0.0->llamafactory==0.8.3.dev0) (3.0.0)\n","Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio>=4.0.0->llamafactory==0.8.3.dev0) (2.16.1)\n","Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio>=4.0.0->llamafactory==0.8.3.dev0) (0.1.2)\n","Building wheels for collected packages: llamafactory\n","  Building editable for llamafactory (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for llamafactory: filename=llamafactory-0.8.3.dev0-0.editable-py3-none-any.whl size=20620 sha256=ed6af715859ef89b7f8e95dd3d81f76ee5b45d73d7ea25d8898ff45eaec0e9eb\n","  Stored in directory: /tmp/pip-ephem-wheel-cache-3cdhd6d2/wheels/de/aa/c5/27b5682c5592b7c0eecc3e208f176dedf6b11a61cf2a910b85\n","Successfully built llamafactory\n","\u001b[33mWARNING: Ignoring invalid distribution -yarrow (/usr/local/lib/python3.10/dist-packages)\u001b[0m\u001b[33m\n","\u001b[0mInstalling collected packages: orjson, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, httptools, h11, fire, einops, dnspython, dill, aiofiles, watchfiles, uvicorn, tiktoken, starlette, nvidia-cusparse-cu12, nvidia-cudnn-cu12, multiprocess, httpcore, email_validator, tyro, sse-starlette, nvidia-cusolver-cu12, httpx, gradio-client, fastapi-cli, datasets, fastapi, bitsandbytes, accelerate, trl, peft, gradio, llamafactory\n","Successfully installed accelerate-0.32.1 aiofiles-23.2.1 bitsandbytes-0.43.1 datasets-2.20.0 dill-0.3.8 dnspython-2.6.1 einops-0.8.0 email_validator-2.2.0 fastapi-0.111.0 fastapi-cli-0.0.4 fire-0.6.0 gradio-4.37.2 gradio-client-1.0.2 h11-0.14.0 httpcore-1.0.5 httptools-0.6.1 httpx-0.27.0 llamafactory-0.8.3.dev0 multiprocess-0.70.16 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.5.82 nvidia-nvtx-cu12-12.1.105 orjson-3.10.6 peft-0.11.1 sse-starlette-2.1.2 starlette-0.37.2 tiktoken-0.7.0 trl-0.9.4 tyro-0.8.5 uvicorn-0.30.1 watchfiles-0.22.0\n","\u001b[33mWARNING: Ignoring invalid distribution -yarrow (/usr/local/lib/python3.10/dist-packages)\u001b[0m\u001b[33m\n","\u001b[0mCollecting unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git\n","  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-n3xjjsbs/unsloth_c789449b7d96442aa069adc1f212e181\n","  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-n3xjjsbs/unsloth_c789449b7d96442aa069adc1f212e181\n","  Resolved https://github.com/unslothai/unsloth.git to commit 9b4cc934efec66abd0a77df011779b393a99c026\n","  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n","  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n","  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","Requirement already satisfied: tyro in /usr/local/lib/python3.10/dist-packages (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.8.5)\n","Collecting transformers>=4.42.3 (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)\n","  Downloading transformers-4.42.3-py3-none-any.whl (9.3 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.3/9.3 MB\u001b[0m \u001b[31m30.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: datasets>=2.16.0 in /usr/local/lib/python3.10/dist-packages (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.20.0)\n","Collecting sentencepiece>=0.2.0 (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)\n","  Downloading sentencepiece-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m68.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (4.66.4)\n","Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (5.9.5)\n","Requirement already satisfied: wheel>=0.42.0 in /usr/local/lib/python3.10/dist-packages (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.43.0)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.25.2)\n","Requirement already satisfied: protobuf<4.0.0 in /usr/local/lib/python3.10/dist-packages (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.20.3)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.15.4)\n","Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (16.1.0)\n","Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.6)\n","Requirement already satisfied: dill<0.3.9,>=0.3.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.3.8)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.0.3)\n","Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.32.3)\n","Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.4.1)\n","Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.70.16)\n","Requirement already satisfied: fsspec[http]<=2024.5.0,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2023.6.0)\n","Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.9.5)\n","Requirement already satisfied: huggingface-hub>=0.21.2 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.23.4)\n","Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (24.1)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (6.0.1)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.42.3->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2024.5.15)\n","Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.42.3->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.4.3)\n","Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.42.3->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.19.1)\n","Requirement already satisfied: docstring-parser>=0.16 in /usr/local/lib/python3.10/dist-packages (from tyro->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.16)\n","Requirement already satisfied: typing-extensions>=4.7.0 in /usr/local/lib/python3.10/dist-packages (from tyro->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (4.12.2)\n","Requirement already satisfied: rich>=11.1.0 in /usr/local/lib/python3.10/dist-packages (from tyro->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (13.7.1)\n","Requirement already satisfied: shtab>=1.5.6 in /usr/local/lib/python3.10/dist-packages (from tyro->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.7.1)\n","Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.3.1)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (23.2.0)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.4.1)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (6.0.5)\n","Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.9.4)\n","Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (4.0.3)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.7)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2024.6.2)\n","Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1.0->tyro->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (3.0.0)\n","Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=11.1.0->tyro->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.16.1)\n","Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2023.4)\n","Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (2024.1)\n","Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=11.1.0->tyro->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (0.1.2)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets>=2.16.0->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git) (1.16.0)\n","Building wheels for collected packages: unsloth\n","  Building wheel for unsloth (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for unsloth: filename=unsloth-2024.7-py3-none-any.whl size=124373 sha256=ab915741c324eb241728fe07c2e8ddddbac78ee486da7acd0da8e30ec7de33fa\n","  Stored in directory: /tmp/pip-ephem-wheel-cache-3grz2oh6/wheels/ed/d4/e9/76fb290ee3df0a5fc21ce5c2c788e29e9607a2353d8342fd0d\n","Successfully built unsloth\n","\u001b[33mWARNING: Ignoring invalid distribution -yarrow (/usr/local/lib/python3.10/dist-packages)\u001b[0m\u001b[33m\n","\u001b[0mInstalling collected packages: sentencepiece, unsloth, transformers\n","  Attempting uninstall: sentencepiece\n","    Found existing installation: sentencepiece 0.1.99\n","    Uninstalling sentencepiece-0.1.99:\n","      Successfully uninstalled sentencepiece-0.1.99\n","  Attempting uninstall: transformers\n","    Found existing installation: transformers 4.41.2\n","    Uninstalling transformers-4.41.2:\n","      Successfully uninstalled transformers-4.41.2\n","Successfully installed sentencepiece-0.2.0 transformers-4.42.3 unsloth-2024.7\n","\u001b[33mWARNING: Ignoring invalid distribution -yarrow (/usr/local/lib/python3.10/dist-packages)\u001b[0m\u001b[33m\n","\u001b[0mCollecting xformers\n","  Downloading xformers-0.0.26.post1-cp310-cp310-manylinux2014_x86_64.whl (222.7 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m222.7/222.7 MB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting trl<0.9.0\n","  Downloading trl-0.8.6-py3-none-any.whl (245 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m245.2/245.2 kB\u001b[0m \u001b[31m29.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: peft in /usr/local/lib/python3.10/dist-packages (0.11.1)\n","Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (0.32.1)\n","Requirement already satisfied: bitsandbytes in /usr/local/lib/python3.10/dist-packages (0.43.1)\n","Installing collected packages: xformers, trl\n","  Attempting uninstall: trl\n","    Found existing installation: trl 0.9.4\n","    Uninstalling trl-0.9.4:\n","      Successfully uninstalled trl-0.9.4\n","Successfully installed trl-0.8.6 xformers-0.0.26.post1\n","CPU times: user 1.1 s, sys: 172 ms, total: 1.27 s\n","Wall time: 2min 33s\n"]}],"source":["%%time\n","\n","%cd /content/\n","%rm -rf LLaMA-Factory\n","!git clone https://github.com/hiyouga/LLaMA-Factory.git\n","%cd LLaMA-Factory\n","%ls\n","!pip install -e .[torch,bitsandbytes]\n","# Installs Unsloth, Xformers (Flash Attention) and all other packages!\n","!pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n","!pip install --no-deps xformers \"trl<0.9.0\" peft accelerate bitsandbytes"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{},"inputWidgets":{},"nuid":"6d394937-6c99-4a7c-9d32-7600a280032f","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":422,"status":"ok","timestamp":1720189503740,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"Rzln0ffbzi_H","outputId":"f9e117e8-59ad-494d-d3f4-996325e4d3d7"},"outputs":[{"name":"stdout","output_type":"stream","text":["workding dir: /content/drive/MyDrive/logical-reasoning/\n"]}],"source":["import os\n","import sys\n","from pathlib import Path\n","\n","workding_dir = \"/content/drive/MyDrive/logical-reasoning/\"\n","os.chdir(workding_dir)\n","sys.path.append(workding_dir)\n","print(\"workding dir:\", workding_dir)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":14303,"status":"ok","timestamp":1720189069046,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"zUqv4IUmU6Yv","outputId":"ffc5edce-70d5-474a-f5a9-3dabcf9f601e"},"outputs":[{"name":"stdout","output_type":"stream","text":["\u001b[33mWARNING: Ignoring invalid distribution -yarrow (/usr/local/lib/python3.10/dist-packages)\u001b[0m\u001b[33m\n","\u001b[0mRequirement already satisfied: huggingface_hub==0.23.2 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 1)) (0.23.2)\n","Requirement already satisfied: nltk==3.8.1 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 2)) (3.8.1)\n","Requirement already satisfied: python-dotenv==1.0.1 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 3)) (1.0.1)\n","Requirement already satisfied: black==24.4.0 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 4)) (24.4.0)\n","Requirement already satisfied: evaluate==0.4.2 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 5)) (0.4.2)\n","Requirement already satisfied: rouge_score==0.1.2 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 6)) (0.1.2)\n","Requirement already satisfied: pytest==8.2.1 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 7)) (8.2.1)\n","Requirement already satisfied: seaborn==0.13.2 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 8)) (0.13.2)\n","Requirement already satisfied: scikit-learn==1.5.0 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 9)) (1.5.0)\n","Requirement already satisfied: jupyter in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 10)) (1.0.0)\n","Requirement already satisfied: ipywidgets in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 11)) (7.7.1)\n","Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 12)) (24.1)\n","Collecting langchain_openai==0.1.13 (from -r requirements.txt (line 13))\n","  Downloading langchain_openai-0.1.13-py3-none-any.whl (45 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.9/45.9 kB\u001b[0m \u001b[31m1.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: wandb==0.17.4 in /usr/local/lib/python3.10/dist-packages (from -r requirements.txt (line 14)) (0.17.4)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface_hub==0.23.2->-r requirements.txt (line 1)) (3.15.4)\n","Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub==0.23.2->-r requirements.txt (line 1)) (2023.6.0)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub==0.23.2->-r requirements.txt (line 1)) (6.0.1)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface_hub==0.23.2->-r requirements.txt (line 1)) (2.32.3)\n","Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub==0.23.2->-r requirements.txt (line 1)) (4.66.4)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub==0.23.2->-r requirements.txt (line 1)) (4.12.2)\n","Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk==3.8.1->-r requirements.txt (line 2)) (8.1.7)\n","Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk==3.8.1->-r requirements.txt (line 2)) (1.4.2)\n","Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk==3.8.1->-r requirements.txt (line 2)) (2024.5.15)\n","Requirement already satisfied: mypy-extensions>=0.4.3 in /usr/local/lib/python3.10/dist-packages (from black==24.4.0->-r requirements.txt (line 4)) (1.0.0)\n","Requirement already satisfied: pathspec>=0.9.0 in /usr/local/lib/python3.10/dist-packages (from black==24.4.0->-r requirements.txt (line 4)) (0.12.1)\n","Requirement already satisfied: platformdirs>=2 in /usr/local/lib/python3.10/dist-packages (from black==24.4.0->-r requirements.txt (line 4)) (4.2.2)\n","Requirement already satisfied: tomli>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from black==24.4.0->-r requirements.txt (line 4)) (2.0.1)\n","Requirement already satisfied: datasets>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from evaluate==0.4.2->-r requirements.txt (line 5)) (2.20.0)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from evaluate==0.4.2->-r requirements.txt (line 5)) (1.25.2)\n","Requirement already satisfied: dill in /usr/local/lib/python3.10/dist-packages (from evaluate==0.4.2->-r requirements.txt (line 5)) (0.3.8)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from evaluate==0.4.2->-r requirements.txt (line 5)) (2.0.3)\n","Requirement already satisfied: xxhash in /usr/local/lib/python3.10/dist-packages (from evaluate==0.4.2->-r requirements.txt (line 5)) (3.4.1)\n","Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (from evaluate==0.4.2->-r requirements.txt (line 5)) (0.70.16)\n","Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from rouge_score==0.1.2->-r requirements.txt (line 6)) (1.4.0)\n","Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from rouge_score==0.1.2->-r requirements.txt (line 6)) (1.16.0)\n","Requirement already satisfied: iniconfig in /usr/local/lib/python3.10/dist-packages (from pytest==8.2.1->-r requirements.txt (line 7)) (2.0.0)\n","Requirement already satisfied: pluggy<2.0,>=1.5 in /usr/local/lib/python3.10/dist-packages (from pytest==8.2.1->-r requirements.txt (line 7)) (1.5.0)\n","Requirement already satisfied: exceptiongroup>=1.0.0rc8 in /usr/local/lib/python3.10/dist-packages (from pytest==8.2.1->-r requirements.txt (line 7)) (1.2.1)\n","Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in /usr/local/lib/python3.10/dist-packages (from seaborn==0.13.2->-r requirements.txt (line 8)) (3.7.1)\n","Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.5.0->-r requirements.txt (line 9)) (1.11.4)\n","Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn==1.5.0->-r requirements.txt (line 9)) (3.5.0)\n","Collecting langchain-core<0.3,>=0.2.2 (from langchain_openai==0.1.13->-r requirements.txt (line 13))\n","  Downloading langchain_core-0.2.11-py3-none-any.whl (337 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m337.4/337.4 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting openai<2.0.0,>=1.32.0 (from langchain_openai==0.1.13->-r requirements.txt (line 13))\n","  Downloading openai-1.35.10-py3-none-any.whl (328 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m328.3/328.3 kB\u001b[0m \u001b[31m13.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: tiktoken<1,>=0.7 in /usr/local/lib/python3.10/dist-packages (from langchain_openai==0.1.13->-r requirements.txt (line 13)) (0.7.0)\n","Requirement already satisfied: docker-pycreds>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from wandb==0.17.4->-r requirements.txt (line 14)) (0.4.0)\n","Requirement already satisfied: gitpython!=3.1.29,>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb==0.17.4->-r requirements.txt (line 14)) (3.1.43)\n","Requirement already satisfied: protobuf!=4.21.0,<6,>=3.19.0 in /usr/local/lib/python3.10/dist-packages (from wandb==0.17.4->-r requirements.txt (line 14)) (3.20.3)\n","Requirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb==0.17.4->-r requirements.txt (line 14)) (5.9.5)\n","Requirement already satisfied: sentry-sdk>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from wandb==0.17.4->-r requirements.txt (line 14)) (2.7.1)\n","Requirement already satisfied: setproctitle in /usr/local/lib/python3.10/dist-packages (from wandb==0.17.4->-r requirements.txt (line 14)) (1.3.3)\n","Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from wandb==0.17.4->-r requirements.txt (line 14)) (67.7.2)\n","Requirement already satisfied: notebook in /usr/local/lib/python3.10/dist-packages (from jupyter->-r requirements.txt (line 10)) (6.5.5)\n","Requirement already satisfied: qtconsole in /usr/local/lib/python3.10/dist-packages (from jupyter->-r requirements.txt (line 10)) (5.5.2)\n","Requirement already satisfied: jupyter-console in /usr/local/lib/python3.10/dist-packages (from jupyter->-r requirements.txt (line 10)) (6.1.0)\n","Requirement already satisfied: nbconvert in /usr/local/lib/python3.10/dist-packages (from jupyter->-r requirements.txt (line 10)) (6.5.4)\n","Requirement already satisfied: ipykernel in /usr/local/lib/python3.10/dist-packages (from jupyter->-r requirements.txt (line 10)) (5.5.6)\n","Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->-r requirements.txt (line 11)) (0.2.0)\n","Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->-r requirements.txt (line 11)) (5.7.1)\n","Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->-r requirements.txt (line 11)) (3.6.6)\n","Requirement already satisfied: ipython>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->-r requirements.txt (line 11)) (7.34.0)\n","Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->-r requirements.txt (line 11)) (3.0.11)\n","Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate==0.4.2->-r requirements.txt (line 5)) (16.1.0)\n","Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate==0.4.2->-r requirements.txt (line 5)) (0.6)\n","Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.0.0->evaluate==0.4.2->-r requirements.txt (line 5)) (3.9.5)\n","Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from gitpython!=3.1.29,>=1.0.0->wandb==0.17.4->-r requirements.txt (line 14)) (4.0.11)\n","Requirement already satisfied: jupyter-client in /usr/local/lib/python3.10/dist-packages (from ipykernel->jupyter->-r requirements.txt (line 10)) (6.1.12)\n","Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipykernel->jupyter->-r requirements.txt (line 10)) (6.3.3)\n","Requirement already satisfied: jedi>=0.16 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->-r requirements.txt (line 11)) (0.19.1)\n","Requirement already satisfied: decorator in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->-r requirements.txt (line 11)) (4.4.2)\n","Requirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->-r requirements.txt (line 11)) (0.7.5)\n","Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->-r requirements.txt (line 11)) (3.0.47)\n","Requirement already satisfied: pygments in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->-r requirements.txt (line 11)) (2.16.1)\n","Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->-r requirements.txt (line 11)) (0.2.0)\n","Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->-r requirements.txt (line 11)) (0.1.7)\n","Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets->-r requirements.txt (line 11)) (4.9.0)\n","Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3,>=0.2.2->langchain_openai==0.1.13->-r requirements.txt (line 13))\n","  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n","Collecting langsmith<0.2.0,>=0.1.75 (from langchain-core<0.3,>=0.2.2->langchain_openai==0.1.13->-r requirements.txt (line 13))\n","  Downloading langsmith-0.1.83-py3-none-any.whl (127 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.5/127.5 kB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3,>=0.2.2->langchain_openai==0.1.13->-r requirements.txt (line 13)) (2.8.0)\n","Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain-core<0.3,>=0.2.2->langchain_openai==0.1.13->-r requirements.txt (line 13)) (8.4.2)\n","Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2->-r requirements.txt (line 8)) (1.2.1)\n","Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2->-r requirements.txt (line 8)) (0.12.1)\n","Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2->-r requirements.txt (line 8)) (4.53.0)\n","Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2->-r requirements.txt (line 8)) (1.4.5)\n","Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2->-r requirements.txt (line 8)) (9.4.0)\n","Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2->-r requirements.txt (line 8)) (3.1.2)\n","Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib!=3.6.1,>=3.4->seaborn==0.13.2->-r requirements.txt (line 8)) (2.8.2)\n","Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.32.0->langchain_openai==0.1.13->-r requirements.txt (line 13)) (3.7.1)\n","Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai<2.0.0,>=1.32.0->langchain_openai==0.1.13->-r requirements.txt (line 13)) (1.7.0)\n","Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.32.0->langchain_openai==0.1.13->-r requirements.txt (line 13)) (0.27.0)\n","Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai<2.0.0,>=1.32.0->langchain_openai==0.1.13->-r requirements.txt (line 13)) (1.3.1)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->evaluate==0.4.2->-r requirements.txt (line 5)) (2023.4)\n","Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->evaluate==0.4.2->-r requirements.txt (line 5)) (2024.1)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub==0.23.2->-r requirements.txt (line 1)) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub==0.23.2->-r requirements.txt (line 1)) (3.7)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub==0.23.2->-r requirements.txt (line 1)) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface_hub==0.23.2->-r requirements.txt (line 1)) (2024.6.2)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter->-r requirements.txt (line 10)) (3.1.4)\n","Requirement already satisfied: pyzmq<25,>=17 in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter->-r requirements.txt (line 10)) (24.0.1)\n","Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter->-r requirements.txt (line 10)) (23.1.0)\n","Requirement already satisfied: jupyter-core>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter->-r requirements.txt (line 10)) (5.7.2)\n","Requirement already satisfied: nbformat in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter->-r requirements.txt (line 10)) (5.10.4)\n","Requirement already satisfied: nest-asyncio>=1.5 in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter->-r requirements.txt (line 10)) (1.6.0)\n","Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter->-r requirements.txt (line 10)) (1.8.3)\n","Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter->-r requirements.txt (line 10)) (0.18.1)\n","Requirement already satisfied: prometheus-client in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter->-r requirements.txt (line 10)) (0.20.0)\n","Requirement already satisfied: nbclassic>=0.4.7 in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter->-r requirements.txt (line 10)) (1.1.0)\n","Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter->-r requirements.txt (line 10)) (4.9.4)\n","Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter->-r requirements.txt (line 10)) (4.12.3)\n","Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter->-r requirements.txt (line 10)) (6.1.0)\n","Requirement already satisfied: defusedxml in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter->-r requirements.txt (line 10)) (0.7.1)\n","Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter->-r requirements.txt (line 10)) (0.4)\n","Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter->-r requirements.txt (line 10)) (0.3.0)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter->-r requirements.txt (line 10)) (2.1.5)\n","Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter->-r requirements.txt (line 10)) (0.8.4)\n","Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter->-r requirements.txt (line 10)) (0.10.0)\n","Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter->-r requirements.txt (line 10)) (1.5.1)\n","Requirement already satisfied: tinycss2 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter->-r requirements.txt (line 10)) (1.3.0)\n","Requirement already satisfied: qtpy>=2.4.0 in /usr/local/lib/python3.10/dist-packages (from qtconsole->jupyter->-r requirements.txt (line 10)) (2.4.1)\n","Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate==0.4.2->-r requirements.txt (line 5)) (1.3.1)\n","Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate==0.4.2->-r requirements.txt (line 5)) (23.2.0)\n","Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate==0.4.2->-r requirements.txt (line 5)) (1.4.1)\n","Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate==0.4.2->-r requirements.txt (line 5)) (6.0.5)\n","Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate==0.4.2->-r requirements.txt (line 5)) (1.9.4)\n","Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.0.0->evaluate==0.4.2->-r requirements.txt (line 5)) (4.0.3)\n","Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.10/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb==0.17.4->-r requirements.txt (line 14)) (5.0.1)\n","Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.32.0->langchain_openai==0.1.13->-r requirements.txt (line 13)) (1.0.5)\n","Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.10/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai<2.0.0,>=1.32.0->langchain_openai==0.1.13->-r requirements.txt (line 13)) (0.14.0)\n","Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets->-r requirements.txt (line 11)) (0.8.4)\n","Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain-core<0.3,>=0.2.2->langchain_openai==0.1.13->-r requirements.txt (line 13))\n","  Downloading jsonpointer-3.0.0-py2.py3-none-any.whl (7.6 kB)\n","Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.10/dist-packages (from langsmith<0.2.0,>=0.1.75->langchain-core<0.3,>=0.2.2->langchain_openai==0.1.13->-r requirements.txt (line 13)) (3.10.6)\n","Requirement already satisfied: notebook-shim>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook->jupyter->-r requirements.txt (line 10)) (0.2.4)\n","Requirement already satisfied: fastjsonschema>=2.15 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook->jupyter->-r requirements.txt (line 10)) (2.20.0)\n","Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook->jupyter->-r requirements.txt (line 10)) (4.19.2)\n","Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.10/dist-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets->-r requirements.txt (line 11)) (0.7.0)\n","Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=4.0.0->ipywidgets->-r requirements.txt (line 11)) (0.2.13)\n","Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.3,>=0.2.2->langchain_openai==0.1.13->-r requirements.txt (line 13)) (0.7.0)\n","Requirement already satisfied: pydantic-core==2.20.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain-core<0.3,>=0.2.2->langchain_openai==0.1.13->-r requirements.txt (line 13)) (2.20.0)\n","Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.10/dist-packages (from argon2-cffi->notebook->jupyter->-r requirements.txt (line 10)) (21.2.0)\n","Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->nbconvert->jupyter->-r requirements.txt (line 10)) (2.5)\n","Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert->jupyter->-r requirements.txt (line 10)) (0.5.1)\n","Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook->jupyter->-r requirements.txt (line 10)) (2023.12.1)\n","Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook->jupyter->-r requirements.txt (line 10)) (0.35.1)\n","Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook->jupyter->-r requirements.txt (line 10)) (0.18.1)\n","Requirement already satisfied: jupyter-server<3,>=1.8 in /usr/local/lib/python3.10/dist-packages (from notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook->jupyter->-r requirements.txt (line 10)) (1.24.0)\n","Requirement already satisfied: cffi>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from argon2-cffi-bindings->argon2-cffi->notebook->jupyter->-r requirements.txt (line 10)) (1.16.0)\n","Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->notebook->jupyter->-r requirements.txt (line 10)) (2.22)\n","Requirement already satisfied: websocket-client in /usr/local/lib/python3.10/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook->jupyter->-r requirements.txt (line 10)) (1.8.0)\n","\u001b[33mWARNING: Ignoring invalid distribution -yarrow (/usr/local/lib/python3.10/dist-packages)\u001b[0m\u001b[33m\n","\u001b[0mInstalling collected packages: jsonpointer, jsonpatch, openai, langsmith, langchain-core, langchain_openai\n","Successfully installed jsonpatch-1.33 jsonpointer-3.0.0 langchain-core-0.2.11 langchain_openai-0.1.13 langsmith-0.1.83 openai-1.35.10\n","CPU times: user 118 ms, sys: 33.3 ms, total: 151 ms\n","Wall time: 14 s\n"]}],"source":["%%time\n","\n","!pip install -r requirements.txt"]},{"cell_type":"code","execution_count":null,"metadata":{"application/vnd.databricks.v1+cell":{"cellMetadata":{},"inputWidgets":{},"nuid":"9f67ec60-2f24-411c-84eb-0dd664b44775","showTitle":false,"title":""},"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":462,"status":"ok","timestamp":1720189512175,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"DIUiweYYzi_I","outputId":"d5e1122d-ac3f-4f8d-ea3f-4a487d66fd45"},"outputs":[{"name":"stdout","output_type":"stream","text":["loading env vars from: /content/drive/MyDrive/logical-reasoning/.env\n"]},{"data":{"text/plain":["True"]},"execution_count":7,"metadata":{},"output_type":"execute_result"}],"source":["from dotenv import find_dotenv, load_dotenv\n","\n","found_dotenv = find_dotenv(\".env\")\n","\n","if len(found_dotenv) == 0:\n","    found_dotenv = find_dotenv(\".env.example\")\n","print(f\"loading env vars from: {found_dotenv}\")\n","load_dotenv(found_dotenv, override=True)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1137,"status":"ok","timestamp":1720184997832,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"4hQO8gkFzi_K","outputId":"64b7d619-e784-49ef-c682-bfa594e170dd"},"outputs":[{"name":"stdout","output_type":"stream","text":["Python 3.10.12\n","\u001b[33mWARNING: Ignoring invalid distribution -yarrow (/usr/local/lib/python3.10/dist-packages)\u001b[0m\u001b[33m\n","\u001b[0m\u001b[33mWARNING: Package(s) not found: flash-attn\u001b[0m\u001b[33m\n","\u001b[0mCPU times: user 14.6 ms, sys: 1.4 ms, total: 16 ms\n","Wall time: 1.01 s\n"]}],"source":["%%time\n","!python --version\n","!pip show flash-attn"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":955306,"status":"ok","timestamp":1720188776118,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"zteOIWQ0-59D","outputId":"0eba2bfd-2c36-4c6f-f9ca-36719f5ee4d3"},"outputs":[{"name":"stdout","output_type":"stream","text":["Current Directory:\n","/content/drive/MyDrive/logical-reasoning/llama-factory\n","config/qwen2_7b_lora_sft.yaml:\n"," {\n","  \"model_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n","  \"stage\": \"sft\",\n","  \"do_train\": true,\n","  \"finetuning_type\": \"lora\",\n","  \"lora_target\": \"all\",\n","  \"quantization_bit\": 4,\n","  \"loraplus_lr_ratio\": 16.0,\n","  \"dataset\": \"alpaca_mac\",\n","  \"template\": \"chatml\",\n","  \"cutoff_len\": 1024,\n","  \"max_samples\": 50,\n","  \"overwrite_cache\": true,\n","  \"preprocessing_num_workers\": 16,\n","  \"output_dir\": \"/content/qwen2-7b/\",\n","  \"logging_steps\": 10,\n","  \"save_steps\": 10,\n","  \"plot_loss\": true,\n","  \"overwrite_output_dir\": true,\n","  \"per_device_train_batch_size\": 1,\n","  \"gradient_accumulation_steps\": 8,\n","  \"learning_rate\": 0.0001,\n","  \"num_train_epochs\": 6.0,\n","  \"lr_scheduler_type\": \"cosine\",\n","  \"warmup_ratio\": 0.1,\n","  \"bf16\": true,\n","  \"ddp_timeout\": 180000000,\n","  \"val_size\": 0.02,\n","  \"per_device_eval_batch_size\": 1,\n","  \"eval_strategy\": \"steps\",\n","  \"eval_steps\": 10,\n","  \"report_to\": \"wandb\",\n","  \"run_name\": \"qwen2_7b_mac_colab\"\n","}\n","2024-07-05 13:57:04.827629: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n","2024-07-05 13:57:04.827683: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n","2024-07-05 13:57:04.829098: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","2024-07-05 13:57:04.836922: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","2024-07-05 13:57:06.140607: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","07/05/2024 13:57:14 - WARNING - llamafactory.hparams.parser - We recommend enable `upcast_layernorm` in quantized training.\n","07/05/2024 13:57:14 - INFO - llamafactory.hparams.parser - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, compute dtype: torch.bfloat16\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 13:57:15,127 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/vocab.json\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 13:57:15,127 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/merges.txt\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 13:57:15,127 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/tokenizer.json\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 13:57:15,128 >> loading file added_tokens.json from cache at None\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 13:57:15,128 >> loading file special_tokens_map.json from cache at None\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 13:57:15,128 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/tokenizer_config.json\n","[WARNING|logging.py:313] 2024-07-05 13:57:15,481 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n","07/05/2024 13:57:15 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n","07/05/2024 13:57:15 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n","07/05/2024 13:57:15 - INFO - llamafactory.data.loader - Loading dataset alpaca_mac.json...\n","/usr/local/lib/python3.10/dist-packages/multiprocess/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n","  self.pid = os.fork()\n","Converting format of dataset (num_proc=16): 100% 50/50 [00:00<00:00, 74.48 examples/s]\n","Running tokenizer on dataset (num_proc=16): 100% 50/50 [00:06<00:00,  8.23 examples/s]\n","input_ids:\n","[151644, 872, 198, 5501, 14683, 279, 2701, 8453, 1467, 1119, 6364, 323, 3410, 1172, 279, 24531, 2213, 11, 4302, 770, 624, 35987, 102895, 99164, 100324, 100717, 100095, 99509, 1773, 151645, 198, 151644, 77091, 198, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n","inputs:\n","<|im_start|>user\n","Please translate the following Chinese text into English and provide only the translated content, nothing else.\n","全仗着狐仙搭救。<|im_end|>\n","<|im_start|>assistant\n","Because I was protected by a fox fairy.<|im_end|>\n","label_ids:\n","[-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 17949, 358, 572, 2617, 553, 264, 38835, 44486, 13, 151645]\n","labels:\n","Because I was protected by a fox fairy.<|im_end|>\n","[INFO|configuration_utils.py:733] 2024-07-05 13:57:23,938 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/config.json\n","[INFO|configuration_utils.py:800] 2024-07-05 13:57:23,940 >> Model config Qwen2Config {\n","  \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n","  \"architectures\": [\n","    \"Qwen2ForCausalLM\"\n","  ],\n","  \"attention_dropout\": 0.0,\n","  \"bos_token_id\": 151643,\n","  \"eos_token_id\": 151645,\n","  \"hidden_act\": \"silu\",\n","  \"hidden_size\": 3584,\n","  \"initializer_range\": 0.02,\n","  \"intermediate_size\": 18944,\n","  \"max_position_embeddings\": 32768,\n","  \"max_window_layers\": 28,\n","  \"model_type\": \"qwen2\",\n","  \"num_attention_heads\": 28,\n","  \"num_hidden_layers\": 28,\n","  \"num_key_value_heads\": 4,\n","  \"rms_norm_eps\": 1e-06,\n","  \"rope_theta\": 1000000.0,\n","  \"sliding_window\": 131072,\n","  \"tie_word_embeddings\": false,\n","  \"torch_dtype\": \"bfloat16\",\n","  \"transformers_version\": \"4.42.3\",\n","  \"use_cache\": true,\n","  \"use_sliding_window\": false,\n","  \"vocab_size\": 152064\n","}\n","\n","07/05/2024 13:57:23 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n","[INFO|modeling_utils.py:3556] 2024-07-05 13:57:23,981 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/model.safetensors.index.json\n","[INFO|modeling_utils.py:1531] 2024-07-05 13:57:23,984 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n","[INFO|configuration_utils.py:1000] 2024-07-05 13:57:23,986 >> Generate config GenerationConfig {\n","  \"bos_token_id\": 151643,\n","  \"eos_token_id\": 151645\n","}\n","\n","Loading checkpoint shards: 100% 4/4 [01:08<00:00, 17.17s/it]\n","[INFO|modeling_utils.py:4364] 2024-07-05 13:58:35,245 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n","\n","[INFO|modeling_utils.py:4372] 2024-07-05 13:58:35,245 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-7B-Instruct.\n","If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n","[INFO|configuration_utils.py:955] 2024-07-05 13:58:35,351 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/generation_config.json\n","[INFO|configuration_utils.py:1000] 2024-07-05 13:58:35,351 >> Generate config GenerationConfig {\n","  \"bos_token_id\": 151643,\n","  \"do_sample\": true,\n","  \"eos_token_id\": [\n","    151645,\n","    151643\n","  ],\n","  \"pad_token_id\": 151643,\n","  \"repetition_penalty\": 1.05,\n","  \"temperature\": 0.7,\n","  \"top_k\": 20,\n","  \"top_p\": 0.8\n","}\n","\n","07/05/2024 13:58:35 - INFO - llamafactory.model.model_utils.checkpointing - Gradient checkpointing enabled.\n","07/05/2024 13:58:35 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n","07/05/2024 13:58:35 - INFO - llamafactory.model.adapter - Upcasting trainable params to float32.\n","07/05/2024 13:58:35 - INFO - llamafactory.model.adapter - Fine-tuning method: LoRA\n","07/05/2024 13:58:35 - INFO - llamafactory.model.model_utils.misc - Found linear modules: up_proj,k_proj,v_proj,o_proj,q_proj,down_proj,gate_proj\n","07/05/2024 13:58:36 - INFO - llamafactory.model.loader - trainable params: 20,185,088 || all params: 7,635,801,600 || trainable%: 0.2643\n","[INFO|trainer.py:642] 2024-07-05 13:58:36,229 >> Using auto half precision backend\n","07/05/2024 13:58:36 - INFO - llamafactory.train.trainer_utils - Using LoRA+ optimizer with loraplus lr ratio 16.00.\n","[INFO|trainer.py:2128] 2024-07-05 13:58:36,735 >> ***** Running training *****\n","[INFO|trainer.py:2129] 2024-07-05 13:58:36,735 >>   Num examples = 49\n","[INFO|trainer.py:2130] 2024-07-05 13:58:36,735 >>   Num Epochs = 6\n","[INFO|trainer.py:2131] 2024-07-05 13:58:36,735 >>   Instantaneous batch size per device = 1\n","[INFO|trainer.py:2134] 2024-07-05 13:58:36,735 >>   Total train batch size (w. parallel, distributed & accumulation) = 8\n","[INFO|trainer.py:2135] 2024-07-05 13:58:36,735 >>   Gradient Accumulation steps = 8\n","[INFO|trainer.py:2136] 2024-07-05 13:58:36,735 >>   Total optimization steps = 36\n","[INFO|trainer.py:2137] 2024-07-05 13:58:36,740 >>   Number of trainable parameters = 20,185,088\n","[INFO|integration_utils.py:750] 2024-07-05 13:58:36,744 >> Automatic Weights & Biases logging enabled, to disable set os.environ[\"WANDB_DISABLED\"] = \"true\"\n","\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33minflaton-sg\u001b[0m (\u001b[33minflaton-ai\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n","\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.17.4\n","\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/content/drive/MyDrive/logical-reasoning/llama-factory/wandb/run-20240705_135838-asc7inzt\u001b[0m\n","\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n","\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mqwen2_7b_mac_colab\u001b[0m\n","\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n","\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/asc7inzt\u001b[0m\n","{'loss': 1.6899, 'grad_norm': 1.2224072217941284, 'learning_rate': 9.157348061512727e-05, 'epoch': 1.63}\n"," 28% 10/36 [03:47<10:03, 23.20s/it][INFO|trainer.py:3788] 2024-07-05 14:02:28,192 >> \n","***** Running Evaluation *****\n","[INFO|trainer.py:3790] 2024-07-05 14:02:28,193 >>   Num examples = 1\n","[INFO|trainer.py:3793] 2024-07-05 14:02:28,193 >>   Batch size = 1\n","\n","                                   \n","\u001b[A{'eval_loss': 1.853132724761963, 'eval_accuracy': 0.5957446808510638, 'eval_runtime': 1.1046, 'eval_samples_per_second': 0.905, 'eval_steps_per_second': 0.905, 'epoch': 1.63}\n"," 28% 10/36 [03:48<10:03, 23.20s/it]\n","100% 1/1 [00:00<00:00, 116.95it/s]\u001b[A\n","                                  \u001b[A[INFO|trainer.py:3478] 2024-07-05 14:02:29,300 >> Saving model checkpoint to /content/qwen2-7b/checkpoint-10\n","[INFO|configuration_utils.py:733] 2024-07-05 14:02:29,563 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/config.json\n","[INFO|configuration_utils.py:800] 2024-07-05 14:02:29,566 >> Model config Qwen2Config {\n","  \"architectures\": [\n","    \"Qwen2ForCausalLM\"\n","  ],\n","  \"attention_dropout\": 0.0,\n","  \"bos_token_id\": 151643,\n","  \"eos_token_id\": 151645,\n","  \"hidden_act\": \"silu\",\n","  \"hidden_size\": 3584,\n","  \"initializer_range\": 0.02,\n","  \"intermediate_size\": 18944,\n","  \"max_position_embeddings\": 32768,\n","  \"max_window_layers\": 28,\n","  \"model_type\": \"qwen2\",\n","  \"num_attention_heads\": 28,\n","  \"num_hidden_layers\": 28,\n","  \"num_key_value_heads\": 4,\n","  \"rms_norm_eps\": 1e-06,\n","  \"rope_theta\": 1000000.0,\n","  \"sliding_window\": 131072,\n","  \"tie_word_embeddings\": false,\n","  \"torch_dtype\": \"bfloat16\",\n","  \"transformers_version\": \"4.42.3\",\n","  \"use_cache\": true,\n","  \"use_sliding_window\": false,\n","  \"vocab_size\": 152064\n","}\n","\n","[INFO|tokenization_utils_base.py:2574] 2024-07-05 14:02:29,732 >> tokenizer config file saved in /content/qwen2-7b/checkpoint-10/tokenizer_config.json\n","[INFO|tokenization_utils_base.py:2583] 2024-07-05 14:02:29,732 >> Special tokens file saved in /content/qwen2-7b/checkpoint-10/special_tokens_map.json\n","{'loss': 0.469, 'grad_norm': 0.5590318441390991, 'learning_rate': 5e-05, 'epoch': 3.27}\n"," 56% 20/36 [07:45<06:07, 22.95s/it][INFO|trainer.py:3788] 2024-07-05 14:06:26,185 >> \n","***** Running Evaluation *****\n","[INFO|trainer.py:3790] 2024-07-05 14:06:26,185 >>   Num examples = 1\n","[INFO|trainer.py:3793] 2024-07-05 14:06:26,186 >>   Batch size = 1\n","\n","                                   \n","\u001b[A{'eval_loss': 2.2268199920654297, 'eval_accuracy': 0.6595744680851063, 'eval_runtime': 1.0803, 'eval_samples_per_second': 0.926, 'eval_steps_per_second': 0.926, 'epoch': 3.27}\n"," 56% 20/36 [07:46<06:07, 22.95s/it]\n","100% 1/1 [00:00<00:00, 267.73it/s]\u001b[A\n","                                  \u001b[A[INFO|trainer.py:3478] 2024-07-05 14:06:27,269 >> Saving model checkpoint to /content/qwen2-7b/checkpoint-20\n","[INFO|configuration_utils.py:733] 2024-07-05 14:06:27,661 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/config.json\n","[INFO|configuration_utils.py:800] 2024-07-05 14:06:27,663 >> Model config Qwen2Config {\n","  \"architectures\": [\n","    \"Qwen2ForCausalLM\"\n","  ],\n","  \"attention_dropout\": 0.0,\n","  \"bos_token_id\": 151643,\n","  \"eos_token_id\": 151645,\n","  \"hidden_act\": \"silu\",\n","  \"hidden_size\": 3584,\n","  \"initializer_range\": 0.02,\n","  \"intermediate_size\": 18944,\n","  \"max_position_embeddings\": 32768,\n","  \"max_window_layers\": 28,\n","  \"model_type\": \"qwen2\",\n","  \"num_attention_heads\": 28,\n","  \"num_hidden_layers\": 28,\n","  \"num_key_value_heads\": 4,\n","  \"rms_norm_eps\": 1e-06,\n","  \"rope_theta\": 1000000.0,\n","  \"sliding_window\": 131072,\n","  \"tie_word_embeddings\": false,\n","  \"torch_dtype\": \"bfloat16\",\n","  \"transformers_version\": \"4.42.3\",\n","  \"use_cache\": true,\n","  \"use_sliding_window\": false,\n","  \"vocab_size\": 152064\n","}\n","\n","[INFO|tokenization_utils_base.py:2574] 2024-07-05 14:06:27,911 >> tokenizer config file saved in /content/qwen2-7b/checkpoint-20/tokenizer_config.json\n","[INFO|tokenization_utils_base.py:2583] 2024-07-05 14:06:27,912 >> Special tokens file saved in /content/qwen2-7b/checkpoint-20/special_tokens_map.json\n","{'loss': 0.0697, 'grad_norm': 0.2154325395822525, 'learning_rate': 8.426519384872733e-06, 'epoch': 4.9}\n"," 83% 30/36 [11:38<02:18, 23.16s/it][INFO|trainer.py:3788] 2024-07-05 14:10:19,020 >> \n","***** Running Evaluation *****\n","[INFO|trainer.py:3790] 2024-07-05 14:10:19,021 >>   Num examples = 1\n","[INFO|trainer.py:3793] 2024-07-05 14:10:19,021 >>   Batch size = 1\n","\n","                                   \n","\u001b[A{'eval_loss': 2.377502202987671, 'eval_accuracy': 0.6595744680851063, 'eval_runtime': 1.0766, 'eval_samples_per_second': 0.929, 'eval_steps_per_second': 0.929, 'epoch': 4.9}\n"," 83% 30/36 [11:39<02:18, 23.16s/it]\n","100% 1/1 [00:00<00:00, 355.18it/s]\u001b[A\n","                                  \u001b[A[INFO|trainer.py:3478] 2024-07-05 14:10:20,099 >> Saving model checkpoint to /content/qwen2-7b/checkpoint-30\n","[INFO|configuration_utils.py:733] 2024-07-05 14:10:20,398 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/config.json\n","[INFO|configuration_utils.py:800] 2024-07-05 14:10:20,400 >> Model config Qwen2Config {\n","  \"architectures\": [\n","    \"Qwen2ForCausalLM\"\n","  ],\n","  \"attention_dropout\": 0.0,\n","  \"bos_token_id\": 151643,\n","  \"eos_token_id\": 151645,\n","  \"hidden_act\": \"silu\",\n","  \"hidden_size\": 3584,\n","  \"initializer_range\": 0.02,\n","  \"intermediate_size\": 18944,\n","  \"max_position_embeddings\": 32768,\n","  \"max_window_layers\": 28,\n","  \"model_type\": \"qwen2\",\n","  \"num_attention_heads\": 28,\n","  \"num_hidden_layers\": 28,\n","  \"num_key_value_heads\": 4,\n","  \"rms_norm_eps\": 1e-06,\n","  \"rope_theta\": 1000000.0,\n","  \"sliding_window\": 131072,\n","  \"tie_word_embeddings\": false,\n","  \"torch_dtype\": \"bfloat16\",\n","  \"transformers_version\": \"4.42.3\",\n","  \"use_cache\": true,\n","  \"use_sliding_window\": false,\n","  \"vocab_size\": 152064\n","}\n","\n","[INFO|tokenization_utils_base.py:2574] 2024-07-05 14:10:20,639 >> tokenizer config file saved in /content/qwen2-7b/checkpoint-30/tokenizer_config.json\n","[INFO|tokenization_utils_base.py:2583] 2024-07-05 14:10:20,640 >> Special tokens file saved in /content/qwen2-7b/checkpoint-30/special_tokens_map.json\n","100% 36/36 [13:59<00:00, 22.57s/it][INFO|trainer.py:3478] 2024-07-05 14:12:39,968 >> Saving model checkpoint to /content/qwen2-7b/checkpoint-36\n","[INFO|configuration_utils.py:733] 2024-07-05 14:12:40,225 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/config.json\n","[INFO|configuration_utils.py:800] 2024-07-05 14:12:40,227 >> Model config Qwen2Config {\n","  \"architectures\": [\n","    \"Qwen2ForCausalLM\"\n","  ],\n","  \"attention_dropout\": 0.0,\n","  \"bos_token_id\": 151643,\n","  \"eos_token_id\": 151645,\n","  \"hidden_act\": \"silu\",\n","  \"hidden_size\": 3584,\n","  \"initializer_range\": 0.02,\n","  \"intermediate_size\": 18944,\n","  \"max_position_embeddings\": 32768,\n","  \"max_window_layers\": 28,\n","  \"model_type\": \"qwen2\",\n","  \"num_attention_heads\": 28,\n","  \"num_hidden_layers\": 28,\n","  \"num_key_value_heads\": 4,\n","  \"rms_norm_eps\": 1e-06,\n","  \"rope_theta\": 1000000.0,\n","  \"sliding_window\": 131072,\n","  \"tie_word_embeddings\": false,\n","  \"torch_dtype\": \"bfloat16\",\n","  \"transformers_version\": \"4.42.3\",\n","  \"use_cache\": true,\n","  \"use_sliding_window\": false,\n","  \"vocab_size\": 152064\n","}\n","\n","[INFO|tokenization_utils_base.py:2574] 2024-07-05 14:12:40,385 >> tokenizer config file saved in /content/qwen2-7b/checkpoint-36/tokenizer_config.json\n","[INFO|tokenization_utils_base.py:2583] 2024-07-05 14:12:40,385 >> Special tokens file saved in /content/qwen2-7b/checkpoint-36/special_tokens_map.json\n","[INFO|trainer.py:2383] 2024-07-05 14:12:41,447 >> \n","\n","Training completed. Do not forget to share your model on huggingface.co/models =)\n","\n","\n","{'train_runtime': 844.7078, 'train_samples_per_second': 0.348, 'train_steps_per_second': 0.043, 'train_loss': 0.6206485204812553, 'epoch': 5.88}\n","100% 36/36 [14:00<00:00, 23.35s/it]\n","[INFO|trainer.py:3478] 2024-07-05 14:12:41,453 >> Saving model checkpoint to /content/qwen2-7b/\n","[INFO|configuration_utils.py:733] 2024-07-05 14:12:41,775 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/config.json\n","[INFO|configuration_utils.py:800] 2024-07-05 14:12:41,777 >> Model config Qwen2Config {\n","  \"architectures\": [\n","    \"Qwen2ForCausalLM\"\n","  ],\n","  \"attention_dropout\": 0.0,\n","  \"bos_token_id\": 151643,\n","  \"eos_token_id\": 151645,\n","  \"hidden_act\": \"silu\",\n","  \"hidden_size\": 3584,\n","  \"initializer_range\": 0.02,\n","  \"intermediate_size\": 18944,\n","  \"max_position_embeddings\": 32768,\n","  \"max_window_layers\": 28,\n","  \"model_type\": \"qwen2\",\n","  \"num_attention_heads\": 28,\n","  \"num_hidden_layers\": 28,\n","  \"num_key_value_heads\": 4,\n","  \"rms_norm_eps\": 1e-06,\n","  \"rope_theta\": 1000000.0,\n","  \"sliding_window\": 131072,\n","  \"tie_word_embeddings\": false,\n","  \"torch_dtype\": \"bfloat16\",\n","  \"transformers_version\": \"4.42.3\",\n","  \"use_cache\": true,\n","  \"use_sliding_window\": false,\n","  \"vocab_size\": 152064\n","}\n","\n","[INFO|tokenization_utils_base.py:2574] 2024-07-05 14:12:41,998 >> tokenizer config file saved in /content/qwen2-7b/tokenizer_config.json\n","[INFO|tokenization_utils_base.py:2583] 2024-07-05 14:12:41,999 >> Special tokens file saved in /content/qwen2-7b/special_tokens_map.json\n","***** train metrics *****\n","  epoch                    =     5.8776\n","  total_flos               =   964581GF\n","  train_loss               =     0.6206\n","  train_runtime            = 0:14:04.70\n","  train_samples_per_second =      0.348\n","  train_steps_per_second   =      0.043\n","Figure saved at: /content/qwen2-7b/training_loss.png\n","Figure saved at: /content/qwen2-7b/training_eval_loss.png\n","Figure saved at: /content/qwen2-7b/training_eval_accuracy.png\n","[INFO|trainer.py:3788] 2024-07-05 14:12:43,408 >> \n","***** Running Evaluation *****\n","[INFO|trainer.py:3790] 2024-07-05 14:12:43,408 >>   Num examples = 1\n","[INFO|trainer.py:3793] 2024-07-05 14:12:43,408 >>   Batch size = 1\n","100% 1/1 [00:00<00:00, 682.67it/s]\n","***** eval metrics *****\n","  epoch                   =     5.8776\n","  eval_accuracy           =     0.6383\n","  eval_loss               =     2.4015\n","  eval_runtime            = 0:00:01.19\n","  eval_samples_per_second =      0.835\n","  eval_steps_per_second   =      0.835\n","[INFO|modelcard.py:449] 2024-07-05 14:12:44,609 >> Dropping the following result as it does not have all the necessary fields:\n","{'task': {'name': 'Causal Language Modeling', 'type': 'text-generation'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.6382978723404256}]}\n","\u001b[34m\u001b[1mwandb\u001b[0m: \n","\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n","\u001b[34m\u001b[1mwandb\u001b[0m:           eval/accuracy ▁██▆\n","\u001b[34m\u001b[1mwandb\u001b[0m:               eval/loss ▁▆██\n","\u001b[34m\u001b[1mwandb\u001b[0m:            eval/runtime ▃▁▁█\n","\u001b[34m\u001b[1mwandb\u001b[0m: eval/samples_per_second ▆██▁\n","\u001b[34m\u001b[1mwandb\u001b[0m:   eval/steps_per_second ▆██▁\n","\u001b[34m\u001b[1mwandb\u001b[0m:             train/epoch ▁▁▄▄▆▆██\n","\u001b[34m\u001b[1mwandb\u001b[0m:       train/global_step ▁▁▄▄▆▆██\n","\u001b[34m\u001b[1mwandb\u001b[0m:         train/grad_norm █▃▁\n","\u001b[34m\u001b[1mwandb\u001b[0m:     train/learning_rate █▄▁\n","\u001b[34m\u001b[1mwandb\u001b[0m:              train/loss █▃▁\n","\u001b[34m\u001b[1mwandb\u001b[0m: \n","\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n","\u001b[34m\u001b[1mwandb\u001b[0m:            eval/accuracy 0.6383\n","\u001b[34m\u001b[1mwandb\u001b[0m:                eval/loss 2.40147\n","\u001b[34m\u001b[1mwandb\u001b[0m:             eval/runtime 1.1974\n","\u001b[34m\u001b[1mwandb\u001b[0m:  eval/samples_per_second 0.835\n","\u001b[34m\u001b[1mwandb\u001b[0m:    eval/steps_per_second 0.835\n","\u001b[34m\u001b[1mwandb\u001b[0m:               total_flos 1035711228174336.0\n","\u001b[34m\u001b[1mwandb\u001b[0m:              train/epoch 5.87755\n","\u001b[34m\u001b[1mwandb\u001b[0m:        train/global_step 36\n","\u001b[34m\u001b[1mwandb\u001b[0m:          train/grad_norm 0.21543\n","\u001b[34m\u001b[1mwandb\u001b[0m:      train/learning_rate 1e-05\n","\u001b[34m\u001b[1mwandb\u001b[0m:               train/loss 0.0697\n","\u001b[34m\u001b[1mwandb\u001b[0m:               train_loss 0.62065\n","\u001b[34m\u001b[1mwandb\u001b[0m:            train_runtime 844.7078\n","\u001b[34m\u001b[1mwandb\u001b[0m: train_samples_per_second 0.348\n","\u001b[34m\u001b[1mwandb\u001b[0m:   train_steps_per_second 0.043\n","\u001b[34m\u001b[1mwandb\u001b[0m: \n","\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mqwen2_7b_mac_colab\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface/runs/asc7inzt\u001b[0m\n","\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at: \u001b[34m\u001b[4mhttps://wandb.ai/inflaton-ai/huggingface\u001b[0m\n","\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\n","\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20240705_135838-asc7inzt/logs\u001b[0m\n","\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require(\"core\")`! See https://wandb.me/wandb-core for more information.\n","CPU times: user 5.21 s, sys: 759 ms, total: 5.97 s\n","Wall time: 15min 54s\n"]}],"source":["%%time\n","\n","!chmod +x ./scripts/tune-lf.sh\n","!./scripts/tune-lf.sh config/qwen2_7b_lora_sft.yaml"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"CGfEYNxdOU_c"},"outputs":[],"source":["def evaluate_model_all_epochs(model_name, num_train_epochs, adapter_path_base, load_in_4bit=True):\n","    os.environ[\"MODEL_NAME\"] = model_name\n","    os.environ[\"LOAD_IN_4BIT\"] = \"true\" if load_in_4bit else \"false\"\n","    for i in range(num_train_epochs + 1):\n","        print(f\"Epoch {i}\")\n","        if i == 0:\n","            os.unsetenv(\"ADAPTER_NAME_OR_PATH\")\n","        else:\n","            adapter_path = f\"{adapter_path_base}/checkpoint-{560 * i}\"\n","            os.environ[\"ADAPTER_NAME_OR_PATH\"] = adapter_path\n","\n","        !python llm_toolkit/eval_lf.py"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2572012,"status":"ok","timestamp":1720192682753,"user":{"displayName":"HUANG DONGHAO _","userId":"00977795705617022768"},"user_tz":-480},"id":"-pBANc_IOfyo","outputId":"ae0c77c6-bf6d-4396-dbf7-25467d67d8a1"},"outputs":[{"name":"stdout","output_type":"stream","text":["Epoch 0\n","2024-07-05 14:35:13.920041: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n","2024-07-05 14:35:13.920114: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n","2024-07-05 14:35:13.921639: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","2024-07-05 14:35:13.929529: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","2024-07-05 14:35:15.218976: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","loading env vars from: /content/drive/MyDrive/logical-reasoning/.env\n","Adding /content/drive/MyDrive/logical-reasoning to sys.path\n","loading /content/drive/MyDrive/logical-reasoning/llm_toolkit/translation_utils.py\n","[nltk_data] Downloading package wordnet to /root/nltk_data...\n","[nltk_data]   Package wordnet is already up-to-date!\n","[nltk_data] Downloading package punkt to /root/nltk_data...\n","[nltk_data]   Package punkt is already up-to-date!\n","[nltk_data] Downloading package omw-1.4 to /root/nltk_data...\n","[nltk_data]   Package omw-1.4 is already up-to-date!\n","Qwen/Qwen2-7B-Instruct None True datasets/mac/mac.tsv results/mac-results_lf.csv\n","(1) GPU = Tesla T4. Max memory = 14.748 GB.\n","0.002 GB of memory reserved.\n","loading model: Qwen/Qwen2-7B-Instruct\n","Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n","Loading checkpoint shards: 100% 4/4 [01:28<00:00, 22.12s/it]\n","(2) GPU = Tesla T4. Max memory = 14.748 GB.\n","6.232 GB of memory reserved.\n","loading train/test data files\n","Generating train split: 4528 examples [00:00, 60385.20 examples/s]\n","Generating test split: 10 examples [00:00, 1826.63 examples/s]\n","Map: 100% 4528/4528 [00:00<00:00, 21640.94 examples/s]\n","Map: 100% 10/10 [00:00<00:00, 1769.00 examples/s]\n","DatasetDict({\n","    train: Dataset({\n","        features: ['chinese', 'english', 'text', 'prompt'],\n","        num_rows: 4528\n","    })\n","    test: Dataset({\n","        features: ['chinese', 'english', 'text', 'prompt'],\n","        num_rows: 10\n","    })\n","})\n","Evaluating model: Qwen/Qwen2-7B-Instruct\n","  0% 0/10 [00:00<?, ?it/s]--------\n","step 1: Old Ander raised his gun, squinted one of his triangular eyes, and with a pull of the trigger, the bullet roared forth, sending golden sparrows raining down like hailstones. Iron sand spewed through the willow branches, making a sizzling sound.<|im_end|>\n","--------\n","step 2: Old Ander raised his gun, squinted one of his triangular eyes, and with a pull of the trigger, the bullet roared forth, sending golden sparrows raining down like hailstones. Iron sand spewed through the willow branches, making a sizzling sound.\n","--------\n","step 3: Old Ander raised his gun, squinted one of his triangular eyes, and with a pull of the trigger, the bullet roared forth, sending golden sparrows raining down like hailstones. Iron sand spewed through the willow branches, making a sizzling sound.\n","100% 10/10 [08:28<00:00, 50.89s/it]\n","(3) GPU = Tesla T4. Max memory = 14.748 GB.\n","6.576 GB of memory reserved.\n","                                             chinese  ...                             Qwen/Qwen2-7B-Instruct\n","0  老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Old Ander raised his gun, squinted one of his ...\n","\n","[1 rows x 3 columns]\n","{'accuracy': 0.0, 'correct_ids': [], 'meteor': 0.31555917339095263, 'bleu_scores': {'bleu': 0.09496927858571295, 'precisions': [0.4173027989821883, 0.1227154046997389, 0.058981233243967826, 0.03581267217630854], 'brevity_penalty': 0.9312320176895069, 'length_ratio': 0.9334916864608076, 'translation_length': 393, 'reference_length': 421}, 'rouge_scores': {'rouge1': 0.42640287214698525, 'rouge2': 0.12123999788237569, 'rougeL': 0.34396454062304865, 'rougeLsum': 0.3434014470826111}}\n","Epoch 1\n","2024-07-05 14:45:41.113642: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n","2024-07-05 14:45:41.113738: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n","2024-07-05 14:45:41.115795: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","2024-07-05 14:45:41.133846: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","2024-07-05 14:45:42.538508: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","loading env vars from: /content/drive/MyDrive/logical-reasoning/.env\n","Adding /content/drive/MyDrive/logical-reasoning to sys.path\n","loading /content/drive/MyDrive/logical-reasoning/llm_toolkit/translation_utils.py\n","[nltk_data] Downloading package wordnet to /root/nltk_data...\n","[nltk_data]   Package wordnet is already up-to-date!\n","[nltk_data] Downloading package punkt to /root/nltk_data...\n","[nltk_data]   Package punkt is already up-to-date!\n","[nltk_data] Downloading package omw-1.4 to /root/nltk_data...\n","[nltk_data]   Package omw-1.4 is already up-to-date!\n","Qwen/Qwen2-7B-Instruct /content/qwen2-7b/checkpoint-10 True datasets/mac/mac.tsv results/mac-results_lf.csv\n","(1) GPU = Tesla T4. Max memory = 14.748 GB.\n","0.002 GB of memory reserved.\n","loading model: Qwen/Qwen2-7B-Instruct\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 14:45:54,050 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/vocab.json\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 14:45:54,051 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/merges.txt\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 14:45:54,051 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/tokenizer.json\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 14:45:54,051 >> loading file added_tokens.json from cache at None\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 14:45:54,051 >> loading file special_tokens_map.json from cache at None\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 14:45:54,051 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/tokenizer_config.json\n","[WARNING|logging.py:313] 2024-07-05 14:45:54,320 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n","07/05/2024 14:45:54 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n","07/05/2024 14:45:54 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n","[INFO|configuration_utils.py:733] 2024-07-05 14:45:54,422 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/config.json\n","[INFO|configuration_utils.py:800] 2024-07-05 14:45:54,423 >> Model config Qwen2Config {\n","  \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n","  \"architectures\": [\n","    \"Qwen2ForCausalLM\"\n","  ],\n","  \"attention_dropout\": 0.0,\n","  \"bos_token_id\": 151643,\n","  \"eos_token_id\": 151645,\n","  \"hidden_act\": \"silu\",\n","  \"hidden_size\": 3584,\n","  \"initializer_range\": 0.02,\n","  \"intermediate_size\": 18944,\n","  \"max_position_embeddings\": 32768,\n","  \"max_window_layers\": 28,\n","  \"model_type\": \"qwen2\",\n","  \"num_attention_heads\": 28,\n","  \"num_hidden_layers\": 28,\n","  \"num_key_value_heads\": 4,\n","  \"rms_norm_eps\": 1e-06,\n","  \"rope_theta\": 1000000.0,\n","  \"sliding_window\": 131072,\n","  \"tie_word_embeddings\": false,\n","  \"torch_dtype\": \"bfloat16\",\n","  \"transformers_version\": \"4.42.3\",\n","  \"use_cache\": true,\n","  \"use_sliding_window\": false,\n","  \"vocab_size\": 152064\n","}\n","\n","07/05/2024 14:45:54 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n","07/05/2024 14:45:54 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n","[INFO|modeling_utils.py:3556] 2024-07-05 14:45:54,477 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/model.safetensors.index.json\n","[INFO|modeling_utils.py:1531] 2024-07-05 14:45:54,481 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n","[INFO|configuration_utils.py:1000] 2024-07-05 14:45:54,482 >> Generate config GenerationConfig {\n","  \"bos_token_id\": 151643,\n","  \"eos_token_id\": 151645\n","}\n","\n","Loading checkpoint shards: 100% 4/4 [01:19<00:00, 19.98s/it]\n","[INFO|modeling_utils.py:4364] 2024-07-05 14:47:17,104 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n","\n","[INFO|modeling_utils.py:4372] 2024-07-05 14:47:17,104 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-7B-Instruct.\n","If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n","[INFO|configuration_utils.py:955] 2024-07-05 14:47:17,209 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/generation_config.json\n","[INFO|configuration_utils.py:1000] 2024-07-05 14:47:17,210 >> Generate config GenerationConfig {\n","  \"bos_token_id\": 151643,\n","  \"do_sample\": true,\n","  \"eos_token_id\": [\n","    151645,\n","    151643\n","  ],\n","  \"pad_token_id\": 151643,\n","  \"repetition_penalty\": 1.05,\n","  \"temperature\": 0.7,\n","  \"top_k\": 20,\n","  \"top_p\": 0.8\n","}\n","\n","07/05/2024 14:47:17 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n","07/05/2024 14:47:18 - INFO - llamafactory.model.adapter - Loaded adapter(s): /content/qwen2-7b/checkpoint-10\n","07/05/2024 14:47:18 - INFO - llamafactory.model.loader - all params: 7,635,801,600\n","(2) GPU = Tesla T4. Max memory = 14.748 GB.\n","5.924 GB of memory reserved.\n","loading train/test data files\n","Map: 100% 4528/4528 [00:00<00:00, 31066.89 examples/s]\n","Map: 100% 10/10 [00:00<00:00, 2164.36 examples/s]\n","DatasetDict({\n","    train: Dataset({\n","        features: ['chinese', 'english', 'text', 'prompt'],\n","        num_rows: 4528\n","    })\n","    test: Dataset({\n","        features: ['chinese', 'english', 'text', 'prompt'],\n","        num_rows: 10\n","    })\n","})\n","Evaluating model: Qwen/Qwen2-7B-Instruct\n","  0% 0/10 [00:00<?, ?it/s]--------\n","step 1: Gao Ang raised his gun and squinted one of his triangular eyes. 'Bang!' He pulled the trigger. The golden sparrows were falling like a shower of hailstones. The iron shot flew from between the willow branches, making a sharp cracking noise.<|im_end|>\n","--------\n","step 2: Gao Ang raised his gun and squinted one of his triangular eyes. 'Bang!' He pulled the trigger. The golden sparrows were falling like a shower of hailstones. The iron shot flew from between the willow branches, making a sharp cracking noise.\n","--------\n","step 3: Gao Ang raised his gun and squinted one of his triangular eyes. 'Bang!' He pulled the trigger. The golden sparrows were falling like a shower of hailstones. The iron shot flew from between the willow branches, making a sharp cracking noise.\n","100% 10/10 [08:13<00:00, 49.36s/it]\n","(3) GPU = Tesla T4. Max memory = 14.748 GB.\n","6.162 GB of memory reserved.\n","                                             chinese  ...               Qwen/Qwen2-7B-Instruct_checkpoint-10\n","0  老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Gao Ang raised his gun and squinted one of his...\n","\n","[1 rows x 4 columns]\n","{'accuracy': 0.0, 'correct_ids': [], 'meteor': 0.31477199342150325, 'bleu_scores': {'bleu': 0.10019593837476333, 'precisions': [0.4350132625994695, 0.1307901907356948, 0.0700280112044818, 0.040345821325648415], 'brevity_penalty': 0.8898424313725933, 'length_ratio': 0.8954869358669834, 'translation_length': 377, 'reference_length': 421}, 'rouge_scores': {'rouge1': 0.43683076930784703, 'rouge2': 0.12351426862697071, 'rougeL': 0.33952193264860053, 'rougeLsum': 0.33803585641861583}}\n","Epoch 2\n","2024-07-05 14:55:43.050465: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n","2024-07-05 14:55:43.050555: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n","2024-07-05 14:55:43.052764: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","2024-07-05 14:55:43.072851: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","2024-07-05 14:55:44.668152: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","loading env vars from: /content/drive/MyDrive/logical-reasoning/.env\n","Adding /content/drive/MyDrive/logical-reasoning to sys.path\n","loading /content/drive/MyDrive/logical-reasoning/llm_toolkit/translation_utils.py\n","[nltk_data] Downloading package wordnet to /root/nltk_data...\n","[nltk_data]   Package wordnet is already up-to-date!\n","[nltk_data] Downloading package punkt to /root/nltk_data...\n","[nltk_data]   Package punkt is already up-to-date!\n","[nltk_data] Downloading package omw-1.4 to /root/nltk_data...\n","[nltk_data]   Package omw-1.4 is already up-to-date!\n","Qwen/Qwen2-7B-Instruct /content/qwen2-7b/checkpoint-20 True datasets/mac/mac.tsv results/mac-results_lf.csv\n","(1) GPU = Tesla T4. Max memory = 14.748 GB.\n","0.002 GB of memory reserved.\n","loading model: Qwen/Qwen2-7B-Instruct\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 14:55:56,676 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/vocab.json\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 14:55:56,676 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/merges.txt\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 14:55:56,676 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/tokenizer.json\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 14:55:56,676 >> loading file added_tokens.json from cache at None\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 14:55:56,677 >> loading file special_tokens_map.json from cache at None\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 14:55:56,677 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/tokenizer_config.json\n","[WARNING|logging.py:313] 2024-07-05 14:55:56,943 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n","07/05/2024 14:55:56 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n","07/05/2024 14:55:56 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n","[INFO|configuration_utils.py:733] 2024-07-05 14:55:57,043 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/config.json\n","[INFO|configuration_utils.py:800] 2024-07-05 14:55:57,045 >> Model config Qwen2Config {\n","  \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n","  \"architectures\": [\n","    \"Qwen2ForCausalLM\"\n","  ],\n","  \"attention_dropout\": 0.0,\n","  \"bos_token_id\": 151643,\n","  \"eos_token_id\": 151645,\n","  \"hidden_act\": \"silu\",\n","  \"hidden_size\": 3584,\n","  \"initializer_range\": 0.02,\n","  \"intermediate_size\": 18944,\n","  \"max_position_embeddings\": 32768,\n","  \"max_window_layers\": 28,\n","  \"model_type\": \"qwen2\",\n","  \"num_attention_heads\": 28,\n","  \"num_hidden_layers\": 28,\n","  \"num_key_value_heads\": 4,\n","  \"rms_norm_eps\": 1e-06,\n","  \"rope_theta\": 1000000.0,\n","  \"sliding_window\": 131072,\n","  \"tie_word_embeddings\": false,\n","  \"torch_dtype\": \"bfloat16\",\n","  \"transformers_version\": \"4.42.3\",\n","  \"use_cache\": true,\n","  \"use_sliding_window\": false,\n","  \"vocab_size\": 152064\n","}\n","\n","07/05/2024 14:55:57 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n","07/05/2024 14:55:57 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n","[INFO|modeling_utils.py:3556] 2024-07-05 14:55:57,085 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/model.safetensors.index.json\n","[INFO|modeling_utils.py:1531] 2024-07-05 14:55:57,089 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n","[INFO|configuration_utils.py:1000] 2024-07-05 14:55:57,090 >> Generate config GenerationConfig {\n","  \"bos_token_id\": 151643,\n","  \"eos_token_id\": 151645\n","}\n","\n","Loading checkpoint shards: 100% 4/4 [01:24<00:00, 21.15s/it]\n","[INFO|modeling_utils.py:4364] 2024-07-05 14:57:24,512 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n","\n","[INFO|modeling_utils.py:4372] 2024-07-05 14:57:24,512 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-7B-Instruct.\n","If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n","[INFO|configuration_utils.py:955] 2024-07-05 14:57:24,618 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/generation_config.json\n","[INFO|configuration_utils.py:1000] 2024-07-05 14:57:24,619 >> Generate config GenerationConfig {\n","  \"bos_token_id\": 151643,\n","  \"do_sample\": true,\n","  \"eos_token_id\": [\n","    151645,\n","    151643\n","  ],\n","  \"pad_token_id\": 151643,\n","  \"repetition_penalty\": 1.05,\n","  \"temperature\": 0.7,\n","  \"top_k\": 20,\n","  \"top_p\": 0.8\n","}\n","\n","07/05/2024 14:57:25 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n","07/05/2024 14:57:26 - INFO - llamafactory.model.adapter - Loaded adapter(s): /content/qwen2-7b/checkpoint-20\n","07/05/2024 14:57:26 - INFO - llamafactory.model.loader - all params: 7,635,801,600\n","(2) GPU = Tesla T4. Max memory = 14.748 GB.\n","5.924 GB of memory reserved.\n","loading train/test data files\n","DatasetDict({\n","    train: Dataset({\n","        features: ['chinese', 'english', 'text', 'prompt'],\n","        num_rows: 4528\n","    })\n","    test: Dataset({\n","        features: ['chinese', 'english', 'text', 'prompt'],\n","        num_rows: 10\n","    })\n","})\n","Evaluating model: Qwen/Qwen2-7B-Instruct\n","  0% 0/10 [00:00<?, ?it/s]--------\n","step 1: Da-ying picked up his rifle and narrowed one of his triangular eyes as he cocked the hammer and fired. The lead shot fell out of the sky like a flock of golden sparrows, breaking the branches of the poplar trees as they flew past with a crackling sound.<|im_end|>\n","--------\n","step 2: Da-ying picked up his rifle and narrowed one of his triangular eyes as he cocked the hammer and fired. The lead shot fell out of the sky like a flock of golden sparrows, breaking the branches of the poplar trees as they flew past with a crackling sound.\n","--------\n","step 3: Da-ying picked up his rifle and narrowed one of his triangular eyes as he cocked the hammer and fired. The lead shot fell out of the sky like a flock of golden sparrows, breaking the branches of the poplar trees as they flew past with a crackling sound.\n","100% 10/10 [09:45<00:00, 58.52s/it]\n","(3) GPU = Tesla T4. Max memory = 14.748 GB.\n","6.162 GB of memory reserved.\n","                                             chinese  ...               Qwen/Qwen2-7B-Instruct_checkpoint-20\n","0  老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Da-ying picked up his rifle and narrowed one o...\n","\n","[1 rows x 5 columns]\n","{'accuracy': 0.0, 'correct_ids': [], 'meteor': 0.34782623384004024, 'bleu_scores': {'bleu': 0.10822083067201967, 'precisions': [0.40749414519906324, 0.1342925659472422, 0.06633906633906633, 0.037783375314861464], 'brevity_penalty': 1.0, 'length_ratio': 1.0142517814726841, 'translation_length': 427, 'reference_length': 421}, 'rouge_scores': {'rouge1': 0.4276595156489868, 'rouge2': 0.13951949543176667, 'rougeL': 0.3555178735824066, 'rougeLsum': 0.3582851565031803}}\n","Epoch 3\n","2024-07-05 15:07:22.824807: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n","2024-07-05 15:07:22.824860: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n","2024-07-05 15:07:22.826427: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","2024-07-05 15:07:22.841647: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n","To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n","2024-07-05 15:07:24.374184: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","loading env vars from: /content/drive/MyDrive/logical-reasoning/.env\n","Adding /content/drive/MyDrive/logical-reasoning to sys.path\n","loading /content/drive/MyDrive/logical-reasoning/llm_toolkit/translation_utils.py\n","[nltk_data] Downloading package wordnet to /root/nltk_data...\n","[nltk_data]   Package wordnet is already up-to-date!\n","[nltk_data] Downloading package punkt to /root/nltk_data...\n","[nltk_data]   Package punkt is already up-to-date!\n","[nltk_data] Downloading package omw-1.4 to /root/nltk_data...\n","[nltk_data]   Package omw-1.4 is already up-to-date!\n","Qwen/Qwen2-7B-Instruct /content/qwen2-7b/checkpoint-30 True datasets/mac/mac.tsv results/mac-results_lf.csv\n","(1) GPU = Tesla T4. Max memory = 14.748 GB.\n","0.002 GB of memory reserved.\n","loading model: Qwen/Qwen2-7B-Instruct\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 15:07:37,165 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/vocab.json\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 15:07:37,165 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/merges.txt\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 15:07:37,165 >> loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/tokenizer.json\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 15:07:37,165 >> loading file added_tokens.json from cache at None\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 15:07:37,165 >> loading file special_tokens_map.json from cache at None\n","[INFO|tokenization_utils_base.py:2161] 2024-07-05 15:07:37,165 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/tokenizer_config.json\n","[WARNING|logging.py:313] 2024-07-05 15:07:37,429 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n","07/05/2024 15:07:37 - INFO - llamafactory.data.template - Replace eos token: <|im_end|>\n","07/05/2024 15:07:37 - INFO - llamafactory.data.template - Add <|im_start|> to stop words.\n","[INFO|configuration_utils.py:733] 2024-07-05 15:07:37,527 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/config.json\n","[INFO|configuration_utils.py:800] 2024-07-05 15:07:37,529 >> Model config Qwen2Config {\n","  \"_name_or_path\": \"Qwen/Qwen2-7B-Instruct\",\n","  \"architectures\": [\n","    \"Qwen2ForCausalLM\"\n","  ],\n","  \"attention_dropout\": 0.0,\n","  \"bos_token_id\": 151643,\n","  \"eos_token_id\": 151645,\n","  \"hidden_act\": \"silu\",\n","  \"hidden_size\": 3584,\n","  \"initializer_range\": 0.02,\n","  \"intermediate_size\": 18944,\n","  \"max_position_embeddings\": 32768,\n","  \"max_window_layers\": 28,\n","  \"model_type\": \"qwen2\",\n","  \"num_attention_heads\": 28,\n","  \"num_hidden_layers\": 28,\n","  \"num_key_value_heads\": 4,\n","  \"rms_norm_eps\": 1e-06,\n","  \"rope_theta\": 1000000.0,\n","  \"sliding_window\": 131072,\n","  \"tie_word_embeddings\": false,\n","  \"torch_dtype\": \"bfloat16\",\n","  \"transformers_version\": \"4.42.3\",\n","  \"use_cache\": true,\n","  \"use_sliding_window\": false,\n","  \"vocab_size\": 152064\n","}\n","\n","07/05/2024 15:07:37 - INFO - llamafactory.model.model_utils.quantization - Quantizing model to 4 bit with bitsandbytes.\n","07/05/2024 15:07:37 - INFO - llamafactory.model.patcher - Using KV cache for faster generation.\n","[INFO|modeling_utils.py:3556] 2024-07-05 15:07:37,571 >> loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/model.safetensors.index.json\n","[INFO|modeling_utils.py:1531] 2024-07-05 15:07:37,575 >> Instantiating Qwen2ForCausalLM model under default dtype torch.bfloat16.\n","[INFO|configuration_utils.py:1000] 2024-07-05 15:07:37,576 >> Generate config GenerationConfig {\n","  \"bos_token_id\": 151643,\n","  \"eos_token_id\": 151645\n","}\n","\n","Loading checkpoint shards: 100% 4/4 [01:25<00:00, 21.33s/it]\n","[INFO|modeling_utils.py:4364] 2024-07-05 15:09:05,971 >> All model checkpoint weights were used when initializing Qwen2ForCausalLM.\n","\n","[INFO|modeling_utils.py:4372] 2024-07-05 15:09:05,971 >> All the weights of Qwen2ForCausalLM were initialized from the model checkpoint at Qwen/Qwen2-7B-Instruct.\n","If your task is similar to the task the model of the checkpoint was trained on, you can already use Qwen2ForCausalLM for predictions without further training.\n","[INFO|configuration_utils.py:955] 2024-07-05 15:09:06,127 >> loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--Qwen--Qwen2-7B-Instruct/snapshots/41c66b0be1c3081f13defc6bdf946c2ef240d6a6/generation_config.json\n","[INFO|configuration_utils.py:1000] 2024-07-05 15:09:06,127 >> Generate config GenerationConfig {\n","  \"bos_token_id\": 151643,\n","  \"do_sample\": true,\n","  \"eos_token_id\": [\n","    151645,\n","    151643\n","  ],\n","  \"pad_token_id\": 151643,\n","  \"repetition_penalty\": 1.05,\n","  \"temperature\": 0.7,\n","  \"top_k\": 20,\n","  \"top_p\": 0.8\n","}\n","\n","07/05/2024 15:09:06 - INFO - llamafactory.model.model_utils.attention - Using torch SDPA for faster training and inference.\n","07/05/2024 15:09:07 - INFO - llamafactory.model.adapter - Loaded adapter(s): /content/qwen2-7b/checkpoint-30\n","07/05/2024 15:09:07 - INFO - llamafactory.model.loader - all params: 7,635,801,600\n","(2) GPU = Tesla T4. Max memory = 14.748 GB.\n","5.924 GB of memory reserved.\n","loading train/test data files\n","DatasetDict({\n","    train: Dataset({\n","        features: ['chinese', 'english', 'text', 'prompt'],\n","        num_rows: 4528\n","    })\n","    test: Dataset({\n","        features: ['chinese', 'english', 'text', 'prompt'],\n","        num_rows: 10\n","    })\n","})\n","Evaluating model: Qwen/Qwen2-7B-Instruct\n","  0% 0/10 [00:00<?, ?it/s]--------\n","step 1: Daogang raised his rifle and squinted one of his triangular eyes as he pulled the trigger. The sound of the shot was followed by a shower of golden sparrows. Iron shot flew between the branches of the willow tree, making a crackling noise as it fell.<|im_end|>\n","--------\n","step 2: Daogang raised his rifle and squinted one of his triangular eyes as he pulled the trigger. The sound of the shot was followed by a shower of golden sparrows. Iron shot flew between the branches of the willow tree, making a crackling noise as it fell.\n","--------\n","step 3: Daogang raised his rifle and squinted one of his triangular eyes as he pulled the trigger. The sound of the shot was followed by a shower of golden sparrows. Iron shot flew between the branches of the willow tree, making a crackling noise as it fell.\n","100% 10/10 [08:47<00:00, 52.75s/it]\n","(3) GPU = Tesla T4. Max memory = 14.748 GB.\n","6.162 GB of memory reserved.\n","                                             chinese  ...               Qwen/Qwen2-7B-Instruct_checkpoint-30\n","0  老耿端起枪，眯缝起一只三角眼，一搂扳机响了枪，冰雹般的金麻雀劈哩啪啦往下落，铁砂子在柳枝间飞...  ...  Daogang raised his rifle and squinted one of h...\n","\n","[1 rows x 6 columns]\n","{'accuracy': 0.0, 'correct_ids': [], 'meteor': 0.3054342491260197, 'bleu_scores': {'bleu': 0.07742715215519567, 'precisions': [0.3879093198992443, 0.10077519379844961, 0.04774535809018567, 0.02452316076294278], 'brevity_penalty': 0.9413376338558703, 'length_ratio': 0.9429928741092637, 'translation_length': 397, 'reference_length': 421}, 'rouge_scores': {'rouge1': 0.4081002220991562, 'rouge2': 0.12336697491249377, 'rougeL': 0.31562784036214553, 'rougeLsum': 0.3172987190744452}}\n","CPU times: user 13.9 s, sys: 1.99 s, total: 15.9 s\n","Wall time: 42min 50s\n"]}],"source":["%%time\n","num_train_epochs = 3\n","evaluate_model_all_epochs(\"Qwen/Qwen2-7B-Instruct\", num_train_epochs, \"/content/qwen2-7b\")"]}],"metadata":{"accelerator":"GPU","application/vnd.databricks.v1+notebook":{"dashboards":[],"environmentMetadata":null,"language":"python","notebookMetadata":{"pythonIndentUnit":4},"notebookName":"07_MAC_+_Qwen2-7B-Instructi_Unsloth_train","widgets":{}},"colab":{"gpuType":"T4","provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.14"}},"nbformat":4,"nbformat_minor":0}