{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "X4cRE8IbIrIV" }, "source": [] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "MOsHUjgdIrIW" }, "outputs": [], "source": [ "##!pip install transformers datasets" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "YZbiBDuGIrId" }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "zVvslsfMIrIh" }, "outputs": [], "source": [ "\n", "task = \"sst2\"\n", "model_checkpoint = \"distilbert-base-uncased\"\n", "\n", "model_checkpoint2 = \"bigscience/bloom-560m\"\n", "\n", "\n", "batch_size = 32" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "IreSlFmlIrIm" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\anaconda\\envs\\transformers\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "from datasets import load_dataset, load_metric" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "s_AY1ATSIrIq" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\杨宇峰\\AppData\\Local\\Temp\\ipykernel_16568\\3398658072.py:4: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n", " metric = load_metric('accuracy')\n", "c:\\anaconda\\envs\\transformers\\lib\\site-packages\\datasets\\load.py:753: FutureWarning: The repository for accuracy contains custom code which must be executed to correctly load the metric. You can inspect the repository content at https://raw.githubusercontent.com/huggingface/datasets/2.17.1/metrics/accuracy/accuracy.py\n", "You can avoid this message in future by passing the argument `trust_remote_code=True`.\n", "Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.\n", " warnings.warn(\n" ] } ], "source": [ "actual_task = \"mnli\" if task == \"mnli-mm\" else task\n", "metric = load_metric('accuracy')\n", "dataset = load_dataset(\"financial_phrasebank\",'sentences_50agree')\n", "dataset=dataset['train']" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "GWiVUF0jIrIv", "outputId": "b5d9d856-eaa3-4444-c650-1642a797cb77" }, "outputs": [], "source": [ "\n", "dataset=dataset.train_test_split(test_size=0.1)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "X6HrpprwIrIz", "outputId": "1a1cf3a9-3349-40e6-88e2-912be6462daa" }, "outputs": [ { "data": { "text/plain": [ "DatasetDict({\n", " train: Dataset({\n", " features: ['sentence', 'label'],\n", " num_rows: 4361\n", " })\n", " test: Dataset({\n", " features: ['sentence', 'label'],\n", " num_rows: 485\n", " })\n", "})" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataset" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "i3j8APAoIrI3" }, "outputs": [], "source": [ "import datasets\n", "import random\n", "import pandas as pd\n", "from IPython.display import display, HTML\n", "\n", "def show_random_elements(dataset, num_examples=10):\n", " assert num_examples <= len(dataset), \"Can't pick more elements than there are in the dataset.\"\n", " picks = []\n", " for _ in range(num_examples):\n", " pick = random.randint(0, len(dataset)-1)\n", " while pick in picks:\n", " pick = random.randint(0, len(dataset)-1)\n", " picks.append(pick)\n", " \n", " df = pd.DataFrame(dataset[picks])\n", " for column, typ in dataset.features.items():\n", " if isinstance(typ, datasets.ClassLabel):\n", " df[column] = df[column].transform(lambda i: typ.names[i])\n", " display(HTML(df.to_html()))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 359 }, "id": "SZy5tRB_IrI7", "outputId": "bf2a2b5b-0bda-41d0-edd4-db568b0cfcc8" }, "outputs": [ { "data": { "text/html": [ "
\n", " | sentence | \n", "label | \n", "
---|---|---|
0 | \n", "Margin call of Zanadvorov has given the chance to make such purchase under the credit of Deutsche Bank for USD 560 million . | \n", "neutral | \n", "
1 | \n", "Again , the most significant sales increase of 18.6 % was in Russia . | \n", "positive | \n", "
2 | \n", "Shares of Nokia Corp. rose Thursday after the cell phone maker said its third-quarter earnings almost doubled and its share of the global handset market increased . | \n", "positive | \n", "
3 | \n", "Finnish plumbing and heating systems supplier Uponor 's net sales from continuing operations decreased by 9.4 % in 2008 to EUR 949.2 mn from EUR 1,047.4 mn in 2007 . | \n", "negative | \n", "
4 | \n", "Pretax profit totalled EUR 2.0 mn , compared to a loss of EUR 159.2 mn in the fourth quarter of 2008 . | \n", "positive | \n", "
5 | \n", "The Segu companies and PKC 's Polish unit will form a business servicing the western European markets with `` annual sales of about hundred million '' and about 2,500 employees , the Finnish company 's CEO , Harri Suutari , said . | \n", "neutral | \n", "
6 | \n", "We serve a broad range of customers , including construction and process industries , shipyards , the public sector and households . | \n", "neutral | \n", "
7 | \n", "Nokia said it still expects to sell 150 more million Symbian devices in years to come , giving an indication of how it expects the Phone 7 handoff to work . | \n", "neutral | \n", "
8 | \n", "Thanks to its extensive industry and operations experience , Cybercom offers strategic and technological expertise to these markets : telecom , industry , media , public sector , retail , and banking and financial services . | \n", "neutral | \n", "
9 | \n", "Aldata said that there are still a number of operational aspects to be defined between it and Microsoft and further details of the product and market initiatives resulting from this agreement will be available at a later date . | \n", "neutral | \n", "