"
],
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [10000/10000 2:22:19, Epoch 10/10]\n",
"
\n",
" \n",
" \n",
" \n",
" Step | \n",
" Training Loss | \n",
" Validation Loss | \n",
"
\n",
" \n",
" \n",
" \n",
" 500 | \n",
" 0.746900 | \n",
" 0.728293 | \n",
"
\n",
" \n",
" 1000 | \n",
" 0.691000 | \n",
" 0.645709 | \n",
"
\n",
" \n",
" 1500 | \n",
" 0.582800 | \n",
" 0.674053 | \n",
"
\n",
" \n",
" 2000 | \n",
" 0.572300 | \n",
" 0.617940 | \n",
"
\n",
" \n",
" 2500 | \n",
" 0.414500 | \n",
" 0.840968 | \n",
"
\n",
" \n",
" 3000 | \n",
" 0.404400 | \n",
" 0.780854 | \n",
"
\n",
" \n",
" 3500 | \n",
" 0.259200 | \n",
" 1.119162 | \n",
"
\n",
" \n",
" 4000 | \n",
" 0.293200 | \n",
" 1.270593 | \n",
"
\n",
" \n",
" 4500 | \n",
" 0.162000 | \n",
" 1.355937 | \n",
"
\n",
" \n",
" 5000 | \n",
" 0.184600 | \n",
" 1.293014 | \n",
"
\n",
" \n",
" 5500 | \n",
" 0.097500 | \n",
" 1.493697 | \n",
"
\n",
" \n",
" 6000 | \n",
" 0.122800 | \n",
" 1.567413 | \n",
"
\n",
" \n",
" 6500 | \n",
" 0.071800 | \n",
" 1.670874 | \n",
"
\n",
" \n",
" 7000 | \n",
" 0.068000 | \n",
" 1.864547 | \n",
"
\n",
" \n",
" 7500 | \n",
" 0.047800 | \n",
" 2.023101 | \n",
"
\n",
" \n",
" 8000 | \n",
" 0.050400 | \n",
" 1.838262 | \n",
"
\n",
" \n",
" 8500 | \n",
" 0.024500 | \n",
" 2.052710 | \n",
"
\n",
" \n",
" 9000 | \n",
" 0.036600 | \n",
" 2.001531 | \n",
"
\n",
" \n",
" 9500 | \n",
" 0.019700 | \n",
" 2.118986 | \n",
"
\n",
" \n",
" 10000 | \n",
" 0.014700 | \n",
" 2.106975 | \n",
"
\n",
" \n",
"
"
]
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"TrainOutput(global_step=10000, training_loss=0.24322458848953246, metrics={'train_runtime': 8540.4084, 'train_samples_per_second': 9.366, 'train_steps_per_second': 1.171, 'total_flos': 2.104644228406272e+16, 'train_loss': 0.24322458848953246, 'epoch': 10.0})"
]
},
"metadata": {},
"execution_count": 96
}
],
"source": [
"# Launch the learning process: training\n",
"trainer.train()"
]
},
{
"cell_type": "code",
"source": [
"#To push the trained model into hugging face model hub\n",
"trainer.push_to_hub()\n"
],
"metadata": {
"id": "BcrMys9ejMGp",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 36
},
"outputId": "1ed4e019-32d7-438a-c786-0f1a85bdc918"
},
"execution_count": 97,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'https://huggingface.co/NewtonKimathi/Covid_Vaccine_Sentiment_Analysis_Bert_based_Model/tree/main/'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 97
}
]
},
{
"cell_type": "code",
"source": [
"tokenizer.push_to_hub(\"NewtonKimathi/Covid_Vaccine_Sentiment_Analysis_Bert_based_Model\")"
],
"metadata": {
"id": "mqrhPcEk94Ye",
"outputId": "e0b3e662-bb9a-4cc6-cb45-958c0ff96b92",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"execution_count": 99,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"CommitInfo(commit_url='https://huggingface.co/NewtonKimathi/Covid_Vaccine_Sentiment_Analysis_Bert_based_Model/commit/4e7f1e6b1a4f0714c04446d2bc10467724e57711', commit_message='Upload tokenizer', commit_description='', oid='4e7f1e6b1a4f0714c04446d2bc10467724e57711', pr_url=None, pr_revision=None, pr_num=None)"
]
},
"metadata": {},
"execution_count": 99
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "lGUIpe2sBcJc"
},
"source": [
"Don't worry the above issue, it is a `KeyboardInterrupt` that means I stopped the training to avoid taking a long time to finish."
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {
"id": "CWqTjv-TBcJd"
},
"outputs": [],
"source": [
"import numpy as np\n",
"from datasets import load_metric\n",
"\n",
"metric = load_metric(\"accuracy\")\n",
"\n",
"def compute_metrics(eval_pred):\n",
" logits, labels = eval_pred\n",
" predictions = np.argmax(logits, axis=-1)\n",
" return metric.compute(predictions=predictions, references=labels)"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {
"id": "1Wr8MMcLBcJd"
},
"outputs": [],
"source": [
"trainer = Trainer(\n",
" model=model,\n",
" args=training_args,\n",
" train_dataset=train_dataset,\n",
" eval_dataset=eval_dataset,\n",
" compute_metrics=compute_metrics,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {
"id": "O329L-KzBcJe",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 124
},
"outputId": "511fc98c-0d23-4da6-8ff7-79b269be87ee"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
""
],
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [250/250 01:02]\n",
"
\n",
" "
]
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'eval_loss': 0.6179400682449341,\n",
" 'eval_accuracy': 0.744,\n",
" 'eval_runtime': 63.1494,\n",
" 'eval_samples_per_second': 31.671,\n",
" 'eval_steps_per_second': 3.959}"
]
},
"metadata": {},
"execution_count": 102
}
],
"source": [
"# Launch the final evaluation\n",
"trainer.evaluate()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "1WTFTUyKBcJf"
},
"source": [
"# Model 2 : Roberta\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "eaXyqAwMBcJf"
},
"source": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "PNAvKF5mBcJf"
},
"source": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "0aC9UonYBcJg"
},
"source": []
},
{
"cell_type": "code",
"source": [
"from transformers import RobertaForSequenceClassification, RobertaTokenizer\n",
"\n",
"# Load pre-trained RoBERTa model and tokenizer\n",
"tokenizer_2 = RobertaTokenizer.from_pretrained(\"roberta-base\")\n",
"model2 = RobertaForSequenceClassification.from_pretrained(\"roberta-base\", num_labels=3)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 200,
"referenced_widgets": [
"766de4aff3eb46d1afa1ec2cb5878093",
"12664abd14314ba39629e7389ef5a23a",
"1e2f87928822461e97c857b55765d909",
"763a8b4438904867ab511fe4e0dc4138",
"6a1d98f7f00f4439b91b202deb4ac053",
"962e3f12c9ca419dbdc5a3d1b1cdfd0d",
"0019cdc29f6d4547b592a18c345058d4",
"3c77946e63f747d3a44ea6f30d46ab5b",
"8c50014ef0a34cbfa2f71d6dece52521",
"dcf9f876e392475a8a674b290be450e4",
"83a250cae9c343eb8e60100646bf5c1e",
"e8f0e68d49f44820a080d293f83883ff",
"dd4c586bbe5f43a8a1d6f1fc2191c514",
"7d7cbd47c206433c99c6867363b09dc6",
"87cf87b552054e659576304f8fb1e614",
"31cdc430e6ac47a6a17c9c29e6c235a2",
"98c69ec7e52b40af883ddd636b296fd1",
"3460728d9b384c5599f8d7b7739ceb4f",
"9a68fd19ec7e432fa887ec9a5a59a56e",
"a4a3aa1a28c749a78f6badad2fe1f199",
"ab8505fd5fbb4ebda6f60c1b351282d3",
"09661d7cc7284ddcb4f1345b38a79fad",
"caddb7cef2cd4a9dab1c987869a61845",
"0ac8aad650124ceaa0e34f227dfd25f8",
"c86361a1f20b490ea809409deb489d1c",
"219362f895c1489e9a3bde4642623ecb",
"a44205fc233544109bc1f6b3d175a6c4",
"7aecee18dc094c0687dcefe04cd30438",
"54ea8de5061541ba923f5495ffa38e23",
"69e918aa207a428b97954f71b3a6515d",
"1c94657af1f349ba8f104983c5608ebf",
"b938b351c44b4a549823d3f4e3848076",
"bb738b59a85e45618dfde1252a912d38",
"f3d9696377f4437287eca013b5c3ead0",
"7783ce6114ab4c379a31e8cc84c95cea",
"c46c79e58c0249de849d6381f9ebecce",
"cf63b0d463d2456384b8eef3d2b33250",
"4b0fefa2f80a4aa99e71b8aae8b8ff24",
"2933dd37184e435b9db1b11507b15441",
"7bc268e21bfb4f67b6bb5df1f9a64669",
"38da502fd6ba449791aa30a235d8f892",
"649e24bc4637459f84f86e303c37cd3f",
"15c3f73d7a054b9ba56b7d740b884d9a",
"f0a27ec1bf2a45e5b266a4e3672b551b"
]
},
"id": "aj7xFArtUJqc",
"outputId": "f24ca48c-9132-4a05-b450-935db97635bb"
},
"execution_count": 61,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading (…)olve/main/vocab.json: 0%| | 0.00/899k [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "766de4aff3eb46d1afa1ec2cb5878093"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading (…)olve/main/merges.txt: 0%| | 0.00/456k [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "e8f0e68d49f44820a080d293f83883ff"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading (…)lve/main/config.json: 0%| | 0.00/481 [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "caddb7cef2cd4a9dab1c987869a61845"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading model.safetensors: 0%| | 0.00/499M [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "f3d9696377f4437287eca013b5c3ead0"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# have the tokenize function with the Tokenizer 2\n",
"\n",
"def tokenize_function_2(df):\n",
" return tokenizer_2(df['clean_text'], padding=\"max_length\")"
],
"metadata": {
"id": "q_zFwXv3Unus"
},
"execution_count": 62,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# using the load_dataset function to load CSV files as datasets\n",
"dataset_2 = load_dataset('csv',\n",
" data_files={'train': './LP5 Dataset/train_subset.csv',\n",
" 'eval': './LP5 Dataset/eval_subset.csv'}, encoding = \"ISO-8859-1\")"
],
"metadata": {
"id": "n2ZXCiN0UwCC"
},
"execution_count": 63,
"outputs": []
},
{
"cell_type": "code",
"source": [
"dataset_2"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "9iNhA2HEVpjZ",
"outputId": "2a76404b-46ec-446a-9cbf-36eeed162717"
},
"execution_count": 64,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"DatasetDict({\n",
" train: Dataset({\n",
" features: ['tweet_id', 'clean_text', 'label', 'agreement'],\n",
" num_rows: 7999\n",
" })\n",
" eval: Dataset({\n",
" features: ['tweet_id', 'clean_text', 'label', 'agreement'],\n",
" num_rows: 2000\n",
" })\n",
"})"
]
},
"metadata": {},
"execution_count": 64
}
]
},
{
"cell_type": "code",
"source": [
"# Tokenize the dataset\n",
"# Changing the tweets into tokens our model can explot\n",
"\n",
"dataset_2 = dataset_2.map(tokenize_function_2, batched=True)"
],
"metadata": {
"id": "aWwrFrYSVtvq",
"outputId": "e74e99e4-7601-4ba7-c342-e9ee3a77d557",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 81,
"referenced_widgets": [
"4afa2fc7ba0c4e769bf7889b06c4894d",
"0471ef46afe64ebbbae390391fc209ae",
"1ee6434822f14f659773e2fa8a1d324c",
"2966b337d2984968994d02db414cf035",
"49b7f6201e544e30939c0c6b27340973",
"0023473834c944239d761d644cf7d362",
"06ba02c941574aa387ef1513f68373bb",
"1a81290ec39b491e82302c45e726f0e8",
"081f1af42c4742239f71291f67218f80",
"46dc6fb4eeaf49e5bda220bfe3843509",
"0ac3a38c97004a249d61b90bafacf2dc",
"ff1fc4cfd1684d669c932b3acfe7feb5",
"3eb7c736683a49578ad1495dfc0ef8b9",
"58d0d8a5e8b44b9ca8cfb87345cb4af9",
"1df5f6316f5947eebe683685649e0249",
"f980c38f33ab421faa400d38c2883f21",
"aa903ab499224be2acb85927698ca7b7",
"2868c8390a2b4dd9b7caf652b70f9132",
"fb323ba1553144c5b2ccbed8bcaa472d",
"f8121190370e44cfb861970df25a6963",
"40c8f1ec50c54b3c82ebdebd12e1033d",
"f11a18a5693c417c89484272b5dad562"
]
}
},
"execution_count": 65,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Map: 0%| | 0/7999 [00:00, ? examples/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "4afa2fc7ba0c4e769bf7889b06c4894d"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Map: 0%| | 0/2000 [00:00, ? examples/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "ff1fc4cfd1684d669c932b3acfe7feb5"
}
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"dataset_2"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "WnIx090Meyea",
"outputId": "90066f2d-d3c9-42d2-e142-aff0871e51ca"
},
"execution_count": 66,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"DatasetDict({\n",
" train: Dataset({\n",
" features: ['tweet_id', 'clean_text', 'label', 'agreement', 'input_ids', 'attention_mask'],\n",
" num_rows: 7999\n",
" })\n",
" eval: Dataset({\n",
" features: ['tweet_id', 'clean_text', 'label', 'agreement', 'input_ids', 'attention_mask'],\n",
" num_rows: 2000\n",
" })\n",
"})"
]
},
"metadata": {},
"execution_count": 66
}
]
},
{
"cell_type": "code",
"source": [
"def transform_labels(data):\n",
" label = data['label'] # extracts the value of the 'label' from the data input\n",
" num = 0\n",
" if label == -1: # 'Negative' sentiment\n",
" num = 0\n",
" elif label == 0: # 'Neutral' sentiment\n",
" num = 1\n",
" elif label == 1: # 'Positive' sentiment\n",
" num = 2\n",
" return {\"labels\": num}"
],
"metadata": {
"id": "shpbwb29XZN6"
},
"execution_count": 67,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Assuming you are using the 'transform_labels' function for the mapping\n",
"drop = ['tweet_id', 'clean_text', 'label', 'agreement']\n",
"dataset_2 = dataset_2.map(transform_labels, remove_columns=drop)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 81,
"referenced_widgets": [
"b158451ad0e243b5a614799f16887cf2",
"c01439433f7e4f0f90072ef2a71baa10",
"e7d9956869c24da49c43e2c2cca0c7f7",
"26e89266e3584c939b9ab82fbf13142e",
"0ae58f106901467a856148dbd46fa8e6",
"f7780ae5bb1140f081d29a3fd68d8486",
"cd6355736c66405384579085703a19f4",
"442bb92adb1740c5adb9e20ba17df65a",
"7dc9c17af48b430e9cd08a554a6b5675",
"35fa17bc5706447388d0c1366ef79bb5",
"4b78033fcc684b44ad0d720fa1e4b1c3",
"e7ff8991ab554c68a40759313e77bd71",
"6fff6b2f9cf84966a7e6ada46d67aa58",
"7008755f6cc5496b865649fc4872cae1",
"93e528af2cde4165b69008b5cb345f29",
"2a4fea73006c48aba9fbd523532993bd",
"c0703a9643f24acb9b7aaea68ba60e9d",
"e97800390bc443c4819eca3c92efd427",
"80ce451c75044f3197a57c3ae185dab5",
"182c173f50dc40aaba23b574a2744dd7",
"241316c94ba54d37bc23f6354550ffe7",
"09e9669d5ed646519b0311d06cbdd917"
]
},
"id": "dXUCCzUXXhCH",
"outputId": "042b7156-b42b-49b0-9d4b-76a2dd294cc2"
},
"execution_count": 68,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Map: 0%| | 0/7999 [00:00, ? examples/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "b158451ad0e243b5a614799f16887cf2"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Map: 0%| | 0/2000 [00:00, ? examples/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "e7ff8991ab554c68a40759313e77bd71"
}
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"dataset_2"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "kWtF-dmleTzz",
"outputId": "49cff4e6-d007-4b92-bc30-7cee5bf5da51"
},
"execution_count": 69,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"DatasetDict({\n",
" train: Dataset({\n",
" features: ['input_ids', 'attention_mask', 'labels'],\n",
" num_rows: 7999\n",
" })\n",
" eval: Dataset({\n",
" features: ['input_ids', 'attention_mask', 'labels'],\n",
" num_rows: 2000\n",
" })\n",
"})"
]
},
"metadata": {},
"execution_count": 69
}
]
},
{
"cell_type": "code",
"source": [
"\n",
"# Shuffle the dataset\n",
"\n",
"roberta_train_dataset = dataset_2[\"train\"].shuffle(seed=50)#.take(subset_size)\n",
"roberta_eval_dataset = dataset_2[\"eval\"].shuffle(seed=50)"
],
"metadata": {
"id": "Y6Husc4_XrDd"
},
"execution_count": 70,
"outputs": []
},
{
"cell_type": "code",
"source": [
"roberta_eval_dataset"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "t59mqqWGYVWR",
"outputId": "16bb91f6-0ed8-4842-de63-9aff2c55ae6c"
},
"execution_count": 71,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Dataset({\n",
" features: ['input_ids', 'attention_mask', 'labels'],\n",
" num_rows: 2000\n",
"})"
]
},
"metadata": {},
"execution_count": 71
}
]
},
{
"cell_type": "code",
"source": [
"# SPecifying the training arguments\n",
"from transformers import TrainingArguments\n",
"\n",
"# Configure the trianing parameters like `num_train_epochs`:\n",
"# the number of time the model will repeat the training loop over the dataset\n",
"training_args_2 = TrainingArguments(\"Covid_Vaccine_Sentiment_Analysis_Roberta_Model\",\n",
" num_train_epochs=5,\n",
" load_best_model_at_end=True,\n",
" push_to_hub=True,\n",
" evaluation_strategy=\"steps\",\n",
" save_strategy=\"steps\")"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "DaRYQqMUYjh_",
"outputId": "9bfa5ca1-dcc7-4657-8553-be263363b63a"
},
"execution_count": 74,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# WE import the Trainer class from the Transformers library.\n",
"from transformers import Trainer\n",
"# Create a trainer\n",
"\n",
"trainer_2 = Trainer(model = model2,args = training_args_2,train_dataset = roberta_train_dataset,\n",
" eval_dataset = roberta_eval_dataset)"
],
"metadata": {
"id": "S3YpKJ7GYl9h"
},
"execution_count": 78,
"outputs": []
},
{
"cell_type": "code",
"source": [
"trainer_2.train()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 441
},
"id": "t_Fdt9upaWdX",
"outputId": "3af23c77-0f1e-4218-dce3-12fb7a0738cb"
},
"execution_count": 79,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
""
],
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [5000/5000 1:10:34, Epoch 5/5]\n",
"
\n",
" \n",
" \n",
" \n",
" Step | \n",
" Training Loss | \n",
" Validation Loss | \n",
"
\n",
" \n",
" \n",
" \n",
" 500 | \n",
" 0.793300 | \n",
" 0.714130 | \n",
"
\n",
" \n",
" 1000 | \n",
" 0.786900 | \n",
" 0.762703 | \n",
"
\n",
" \n",
" 1500 | \n",
" 0.732500 | \n",
" 0.691696 | \n",
"
\n",
" \n",
" 2000 | \n",
" 0.729200 | \n",
" 0.686629 | \n",
"
\n",
" \n",
" 2500 | \n",
" 0.686100 | \n",
" 0.717395 | \n",
"
\n",
" \n",
" 3000 | \n",
" 0.680000 | \n",
" 0.693970 | \n",
"
\n",
" \n",
" 3500 | \n",
" 0.622000 | \n",
" 0.686874 | \n",
"
\n",
" \n",
" 4000 | \n",
" 0.627800 | \n",
" 0.700868 | \n",
"
\n",
" \n",
" 4500 | \n",
" 0.563100 | \n",
" 0.732568 | \n",
"
\n",
" \n",
" 5000 | \n",
" 0.558900 | \n",
" 0.702088 | \n",
"
\n",
" \n",
"
"
]
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"TrainOutput(global_step=5000, training_loss=0.6779822937011719, metrics={'train_runtime': 4238.9541, 'train_samples_per_second': 9.435, 'train_steps_per_second': 1.18, 'total_flos': 1.052322114203136e+16, 'train_loss': 0.6779822937011719, 'epoch': 5.0})"
]
},
"metadata": {},
"execution_count": 79
}
]
},
{
"cell_type": "code",
"source": [
"trainer_2.push_to_hub()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 36
},
"id": "_0FKKWbcp1_O",
"outputId": "a1570b1e-c589-4b49-dad2-5f75004fe0cc"
},
"execution_count": 80,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'https://huggingface.co/NewtonKimathi/Covid_Vaccine_Sentiment_Analysis_Roberta_Model/tree/main/'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 80
}
]
},
{
"cell_type": "code",
"source": [
"import numpy as np\n",
"from datasets import load_metric\n",
"\n",
"metric = load_metric(\"accuracy\")\n",
"\n",
"def compute_Metrics(eval_pred):\n",
" logits, labels = eval_pred\n",
" predictions = np.argmax(logits, axis=-1)\n",
" return metric.compute(predictions=predictions, references=labels)"
],
"metadata": {
"id": "Ydipwhdxqebj",
"outputId": "1ab9b2ea-16ab-425e-e73f-6d32e6c453a2",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 49,
"referenced_widgets": [
"63614e8c81c244e587f21aa84000c8c7",
"af816b2f0f594499bfd708d2acd4e980",
"b0491c595fe54887aaf9caf8e47a30de",
"515b037f6c6b4e0ba8301f30c8bbfc56",
"5552a066f8cf4fe3be5cabfe9753b9da",
"cab547e195664df3b29021cf6cf00056",
"12fe103d32de4740a764bfc18378b2c3",
"27f6a665d61847edbc02c0e4c52c81e6",
"202a1d55ae59426191e9e8b88633e3c9",
"90b2c03d6d9648e79b340b0cb9a5ebe7",
"4ea0df6dd8744c4fbed9b53f03bbfac5"
]
}
},
"execution_count": 81,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading builder script: 0%| | 0.00/1.65k [00:00, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "63614e8c81c244e587f21aa84000c8c7"
}
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"trainer_2 = Trainer(\n",
" model=model2,\n",
" args=training_args_2,\n",
" train_dataset=roberta_train_dataset,\n",
" eval_dataset=roberta_eval_dataset,\n",
" compute_metrics=compute_Metrics,\n",
")"
],
"metadata": {
"id": "kmSUS16hrnHZ"
},
"execution_count": 83,
"outputs": []
},
{
"cell_type": "code",
"source": [
"trainer_2.evaluate()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 124
},
"id": "aMbqJ-1Lr9Zp",
"outputId": "1ff644c9-f060-4885-f451-44e99f3b7caf"
},
"execution_count": 84,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
""
],
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [250/250 00:57]\n",
"
\n",
" "
]
},
"metadata": {}
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"{'eval_loss': 0.6866294145584106,\n",
" 'eval_accuracy': 0.7365,\n",
" 'eval_runtime': 57.6464,\n",
" 'eval_samples_per_second': 34.694,\n",
" 'eval_steps_per_second': 4.337}"
]
},
"metadata": {},
"execution_count": 84
}
]
},
{
"cell_type": "code",
"source": [
"tokenizer_2.push_to_hub(\"NewtonKimathi/Covid_Vaccine_Sentiment_Analysis_Roberta_Model\")"
],
"metadata": {
"id": "PBLIA_5fYY28",
"outputId": "98f0fe64-79a6-4828-ded7-f2981f0103f7",
"colab": {
"base_uri": "https://localhost:8080/"
}
},
"execution_count": 85,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"CommitInfo(commit_url='https://huggingface.co/NewtonKimathi/Covid_Vaccine_Sentiment_Analysis_Roberta_Model/commit/f1e0044365b7ed0a6eae830cf416228679f2e49f', commit_message='Upload tokenizer', commit_description='', oid='f1e0044365b7ed0a6eae830cf416228679f2e49f', pr_url=None, pr_revision=None, pr_num=None)"
]
},
"metadata": {},
"execution_count": 85
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "3UnGIx7OYilg"
},
"execution_count": null,
"outputs": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6 (default, Aug 5 2022, 15:21:02) \n[Clang 14.0.0 (clang-1400.0.29.102)]"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "1ab24538aa0da4b2d8c48eaca591ff7ffc54671225fb0511b432fd9e26a098ba"
}
},
"colab": {
"provenance": [],
"gpuType": "T4"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"4d8aeed8328944059c653d9aed6458e3": {
"model_module": "@jupyter-widgets/controls",
"model_name": "VBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "VBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "VBoxView",
"box_style": "",
"children": [
"IPY_MODEL_fad83b2c46e640c88cc503618024fb7c",
"IPY_MODEL_ab6fc9e2911b4bae8c35e37d622e6007",
"IPY_MODEL_f7c73e5c461445298e8e7dd8d6092af6",
"IPY_MODEL_4c2c7f6c85f94240ba31118e5820b403"
],
"layout": "IPY_MODEL_e14f2967414b4845bda9f18c242713c4"
}
},
"17c26aae58f149a4852d97dbd79920f5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_765c9eb707754bd38706ffecd427d9dc",
"placeholder": "",
"style": "IPY_MODEL_169b4657fa8f4b35a2408f0978b68f6b",
"value": "
Copy a token from your Hugging Face\ntokens page and paste it below.
Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. "
}
},
"a81c3d888d334d18a3ec6da073d8d1c1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "PasswordModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "PasswordModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "PasswordView",
"continuous_update": true,
"description": "Token:",
"description_tooltip": null,
"disabled": false,
"layout": "IPY_MODEL_3a19c2504fb849a087f8b1e5792a506b",
"placeholder": "",
"style": "IPY_MODEL_c4d3d6badcf44396937ed3b01463c319",
"value": ""
}
},
"eec59ab891214a4c99fc912cc903096d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "CheckboxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "CheckboxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "CheckboxView",
"description": "Add token as git credential?",
"description_tooltip": null,
"disabled": false,
"indent": true,
"layout": "IPY_MODEL_9281f152b0744ef89b7a7d94c4549ea3",
"style": "IPY_MODEL_e0a93c8ca5b449d1ab61f865a59a4c67",
"value": true
}
},
"3c7978fe2fb34620810f0f03c024216b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ButtonModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ButtonModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ButtonView",
"button_style": "",
"description": "Login",
"disabled": false,
"icon": "",
"layout": "IPY_MODEL_f15216c7eaf24cb692bf79f93ec822a2",
"style": "IPY_MODEL_b16f0df38ced4325affa5b824c546f84",
"tooltip": ""
}
},
"800dfaaac72548f8818f0242fb11a542": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_bb44286e41274bbdb651098c127406ed",
"placeholder": "",
"style": "IPY_MODEL_a9825e6f5f68442191e1772a970367f3",
"value": "\nPro Tip: If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks.