joshcarp
/

llm-playground

Model card Files Files and versions Community

joshcarp commited on Mar 20, 2024

Commit

bcffb9c

0 Parent(s):

Initial huggingface commit

Browse files

Files changed (26) hide show

.gitattributes +35 -0
HumanEval.jsonl +0 -0
README.md +3 -0
another.ipynb +112 -0
calendar copy.ipynb +587 -0
calendar.ipynb +524 -0
data-flattened.json +470 -0
data.json +620 -0
data2.json +310 -0
data3.jsonl +80 -0
dataset.csv +0 -0
elif.ipynb +452 -0
foo.py +15 -0
foobar +0 -0
foobar.txt +25 -0
has_closest_elements.evy +38 -0
ner.ipynb +363 -0
nltk.ipynb +0 -0
notebook.ipynb +268 -0
nuner.ipynb +124 -0
prompt.md +469 -0
python +0 -0
sft.ipynb +181 -0
squash.py +29 -0
translate.py +10 -0
youtube-tutorial.ipynb +24 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

HumanEval.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

README.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ ## Calendar LLM
2	+
3	+ Experiment fine tuning LLMs to generate calendar events from natural language.

another.ipynb ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Entity: L V | Type: ('Let', 'VB')\n",
+      "Entity: ' P | Type: (\"'s\", 'POS')\n",
+      "Entity: m N | Type: ('meet', 'NN')\n",
+      "Entity: f I | Type: ('for', 'IN')\n",
+      "Entity: l N | Type: ('lunch', 'NN')\n",
+      "Entity: t N | Type: ('tomorrow', 'NN')\n",
+      "Entity: a I | Type: ('at', 'IN')\n",
+      "Entity: 1 C | Type: ('12', 'CD')\n",
+      "Entity: P N | Type: ('PM', 'NNP')\n",
+      "Entity: a I | Type: ('at', 'IN')\n",
+      "Entity: t D | Type: ('the', 'DT')\n",
+      "Entity: Italian | Type: (GPE Italian/JJ)\n",
+      "Entity: r N | Type: ('restaurant', 'NN')\n",
+      "Entity: o I | Type: ('on', 'IN')\n",
+      "Entity: Main Street | Type: (FACILITY Main/NNP Street/NNP)\n",
+      "Entity: . . | Type: ('.', '.')\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "from nltk import ne_chunk, pos_tag\n",
+    "from nltk.tokenize import word_tokenize\n",
+    "\n",
+    "# Sample text for demonstration\n",
+    "text = \"Let's meet for lunch tomorrow at 12 PM at the Italian restaurant on Main Street.\"\n",
+    "\n",
+    "# Tokenize the text into words\n",
+    "tokens = word_tokenize(text)\n",
+    "\n",
+    "# Apply NER using NLTK's pre-trained models\n",
+    "ner_tags = ne_chunk(pos_tag(tokens))\n",
+    "\n",
+    "# Print the named entities\n",
+    "for chunk in ner_tags:\n",
+    "    if hasattr(chunk, 'label'):\n",
+    "        print(f\"Entity: {' '.join(c[0] for c in chunk)} | Type: {chunk}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Entity: Lunch Tomorrow | Type: PERSON\n",
+      "Entity: Italian | Type: GPE\n",
+      "Entity: Main Street | Type: FACILITY\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Apply NER using NLTK's pre-trained models\n",
+    "ner_tags = ne_chunk(pos_tag(tokens))\n",
+    "\n",
+    "# Print the named entities\n",
+    "for chunk in ner_tags:\n",
+    "    if hasattr(chunk, 'label'):\n",
+    "        print(f\"Entity: {' '.join(c[0] for c in chunk)} | Type: {chunk.label()}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

calendar copy.ipynb ADDED Viewed

	@@ -0,0 +1,587 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "dataset = load_dataset(\"json\", data_files=\"data-flattened.json\", split=\"train\")\n",
+    "\n",
+    "labels = [\"datetime\", \"description\", \"location\"]\n",
+    "dataset = dataset.train_test_split(test_size=0.1)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7f612c075ba5465b85b56fa25e5c8e91",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/69 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "No chat template is defined for this tokenizer - using a default chat template that implements the ChatML format (without BOS/EOS tokens!). If the default is not appropriate for your model, please set `tokenizer.chat_template` to an appropriate template. See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n",
+      "\n"
+     ]
+    },
+    {
+     "ename": "KeyError",
+     "evalue": "'summary'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[60], line 28\u001b[0m\n\u001b[1;32m     25\u001b[0m     model_inputs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlabels\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m labels[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m     26\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m model_inputs\n\u001b[0;32m---> 28\u001b[0m tokenized_data_set \u001b[38;5;241m=\u001b[39m \u001b[43mdataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpreprocess_function\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatched\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m     30\u001b[0m \u001b[38;5;66;03m# Training setup (assuming you have data in optimal JSON format)\u001b[39;00m\n\u001b[1;32m     31\u001b[0m training_args \u001b[38;5;241m=\u001b[39m TrainingArguments(\n\u001b[1;32m     32\u001b[0m     output_dir\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcalendar_model\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     33\u001b[0m     evaluation_strategy\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mepoch\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     42\u001b[0m     \u001b[38;5;66;03m# push_to_hub=True,\u001b[39;00m\n\u001b[1;32m     43\u001b[0m )\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/dataset_dict.py:869\u001b[0m, in \u001b[0;36mDatasetDict.map\u001b[0;34m(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_names, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, desc)\u001b[0m\n\u001b[1;32m    865\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cache_file_names \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    866\u001b[0m     cache_file_names \u001b[38;5;241m=\u001b[39m {k: \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mfor\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m}\n\u001b[1;32m    867\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DatasetDict(\n\u001b[1;32m    868\u001b[0m     {\n\u001b[0;32m--> 869\u001b[0m         k: \u001b[43mdataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    870\u001b[0m \u001b[43m            \u001b[49m\u001b[43mfunction\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunction\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    871\u001b[0m \u001b[43m            \u001b[49m\u001b[43mwith_indices\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwith_indices\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    872\u001b[0m \u001b[43m            \u001b[49m\u001b[43mwith_rank\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwith_rank\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    873\u001b[0m \u001b[43m            \u001b[49m\u001b[43minput_columns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_columns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    874\u001b[0m \u001b[43m            \u001b[49m\u001b[43mbatched\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatched\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    875\u001b[0m \u001b[43m            \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    876\u001b[0m \u001b[43m            \u001b[49m\u001b[43mdrop_last_batch\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdrop_last_batch\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    877\u001b[0m \u001b[43m            \u001b[49m\u001b[43mremove_columns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mremove_columns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    878\u001b[0m \u001b[43m            \u001b[49m\u001b[43mkeep_in_memory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_in_memory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    879\u001b[0m \u001b[43m            \u001b[49m\u001b[43mload_from_cache_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mload_from_cache_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    880\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcache_file_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_file_names\u001b[49m\u001b[43m[\u001b[49m\u001b[43mk\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    881\u001b[0m \u001b[43m            \u001b[49m\u001b[43mwriter_batch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwriter_batch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    882\u001b[0m \u001b[43m            \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfeatures\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    883\u001b[0m \u001b[43m            \u001b[49m\u001b[43mdisable_nullable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdisable_nullable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    884\u001b[0m \u001b[43m            \u001b[49m\u001b[43mfn_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfn_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    885\u001b[0m \u001b[43m            \u001b[49m\u001b[43mnum_proc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_proc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    886\u001b[0m \u001b[43m            \u001b[49m\u001b[43mdesc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdesc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    887\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    888\u001b[0m         \u001b[38;5;28;01mfor\u001b[39;00m k, dataset \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m    889\u001b[0m     }\n\u001b[1;32m    890\u001b[0m )\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:593\u001b[0m, in \u001b[0;36mtransmit_tasks.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    591\u001b[0m     \u001b[38;5;28mself\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mself\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    592\u001b[0m \u001b[38;5;66;03m# apply actual function\u001b[39;00m\n\u001b[0;32m--> 593\u001b[0m out: Union[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDatasetDict\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    594\u001b[0m datasets: List[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(out\u001b[38;5;241m.\u001b[39mvalues()) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(out, \u001b[38;5;28mdict\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m [out]\n\u001b[1;32m    595\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m dataset \u001b[38;5;129;01min\u001b[39;00m datasets:\n\u001b[1;32m    596\u001b[0m     \u001b[38;5;66;03m# Remove task templates if a column mapping of the template is no longer valid\u001b[39;00m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:558\u001b[0m, in \u001b[0;36mtransmit_format.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    551\u001b[0m self_format \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m    552\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_type,\n\u001b[1;32m    553\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mformat_kwargs\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_kwargs,\n\u001b[1;32m    554\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_columns,\n\u001b[1;32m    555\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moutput_all_columns\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_output_all_columns,\n\u001b[1;32m    556\u001b[0m }\n\u001b[1;32m    557\u001b[0m \u001b[38;5;66;03m# apply actual function\u001b[39;00m\n\u001b[0;32m--> 558\u001b[0m out: Union[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDatasetDict\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    559\u001b[0m datasets: List[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(out\u001b[38;5;241m.\u001b[39mvalues()) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(out, \u001b[38;5;28mdict\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m [out]\n\u001b[1;32m    560\u001b[0m \u001b[38;5;66;03m# re-apply format to the output\u001b[39;00m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:3105\u001b[0m, in \u001b[0;36mDataset.map\u001b[0;34m(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, suffix_template, new_fingerprint, desc)\u001b[0m\n\u001b[1;32m   3099\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m transformed_dataset \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   3100\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m hf_tqdm(\n\u001b[1;32m   3101\u001b[0m         unit\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m examples\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m   3102\u001b[0m         total\u001b[38;5;241m=\u001b[39mpbar_total,\n\u001b[1;32m   3103\u001b[0m         desc\u001b[38;5;241m=\u001b[39mdesc \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMap\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m   3104\u001b[0m     ) \u001b[38;5;28;01mas\u001b[39;00m pbar:\n\u001b[0;32m-> 3105\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrank\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdone\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mDataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_map_single\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdataset_kwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m   3106\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mdone\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m   3107\u001b[0m \u001b[43m                \u001b[49m\u001b[43mshards_done\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:3482\u001b[0m, in \u001b[0;36mDataset._map_single\u001b[0;34m(shard, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, new_fingerprint, rank, offset)\u001b[0m\n\u001b[1;32m   3478\u001b[0m indices \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\n\u001b[1;32m   3479\u001b[0m     \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m*\u001b[39m(\u001b[38;5;28mslice\u001b[39m(i, i \u001b[38;5;241m+\u001b[39m batch_size)\u001b[38;5;241m.\u001b[39mindices(shard\u001b[38;5;241m.\u001b[39mnum_rows)))\n\u001b[1;32m   3480\u001b[0m )  \u001b[38;5;66;03m# Something simpler?\u001b[39;00m\n\u001b[1;32m   3481\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3482\u001b[0m     batch \u001b[38;5;241m=\u001b[39m \u001b[43mapply_function_on_filtered_inputs\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   3483\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbatch\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3484\u001b[0m \u001b[43m        \u001b[49m\u001b[43mindices\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3485\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcheck_same_num_examples\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mshard\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlist_indexes\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m>\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3486\u001b[0m \u001b[43m        \u001b[49m\u001b[43moffset\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffset\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3487\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3488\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m NumExamplesMismatchError:\n\u001b[1;32m   3489\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m DatasetTransformationNotAllowedError(\n\u001b[1;32m   3490\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUsing `.map` in batched mode on a dataset with attached indexes is allowed only if it doesn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt create or remove existing examples. You can first run `.drop_index() to remove your index and then re-add it.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   3491\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:3361\u001b[0m, in \u001b[0;36mDataset._map_single.<locals>.apply_function_on_filtered_inputs\u001b[0;34m(pa_inputs, indices, check_same_num_examples, offset)\u001b[0m\n\u001b[1;32m   3359\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m with_rank:\n\u001b[1;32m   3360\u001b[0m     additional_args \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m (rank,)\n\u001b[0;32m-> 3361\u001b[0m processed_inputs \u001b[38;5;241m=\u001b[39m \u001b[43mfunction\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfn_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43madditional_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfn_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3362\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(processed_inputs, LazyDict):\n\u001b[1;32m   3363\u001b[0m     processed_inputs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m   3364\u001b[0m         k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m processed_inputs\u001b[38;5;241m.\u001b[39mdata\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m processed_inputs\u001b[38;5;241m.\u001b[39mkeys_to_format\n\u001b[1;32m   3365\u001b[0m     }\n",
+      "Cell \u001b[0;32mIn[60], line 23\u001b[0m, in \u001b[0;36mpreprocess_function\u001b[0;34m(examples)\u001b[0m\n\u001b[1;32m     20\u001b[0m inputs \u001b[38;5;241m=\u001b[39m [doc \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m examples[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmessage\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[1;32m     21\u001b[0m model_inputs \u001b[38;5;241m=\u001b[39m tokenizer(inputs, max_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1024\u001b[39m, truncation\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, padding\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_length\u001b[39m\u001b[38;5;124m\"\u001b[39m) \n\u001b[0;32m---> 23\u001b[0m labels \u001b[38;5;241m=\u001b[39m tokenizer(text_target\u001b[38;5;241m=\u001b[39m\u001b[43mexamples\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msummary\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m, max_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m128\u001b[39m, truncation\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, padding\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_length\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     25\u001b[0m model_inputs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlabels\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m labels[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m     26\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model_inputs\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/formatting/formatting.py:270\u001b[0m, in \u001b[0;36mLazyDict.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m    269\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key):\n\u001b[0;32m--> 270\u001b[0m     value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m    271\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkeys_to_format:\n\u001b[1;32m    272\u001b[0m         value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mformat(key)\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'summary'"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import (\n",
+    "    AutoModelForSequenceClassification,\n",
+    "    AutoTokenizer,\n",
+    "    Trainer,\n",
+    "    TextClassificationPipeline,\n",
+    "    TrainingArguments,\n",
+    ")\n",
+    "\n",
+    "# Model and tokenizer selection\n",
+    "checkpoint = \"google-t5/t5-small\"  # Ensure correct model name\n",
+    "\n",
+    "\n",
+    "# Configure model for multi-label classification\n",
+    "model = AutoModelForSequenceClassification.from_pretrained(\n",
+    "    checkpoint, num_labels=len(labels)\n",
+    ")\n",
+    "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
+    "\n",
+    "def preprocess_function(examples):\n",
+    "    inputs = [doc for doc in examples[\"message\"]]\n",
+    "    model_inputs = tokenizer(inputs, max_length=1024, truncation=True, padding=\"max_length\")\n",
+    "\n",
+    "    labels = tokenizer(text_target=examples[\"summary\"], max_length=128, truncation=True, padding=\"max_length\")\n",
+    "\n",
+    "    model_inputs[\"labels\"] = labels[\"input_ids\"]\n",
+    "    return model_inputs\n",
+    "\n",
+    "tokenized_data_set = dataset.map(preprocess_function, batched=True)\n",
+    "\n",
+    "# Training setup (assuming you have data in optimal JSON format)\n",
+    "training_args = TrainingArguments(\n",
+    "    output_dir=\"calendar_model\",\n",
+    "    evaluation_strategy=\"epoch\",\n",
+    "    learning_rate=5e-5,\n",
+    "    per_device_train_batch_size=16,\n",
+    "    per_device_eval_batch_size=16,\n",
+    "    weight_decay=0.01,\n",
+    "    save_total_limit=3,\n",
+    "    num_train_epochs=1,\n",
+    "    use_mps_device=True,\n",
+    "    # fp16=True,\n",
+    "    # push_to_hub=True,\n",
+    ")\n",
+    "\n",
+    "# Train the model\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=dataset[\"train\"],\n",
+    "    eval_dataset=dataset[\"test\"],\n",
+    ")\n",
+    "trainer.train()\n",
+    "\n",
+    "# Create pipeline for multi-label prediction\n",
+    "pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, labels=labels)\n",
+    "\n",
+    "# Example usage for multi-label prediction\n",
+    "text = \"Meeting with John at 2 pm tomorrow in the conference room\"\n",
+    "calendar_entry = pipe(text)\n",
+    "\n",
+    "print(calendar_entry)  # Output will be a list of dictionaries, one per label\n",
+    "\n",
+    "# Example: Accessing scores for the \"datetime\" label\n",
+    "datetime_predictions = calendar_entry[0]\n",
+    "print(datetime_predictions[\"score\"])  # List of prediction scores for \"datetime\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "506a9ad72c324024a186fda4e1fd7156",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/69 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "ename": "ValueError",
+     "evalue": "text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[6], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m tokenized_data_set \u001b[38;5;241m=\u001b[39m \u001b[43mdata_set\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpreprocess_function\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatched\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/dataset_dict.py:869\u001b[0m, in \u001b[0;36mDatasetDict.map\u001b[0;34m(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_names, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, desc)\u001b[0m\n\u001b[1;32m    865\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cache_file_names \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    866\u001b[0m     cache_file_names \u001b[38;5;241m=\u001b[39m {k: \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mfor\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m}\n\u001b[1;32m    867\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DatasetDict(\n\u001b[1;32m    868\u001b[0m     {\n\u001b[0;32m--> 869\u001b[0m         k: \u001b[43mdataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    870\u001b[0m \u001b[43m            \u001b[49m\u001b[43mfunction\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunction\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    871\u001b[0m \u001b[43m            \u001b[49m\u001b[43mwith_indices\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwith_indices\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    872\u001b[0m \u001b[43m            \u001b[49m\u001b[43mwith_rank\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwith_rank\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    873\u001b[0m \u001b[43m            \u001b[49m\u001b[43minput_columns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_columns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    874\u001b[0m \u001b[43m            \u001b[49m\u001b[43mbatched\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatched\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    875\u001b[0m \u001b[43m            \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    876\u001b[0m \u001b[43m            \u001b[49m\u001b[43mdrop_last_batch\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdrop_last_batch\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    877\u001b[0m \u001b[43m            \u001b[49m\u001b[43mremove_columns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mremove_columns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    878\u001b[0m \u001b[43m            \u001b[49m\u001b[43mkeep_in_memory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_in_memory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    879\u001b[0m \u001b[43m            \u001b[49m\u001b[43mload_from_cache_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mload_from_cache_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    880\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcache_file_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_file_names\u001b[49m\u001b[43m[\u001b[49m\u001b[43mk\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    881\u001b[0m \u001b[43m            \u001b[49m\u001b[43mwriter_batch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwriter_batch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    882\u001b[0m \u001b[43m            \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfeatures\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    883\u001b[0m \u001b[43m            \u001b[49m\u001b[43mdisable_nullable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdisable_nullable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    884\u001b[0m \u001b[43m            \u001b[49m\u001b[43mfn_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfn_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    885\u001b[0m \u001b[43m            \u001b[49m\u001b[43mnum_proc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_proc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    886\u001b[0m \u001b[43m            \u001b[49m\u001b[43mdesc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdesc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    887\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    888\u001b[0m         \u001b[38;5;28;01mfor\u001b[39;00m k, dataset \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m    889\u001b[0m     }\n\u001b[1;32m    890\u001b[0m )\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:593\u001b[0m, in \u001b[0;36mtransmit_tasks.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    591\u001b[0m     \u001b[38;5;28mself\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mself\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    592\u001b[0m \u001b[38;5;66;03m# apply actual function\u001b[39;00m\n\u001b[0;32m--> 593\u001b[0m out: Union[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDatasetDict\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    594\u001b[0m datasets: List[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(out\u001b[38;5;241m.\u001b[39mvalues()) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(out, \u001b[38;5;28mdict\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m [out]\n\u001b[1;32m    595\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m dataset \u001b[38;5;129;01min\u001b[39;00m datasets:\n\u001b[1;32m    596\u001b[0m     \u001b[38;5;66;03m# Remove task templates if a column mapping of the template is no longer valid\u001b[39;00m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:558\u001b[0m, in \u001b[0;36mtransmit_format.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    551\u001b[0m self_format \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m    552\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_type,\n\u001b[1;32m    553\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mformat_kwargs\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_kwargs,\n\u001b[1;32m    554\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_columns,\n\u001b[1;32m    555\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moutput_all_columns\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_output_all_columns,\n\u001b[1;32m    556\u001b[0m }\n\u001b[1;32m    557\u001b[0m \u001b[38;5;66;03m# apply actual function\u001b[39;00m\n\u001b[0;32m--> 558\u001b[0m out: Union[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDatasetDict\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    559\u001b[0m datasets: List[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(out\u001b[38;5;241m.\u001b[39mvalues()) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(out, \u001b[38;5;28mdict\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m [out]\n\u001b[1;32m    560\u001b[0m \u001b[38;5;66;03m# re-apply format to the output\u001b[39;00m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:3105\u001b[0m, in \u001b[0;36mDataset.map\u001b[0;34m(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, suffix_template, new_fingerprint, desc)\u001b[0m\n\u001b[1;32m   3099\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m transformed_dataset \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   3100\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m hf_tqdm(\n\u001b[1;32m   3101\u001b[0m         unit\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m examples\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m   3102\u001b[0m         total\u001b[38;5;241m=\u001b[39mpbar_total,\n\u001b[1;32m   3103\u001b[0m         desc\u001b[38;5;241m=\u001b[39mdesc \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMap\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m   3104\u001b[0m     ) \u001b[38;5;28;01mas\u001b[39;00m pbar:\n\u001b[0;32m-> 3105\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrank\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdone\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mDataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_map_single\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdataset_kwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m   3106\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mdone\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m   3107\u001b[0m \u001b[43m                \u001b[49m\u001b[43mshards_done\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:3482\u001b[0m, in \u001b[0;36mDataset._map_single\u001b[0;34m(shard, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, new_fingerprint, rank, offset)\u001b[0m\n\u001b[1;32m   3478\u001b[0m indices \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\n\u001b[1;32m   3479\u001b[0m     \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m*\u001b[39m(\u001b[38;5;28mslice\u001b[39m(i, i \u001b[38;5;241m+\u001b[39m batch_size)\u001b[38;5;241m.\u001b[39mindices(shard\u001b[38;5;241m.\u001b[39mnum_rows)))\n\u001b[1;32m   3480\u001b[0m )  \u001b[38;5;66;03m# Something simpler?\u001b[39;00m\n\u001b[1;32m   3481\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3482\u001b[0m     batch \u001b[38;5;241m=\u001b[39m \u001b[43mapply_function_on_filtered_inputs\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   3483\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbatch\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3484\u001b[0m \u001b[43m        \u001b[49m\u001b[43mindices\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3485\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcheck_same_num_examples\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mshard\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlist_indexes\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m>\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3486\u001b[0m \u001b[43m        \u001b[49m\u001b[43moffset\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffset\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3487\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3488\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m NumExamplesMismatchError:\n\u001b[1;32m   3489\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m DatasetTransformationNotAllowedError(\n\u001b[1;32m   3490\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUsing `.map` in batched mode on a dataset with attached indexes is allowed only if it doesn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt create or remove existing examples. You can first run `.drop_index() to remove your index and then re-add it.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   3491\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:3361\u001b[0m, in \u001b[0;36mDataset._map_single.<locals>.apply_function_on_filtered_inputs\u001b[0;34m(pa_inputs, indices, check_same_num_examples, offset)\u001b[0m\n\u001b[1;32m   3359\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m with_rank:\n\u001b[1;32m   3360\u001b[0m     additional_args \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m (rank,)\n\u001b[0;32m-> 3361\u001b[0m processed_inputs \u001b[38;5;241m=\u001b[39m \u001b[43mfunction\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfn_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43madditional_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfn_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3362\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(processed_inputs, LazyDict):\n\u001b[1;32m   3363\u001b[0m     processed_inputs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m   3364\u001b[0m         k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m processed_inputs\u001b[38;5;241m.\u001b[39mdata\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m processed_inputs\u001b[38;5;241m.\u001b[39mkeys_to_format\n\u001b[1;32m   3365\u001b[0m     }\n",
+      "Cell \u001b[0;32mIn[5], line 14\u001b[0m, in \u001b[0;36mpreprocess_function\u001b[0;34m(examples)\u001b[0m\n\u001b[1;32m     11\u001b[0m inputs \u001b[38;5;241m=\u001b[39m [doc \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m examples[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmessage\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[1;32m     12\u001b[0m model_inputs \u001b[38;5;241m=\u001b[39m tokenizer(inputs, max_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m128\u001b[39m, truncation\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, padding\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_length\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 14\u001b[0m labels \u001b[38;5;241m=\u001b[39m \u001b[43mtokenizer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexamples\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlabels\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_length\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m128\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtruncation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpadding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_length\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     16\u001b[0m model_inputs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlabels\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m labels[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m     17\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model_inputs\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/tokenization_utils_base.py:2829\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.__call__\u001b[0;34m(self, text, text_pair, text_target, text_pair_target, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)\u001b[0m\n\u001b[1;32m   2827\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_in_target_context_manager:\n\u001b[1;32m   2828\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_switch_to_input_mode()\n\u001b[0;32m-> 2829\u001b[0m     encodings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_one\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtext_pair\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtext_pair\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mall_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2830\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m text_target \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   2831\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_switch_to_target_mode()\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/tokenization_utils_base.py:2887\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase._call_one\u001b[0;34m(self, text, text_pair, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)\u001b[0m\n\u001b[1;32m   2884\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m   2886\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m _is_valid_text_input(text):\n\u001b[0;32m-> 2887\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m   2888\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2889\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mor `List[List[str]]` (batch of pretokenized examples).\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2890\u001b[0m     )\n\u001b[1;32m   2892\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m text_pair \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m _is_valid_text_input(text_pair):\n\u001b[1;32m   2893\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m   2894\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2895\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mor `List[List[str]]` (batch of pretokenized examples).\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2896\u001b[0m     )\n",
+      "\u001b[0;31mValueError\u001b[0m: text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples)."
+     ]
+    }
+   ],
+   "source": [
+    "tokenized_data_set = data_set.map(preprocess_function, batched=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import DataCollatorForSeq2Seq"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import evaluate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rouge = evaluate.load(\"rouge\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "def compute_metrics(eval_pred):\n",
+    "    predictions, labels = eval_pred\n",
+    "    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)\n",
+    "    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)\n",
+    "    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)\n",
+    "\n",
+    "    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)\n",
+    "\n",
+    "    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]\n",
+    "    result[\"gen_len\"] = np.mean(prediction_lens)\n",
+    "\n",
+    "    return {k: round(v, 4) for k, v in result.items()}\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer\n",
+    "model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model moved to MPS device\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "\n",
+    "# Check that MPS is available\n",
+    "if not torch.backends.mps.is_available():\n",
+    "    if not torch.backends.mps.is_built():\n",
+    "        print(\"MPS not available because the current PyTorch install was not \"\n",
+    "              \"built with MPS enabled.\")\n",
+    "    else:\n",
+    "        print(\"MPS not available because the current MacOS version is not 12.3+ \"\n",
+    "              \"and/or you do not have an MPS-enabled device on this machine.\")\n",
+    "\n",
+    "else:\n",
+    "    mps_device = torch.device(\"mps\")\n",
+    "    model.to(mps_device)\n",
+    "    print(\"Model moved to MPS device\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/training_args.py:1951: UserWarning: `use_mps_device` is deprecated and will be removed in version 5.0 of 🤗 Transformers. `mps` device will be used by default if available similar to the way `cuda` device is used.Therefore, no action from user is required. \n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "training_args = Seq2SeqTrainingArguments(\n",
+    "    output_dir=\"calendar_model\",\n",
+    "    evaluation_strategy=\"epoch\",\n",
+    "    learning_rate=2e-5,\n",
+    "    per_device_train_batch_size=16,\n",
+    "    per_device_eval_batch_size=16,\n",
+    "    weight_decay=0.01,\n",
+    "    save_total_limit=3,\n",
+    "    num_train_epochs=3,\n",
+    "    predict_with_generate=True,\n",
+    "    use_mps_device=True,\n",
+    "    # fp16=True,\n",
+    "    # push_to_hub=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['details', 'message'],\n",
+      "        num_rows: 69\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['details', 'message'],\n",
+      "        num_rows: 8\n",
+      "    })\n",
+      "})\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(data_set)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trainer = Seq2SeqTrainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=tokenized_data_set[\"train\"],\n",
+    "    eval_dataset=tokenized_data_set[\"test\"],\n",
+    "    tokenizer=tokenizer,\n",
+    "    data_collator=data_collator,\n",
+    "    compute_metrics=compute_metrics,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9452caa67e26493eb4c189fd55a68c32",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/15 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/generation/utils.py:1178: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9829c1db68244e7b827c76f106a353a8",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'eval_loss': 14.770042419433594, 'eval_rouge1': 0.2492, 'eval_rouge2': 0.132, 'eval_rougeL': 0.2098, 'eval_rougeLsum': 0.2078, 'eval_gen_len': 18.5, 'eval_runtime': 3.1599, 'eval_samples_per_second': 2.532, 'eval_steps_per_second': 0.316, 'epoch': 1.0}\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3e8f24890d4848e5958d51b2fad39827",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'eval_loss': 13.279829978942871, 'eval_rouge1': 0.191, 'eval_rouge2': 0.0841, 'eval_rougeL': 0.171, 'eval_rougeLsum': 0.1669, 'eval_gen_len': 18.5, 'eval_runtime': 0.6868, 'eval_samples_per_second': 11.648, 'eval_steps_per_second': 1.456, 'epoch': 2.0}\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "812b05e2e2234a87ab283f4771f8f615",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'eval_loss': 12.672184944152832, 'eval_rouge1': 0.1767, 'eval_rouge2': 0.0792, 'eval_rougeL': 0.1555, 'eval_rougeLsum': 0.1518, 'eval_gen_len': 19.0, 'eval_runtime': 0.6063, 'eval_samples_per_second': 13.195, 'eval_steps_per_second': 1.649, 'epoch': 3.0}\n",
+      "{'train_runtime': 12.159, 'train_samples_per_second': 17.024, 'train_steps_per_second': 1.234, 'train_loss': 12.712192789713542, 'epoch': 3.0}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "TrainOutput(global_step=15, training_loss=12.712192789713542, metrics={'train_runtime': 12.159, 'train_samples_per_second': 17.024, 'train_steps_per_second': 1.234, 'train_loss': 12.712192789713542, 'epoch': 3.0})"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7350298fb1d24de696d2fdce2b167cb7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "bdd17cc0c7624ab0babcf12b19157c75",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "040b39bf6bdc4e939cf56a47e1f4451e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "training_args.bin:   0%|          | 0.00/4.98k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "CommitInfo(commit_url='https://huggingface.co/joshcarp/calendar_model/commit/ef13304ccc7e109ab97007e944f01405ce9b1409', commit_message='End of training', commit_description='', oid='ef13304ccc7e109ab97007e944f01405ce9b1409', pr_url=None, pr_revision=None, pr_num=None)"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "trainer.push_to_hub()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "convert to summary: Doctor's appointment on Friday at 9:00 AM.\n",
+      "[{'generated_text': \"Umgekehrt: Doctor's appointment on Friday at 9:00 AM.\"}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import pipeline\n",
+    "\n",
+    "hub_model_id = \"joshcarp/calendar_model\"\n",
+    "summarizer = pipeline(\"textclassificationpipeline\", model=hub_model_id)\n",
+    "text = \"convert to summary: Doctor's appointment on Friday at 9:00 AM.\"\n",
+    "summary = summarizer(text, max_length=50, min_length=6)\n",
+    "print(text)\n",
+    "print(summary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

calendar.ipynb ADDED Viewed

	@@ -0,0 +1,524 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "data_set = load_dataset(\"json\", data_files=\"data3.json\", split=\"train\")\n",
+    "# convert data_set details field to string\n",
+    "#\n",
+    "# data_set = data_set.map(lambda x: {\"details\": str(x[\"details\"])})\n",
+    "data_set = data_set.train_test_split(test_size=0.1)\n",
+    "# print(data_set.data[\"train\"][0])\n",
+    "# print(type(data_set.data[\"train\"]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "OSError",
+     "evalue": "flan-t5-small is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mHTTPError\u001b[0m                                 Traceback (most recent call last)",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/utils/_errors.py:304\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m    303\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 304\u001b[0m     \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/requests/models.py:1021\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1020\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[0;32m-> 1021\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n",
+      "\u001b[0;31mHTTPError\u001b[0m: 404 Client Error: Not Found for url: https://huggingface.co/flan-t5-small/resolve/main/tokenizer_config.json",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[0;31mRepositoryNotFoundError\u001b[0m                   Traceback (most recent call last)",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/utils/hub.py:398\u001b[0m, in \u001b[0;36mcached_file\u001b[0;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[1;32m    396\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    397\u001b[0m     \u001b[38;5;66;03m# Load from URL or cache if already cached\u001b[39;00m\n\u001b[0;32m--> 398\u001b[0m     resolved_file \u001b[38;5;241m=\u001b[39m \u001b[43mhf_hub_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    399\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpath_or_repo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    400\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    401\u001b[0m \u001b[43m        \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    402\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    403\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    404\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    405\u001b[0m \u001b[43m        \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    406\u001b[0m \u001b[43m        \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    407\u001b[0m \u001b[43m        \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    408\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    409\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    410\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    411\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    412\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m GatedRepoError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py:118\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    116\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/file_download.py:1403\u001b[0m, in \u001b[0;36mhf_hub_download\u001b[0;34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, local_dir_use_symlinks, user_agent, force_download, force_filename, proxies, etag_timeout, resume_download, token, local_files_only, legacy_cache_layout, endpoint)\u001b[0m\n\u001b[1;32m   1401\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(head_call_error, RepositoryNotFoundError) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(head_call_error, GatedRepoError):\n\u001b[1;32m   1402\u001b[0m     \u001b[38;5;66;03m# Repo not found or gated => let's raise the actual error\u001b[39;00m\n\u001b[0;32m-> 1403\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m head_call_error\n\u001b[1;32m   1404\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1405\u001b[0m     \u001b[38;5;66;03m# Otherwise: most likely a connection issue or Hub downtime => let's warn the user\u001b[39;00m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/file_download.py:1261\u001b[0m, in \u001b[0;36mhf_hub_download\u001b[0;34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, local_dir_use_symlinks, user_agent, force_download, force_filename, proxies, etag_timeout, resume_download, token, local_files_only, legacy_cache_layout, endpoint)\u001b[0m\n\u001b[1;32m   1260\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1261\u001b[0m     metadata \u001b[38;5;241m=\u001b[39m \u001b[43mget_hf_file_metadata\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1262\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1263\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1264\u001b[0m \u001b[43m        \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1265\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1266\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlibrary_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlibrary_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1267\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlibrary_version\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlibrary_version\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1268\u001b[0m \u001b[43m        \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1269\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1270\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m EntryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m http_error:\n\u001b[1;32m   1271\u001b[0m     \u001b[38;5;66;03m# Cache the non-existence of the file and raise\u001b[39;00m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py:118\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    116\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/file_download.py:1667\u001b[0m, in \u001b[0;36mget_hf_file_metadata\u001b[0;34m(url, token, proxies, timeout, library_name, library_version, user_agent)\u001b[0m\n\u001b[1;32m   1666\u001b[0m \u001b[38;5;66;03m# Retrieve metadata\u001b[39;00m\n\u001b[0;32m-> 1667\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1668\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mHEAD\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1669\u001b[0m \u001b[43m    \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1670\u001b[0m \u001b[43m    \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1671\u001b[0m \u001b[43m    \u001b[49m\u001b[43mallow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   1672\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   1673\u001b[0m \u001b[43m    \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1674\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1675\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1676\u001b[0m hf_raise_for_status(r)\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/file_download.py:385\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[0;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[1;32m    384\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m follow_relative_redirects:\n\u001b[0;32m--> 385\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    386\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    387\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    388\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    389\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    390\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    392\u001b[0m     \u001b[38;5;66;03m# If redirection, we redirect only relative paths.\u001b[39;00m\n\u001b[1;32m    393\u001b[0m     \u001b[38;5;66;03m# This is useful in case of a renamed repository.\u001b[39;00m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/file_download.py:409\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[0;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[1;32m    408\u001b[0m response \u001b[38;5;241m=\u001b[39m get_session()\u001b[38;5;241m.\u001b[39mrequest(method\u001b[38;5;241m=\u001b[39mmethod, url\u001b[38;5;241m=\u001b[39murl, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams)\n\u001b[0;32m--> 409\u001b[0m \u001b[43mhf_raise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    410\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/utils/_errors.py:352\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m    344\u001b[0m     message \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m    345\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m Client Error.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    346\u001b[0m         \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    350\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m make sure you are authenticated.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    351\u001b[0m     )\n\u001b[0;32m--> 352\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m RepositoryNotFoundError(message, response) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m    354\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m400\u001b[39m:\n",
+      "\u001b[0;31mRepositoryNotFoundError\u001b[0m: 404 Client Error. (Request ID: Root=1-65f984a1-7346ead41ef1f7332a940212;3782e90a-db2f-4d61-bc70-34daff4938f4)\n\nRepository Not Found for url: https://huggingface.co/flan-t5-small/resolve/main/tokenizer_config.json.\nPlease make sure you specified the correct `repo_id` and `repo_type`.\nIf you are trying to access a private or gated repo, make sure you are authenticated.",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[0;31mOSError\u001b[0m                                   Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[44], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer\n\u001b[1;32m      2\u001b[0m checkpoint \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mflan-t5-small\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m \u001b[43mAutoTokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheckpoint\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      4\u001b[0m prefix \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\"\"\u001b[39m\u001b[38;5;124mextract the calendar event details from the following message. The details should be specified in the following json format:\u001b[39m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;124m{\u001b[39m\n\u001b[1;32m      6\u001b[0m \u001b[38;5;124m    \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdatetime\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m: \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2024-03-12T12:00:00\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m,\u001b[39m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[38;5;124m}\u001b[39m\n\u001b[1;32m     10\u001b[0m \u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m     11\u001b[0m prefix \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/models/auto/tokenization_auto.py:767\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m    764\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m tokenizer_class\u001b[38;5;241m.\u001b[39mfrom_pretrained(pretrained_model_name_or_path, \u001b[38;5;241m*\u001b[39minputs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m    766\u001b[0m \u001b[38;5;66;03m# Next, let's try to use the tokenizer_config file to get the tokenizer class.\u001b[39;00m\n\u001b[0;32m--> 767\u001b[0m tokenizer_config \u001b[38;5;241m=\u001b[39m \u001b[43mget_tokenizer_config\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    768\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m tokenizer_config:\n\u001b[1;32m    769\u001b[0m     kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m tokenizer_config[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/models/auto/tokenization_auto.py:600\u001b[0m, in \u001b[0;36mget_tokenizer_config\u001b[0;34m(pretrained_model_name_or_path, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, **kwargs)\u001b[0m\n\u001b[1;32m    597\u001b[0m     token \u001b[38;5;241m=\u001b[39m use_auth_token\n\u001b[1;32m    599\u001b[0m commit_hash \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[0;32m--> 600\u001b[0m resolved_config_file \u001b[38;5;241m=\u001b[39m \u001b[43mcached_file\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    601\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    602\u001b[0m \u001b[43m    \u001b[49m\u001b[43mTOKENIZER_CONFIG_FILE\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    603\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    604\u001b[0m \u001b[43m    \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    605\u001b[0m \u001b[43m    \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    606\u001b[0m \u001b[43m    \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    607\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    608\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    609\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    610\u001b[0m \u001b[43m    \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    611\u001b[0m \u001b[43m    \u001b[49m\u001b[43m_raise_exceptions_for_gated_repo\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    612\u001b[0m \u001b[43m    \u001b[49m\u001b[43m_raise_exceptions_for_missing_entries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    613\u001b[0m \u001b[43m    \u001b[49m\u001b[43m_raise_exceptions_for_connection_errors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    614\u001b[0m \u001b[43m    \u001b[49m\u001b[43m_commit_hash\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcommit_hash\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    615\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    616\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m resolved_config_file \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    617\u001b[0m     logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not locate the tokenizer configuration file, will try to use the model config instead.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/utils/hub.py:421\u001b[0m, in \u001b[0;36mcached_file\u001b[0;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[1;32m    416\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m    417\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou are trying to access a gated repo.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mMake sure to have access to it at \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    418\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttps://huggingface.co/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mstr\u001b[39m(e)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    419\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m    420\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m RepositoryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 421\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m    422\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is not a local folder and is not a valid model identifier \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    423\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlisted on \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/models\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mIf this is a private repository, make sure to pass a token \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    424\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhaving permission to this repo either by logging in with `huggingface-cli login` or by passing \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    425\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m`token=<your_token>`\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    426\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m    427\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m RevisionNotFoundError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    428\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m    429\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrevision\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is not a valid git identifier (branch name, tag name or commit id) that exists \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    430\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfor this model name. Check the model page at \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    431\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m for available revisions.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    432\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n",
+      "\u001b[0;31mOSError\u001b[0m: flan-t5-small is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import AutoTokenizer\n",
+    "checkpoint = \"flan-t5-small\"\n",
+    "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
+    "prefix = \"\"\"extract the calendar event details from the following message. The details should be specified in the following json format:\n",
+    "{\n",
+    "    \"datetime\": \"2024-03-12T12:00:00\",\n",
+    "    \"description\": \"Lunch meeting\",\n",
+    "    \"location\": \"Italian restaurant on Main Street\"\n",
+    "}\n",
+    "\"\"\"\n",
+    "prefix = \"\"\n",
+    "\n",
+    "def preprocess_function(examples):\n",
+    "    inputs = [prefix + doc for doc in examples[\"message\"]]\n",
+    "    target = [doc for doc in examples[\"details\"]]\n",
+    "    model_inputs = tokenizer(inputs, text_target=target, max_length=1024, truncation=True, padding=\"max_length\") \n",
+    "\n",
+    "    # labels = tokenizer(text_target=examples[\"details\"], max_length=128, truncation=True, padding=\"max_length\")\n",
+    "\n",
+    "    # model_inputs[\"labels\"] = labels[\"input_ids\"]\n",
+    "    return model_inputs\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "175dd8b79c984d4ab51850288906a808",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/69 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c80154f297054a54b7eada6199f8a3ab",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/8 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "tokenized_data_set = data_set.map(preprocess_function, batched=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import DataCollatorForSeq2Seq"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import evaluate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rouge = evaluate.load(\"rouge\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "def compute_metrics(eval_pred):\n",
+    "    predictions, labels = eval_pred.predictions, eval_pred.label_ids\n",
+    "    predicted_strings = tokenizer.batch_decode(predictions, skip_special_tokens=True)\n",
+    "    actual_strings = tokenizer.batch_decode(labels, skip_special_tokens=True)\n",
+    "\n",
+    "    token_diffs = []\n",
+    "    for predicted, actual in zip(predicted_strings, actual_strings):\n",
+    "        predicted_tokens = tokenizer(predicted)[\"input_ids\"]\n",
+    "        actual_tokens = tokenizer(actual)[\"input_ids\"]\n",
+    "        token_diff = abs(len(predicted_tokens) - len(actual_tokens))\n",
+    "        token_diffs.append(token_diff)\n",
+    "\n",
+    "    avg_token_diff = sum(token_diffs) / len(token_diffs)\n",
+    "    return {\"average_token_difference\": avg_token_diff}\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer\n",
+    "model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model moved to MPS device\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "\n",
+    "# Check that MPS is available\n",
+    "if not torch.backends.mps.is_available():\n",
+    "    if not torch.backends.mps.is_built():\n",
+    "        print(\"MPS not available because the current PyTorch install was not \"\n",
+    "              \"built with MPS enabled.\")\n",
+    "    else:\n",
+    "        print(\"MPS not available because the current MacOS version is not 12.3+ \"\n",
+    "              \"and/or you do not have an MPS-enabled device on this machine.\")\n",
+    "\n",
+    "else:\n",
+    "    mps_device = torch.device(\"mps\")\n",
+    "    model.to(mps_device)\n",
+    "    print(\"Model moved to MPS device\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/training_args.py:1951: UserWarning: `use_mps_device` is deprecated and will be removed in version 5.0 of 🤗 Transformers. `mps` device will be used by default if available similar to the way `cuda` device is used.Therefore, no action from user is required. \n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "training_args = Seq2SeqTrainingArguments(\n",
+    "    output_dir=\"calendar_model\",\n",
+    "    evaluation_strategy=\"epoch\",\n",
+    "    learning_rate=5e-5,\n",
+    "    per_device_train_batch_size=8,\n",
+    "    per_device_eval_batch_size=8,\n",
+    "    weight_decay=0.01,\n",
+    "    save_total_limit=3,\n",
+    "    num_train_epochs=1,\n",
+    "    predict_with_generate=True,\n",
+    "    use_mps_device=True,\n",
+    "    # fp16=True,\n",
+    "    # push_to_hub=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "DatasetDict({\n",
+      "    train: Dataset({\n",
+      "        features: ['details', 'message'],\n",
+      "        num_rows: 69\n",
+      "    })\n",
+      "    test: Dataset({\n",
+      "        features: ['details', 'message'],\n",
+      "        num_rows: 8\n",
+      "    })\n",
+      "})\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(data_set)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trainer = Seq2SeqTrainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=tokenized_data_set[\"train\"],\n",
+    "    eval_dataset=tokenized_data_set[\"test\"],\n",
+    "    tokenizer=tokenizer,\n",
+    "    data_collator=data_collator,\n",
+    "    compute_metrics=compute_metrics,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fddcfbbfca9944309199f434f94b8577",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/9 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/generation/utils.py:1178: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "71611216935e4ddca1f16114070609f9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'eval_loss': 9.526269912719727, 'eval_average_token_difference': 9.875, 'eval_runtime': 3.2711, 'eval_samples_per_second': 2.446, 'eval_steps_per_second': 0.306, 'epoch': 1.0}\n",
+      "{'train_runtime': 270.5513, 'train_samples_per_second': 0.255, 'train_steps_per_second': 0.033, 'train_loss': 10.85148451063368, 'epoch': 1.0}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "TrainOutput(global_step=9, training_loss=10.85148451063368, metrics={'train_runtime': 270.5513, 'train_samples_per_second': 0.255, 'train_steps_per_second': 0.033, 'train_loss': 10.85148451063368, 'epoch': 1.0})"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f6f6f4eab8b44af285b2921106c718ae",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "df785967253f4223ab213e82ab8b468e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c511bfb7228c4ee0881c2bf456a8cc10",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "training_args.bin:   0%|          | 0.00/4.98k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": [
+       "CommitInfo(commit_url='https://huggingface.co/joshcarp/calendar_model/commit/edfcfa8cc6e1ae5fb389894f56f0fb2a6885828a', commit_message='End of training', commit_description='', oid='edfcfa8cc6e1ae5fb389894f56f0fb2a6885828a', pr_url=None, pr_revision=None, pr_num=None)"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# push to hub\n",
+    "trainer.push_to_hub()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "extract the calendar event details from a message. The details should be specified in the following json format:\n",
+      "{\n",
+      "    \"datetime\": \"<inferred start time from input text>\",\n",
+      "    \"description\": \"<description of event from input text>\",\n",
+      "    \"location\": \"<location of event from input text>\"\n",
+      "}\n",
+      "\n",
+      "Here is an example: \"Reminder: Team meeting on Friday at 10 AM in the conference room.\"\n",
+      "\n",
+      "For this example the output should be:\n",
+      "\n",
+      "{\n",
+      "    \"datetime\": \"2024-03-15T10:00:00\",\n",
+      "    \"description\": \"Team meeting\",\n",
+      "    \"location\": \"Conference room\"\n",
+      "}\n",
+      "\n",
+      "\n",
+      "Here is the input text: Doctor's appointment on Friday at 9:00 AM.\n",
+      "[{'generated_text': 'calendar event details from a message. The details should be specified in json format:  \"datetime\": \"inferred start time from input text>\", \"description\": \"description of event from input text>\", \"location\":'}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import pipeline\n",
+    "hub_model_id = \"joshcarp/calendar_model\"\n",
+    "summarizer = pipeline(\"text2text-generation\", model=hub_model_id)\n",
+    "\n",
+    "\n",
+    "prefix = \"\"\"extract the calendar event details from a message. The details should be specified in the following json format:\n",
+    "{\n",
+    "    \"datetime\": \"<inferred start time from input text>\",\n",
+    "    \"description\": \"<description of event from input text>\",\n",
+    "    \"location\": \"<location of event from input text>\"\n",
+    "}\n",
+    "\n",
+    "Here is an example: \"Reminder: Team meeting on Friday at 10 AM in the conference room.\"\n",
+    "\n",
+    "For this example the output should be:\n",
+    "\n",
+    "{\n",
+    "    \"datetime\": \"2024-03-15T10:00:00\",\n",
+    "    \"description\": \"Team meeting\",\n",
+    "    \"location\": \"Conference room\"\n",
+    "}\n",
+    "\n",
+    "\n",
+    "Here is the input text: \"\"\"\n",
+    "\n",
+    "text = prefix+\"Doctor's appointment on Friday at 9:00 AM.\"\n",
+    "\n",
+    "\n",
+    "summary = summarizer(text, max_length=60, min_length=6, truncation=True)\n",
+    "print(text)\n",
+    "print(summary)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

data-flattened.json ADDED Viewed

	@@ -0,0 +1,470 @@

+[
+  {
+    "message": "Let's meet for lunch tomorrow at 12 PM at the Italian restaurant on Main Street.",
+    "datetime": "2024-03-12T12:00:00",
+    "description": "Lunch meeting",
+    "location": "Italian restaurant on Main Street"
+  },
+  {
+    "message": "Reminder: Team meeting on Friday at 10 AM in the conference room.",
+    "datetime": "2024-03-15T10:00:00",
+    "description": "Team meeting",
+    "location": "Conference room"
+  },
+  {
+    "message": "Don't forget the doctor's appointment next Monday at 3:30 PM.",
+    "datetime": "2024-03-18T15:30:00",
+    "description": "Doctor's appointment",
+    "location": ""
+  },
+  {
+    "message": "Dinner with parents this Saturday evening at 7 PM.",
+    "datetime": "2024-03-16T19:00:00",
+    "description": "Dinner with parents",
+    "location": ""
+  },
+  {
+    "message": "Meeting with client next Tuesday at 9:30 AM at their office.",
+    "datetime": "2024-03-19T09:30:00",
+    "description": "Meeting with client",
+    "location": "Client's office"
+  },
+  {
+    "message": "Soccer practice on Wednesday at 4:00 PM at the park.",
+    "datetime": "2024-03-13T16:00:00",
+    "description": "Soccer practice",
+    "location": "Park"
+  },
+  {
+    "message": "Conference call tomorrow at 2:30 PM. Dial-in: 123-456-7890",
+    "datetime": "2024-03-12T14:30:00",
+    "description": "Conference call",
+    "location": ""
+  },
+  {
+    "message": "Pick up groceries on Friday after work.",
+    "datetime": "2024-03-15T17:00:00",
+    "description": "Pick up groceries",
+    "location": ""
+  },
+  {
+    "message": "Movie night with friends on Saturday at 8 PM.",
+    "datetime": "2024-03-16T20:00:00",
+    "description": "Movie night with friends",
+    "location": ""
+  },
+  {
+    "message": "Workout session next Monday morning at the gym.",
+    "datetime": "2024-03-18T08:00:00",
+    "description": "Workout session",
+    "location": "Gym"
+  },
+  {
+    "message": "Team lunch next Wednesday at noon.",
+    "datetime": "2024-03-20T12:00:00",
+    "description": "Team lunch",
+    "location": ""
+  },
+  {
+    "message": "Board meeting on Thursday at 9:00 AM in the boardroom.",
+    "datetime": "2024-03-14T09:00:00",
+    "description": "Board meeting",
+    "location": "Boardroom"
+  },
+  {
+    "message": "Flight to New York City on Friday evening.",
+    "datetime": "2024-03-15T18:00:00",
+    "description": "Flight to New York City",
+    "location": ""
+  },
+  {
+    "message": "Coffee with Jane next Tuesday at 11:30 AM.",
+    "datetime": "2024-03-19T11:30:00",
+    "description": "Coffee with Jane",
+    "location": ""
+  },
+  {
+    "message": "Dentist appointment on Wednesday at 2 PM.",
+    "datetime": "2024-03-13T14:00:00",
+    "description": "Dentist appointment",
+    "location": ""
+  },
+  {
+    "message": "Team outing next Friday afternoon.",
+    "datetime": "2024-03-15T12:00:00",
+    "description": "Team outing",
+    "location": ""
+  },
+  {
+    "message": "Book club meeting on Thursday at 7:30 PM.",
+    "datetime": "2024-03-14T19:30:00",
+    "description": "Book club meeting",
+    "location": ""
+  },
+  {
+    "message": "Conference in Chicago next month from April 10th to April 12th.",
+    "datetime": "2024-04-10T00:00:00",
+    "end": "2024-04-12T00:00:00",
+    "description": "Conference in Chicago",
+    "location": ""
+  },
+  {
+    "message": "Parent-teacher meeting on Monday at 4:30 PM.",
+    "datetime": "2024-03-18T16:30:00",
+    "description": "Parent-teacher meeting",
+    "location": ""
+  },
+  {
+    "message": "Dinner with John next Saturday at 6:30 PM at his place.",
+    "datetime": "2024-03-16T18:30:00",
+    "description": "Dinner with John",
+    "location": "John's place"
+  },
+  {
+    "message": "Birthday party for Sarah on Friday night at 8 PM.",
+    "datetime": "2024-03-15T20:00:00",
+    "description": "Birthday party for Sarah",
+    "location": ""
+  },
+  {
+    "message": "Conference call on Thursday at 11:00 AM.",
+    "datetime": "2024-03-14T11:00:00",
+    "description": "Conference call",
+    "location": ""
+  },
+  {
+    "message": "Meeting with HR on Monday morning at 9 AM.",
+    "datetime": "2024-03-18T09:00:00",
+    "description": "Meeting with HR",
+    "location": ""
+  },
+  {
+    "message": "Conference in London next week from April 1st to April 3rd.",
+    "datetime": "2024-04-01T00:00:00",
+    "end": "2024-04-03T00:00:00",
+    "description": "Conference in London",
+    "location": ""
+  },
+  {
+    "message": "Lunch with colleagues on Thursday at 12:30 PM.",
+    "datetime": "2024-03-14T12:30:00",
+    "description": "Lunch with colleagues",
+    "location": ""
+  },
+  {
+    "message": "Board meeting next Tuesday at 10 AM.",
+    "datetime": "2024-03-19T10:00:00",
+    "description": "Board meeting",
+    "location": ""
+  },
+  {
+    "message": "Workshop on Saturday morning at 9:30 AM in the auditorium.",
+    "datetime": "2024-03-16T09:30:00",
+    "description": "Workshop",
+    "location": "Auditorium"
+  },
+  {
+    "message": "Dinner party at Mike's place next Friday at 7:00 PM.",
+    "datetime": "2024-03-15T19:00:00",
+    "description": "Dinner party at Mike's place",
+    "location": "Mike's place"
+  },
+  {
+    "message": "Training session on Monday afternoon at 2 PM.",
+    "datetime": "2024-03-18T14:00:00",
+    "description": "Training session",
+    "location": ""
+  },
+  {
+    "message": "Coffee meeting on Wednesday at 10:30 AM.",
+    "datetime": "2024-03-13T10:30:00",
+    "description": "Coffee meeting",
+    "location": ""
+  },
+  {
+    "message": "Flight to Paris on Sunday morning at 9:00 AM.",
+    "datetime": "2024-03-17T09:00:00",
+    "description": "Flight to Paris",
+    "location": ""
+  },
+  {
+    "message": "Client presentation on Thursday at 2:00 PM in the conference room.",
+    "datetime": "2024-03-14T14:00:00",
+    "description": "Client presentation",
+    "location": "Conference room"
+  },
+  {
+    "message": "Dentist appointment on Tuesday at 11:00 AM.",
+    "datetime": "2024-03-19T11:00:00",
+    "description": "Dentist appointment",
+    "location": ""
+  },
+  {
+    "message": "Team building event next Friday at 1:00 PM.",
+    "datetime": "2024-03-15T13:00:00",
+    "description": "Team building event",
+    "location": ""
+  },
+  {
+    "message": "Business trip to San Francisco from April 5th to April 7th.",
+    "datetime": "2024-04-05T00:00:00",
+    "end": "2024-04-07T00:00:00",
+    "description": "Business trip to San Francisco",
+    "location": ""
+  },
+  {
+    "message": "Meeting with Sarah on Monday at 4:00 PM.",
+    "datetime": "2024-03-18T16:00:00",
+    "description": "Meeting with Sarah",
+    "location": ""
+  },
+  {
+    "message": "Dinner reservation for two on Friday night at 7:30 PM.",
+    "datetime": "2024-03-15T19:30:00",
+    "description": "Dinner reservation for two",
+    "location": ""
+  },
+  {
+    "message": "Video conference call on Tuesday at 3:00 PM.",
+    "datetime": "2024-03-19T15:00:00",
+    "description": "Video conference call",
+    "location": ""
+  },
+  {
+    "message": "Networking event on Wednesday evening at 6:00 PM.",
+    "datetime": "2024-03-13T18:00:00",
+    "description": "Networking event",
+    "location": ""
+  },
+  {
+    "message": "Pick up dry cleaning on Thursday afternoon.",
+    "datetime": "2024-03-14T12:00:00",
+    "description": "Pick up dry cleaning",
+    "location": ""
+  },
+  {
+    "message": "Coffee catch-up with Mark on Tuesday morning at 10 AM.",
+    "datetime": "2024-03-19T10:00:00",
+    "description": "Coffee catch-up with Mark",
+    "location": ""
+  },
+  {
+    "message": "Volunteer work at the shelter on Saturday afternoon.",
+    "datetime": "2024-03-16T12:00:00",
+    "description": "Volunteer work at the shelter",
+    "location": ""
+  },
+  {
+    "message": "Dinner with the Smiths on Sunday evening at 6:30 PM.",
+    "datetime": "2024-03-17T18:30:00",
+    "description": "Dinner with the Smiths",
+    "location": ""
+  },
+  {
+    "message": "Conference call with investors on Monday at 11:00 AM.",
+    "datetime": "2024-03-18T11:00:00",
+    "description": "Conference call with investors",
+    "location": ""
+  },
+  {
+    "message": "Lunch meeting with client on Thursday at 1:00 PM.",
+    "datetime": "2024-03-14T13:00:00",
+    "description": "Lunch meeting with client",
+    "location": ""
+  },
+  {
+    "message": "Conference in Berlin next month from April 8th to April 10th.",
+    "datetime": "2024-04-08T00:00:00",
+    "end": "2024-04-10T00:00:00",
+    "description": "Conference in Berlin",
+    "location": ""
+  },
+  {
+    "message": "Meeting with project team on Monday at 2:00 PM.",
+    "datetime": "2024-03-18T14:00:00",
+    "description": "Meeting with project team",
+    "location": ""
+  },
+  {
+    "message": "Workout session at the gym on Wednesday at 6:00 AM.",
+    "datetime": "2024-03-13T06:00:00",
+    "description": "Workout session at the gym",
+    "location": ""
+  },
+  {
+    "message": "Family dinner on Sunday at 7:00 PM.",
+    "datetime": "2024-03-17T19:00:00",
+    "description": "Family dinner",
+    "location": ""
+  },
+  {
+    "message": "Client meeting on Friday at 2:30 PM in the boardroom.",
+    "datetime": "2024-03-15T14:30:00",
+    "description": "Client meeting",
+    "location": "Boardroom"
+  },
+  {
+    "message": "Doctor's appointment on Monday at 10:00 AM.",
+    "datetime": "2024-03-18T10:00:00",
+    "description": "Doctor's appointment",
+    "location": ""
+  },
+  {
+    "message": "Movie night with friends next Saturday at 8:00 PM.",
+    "datetime": "2024-03-16T20:00:00",
+    "description": "Movie night with friends",
+    "location": ""
+  },
+  {
+    "message": "Conference call with team members on Tuesday at 11:00 AM.",
+    "datetime": "2024-03-19T11:00:00",
+    "description": "Conference call with team members",
+    "location": ""
+  },
+  {
+    "message": "Dinner at the new restaurant on Friday evening at 7:30 PM.",
+    "datetime": "2024-03-15T19:30:00",
+    "description": "Dinner at the new restaurant",
+    "location": ""
+  },
+  {
+    "message": "Meeting with clients on Wednesday at 3:00 PM.",
+    "datetime": "2024-03-13T15:00:00",
+    "description": "Meeting with clients",
+    "location": ""
+  },
+  {
+    "message": "Lunch with colleagues next Thursday at 1:00 PM.",
+    "datetime": "2024-03-14T13:00:00",
+    "description": "Lunch with colleagues",
+    "location": ""
+  },
+  {
+    "message": "Parent-teacher meeting on Monday at 3:00 PM.",
+    "datetime": "2024-03-18T15:00:00",
+    "description": "Parent-teacher meeting",
+    "location": ""
+  },
+  {
+    "message": "Flight to Tokyo next month on April 9th.",
+    "datetime": "2024-04-09T00:00:00",
+    "description": "Flight to Tokyo",
+    "location": ""
+  },
+  {
+    "message": "Meeting with the marketing team on Tuesday at 2:00 PM.",
+    "datetime": "2024-03-19T14:00:00",
+    "description": "Meeting with the marketing team",
+    "location": ""
+  },
+  {
+    "message": "Dinner with friends on Saturday at 7:00 PM.",
+    "datetime": "2024-03-16T19:00:00",
+    "description": "Dinner with friends",
+    "location": ""
+  },
+  {
+    "message": "Team meeting on Monday at 11:00 AM.",
+    "datetime": "2024-03-18T11:00:00",
+    "description": "Team meeting",
+    "location": ""
+  },
+  {
+    "message": "Conference call with the IT department on Thursday at 10:00 AM.",
+    "datetime": "2024-03-14T10:00:00",
+    "description": "Conference call with the IT department",
+    "location": ""
+  },
+  {
+    "message": "Lunch meeting with Jane on Wednesday at 12:00 PM.",
+    "datetime": "2024-03-13T12:00:00",
+    "description": "Lunch meeting with Jane",
+    "location": ""
+  },
+  {
+    "message": "Conference in Paris next month from April 10th to April 12th.",
+    "datetime": "2024-04-10T00:00:00",
+    "end": "2024-04-12T00:00:00",
+    "description": "Conference in Paris",
+    "location": ""
+  },
+  {
+    "message": "Workshop on Friday afternoon at 3:00 PM.",
+    "datetime": "2024-03-15T15:00:00",
+    "description": "Workshop",
+    "location": ""
+  },
+  {
+    "message": "Dinner with family next Sunday at 6:00 PM.",
+    "datetime": "2024-03-17T18:00:00",
+    "description": "Dinner with family",
+    "location": ""
+  },
+  {
+    "message": "Conference call with the sales team on Monday at 2:00 PM.",
+    "datetime": "2024-03-18T14:00:00",
+    "description": "Conference call with the sales team",
+    "location": ""
+  },
+  {
+    "message": "Doctor's appointment on Thursday at 10:30 AM.",
+    "datetime": "2024-03-14T10:30:00",
+    "description": "Doctor's appointment",
+    "location": ""
+  },
+  {
+    "message": "Meeting with the CEO on Tuesday at 9:00 AM.",
+    "datetime": "2024-03-19T09:00:00",
+    "description": "Meeting with the CEO",
+    "location": ""
+  },
+  {
+    "message": "Lunch with friends on Friday at 1:00 PM.",
+    "datetime": "2024-03-15T13:00:00",
+    "description": "Lunch with friends",
+    "location": ""
+  },
+  {
+    "message": "Meeting with the legal team on Monday at 3:30 PM.",
+    "datetime": "2024-03-18T15:30:00",
+    "description": "Meeting with the legal team",
+    "location": ""
+  },
+  {
+    "message": "Conference in Tokyo next month from April 9th to April 11th.",
+    "datetime": "2024-04-09T00:00:00",
+    "end": "2024-04-11T00:00:00",
+    "description": "Conference in Tokyo",
+    "location": ""
+  },
+  {
+    "message": "Team meeting on Thursday at 11:00 AM.",
+    "datetime": "2024-03-14T11:00:00",
+    "description": "Team meeting",
+    "location": ""
+  },
+  {
+    "message": "Dinner with clients on Wednesday at 7:30 PM.",
+    "datetime": "2024-03-13T19:30:00",
+    "description": "Dinner with clients",
+    "location": ""
+  },
+  {
+    "message": "Doctor's appointment on Friday at 9:00 AM.",
+    "datetime": "2024-03-15T09:00:00",
+    "description": "Doctor's appointment",
+    "location": ""
+  },
+  {
+    "message": "Coffee meeting with Sarah on Tuesday at 10:00 AM.",
+    "datetime": "2024-03-19T10:00:00",
+    "description": "Coffee meeting with Sarah",
+    "location": ""
+  },
+  {
+    "message": "Conference call with clients on Monday at 4:00 PM.",
+    "datetime": "2024-03-18T16:00:00",
+    "description": "Conference call with clients",
+    "location": ""
+  }
+]

data.json ADDED Viewed

	@@ -0,0 +1,620 @@

+[
+  {
+    "message": "Let's meet for lunch tomorrow at 12 PM at the Italian restaurant on Main Street.",
+    "labels": {"datetime": "2024-03-12T12:00:00", "description": "Lunch meeting", "location": "Italian restaurant on Main Street"}
+  },
+  {
+    "message": "Reminder: Team meeting on Friday at 10 AM in the conference room.",
+    "labels": {
+      "datetime": "2024-03-15T10:00:00",
+      "description": "Team meeting",
+      "location": "Conference room"
+    }
+  },
+  {
+    "message": "Don't forget the doctor's appointment next Monday at 3:30 PM.",
+    "labels": {
+      "datetime": "2024-03-18T15:30:00",
+      "description": "Doctor's appointment",
+      "location": ""
+    }
+  },
+  {
+    "message": "Dinner with parents this Saturday evening at 7 PM.",
+    "labels": {
+      "datetime": "2024-03-16T19:00:00",
+      "description": "Dinner with parents",
+      "location": ""
+    }
+  },
+  {
+    "message": "Meeting with client next Tuesday at 9:30 AM at their office.",
+    "labels": {
+      "datetime": "2024-03-19T09:30:00",
+      "description": "Meeting with client",
+      "location": "Client's office"
+    }
+  },
+  {
+    "message": "Soccer practice on Wednesday at 4:00 PM at the park.",
+    "labels": {
+      "datetime": "2024-03-13T16:00:00",
+      "description": "Soccer practice",
+      "location": "Park"
+    }
+  },
+  {
+    "message": "Conference call tomorrow at 2:30 PM. Dial-in: 123-456-7890",
+    "labels": {
+      "datetime": "2024-03-12T14:30:00",
+      "description": "Conference call",
+      "location": ""
+    }
+  },
+  {
+    "message": "Pick up groceries on Friday after work.",
+    "labels": {
+      "datetime": "2024-03-15T17:00:00",
+      "description": "Pick up groceries",
+      "location": ""
+    }
+  },
+  {
+    "message": "Movie night with friends on Saturday at 8 PM.",
+    "labels": {
+      "datetime": "2024-03-16T20:00:00",
+      "description": "Movie night with friends",
+      "location": ""
+    }
+  },
+  {
+    "message": "Workout session next Monday morning at the gym.",
+    "labels": {
+      "datetime": "2024-03-18T08:00:00",
+      "description": "Workout session",
+      "location": "Gym"
+    }
+  },
+  {
+    "message": "Team lunch next Wednesday at noon.",
+    "labels": {
+      "datetime": "2024-03-20T12:00:00",
+      "description": "Team lunch",
+      "location": ""
+    }
+  },
+  {
+    "message": "Board meeting on Thursday at 9:00 AM in the boardroom.",
+    "labels": {
+      "datetime": "2024-03-14T09:00:00",
+      "description": "Board meeting",
+      "location": "Boardroom"
+    }
+  },
+  {
+    "message": "Flight to New York City on Friday evening.",
+    "labels": {
+      "datetime": "2024-03-15T18:00:00",
+      "description": "Flight to New York City",
+      "location": ""
+    }
+  },
+  {
+    "message": "Coffee with Jane next Tuesday at 11:30 AM.",
+    "labels": {
+      "datetime": "2024-03-19T11:30:00",
+      "description": "Coffee with Jane",
+      "location": ""
+    }
+  },
+  {
+    "message": "Dentist appointment on Wednesday at 2 PM.",
+    "labels": {
+      "datetime": "2024-03-13T14:00:00",
+      "description": "Dentist appointment",
+      "location": ""
+    }
+  },
+  {
+    "message": "Team outing next Friday afternoon.",
+    "labels": {
+      "datetime": "2024-03-15T12:00:00",
+      "description": "Team outing",
+      "location": ""
+    }
+  },
+  {
+    "message": "Book club meeting on Thursday at 7:30 PM.",
+    "labels": {
+      "datetime": "2024-03-14T19:30:00",
+      "description": "Book club meeting",
+      "location": ""
+    }
+  },
+  {
+    "message": "Conference in Chicago next month from April 10th to April 12th.",
+    "labels": {
+      "datetime": "2024-04-10T00:00:00",
+      "end": "2024-04-12T00:00:00",
+      "description": "Conference in Chicago",
+      "location": ""
+    }
+  },
+  {
+    "message": "Parent-teacher meeting on Monday at 4:30 PM.",
+    "labels": {
+      "datetime": "2024-03-18T16:30:00",
+      "description": "Parent-teacher meeting",
+      "location": ""
+    }
+  },
+  {
+    "message": "Dinner with John next Saturday at 6:30 PM at his place.",
+    "labels": {
+      "datetime": "2024-03-16T18:30:00",
+      "description": "Dinner with John",
+      "location": "John's place"
+    }
+  },
+  {
+    "message": "Birthday party for Sarah on Friday night at 8 PM.",
+    "labels": {
+      "datetime": "2024-03-15T20:00:00",
+      "description": "Birthday party for Sarah",
+      "location": ""
+    }
+  },
+  {
+    "message": "Conference call on Thursday at 11:00 AM.",
+    "labels": {
+      "datetime": "2024-03-14T11:00:00",
+      "description": "Conference call",
+      "location": ""
+    }
+  },
+  {
+    "message": "Meeting with HR on Monday morning at 9 AM.",
+    "labels": {
+      "datetime": "2024-03-18T09:00:00",
+      "description": "Meeting with HR",
+      "location": ""
+    }
+  },
+  {
+    "message": "Conference in London next week from April 1st to April 3rd.",
+    "labels": {
+      "datetime": "2024-04-01T00:00:00",
+      "end": "2024-04-03T00:00:00",
+      "description": "Conference in London",
+      "location": ""
+    }
+  },
+  {
+    "message": "Lunch with colleagues on Thursday at 12:30 PM.",
+    "labels": {
+      "datetime": "2024-03-14T12:30:00",
+      "description": "Lunch with colleagues",
+      "location": ""
+    }
+  },
+  {
+    "message": "Board meeting next Tuesday at 10 AM.",
+    "labels": {
+      "datetime": "2024-03-19T10:00:00",
+      "description": "Board meeting",
+      "location": ""
+    }
+  },
+  {
+    "message": "Workshop on Saturday morning at 9:30 AM in the auditorium.",
+    "labels": {
+      "datetime": "2024-03-16T09:30:00",
+      "description": "Workshop",
+      "location": "Auditorium"
+    }
+  },
+  {
+    "message": "Dinner party at Mike's place next Friday at 7:00 PM.",
+    "labels": {
+      "datetime": "2024-03-15T19:00:00",
+      "description": "Dinner party at Mike's place",
+      "location": "Mike's place"
+    }
+  },
+  {
+    "message": "Training session on Monday afternoon at 2 PM.",
+    "labels": {
+      "datetime": "2024-03-18T14:00:00",
+      "description": "Training session",
+      "location": ""
+    }
+  },
+  {
+    "message": "Coffee meeting on Wednesday at 10:30 AM.",
+    "labels": {
+      "datetime": "2024-03-13T10:30:00",
+      "description": "Coffee meeting",
+      "location": ""
+    }
+  },
+  {
+    "message": "Flight to Paris on Sunday morning at 9:00 AM.",
+    "labels": {
+      "datetime": "2024-03-17T09:00:00",
+      "description": "Flight to Paris",
+      "location": ""
+    }
+  },
+  {
+    "message": "Client presentation on Thursday at 2:00 PM in the conference room.",
+    "labels": {
+      "datetime": "2024-03-14T14:00:00",
+      "description": "Client presentation",
+      "location": "Conference room"
+    }
+  },
+  {
+    "message": "Dentist appointment on Tuesday at 11:00 AM.",
+    "labels": {
+      "datetime": "2024-03-19T11:00:00",
+      "description": "Dentist appointment",
+      "location": ""
+    }
+  },
+  {
+    "message": "Team building event next Friday at 1:00 PM.",
+    "labels": {
+      "datetime": "2024-03-15T13:00:00",
+      "description": "Team building event",
+      "location": ""
+    }
+  },
+  {
+    "message": "Business trip to San Francisco from April 5th to April 7th.",
+    "labels": {
+      "datetime": "2024-04-05T00:00:00",
+      "end": "2024-04-07T00:00:00",
+      "description": "Business trip to San Francisco",
+      "location": ""
+    }
+  },
+  {
+    "message": "Meeting with Sarah on Monday at 4:00 PM.",
+    "labels": {
+      "datetime": "2024-03-18T16:00:00",
+      "description": "Meeting with Sarah",
+      "location": ""
+    }
+  },
+  {
+    "message": "Dinner reservation for two on Friday night at 7:30 PM.",
+    "labels": {
+      "datetime": "2024-03-15T19:30:00",
+      "description": "Dinner reservation for two",
+      "location": ""
+    }
+  },
+  {
+    "message": "Video conference call on Tuesday at 3:00 PM.",
+    "labels": {
+      "datetime": "2024-03-19T15:00:00",
+      "description": "Video conference call",
+      "location": ""
+    }
+  },
+  {
+    "message": "Networking event on Wednesday evening at 6:00 PM.",
+    "labels": {
+      "datetime": "2024-03-13T18:00:00",
+      "description": "Networking event",
+      "location": ""
+    }
+  },
+  {
+    "message": "Pick up dry cleaning on Thursday afternoon.",
+    "labels": {
+      "datetime": "2024-03-14T12:00:00",
+      "description": "Pick up dry cleaning",
+      "location": ""
+    }
+  },
+  {
+    "message": "Coffee catch-up with Mark on Tuesday morning at 10 AM.",
+    "labels": {
+      "datetime": "2024-03-19T10:00:00",
+      "description": "Coffee catch-up with Mark",
+      "location": ""
+    }
+  },
+  {
+    "message": "Volunteer work at the shelter on Saturday afternoon.",
+    "labels": {
+      "datetime": "2024-03-16T12:00:00",
+      "description": "Volunteer work at the shelter",
+      "location": ""
+    }
+  },
+  {
+    "message": "Dinner with the Smiths on Sunday evening at 6:30 PM.",
+    "labels": {
+      "datetime": "2024-03-17T18:30:00",
+      "description": "Dinner with the Smiths",
+      "location": ""
+    }
+  },
+  {
+    "message": "Conference call with investors on Monday at 11:00 AM.",
+    "labels": {
+      "datetime": "2024-03-18T11:00:00",
+      "description": "Conference call with investors",
+      "location": ""
+    }
+  },
+  {
+    "message": "Lunch meeting with client on Thursday at 1:00 PM.",
+    "labels": {
+      "datetime": "2024-03-14T13:00:00",
+      "description": "Lunch meeting with client",
+      "location": ""
+    }
+  },
+  {
+    "message": "Conference in Berlin next month from April 8th to April 10th.",
+    "labels": {
+      "datetime": "2024-04-08T00:00:00",
+      "end": "2024-04-10T00:00:00",
+      "description": "Conference in Berlin",
+      "location": ""
+    }
+  },
+  {
+    "message": "Meeting with project team on Monday at 2:00 PM.",
+    "labels": {
+      "datetime": "2024-03-18T14:00:00",
+      "description": "Meeting with project team",
+      "location": ""
+    }
+  },
+  {
+    "message": "Workout session at the gym on Wednesday at 6:00 AM.",
+    "labels": {
+      "datetime": "2024-03-13T06:00:00",
+      "description": "Workout session at the gym",
+      "location": ""
+    }
+  },
+  {
+    "message": "Family dinner on Sunday at 7:00 PM.",
+    "labels": {
+      "datetime": "2024-03-17T19:00:00",
+      "description": "Family dinner",
+      "location": ""
+    }
+  },
+  {
+    "message": "Client meeting on Friday at 2:30 PM in the boardroom.",
+    "labels": {
+      "datetime": "2024-03-15T14:30:00",
+      "description": "Client meeting",
+      "location": "Boardroom"
+    }
+  },
+  {
+    "message": "Doctor's appointment on Monday at 10:00 AM.",
+    "labels": {
+      "datetime": "2024-03-18T10:00:00",
+      "description": "Doctor's appointment",
+      "location": ""
+    }
+  },
+  {
+    "message": "Movie night with friends next Saturday at 8:00 PM.",
+    "labels": {
+      "datetime": "2024-03-16T20:00:00",
+      "description": "Movie night with friends",
+      "location": ""
+    }
+  },
+  {
+    "message": "Conference call with team members on Tuesday at 11:00 AM.",
+    "labels": {
+      "datetime": "2024-03-19T11:00:00",
+      "description": "Conference call with team members",
+      "location": ""
+    }
+  },
+  {
+    "message": "Dinner at the new restaurant on Friday evening at 7:30 PM.",
+    "labels": {
+      "datetime": "2024-03-15T19:30:00",
+      "description": "Dinner at the new restaurant",
+      "location": ""
+    }
+  },
+  {
+    "message": "Meeting with clients on Wednesday at 3:00 PM.",
+    "labels": {
+      "datetime": "2024-03-13T15:00:00",
+      "description": "Meeting with clients",
+      "location": ""
+    }
+  },
+  {
+    "message": "Lunch with colleagues next Thursday at 1:00 PM.",
+    "labels": {
+      "datetime": "2024-03-14T13:00:00",
+      "description": "Lunch with colleagues",
+      "location": ""
+    }
+  },
+  {
+    "message": "Parent-teacher meeting on Monday at 3:00 PM.",
+    "labels": {
+      "datetime": "2024-03-18T15:00:00",
+      "description": "Parent-teacher meeting",
+      "location": ""
+    }
+  },
+  {
+    "message": "Flight to Tokyo next month on April 9th.",
+    "labels": {
+      "datetime": "2024-04-09T00:00:00",
+      "description": "Flight to Tokyo",
+      "location": ""
+    }
+  },
+  {
+    "message": "Meeting with the marketing team on Tuesday at 2:00 PM.",
+    "labels": {
+      "datetime": "2024-03-19T14:00:00",
+      "description": "Meeting with the marketing team",
+      "location": ""
+    }
+  },
+  {
+    "message": "Dinner with friends on Saturday at 7:00 PM.",
+    "labels": {
+      "datetime": "2024-03-16T19:00:00",
+      "description": "Dinner with friends",
+      "location": ""
+    }
+  },
+  {
+    "message": "Team meeting on Monday at 11:00 AM.",
+    "labels": {
+      "datetime": "2024-03-18T11:00:00",
+      "description": "Team meeting",
+      "location": ""
+    }
+  },
+  {
+    "message": "Conference call with the IT department on Thursday at 10:00 AM.",
+    "labels": {
+      "datetime": "2024-03-14T10:00:00",
+      "description": "Conference call with the IT department",
+      "location": ""
+    }
+  },
+  {
+    "message": "Lunch meeting with Jane on Wednesday at 12:00 PM.",
+    "labels": {
+      "datetime": "2024-03-13T12:00:00",
+      "description": "Lunch meeting with Jane",
+      "location": ""
+    }
+  },
+  {
+    "message": "Conference in Paris next month from April 10th to April 12th.",
+    "labels": {
+      "datetime": "2024-04-10T00:00:00",
+      "end": "2024-04-12T00:00:00",
+      "description": "Conference in Paris",
+      "location": ""
+    }
+  },
+  {
+    "message": "Workshop on Friday afternoon at 3:00 PM.",
+    "labels": {
+      "datetime": "2024-03-15T15:00:00",
+      "description": "Workshop",
+      "location": ""
+    }
+  },
+  {
+    "message": "Dinner with family next Sunday at 6:00 PM.",
+    "labels": {
+      "datetime": "2024-03-17T18:00:00",
+      "description": "Dinner with family",
+      "location": ""
+    }
+  },
+  {
+    "message": "Conference call with the sales team on Monday at 2:00 PM.",
+    "labels": {
+      "datetime": "2024-03-18T14:00:00",
+      "description": "Conference call with the sales team",
+      "location": ""
+    }
+  },
+  {
+    "message": "Doctor's appointment on Thursday at 10:30 AM.",
+    "labels": {
+      "datetime": "2024-03-14T10:30:00",
+      "description": "Doctor's appointment",
+      "location": ""
+    }
+  },
+  {
+    "message": "Meeting with the CEO on Tuesday at 9:00 AM.",
+    "labels": {
+      "datetime": "2024-03-19T09:00:00",
+      "description": "Meeting with the CEO",
+      "location": ""
+    }
+  },
+  {
+    "message": "Lunch with friends on Friday at 1:00 PM.",
+    "labels": {
+      "datetime": "2024-03-15T13:00:00",
+      "description": "Lunch with friends",
+      "location": ""
+    }
+  },
+  {
+    "message": "Meeting with the legal team on Monday at 3:30 PM.",
+    "labels": {
+      "datetime": "2024-03-18T15:30:00",
+      "description": "Meeting with the legal team",
+      "location": ""
+    }
+  },
+  {
+    "message": "Conference in Tokyo next month from April 9th to April 11th.",
+    "labels": {
+      "datetime": "2024-04-09T00:00:00",
+      "end": "2024-04-11T00:00:00",
+      "description": "Conference in Tokyo",
+      "location": ""
+    }
+  },
+  {
+    "message": "Team meeting on Thursday at 11:00 AM.",
+    "labels": {
+      "datetime": "2024-03-14T11:00:00",
+      "description": "Team meeting",
+      "location": ""
+    }
+  },
+  {
+    "message": "Dinner with clients on Wednesday at 7:30 PM.",
+    "labels": {
+      "datetime": "2024-03-13T19:30:00",
+      "description": "Dinner with clients",
+      "location": ""
+    }
+  },
+  {
+    "message": "Doctor's appointment on Friday at 9:00 AM.",
+    "labels": {
+      "datetime": "2024-03-15T09:00:00",
+      "description": "Doctor's appointment",
+      "location": ""
+    }
+  },
+  {
+    "message": "Coffee meeting with Sarah on Tuesday at 10:00 AM.",
+    "labels": {
+      "datetime": "2024-03-19T10:00:00",
+      "description": "Coffee meeting with Sarah",
+      "location": ""
+    }
+  },
+  {
+    "message": "Conference call with clients on Monday at 4:00 PM.",
+    "labels": {
+      "datetime": "2024-03-18T16:00:00",
+      "description": "Conference call with clients",
+      "location": ""
+    }
+  }
+]

data2.json ADDED Viewed

	@@ -0,0 +1,310 @@

+[
+    {
+        "message": "Let's meet for lunch tomorrow at 12 PM at the Italian restaurant on Main Street.",
+        "details": "datetime: 2024-03-12T12:00:00, end: , description: Lunch meeting, location: Italian restaurant on Main Street"
+    },
+    {
+        "message": "Reminder: Team meeting on Friday at 10 AM in the conference room.",
+        "details": "datetime: 2024-03-15T10:00:00, end: , description: Team meeting, location: Conference room"
+    },
+    {
+        "message": "Don't forget the doctor's appointment next Monday at 3:30 PM.",
+        "details": "datetime: 2024-03-18T15:30:00, end: , description: Doctor's appointment, location: "
+    },
+    {
+        "message": "Dinner with parents this Saturday evening at 7 PM.",
+        "details": "datetime: 2024-03-16T19:00:00, end: , description: Dinner with parents, location: "
+    },
+    {
+        "message": "Meeting with client next Tuesday at 9:30 AM at their office.",
+        "details": "datetime: 2024-03-19T09:30:00, end: , description: Meeting with client, location: Client's office"
+    },
+    {
+        "message": "Soccer practice on Wednesday at 4:00 PM at the park.",
+        "details": "datetime: 2024-03-13T16:00:00, end: , description: Soccer practice, location: Park"
+    },
+    {
+        "message": "Conference call tomorrow at 2:30 PM. Dial-in: 123-456-7890",
+        "details": "datetime: 2024-03-12T14:30:00, end: , description: Conference call, location: "
+    },
+    {
+        "message": "Pick up groceries on Friday after work.",
+        "details": "datetime: 2024-03-15T17:00:00, end: , description: Pick up groceries, location: "
+    },
+    {
+        "message": "Movie night with friends on Saturday at 8 PM.",
+        "details": "datetime: 2024-03-16T20:00:00, end: , description: Movie night with friends, location: "
+    },
+    {
+        "message": "Workout session next Monday morning at the gym.",
+        "details": "datetime: 2024-03-18T08:00:00, end: , description: Workout session, location: Gym"
+    },
+    {
+        "message": "Team lunch next Wednesday at noon.",
+        "details": "datetime: 2024-03-20T12:00:00, end: , description: Team lunch, location: "
+    },
+    {
+        "message": "Board meeting on Thursday at 9:00 AM in the boardroom.",
+        "details": "datetime: 2024-03-14T09:00:00, end: , description: Board meeting, location: Boardroom"
+    },
+    {
+        "message": "Flight to New York City on Friday evening.",
+        "details": "datetime: 2024-03-15T18:00:00, end: , description: Flight to New York City, location: "
+    },
+    {
+        "message": "Coffee with Jane next Tuesday at 11:30 AM.",
+        "details": "datetime: 2024-03-19T11:30:00, end: , description: Coffee with Jane, location: "
+    },
+    {
+        "message": "Dentist appointment on Wednesday at 2 PM.",
+        "details": "datetime: 2024-03-13T14:00:00, end: , description: Dentist appointment, location: "
+    },
+    {
+        "message": "Team outing next Friday afternoon.",
+        "details": "datetime: 2024-03-15T12:00:00, end: , description: Team outing, location: "
+    },
+    {
+        "message": "Book club meeting on Thursday at 7:30 PM.",
+        "details": "datetime: 2024-03-14T19:30:00, end: , description: Book club meeting, location: "
+    },
+    {
+        "message": "Conference in Chicago next month from April 10th to April 12th.",
+        "details": "datetime: 2024-04-10T00:00:00, end: 2024-04-12T00:00:00, description: Conference in Chicago, location: "
+    },
+    {
+        "message": "Parent-teacher meeting on Monday at 4:30 PM.",
+        "details": "datetime: 2024-03-18T16:30:00, end: , description: Parent-teacher meeting, location: "
+    },
+    {
+        "message": "Dinner with John next Saturday at 6:30 PM at his place.",
+        "details": "datetime: 2024-03-16T18:30:00, end: , description: Dinner with John, location: John's place"
+    },
+    {
+        "message": "Birthday party for Sarah on Friday night at 8 PM.",
+        "details": "datetime: 2024-03-15T20:00:00, end: , description: Birthday party for Sarah, location: "
+    },
+    {
+        "message": "Conference call on Thursday at 11:00 AM.",
+        "details": "datetime: 2024-03-14T11:00:00, end: , description: Conference call, location: "
+    },
+    {
+        "message": "Meeting with HR on Monday morning at 9 AM.",
+        "details": "datetime: 2024-03-18T09:00:00, end: , description: Meeting with HR, location: "
+    },
+    {
+        "message": "Conference in London next week from April 1st to April 3rd.",
+        "details": "datetime: 2024-04-01T00:00:00, end: 2024-04-03T00:00:00, description: Conference in London, location: "
+    },
+    {
+        "message": "Lunch with colleagues on Thursday at 12:30 PM.",
+        "details": "datetime: 2024-03-14T12:30:00, end: , description: Lunch with colleagues, location: "
+    },
+    {
+        "message": "Board meeting next Tuesday at 10 AM.",
+        "details": "datetime: 2024-03-19T10:00:00, end: , description: Board meeting, location: "
+    },
+    {
+        "message": "Workshop on Saturday morning at 9:30 AM in the auditorium.",
+        "details": "datetime: 2024-03-16T09:30:00, end: , description: Workshop, location: Auditorium"
+    },
+    {
+        "message": "Dinner party at Mike's place next Friday at 7:00 PM.",
+        "details": "datetime: 2024-03-15T19:00:00, end: , description: Dinner party at Mike's place, location: Mike's place"
+    },
+    {
+        "message": "Training session on Monday afternoon at 2 PM.",
+        "details": "datetime: 2024-03-18T14:00:00, end: , description: Training session, location: "
+    },
+    {
+        "message": "Coffee meeting on Wednesday at 10:30 AM.",
+        "details": "datetime: 2024-03-13T10:30:00, end: , description: Coffee meeting, location: "
+    },
+    {
+        "message": "Flight to Paris on Sunday morning at 9:00 AM.",
+        "details": "datetime: 2024-03-17T09:00:00, end: , description: Flight to Paris, location: "
+    },
+    {
+        "message": "Client presentation on Thursday at 2:00 PM in the conference room.",
+        "details": "datetime: 2024-03-14T14:00:00, end: , description: Client presentation, location: Conference room"
+    },
+    {
+        "message": "Dentist appointment on Tuesday at 11:00 AM.",
+        "details": "datetime: 2024-03-19T11:00:00, end: , description: Dentist appointment, location: "
+    },
+    {
+        "message": "Team building event next Friday at 1:00 PM.",
+        "details": "datetime: 2024-03-15T13:00:00, end: , description: Team building event, location: "
+    },
+    {
+        "message": "Business trip to San Francisco from April 5th to April 7th.",
+        "details": "datetime: 2024-04-05T00:00:00, end: 2024-04-07T00:00:00, description: Business trip to San Francisco, location: "
+    },
+    {
+        "message": "Meeting with Sarah on Monday at 4:00 PM.",
+        "details": "datetime: 2024-03-18T16:00:00, end: , description: Meeting with Sarah, location: "
+    },
+    {
+        "message": "Dinner reservation for two on Friday night at 7:30 PM.",
+        "details": "datetime: 2024-03-15T19:30:00, end: , description: Dinner reservation for two, location: "
+    },
+    {
+        "message": "Video conference call on Tuesday at 3:00 PM.",
+        "details": "datetime: 2024-03-19T15:00:00, end: , description: Video conference call, location: "
+    },
+    {
+        "message": "Networking event on Wednesday evening at 6:00 PM.",
+        "details": "datetime: 2024-03-13T18:00:00, end: , description: Networking event, location: "
+    },
+    {
+        "message": "Pick up dry cleaning on Thursday afternoon.",
+        "details": "datetime: 2024-03-14T12:00:00, end: , description: Pick up dry cleaning, location: "
+    },
+    {
+        "message": "Coffee catch-up with Mark on Tuesday morning at 10 AM.",
+        "details": "datetime: 2024-03-19T10:00:00, end: , description: Coffee catch-up with Mark, location: "
+    },
+    {
+        "message": "Volunteer work at the shelter on Saturday afternoon.",
+        "details": "datetime: 2024-03-16T12:00:00, end: , description: Volunteer work at the shelter, location: "
+    },
+    {
+        "message": "Dinner with the Smiths on Sunday evening at 6:30 PM.",
+        "details": "datetime: 2024-03-17T18:30:00, end: , description: Dinner with the Smiths, location: "
+    },
+    {
+        "message": "Conference call with investors on Monday at 11:00 AM.",
+        "details": "datetime: 2024-03-18T11:00:00, end: , description: Conference call with investors, location: "
+    },
+    {
+        "message": "Lunch meeting with client on Thursday at 1:00 PM.",
+        "details": "datetime: 2024-03-14T13:00:00, end: , description: Lunch meeting with client, location: "
+    },
+    {
+        "message": "Conference in Berlin next month from April 8th to April 10th.",
+        "details": "datetime: 2024-04-08T00:00:00, end: 2024-04-10T00:00:00, description: Conference in Berlin, location: "
+    },
+    {
+        "message": "Meeting with project team on Monday at 2:00 PM.",
+        "details": "datetime: 2024-03-18T14:00:00, end: , description: Meeting with project team, location: "
+    },
+    {
+        "message": "Workout session at the gym on Wednesday at 6:00 AM.",
+        "details": "datetime: 2024-03-13T06:00:00, end: , description: Workout session at the gym, location: "
+    },
+    {
+        "message": "Family dinner on Sunday at 7:00 PM.",
+        "details": "datetime: 2024-03-17T19:00:00, end: , description: Family dinner, location: "
+    },
+    {
+        "message": "Client meeting on Friday at 2:30 PM in the boardroom.",
+        "details": "datetime: 2024-03-15T14:30:00, end: , description: Client meeting, location: Boardroom"
+    },
+    {
+        "message": "Doctor's appointment on Monday at 10:00 AM.",
+        "details": "datetime: 2024-03-18T10:00:00, end: , description: Doctor's appointment, location: "
+    },
+    {
+        "message": "Movie night with friends next Saturday at 8:00 PM.",
+        "details": "datetime: 2024-03-16T20:00:00, end: , description: Movie night with friends, location: "
+    },
+    {
+        "message": "Conference call with team members on Tuesday at 11:00 AM.",
+        "details": "datetime: 2024-03-19T11:00:00, end: , description: Conference call with team members, location: "
+    },
+    {
+        "message": "Dinner at the new restaurant on Friday evening at 7:30 PM.",
+        "details": "datetime: 2024-03-15T19:30:00, end: , description: Dinner at the new restaurant, location: "
+    },
+    {
+        "message": "Meeting with clients on Wednesday at 3:00 PM.",
+        "details": "datetime: 2024-03-13T15:00:00, end: , description: Meeting with clients, location: "
+    },
+    {
+        "message": "Lunch with colleagues next Thursday at 1:00 PM.",
+        "details": "datetime: 2024-03-14T13:00:00, end: , description: Lunch with colleagues, location: "
+    },
+    {
+        "message": "Parent-teacher meeting on Monday at 3:00 PM.",
+        "details": "datetime: 2024-03-18T15:00:00, end: , description: Parent-teacher meeting, location: "
+    },
+    {
+        "message": "Flight to Tokyo next month on April 9th.",
+        "details": "datetime: 2024-04-09T00:00:00, end: , description: Flight to Tokyo, location: "
+    },
+    {
+        "message": "Meeting with the marketing team on Tuesday at 2:00 PM.",
+        "details": "datetime: 2024-03-19T14:00:00, end: , description: Meeting with the marketing team, location: "
+    },
+    {
+        "message": "Dinner with friends on Saturday at 7:00 PM.",
+        "details": "datetime: 2024-03-16T19:00:00, end: , description: Dinner with friends, location: "
+    },
+    {
+        "message": "Team meeting on Monday at 11:00 AM.",
+        "details": "datetime: 2024-03-18T11:00:00, end: , description: Team meeting, location: "
+    },
+    {
+        "message": "Conference call with the IT department on Thursday at 10:00 AM.",
+        "details": "datetime: 2024-03-14T10:00:00, end: , description: Conference call with the IT department, location: "
+    },
+    {
+        "message": "Lunch meeting with Jane on Wednesday at 12:00 PM.",
+        "details": "datetime: 2024-03-13T12:00:00, end: , description: Lunch meeting with Jane, location: "
+    },
+    {
+        "message": "Conference in Paris next month from April 10th to April 12th.",
+        "details": "datetime: 2024-04-10T00:00:00, end: 2024-04-12T00:00:00, description: Conference in Paris, location: "
+    },
+    {
+        "message": "Workshop on Friday afternoon at 3:00 PM.",
+        "details": "datetime: 2024-03-15T15:00:00, end: , description: Workshop, location: "
+    },
+    {
+        "message": "Dinner with family next Sunday at 6:00 PM.",
+        "details": "datetime: 2024-03-17T18:00:00, end: , description: Dinner with family, location: "
+    },
+    {
+        "message": "Conference call with the sales team on Monday at 2:00 PM.",
+        "details": "datetime: 2024-03-18T14:00:00, end: , description: Conference call with the sales team, location: "
+    },
+    {
+        "message": "Doctor's appointment on Thursday at 10:30 AM.",
+        "details": "datetime: 2024-03-14T10:30:00, end: , description: Doctor's appointment, location: "
+    },
+    {
+        "message": "Meeting with the CEO on Tuesday at 9:00 AM.",
+        "details": "datetime: 2024-03-19T09:00:00, end: , description: Meeting with the CEO, location: "
+    },
+    {
+        "message": "Lunch with friends on Friday at 1:00 PM.",
+        "details": "datetime: 2024-03-15T13:00:00, end: , description: Lunch with friends, location: "
+    },
+    {
+        "message": "Meeting with the legal team on Monday at 3:30 PM.",
+        "details": "datetime: 2024-03-18T15:30:00, end: , description: Meeting with the legal team, location: "
+    },
+    {
+        "message": "Conference in Tokyo next month from April 9th to April 11th.",
+        "details": "datetime: 2024-04-09T00:00:00, end: 2024-04-11T00:00:00, description: Conference in Tokyo, location: "
+    },
+    {
+        "message": "Team meeting on Thursday at 11:00 AM.",
+        "details": "datetime: 2024-03-14T11:00:00, end: , description: Team meeting, location: "
+    },
+    {
+        "message": "Dinner with clients on Wednesday at 7:30 PM.",
+        "details": "datetime: 2024-03-13T19:30:00, end: , description: Dinner with clients, location: "
+    },
+    {
+        "message": "Doctor's appointment on Friday at 9:00 AM.",
+        "details": "datetime: 2024-03-15T09:00:00, end: , description: Doctor's appointment, location: "
+    },
+    {
+        "message": "Coffee meeting with Sarah on Tuesday at 10:00 AM.",
+        "details": "datetime: 2024-03-19T10:00:00, end: , description: Coffee meeting with Sarah, location: "
+    },
+    {
+        "message": "Conference call with clients on Monday at 4:00 PM.",
+        "details": "datetime: 2024-03-18T16:00:00, end: , description: Conference call with clients, location: "
+    }
+]

data3.jsonl ADDED Viewed

	@@ -0,0 +1,80 @@

+[
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Let's meet for lunch tomorrow at 12 PM at the Italian restaurant on Main Street.\nThe Details are as follows: {'datetime': '2024-03-12T12:00:00', 'description': 'Lunch meeting', 'location': 'Italian restaurant on Main Street'}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Reminder: Team meeting on Friday at 10 AM in the conference room.\nThe Details are as follows: {'datetime': '2024-03-15T10:00:00', 'description': 'Team meeting', 'location': 'Conference room'}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Don't forget the doctor's appointment next Monday at 3:30 PM.\nThe Details are as follows: {'datetime': '2024-03-18T15:30:00', 'description': \"Doctor's appointment\", 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner with parents this Saturday evening at 7 PM.\nThe Details are as follows: {'datetime': '2024-03-16T19:00:00', 'description': 'Dinner with parents', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Meeting with client next Tuesday at 9:30 AM at their office.\nThe Details are as follows: {'datetime': '2024-03-19T09:30:00', 'description': 'Meeting with client', 'location': \"Client's office\"}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Soccer practice on Wednesday at 4:00 PM at the park.\nThe Details are as follows: {'datetime': '2024-03-13T16:00:00', 'description': 'Soccer practice', 'location': 'Park'}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference call tomorrow at 2:30 PM. Dial-in: 123-456-7890\nThe Details are as follows: {'datetime': '2024-03-12T14:30:00', 'description': 'Conference call', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Pick up groceries on Friday after work.\nThe Details are as follows: {'datetime': '2024-03-15T17:00:00', 'description': 'Pick up groceries', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Movie night with friends on Saturday at 8 PM.\nThe Details are as follows: {'datetime': '2024-03-16T20:00:00', 'description': 'Movie night with friends', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Workout session next Monday morning at the gym.\nThe Details are as follows: {'datetime': '2024-03-18T08:00:00', 'description': 'Workout session', 'location': 'Gym'}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Team lunch next Wednesday at noon.\nThe Details are as follows: {'datetime': '2024-03-20T12:00:00', 'description': 'Team lunch', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Board meeting on Thursday at 9:00 AM in the boardroom.\nThe Details are as follows: {'datetime': '2024-03-14T09:00:00', 'description': 'Board meeting', 'location': 'Boardroom'}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Flight to New York City on Friday evening.\nThe Details are as follows: {'datetime': '2024-03-15T18:00:00', 'description': 'Flight to New York City', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Coffee with Jane next Tuesday at 11:30 AM.\nThe Details are as follows: {'datetime': '2024-03-19T11:30:00', 'description': 'Coffee with Jane', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dentist appointment on Wednesday at 2 PM.\nThe Details are as follows: {'datetime': '2024-03-13T14:00:00', 'description': 'Dentist appointment', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Team outing next Friday afternoon.\nThe Details are as follows: {'datetime': '2024-03-15T12:00:00', 'description': 'Team outing', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Book club meeting on Thursday at 7:30 PM.\nThe Details are as follows: {'datetime': '2024-03-14T19:30:00', 'description': 'Book club meeting', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference in Chicago next month from April 10th to April 12th.\nThe Details are as follows: {'datetime': '2024-04-10T00:00:00', 'end': '2024-04-12T00:00:00', 'description': 'Conference in Chicago', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Parent-teacher meeting on Monday at 4:30 PM.\nThe Details are as follows: {'datetime': '2024-03-18T16:30:00', 'description': 'Parent-teacher meeting', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner with John next Saturday at 6:30 PM at his place.\nThe Details are as follows: {'datetime': '2024-03-16T18:30:00', 'description': 'Dinner with John', 'location': \"John's place\"}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Birthday party for Sarah on Friday night at 8 PM.\nThe Details are as follows: {'datetime': '2024-03-15T20:00:00', 'description': 'Birthday party for Sarah', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference call on Thursday at 11:00 AM.\nThe Details are as follows: {'datetime': '2024-03-14T11:00:00', 'description': 'Conference call', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Meeting with HR on Monday morning at 9 AM.\nThe Details are as follows: {'datetime': '2024-03-18T09:00:00', 'description': 'Meeting with HR', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference in London next week from April 1st to April 3rd.\nThe Details are as follows: {'datetime': '2024-04-01T00:00:00', 'end': '2024-04-03T00:00:00', 'description': 'Conference in London', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Lunch with colleagues on Thursday at 12:30 PM.\nThe Details are as follows: {'datetime': '2024-03-14T12:30:00', 'description': 'Lunch with colleagues', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Board meeting next Tuesday at 10 AM.\nThe Details are as follows: {'datetime': '2024-03-19T10:00:00', 'description': 'Board meeting', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Workshop on Saturday morning at 9:30 AM in the auditorium.\nThe Details are as follows: {'datetime': '2024-03-16T09:30:00', 'description': 'Workshop', 'location': 'Auditorium'}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner party at Mike's place next Friday at 7:00 PM.\nThe Details are as follows: {'datetime': '2024-03-15T19:00:00', 'description': \"Dinner party at Mike's place\", 'location': \"Mike's place\"}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Training session on Monday afternoon at 2 PM.\nThe Details are as follows: {'datetime': '2024-03-18T14:00:00', 'description': 'Training session', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Coffee meeting on Wednesday at 10:30 AM.\nThe Details are as follows: {'datetime': '2024-03-13T10:30:00', 'description': 'Coffee meeting', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Flight to Paris on Sunday morning at 9:00 AM.\nThe Details are as follows: {'datetime': '2024-03-17T09:00:00', 'description': 'Flight to Paris', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Client presentation on Thursday at 2:00 PM in the conference room.\nThe Details are as follows: {'datetime': '2024-03-14T14:00:00', 'description': 'Client presentation', 'location': 'Conference room'}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dentist appointment on Tuesday at 11:00 AM.\nThe Details are as follows: {'datetime': '2024-03-19T11:00:00', 'description': 'Dentist appointment', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Team building event next Friday at 1:00 PM.\nThe Details are as follows: {'datetime': '2024-03-15T13:00:00', 'description': 'Team building event', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Business trip to San Francisco from April 5th to April 7th.\nThe Details are as follows: {'datetime': '2024-04-05T00:00:00', 'end': '2024-04-07T00:00:00', 'description': 'Business trip to San Francisco', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Meeting with Sarah on Monday at 4:00 PM.\nThe Details are as follows: {'datetime': '2024-03-18T16:00:00', 'description': 'Meeting with Sarah', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner reservation for two on Friday night at 7:30 PM.\nThe Details are as follows: {'datetime': '2024-03-15T19:30:00', 'description': 'Dinner reservation for two', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Video conference call on Tuesday at 3:00 PM.\nThe Details are as follows: {'datetime': '2024-03-19T15:00:00', 'description': 'Video conference call', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Networking event on Wednesday evening at 6:00 PM.\nThe Details are as follows: {'datetime': '2024-03-13T18:00:00', 'description': 'Networking event', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Pick up dry cleaning on Thursday afternoon.\nThe Details are as follows: {'datetime': '2024-03-14T12:00:00', 'description': 'Pick up dry cleaning', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Coffee catch-up with Mark on Tuesday morning at 10 AM.\nThe Details are as follows: {'datetime': '2024-03-19T10:00:00', 'description': 'Coffee catch-up with Mark', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Volunteer work at the shelter on Saturday afternoon.\nThe Details are as follows: {'datetime': '2024-03-16T12:00:00', 'description': 'Volunteer work at the shelter', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner with the Smiths on Sunday evening at 6:30 PM.\nThe Details are as follows: {'datetime': '2024-03-17T18:30:00', 'description': 'Dinner with the Smiths', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference call with investors on Monday at 11:00 AM.\nThe Details are as follows: {'datetime': '2024-03-18T11:00:00', 'description': 'Conference call with investors', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Lunch meeting with client on Thursday at 1:00 PM.\nThe Details are as follows: {'datetime': '2024-03-14T13:00:00', 'description': 'Lunch meeting with client', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference in Berlin next month from April 8th to April 10th.\nThe Details are as follows: {'datetime': '2024-04-08T00:00:00', 'end': '2024-04-10T00:00:00', 'description': 'Conference in Berlin', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Meeting with project team on Monday at 2:00 PM.\nThe Details are as follows: {'datetime': '2024-03-18T14:00:00', 'description': 'Meeting with project team', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Workout session at the gym on Wednesday at 6:00 AM.\nThe Details are as follows: {'datetime': '2024-03-13T06:00:00', 'description': 'Workout session at the gym', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Family dinner on Sunday at 7:00 PM.\nThe Details are as follows: {'datetime': '2024-03-17T19:00:00', 'description': 'Family dinner', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Client meeting on Friday at 2:30 PM in the boardroom.\nThe Details are as follows: {'datetime': '2024-03-15T14:30:00', 'description': 'Client meeting', 'location': 'Boardroom'}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Doctor's appointment on Monday at 10:00 AM.\nThe Details are as follows: {'datetime': '2024-03-18T10:00:00', 'description': \"Doctor's appointment\", 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Movie night with friends next Saturday at 8:00 PM.\nThe Details are as follows: {'datetime': '2024-03-16T20:00:00', 'description': 'Movie night with friends', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference call with team members on Tuesday at 11:00 AM.\nThe Details are as follows: {'datetime': '2024-03-19T11:00:00', 'description': 'Conference call with team members', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner at the new restaurant on Friday evening at 7:30 PM.\nThe Details are as follows: {'datetime': '2024-03-15T19:30:00', 'description': 'Dinner at the new restaurant', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Meeting with clients on Wednesday at 3:00 PM.\nThe Details are as follows: {'datetime': '2024-03-13T15:00:00', 'description': 'Meeting with clients', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Lunch with colleagues next Thursday at 1:00 PM.\nThe Details are as follows: {'datetime': '2024-03-14T13:00:00', 'description': 'Lunch with colleagues', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Parent-teacher meeting on Monday at 3:00 PM.\nThe Details are as follows: {'datetime': '2024-03-18T15:00:00', 'description': 'Parent-teacher meeting', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Flight to Tokyo next month on April 9th.\nThe Details are as follows: {'datetime': '2024-04-09T00:00:00', 'description': 'Flight to Tokyo', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Meeting with the marketing team on Tuesday at 2:00 PM.\nThe Details are as follows: {'datetime': '2024-03-19T14:00:00', 'description': 'Meeting with the marketing team', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner with friends on Saturday at 7:00 PM.\nThe Details are as follows: {'datetime': '2024-03-16T19:00:00', 'description': 'Dinner with friends', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Team meeting on Monday at 11:00 AM.\nThe Details are as follows: {'datetime': '2024-03-18T11:00:00', 'description': 'Team meeting', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference call with the IT department on Thursday at 10:00 AM.\nThe Details are as follows: {'datetime': '2024-03-14T10:00:00', 'description': 'Conference call with the IT department', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Lunch meeting with Jane on Wednesday at 12:00 PM.\nThe Details are as follows: {'datetime': '2024-03-13T12:00:00', 'description': 'Lunch meeting with Jane', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference in Paris next month from April 10th to April 12th.\nThe Details are as follows: {'datetime': '2024-04-10T00:00:00', 'end': '2024-04-12T00:00:00', 'description': 'Conference in Paris', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Workshop on Friday afternoon at 3:00 PM.\nThe Details are as follows: {'datetime': '2024-03-15T15:00:00', 'description': 'Workshop', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner with family next Sunday at 6:00 PM.\nThe Details are as follows: {'datetime': '2024-03-17T18:00:00', 'description': 'Dinner with family', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference call with the sales team on Monday at 2:00 PM.\nThe Details are as follows: {'datetime': '2024-03-18T14:00:00', 'description': 'Conference call with the sales team', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Doctor's appointment on Thursday at 10:30 AM.\nThe Details are as follows: {'datetime': '2024-03-14T10:30:00', 'description': \"Doctor's appointment\", 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Meeting with the CEO on Tuesday at 9:00 AM.\nThe Details are as follows: {'datetime': '2024-03-19T09:00:00', 'description': 'Meeting with the CEO', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Lunch with friends on Friday at 1:00 PM.\nThe Details are as follows: {'datetime': '2024-03-15T13:00:00', 'description': 'Lunch with friends', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Meeting with the legal team on Monday at 3:30 PM.\nThe Details are as follows: {'datetime': '2024-03-18T15:30:00', 'description': 'Meeting with the legal team', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference in Tokyo next month from April 9th to April 11th.\nThe Details are as follows: {'datetime': '2024-04-09T00:00:00', 'end': '2024-04-11T00:00:00', 'description': 'Conference in Tokyo', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Team meeting on Thursday at 11:00 AM.\nThe Details are as follows: {'datetime': '2024-03-14T11:00:00', 'description': 'Team meeting', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner with clients on Wednesday at 7:30 PM.\nThe Details are as follows: {'datetime': '2024-03-13T19:30:00', 'description': 'Dinner with clients', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Doctor's appointment on Friday at 9:00 AM.\nThe Details are as follows: {'datetime': '2024-03-15T09:00:00', 'description': \"Doctor's appointment\", 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Coffee meeting with Sarah on Tuesday at 10:00 AM.\nThe Details are as follows: {'datetime': '2024-03-19T10:00:00', 'description': 'Coffee meeting with Sarah', 'location': ''}\n",
+    "Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference call with clients on Monday at 4:00 PM.\nThe Details are as follows: {'datetime': '2024-03-18T16:00:00', 'description': 'Conference call with clients', 'location': ''}\n"
+]

dataset.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

elif.ipynb ADDED Viewed

	@@ -0,0 +1,452 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "cf86fed9cae54700b31a616cd82b7180",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating train split: 0 examples [00:00, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from datasets import load_dataset\n",
+    "# load dataset from data.jsonl file:\n",
+    "eli5 = load_dataset(\"json\", data_files=\"data3.jsonl\", split=\"train[:80%]\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 95,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eli5 = eli5.train_test_split(test_size=0.2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'text': \"Extract the calendar events from the following text, the text will contain a place, time , land possibly a location. Here is the text: : Board meeting next Tuesday at 10 AM.\\nThe Details are as follows: {'datetime': '2024-03-19T10:00:00', 'description': 'Board meeting', 'location': ''}\\n\"}"
+      ]
+     },
+     "execution_count": 96,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "eli5[\"train\"][0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoTokenizer\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"distilbert/distilgpt2\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 98,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eli5 = eli5.flatten()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'text': \"Extract the calendar events from the following text, the text will contain a place, time , land possibly a location. Here is the text: : Board meeting next Tuesday at 10 AM.\\nThe Details are as follows: {'datetime': '2024-03-19T10:00:00', 'description': 'Board meeting', 'location': ''}\\n\"}"
+      ]
+     },
+     "execution_count": 99,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "eli5[\"train\"][0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 100,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def preprocess_function(examples):\n",
+    "    return tokenizer([\" \".join(x) for x in examples])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 101,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7d326a1d4117454f98bfd6c7f575120c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/49 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "13f25ef46a43486ea69fec77f62f7c9a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/13 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "tokenized_eli5 = eli5.map(\n",
+    "    preprocess_function,\n",
+    "    batched=True,\n",
+    "    num_proc=4,\n",
+    "    remove_columns=eli5[\"train\"].column_names,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 102,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "block_size = 128\n",
+    "\n",
+    "\n",
+    "def group_texts(examples):\n",
+    "    # Concatenate all texts.\n",
+    "    concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}\n",
+    "    total_length = len(concatenated_examples[list(examples.keys())[0]])\n",
+    "    # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can\n",
+    "    # customize this part to your needs.\n",
+    "    if total_length >= block_size:\n",
+    "        total_length = (total_length // block_size) * block_size\n",
+    "    # Split by chunks of block_size.\n",
+    "    result = {\n",
+    "        k: [t[i : i + block_size] for i in range(0, total_length, block_size)]\n",
+    "        for k, t in concatenated_examples.items()\n",
+    "    }\n",
+    "    result[\"labels\"] = result[\"input_ids\"].copy()\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 103,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a571ed26269640278514bfb2b02b1e03",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/4 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3385544255ec4af79d74e2d131845e07",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map (num_proc=4):   0%|          | 0/4 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 104,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import DataCollatorForLanguageModeling\n",
+    "\n",
+    "tokenizer.pad_token = tokenizer.eos_token\n",
+    "data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 105,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoModelForCausalLM, TrainingArguments, Trainer\n",
+    "\n",
+    "model = AutoModelForCausalLM.from_pretrained(\"distilbert/distilgpt2\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 106,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model moved to MPS device\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "\n",
+    "# Check that MPS is available\n",
+    "if not torch.backends.mps.is_available():\n",
+    "    if not torch.backends.mps.is_built():\n",
+    "        print(\"MPS not available because the current PyTorch install was not \"\n",
+    "              \"built with MPS enabled.\")\n",
+    "    else:\n",
+    "        print(\"MPS not available because the current MacOS version is not 12.3+ \"\n",
+    "              \"and/or you do not have an MPS-enabled device on this machine.\")\n",
+    "\n",
+    "else:\n",
+    "    mps_device = torch.device(\"mps\")\n",
+    "    model.to(mps_device)\n",
+    "    print(\"Model moved to MPS device\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 107,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a71332654a414bfe87d416ce502c9cdc",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/3 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "eab9e9bcc2814d3e8ba55806dc9d4a4f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'eval_loss': 6.667893886566162, 'eval_runtime': 0.0262, 'eval_samples_per_second': 152.548, 'eval_steps_per_second': 38.137, 'epoch': 1.0}\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b9634b37ea2d436c9700ce311651fdae",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'eval_loss': 6.2145514488220215, 'eval_runtime': 0.1232, 'eval_samples_per_second': 32.47, 'eval_steps_per_second': 8.118, 'epoch': 2.0}\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3af25ad29ad04c319677ec04dc22d3d1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'eval_loss': 5.993268966674805, 'eval_runtime': 0.0204, 'eval_samples_per_second': 196.346, 'eval_steps_per_second': 49.087, 'epoch': 3.0}\n",
+      "{'train_runtime': 1.588, 'train_samples_per_second': 7.556, 'train_steps_per_second': 1.889, 'train_loss': 6.412024815877278, 'epoch': 3.0}\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "TrainOutput(global_step=3, training_loss=6.412024815877278, metrics={'train_runtime': 1.588, 'train_samples_per_second': 7.556, 'train_steps_per_second': 1.889, 'train_loss': 6.412024815877278, 'epoch': 3.0})"
+      ]
+     },
+     "execution_count": 107,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "training_args = TrainingArguments(\n",
+    "    output_dir=\"my_awesome_eli5_clm-model\",\n",
+    "    evaluation_strategy=\"epoch\",\n",
+    "    learning_rate=2e-5,\n",
+    "    weight_decay=0.01,\n",
+    "    push_to_hub=True,\n",
+    ")\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=lm_dataset[\"train\"],\n",
+    "    eval_dataset=lm_dataset[\"test\"],\n",
+    "    data_collator=data_collator,\n",
+    ")\n",
+    "\n",
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 110,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[{'generated_text': \"Extract the calendar events from the following text, the text will contain a place, time , land possibly a location. Here is the text: : Let's meet for lunch tomorrow at 12 PM at the Italian restaurant on Main Street.\\nThe Details are as follows: { if (is_empty(the_time()) : return next_day_long(_.length(this); } } }\\nThe Time is: 12 PM on Sunday 12th at the Italian restaurant on Main Street.\\nTaste: 12 PM on Sunday 8th at the Italian restaurant on Main Street.\\nThe Time is: 11 PM on Monday 9th at the Italian restaurant on Main Street.\\nThe Time is: 11 AM on Monday 9th at the Italian restaurant on Main Street.\\nTaste: 11 AM on Sunday 8th at the Italian restaurant on Main Street.\\nThe Time is: 11 AM on Monday 9th at the Italian restaurant on Main Street.\\nThe Time is: 11 AM\"}]"
+      ]
+     },
+     "execution_count": 110,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "prompt =  \"Extract the calendar events from the following text, the text will contain a place, time , land possibly a location. Here is the text: : Let's meet for lunch tomorrow at 12 PM at the Italian restaurant on Main Street.\\nThe Details are as follows: {\"\n",
+    "from transformers import pipeline\n",
+    "\n",
+    "generator = pipeline(\"text-generation\", model=model, tokenizer=tokenizer, max_length=200)\n",
+    "generator(prompt)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoTokenizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

foo.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import json
+newdata = []
+with open("data.json", "r", encoding="utf8") as file:
+  data = json.load(file)
+  for elem in data:
+    newdata.append(
+      f"Convert the following message to json format: {elem['message']}\n"+
+      f"Details: {elem['labels']}\n"
+    )
+# write modified data to data3.json
+with open("data3.json", "w", encoding="utf8") as file:
+  json.dump(newdata, file, indent=4)

foobar ADDED Viewed

File without changes

foobar.txt ADDED Viewed

	@@ -0,0 +1,25 @@

+Complete the code:
+from typing import List
+def has_close_elements(numbers: List[float], threshold: float) -> bool:
+    """ Check if in given list of numbers, are any two numbers closer to each other than
+    given threshold.
+    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)
+    False
+    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)
+    True
+    """
+def truncate_number(number: float) -> float:
+// Given a positive floating point number, it can be decomposed into
+// and integer part (largest integer smaller than given number) and decimals
+// (leftover part always smaller than 1).
+//
+// Return the decimal part of the number.
+// >>> truncate_number 3.5 // 0.5

has_closest_elements.evy ADDED Viewed

	@@ -0,0 +1,38 @@

+func has_close_element:bool nums:[]num threshold:num
+    for i := range (len nums)
+        for j := range (i + 1) (len nums)
+            if (abs nums[i]-nums[j])<threshold
+                return true
+            end
+        end
+    end
+    return false
+end
+func abs:num n:num
+    if n < 0
+        return -n
+    end
+    return n
+end
+fails := 0
+total := 0
+func assert want:any got:any
+    total = total + 1
+    if want != got
+        fails = fails + 1
+        printf "want != got: want %v got %v\n" want got
+    end
+end
+func finished
+    printf "%v of %v tests passed\n" (total - fails) total
+end
+// -- Test Cases Start -- //
+assert false (has_close_element [1.0 2.0 3.0] 0.5)
+assert true (has_close_element [1.0 2.8 3.0 4.0 5.0 2.0] 0.3)
+// -- Test Cases End -- //
+finished

ner.ipynb ADDED Viewed

	@@ -0,0 +1,363 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install svgling\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install transformers datasets evaluate seqeval"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datasets import load_dataset\n",
+    "import pandas as pd\n",
+    "\n",
+    "wnut = load_dataset(\"wnut_17\")\n",
+    "df = pd.DataFrame(wnut[\"train\"])  # Assuming 'train' split, you can choose other splits\n",
+    "\n",
+    "# Save the DataFrame to a CSV file\n",
+    "df.to_csv('dataset.csv', index=False)  # Change 'dataset.csv' to your desired file name\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wnut[\"train\"][0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import nltk\n",
+    "import ssl\n",
+    "\n",
+    "try:\n",
+    "    _create_unverified_https_context = ssl._create_unverified_context\n",
+    "except AttributeError:\n",
+    "    pass\n",
+    "else:\n",
+    "    ssl._create_default_https_context = _create_unverified_https_context\n",
+    "\n",
+    "nltk.download('punkt')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "label_list = wnut[\"train\"].features[f\"ner_tags\"].feature.names\n",
+    "label_list"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoTokenizer\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"distilbert/distilbert-base-uncased\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "example = wnut[\"train\"][0]\n",
+    "tokenized_input = tokenizer(example[\"tokens\"], is_split_into_words=True)\n",
+    "tokens = tokenizer.convert_ids_to_tokens(tokenized_input[\"input_ids\"])\n",
+    "tokens"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def tokenize_and_align_labels(examples):\n",
+    "    tokenized_inputs = tokenizer(examples[\"tokens\"], truncation=True, is_split_into_words=True)\n",
+    "\n",
+    "    labels = []\n",
+    "    for i, label in enumerate(examples[f\"ner_tags\"]):\n",
+    "        word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.\n",
+    "        previous_word_idx = None\n",
+    "        label_ids = []\n",
+    "        for word_idx in word_ids:  # Set the special tokens to -100.\n",
+    "            if word_idx is None:\n",
+    "                label_ids.append(-100)\n",
+    "            elif word_idx != previous_word_idx:  # Only label the first token of a given word.\n",
+    "                label_ids.append(label[word_idx])\n",
+    "            else:\n",
+    "                label_ids.append(-100)\n",
+    "            previous_word_idx = word_idx\n",
+    "        labels.append(label_ids)\n",
+    "\n",
+    "    tokenized_inputs[\"labels\"] = labels\n",
+    "    return tokenized_inputs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import evaluate\n",
+    "\n",
+    "seqeval = evaluate.load(\"seqeval\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import DataCollatorForTokenClassification\n",
+    "\n",
+    "data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import evaluate\n",
+    "\n",
+    "seqeval = evaluate.load(\"seqeval\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "labels = [label_list[i] for i in example[f\"ner_tags\"]]\n",
+    "\n",
+    "\n",
+    "def compute_metrics(p):\n",
+    "    predictions, labels = p\n",
+    "    predictions = np.argmax(predictions, axis=2)\n",
+    "\n",
+    "    true_predictions = [\n",
+    "        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n",
+    "        for prediction, label in zip(predictions, labels)\n",
+    "    ]\n",
+    "    true_labels = [\n",
+    "        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]\n",
+    "        for prediction, label in zip(predictions, labels)\n",
+    "    ]\n",
+    "\n",
+    "    results = seqeval.compute(predictions=true_predictions, references=true_labels)\n",
+    "    return {\n",
+    "        \"precision\": results[\"overall_precision\"],\n",
+    "        \"recall\": results[\"overall_recall\"],\n",
+    "        \"f1\": results[\"overall_f1\"],\n",
+    "        \"accuracy\": results[\"overall_accuracy\"],\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "id2label = {\n",
+    "    0: \"O\",\n",
+    "    1: \"B-corporation\",\n",
+    "    2: \"I-corporation\",\n",
+    "    3: \"B-creative-work\",\n",
+    "    4: \"I-creative-work\",\n",
+    "    5: \"B-group\",\n",
+    "    6: \"I-group\",\n",
+    "    7: \"B-location\",\n",
+    "    8: \"I-location\",\n",
+    "    9: \"B-person\",\n",
+    "    10: \"I-person\",\n",
+    "    11: \"B-product\",\n",
+    "    12: \"I-product\",\n",
+    "}\n",
+    "label2id = {\n",
+    "    \"O\": 0,\n",
+    "    \"B-corporation\": 1,\n",
+    "    \"I-corporation\": 2,\n",
+    "    \"B-creative-work\": 3,\n",
+    "    \"I-creative-work\": 4,\n",
+    "    \"B-group\": 5,\n",
+    "    \"I-group\": 6,\n",
+    "    \"B-location\": 7,\n",
+    "    \"I-location\": 8,\n",
+    "    \"B-person\": 9,\n",
+    "    \"I-person\": 10,\n",
+    "    \"B-product\": 11,\n",
+    "    \"I-product\": 12,\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer\n",
+    "\n",
+    "model = AutoModelForTokenClassification.from_pretrained(\n",
+    "    \"distilbert/distilbert-base-uncased\", num_labels=13, id2label=id2label, label2id=label2id\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "training_args = TrainingArguments(\n",
+    "    output_dir=\"my_awesome_wnut_model\",\n",
+    "    learning_rate=2e-5,\n",
+    "    per_device_train_batch_size=16,\n",
+    "    per_device_eval_batch_size=16,\n",
+    "    num_train_epochs=2,\n",
+    "    weight_decay=0.01,\n",
+    "    evaluation_strategy=\"epoch\",\n",
+    "    save_strategy=\"epoch\",\n",
+    "    load_best_model_at_end=True,\n",
+    "    push_to_hub=False,\n",
+    ")\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=tokenized_wnut[\"train\"],\n",
+    "    eval_dataset=tokenized_wnut[\"test\"],\n",
+    "    tokenizer=tokenizer,\n",
+    "    data_collator=data_collator,\n",
+    "    compute_metrics=compute_metrics,\n",
+    ")\n",
+    "\n",
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import pipeline\n",
+    "\n",
+    "text = \"Let's meet for Lunch Tomorrow at 12 PM at the Italian restaurant on Main Street. Simon\"\n",
+    "classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
+    "classifier(text)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

nltk.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebook.ipynb ADDED Viewed

	@@ -0,0 +1,268 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "billsum = load_dataset(\"billsum\", split=\"ca_test\")\n",
+    "billsum = billsum.select(range(1000))\n",
+    "billsum = billsum.train_test_split(test_size=0.2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoTokenizer\n",
+    "checkpoint = \"google-t5/t5-small\"\n",
+    "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
+    "prefix = \"summarize: \"\n",
+    "\n",
+    "def preprocess_function(examples):\n",
+    "    inputs = [prefix + doc for doc in examples[\"text\"]]\n",
+    "    model_inputs = tokenizer(inputs, max_length=1024, truncation=True, padding=\"max_length\") \n",
+    "\n",
+    "    labels = tokenizer(text_target=examples[\"summary\"], max_length=128, truncation=True, padding=\"max_length\")\n",
+    "\n",
+    "    model_inputs[\"labels\"] = labels[\"input_ids\"]\n",
+    "    return model_inputs\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4dfbb4c779af4a4ca5398622f2bd887d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/800 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2a4f6446a1e541ed9ef835ca2b2bdfa1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/200 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "tokenized_billsum = billsum.map(preprocess_function, batched=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer\n",
+    "model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model moved to MPS device\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "\n",
+    "# Check that MPS is available\n",
+    "if not torch.backends.mps.is_available():\n",
+    "    if not torch.backends.mps.is_built():\n",
+    "        print(\"MPS not available because the current PyTorch install was not \"\n",
+    "              \"built with MPS enabled.\")\n",
+    "    else:\n",
+    "        print(\"MPS not available because the current MacOS version is not 12.3+ \"\n",
+    "              \"and/or you do not have an MPS-enabled device on this machine.\")\n",
+    "\n",
+    "else:\n",
+    "    mps_device = torch.device(\"mps\")\n",
+    "    model.to(mps_device)\n",
+    "    print(\"Model moved to MPS device\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/training_args.py:1951: UserWarning: `use_mps_device` is deprecated and will be removed in version 5.0 of 🤗 Transformers. `mps` device will be used by default if available similar to the way `cuda` device is used.Therefore, no action from user is required. \n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "training_args = Seq2SeqTrainingArguments(\n",
+    "    output_dir=\"calendar_model\",\n",
+    "    evaluation_strategy=\"epoch\",\n",
+    "    learning_rate=5e-5,\n",
+    "    per_device_train_batch_size=16,\n",
+    "    per_device_eval_batch_size=16,\n",
+    "    weight_decay=0.01,\n",
+    "    save_total_limit=3,\n",
+    "    num_train_epochs=1,\n",
+    "    predict_with_generate=True,\n",
+    "    use_mps_device=True,\n",
+    "    # fp16=True,\n",
+    "    # push_to_hub=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import evaluate\n",
+    "metric = evaluate.load(\"accuracy\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def compute_metrics(eval_pred):\n",
+    "     logits, labels = eval_pred\n",
+    "     predictions = np.argmax(logits, axis=-1)\n",
+    "     return metric.compute(predictions=predictions, references=labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import TrainingArguments, Trainer\n",
+    "training_args = TrainingArguments(output_dir=\"test_trainer\", evaluation_strategy=\"epoch\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trainer = Trainer(\n",
+    "     model=model,\n",
+    "     args=training_args,\n",
+    "     train_dataset=tokenized_billsum[\"train\"],\n",
+    "     eval_dataset=tokenized_billsum[\"test\"],\n",
+    "     compute_metrics=compute_metrics,\n",
+    " )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b8af6446b2b344818e0812c345023f53",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/300 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/trainer.py:1624\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m   1622\u001b[0m         hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[1;32m   1623\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1624\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1625\u001b[0m \u001b[43m        \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1626\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1627\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1628\u001b[0m \u001b[43m        \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1629\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/trainer.py:1966\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m   1960\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maccelerator\u001b[38;5;241m.\u001b[39maccumulate(model):\n\u001b[1;32m   1961\u001b[0m     tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining_step(model, inputs)\n\u001b[1;32m   1963\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m   1964\u001b[0m     args\u001b[38;5;241m.\u001b[39mlogging_nan_inf_filter\n\u001b[1;32m   1965\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_tpu_available()\n\u001b[0;32m-> 1966\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39misnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43misinf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtr_loss_step\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m   1967\u001b[0m ):\n\u001b[1;32m   1968\u001b[0m     \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[1;32m   1969\u001b[0m     tr_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m tr_loss \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_globalstep_last_logged)\n\u001b[1;32m   1970\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "trainer.train()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

nuner.ipynb ADDED Viewed

	@@ -0,0 +1,124 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import transformers\n",
+    "\n",
+    "\n",
+    "model = transformers.AutoModel.from_pretrained(\n",
+    "    'numind/NuNER-v1.0',\n",
+    "    output_hidden_states=True\n",
+    ")\n",
+    "tokenizer = transformers.AutoTokenizer.from_pretrained(\n",
+    "    'numind/NuNER-v1.0'\n",
+    ")\n",
+    "\n",
+    "text = [\n",
+    "    \"NuMind is an AI company based in Paris and USA.\",\n",
+    "    \"See other models from us on https://huggingface.co/numind\"\n",
+    "]\n",
+    "encoded_input = tokenizer(\n",
+    "    text,\n",
+    "    return_tensors='pt',\n",
+    "    padding=True,\n",
+    "    truncation=True\n",
+    ")\n",
+    "output = model(**encoded_input)\n",
+    "\n",
+    "# for better quality\n",
+    "emb = torch.cat(\n",
+    "    (output.hidden_states[-1], output.hidden_states[-7]),\n",
+    "    dim=2\n",
+    ")\n",
+    "\n",
+    "# for better speed\n",
+    "# emb = output.hidden_states[-1]\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at numind/NuNER-v1.0 and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    },
+    {
+     "ename": "KeyError",
+     "evalue": "'tokens'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[36], line 25\u001b[0m\n\u001b[1;32m     22\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m result \u001b[38;5;129;01min\u001b[39;00m results:\n\u001b[1;32m     23\u001b[0m   \u001b[38;5;66;03m# Access tokens list using the 'tokens' key (dictionary access)\u001b[39;00m\n\u001b[1;32m     24\u001b[0m   \u001b[38;5;28;01mfor\u001b[39;00m res \u001b[38;5;129;01min\u001b[39;00m result:\n\u001b[0;32m---> 25\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m token \u001b[38;5;129;01min\u001b[39;00m \u001b[43mres\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtokens\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m:\n\u001b[1;32m     26\u001b[0m         \u001b[38;5;66;03m# Remove the special token prefix (if present)\u001b[39;00m\n\u001b[1;32m     27\u001b[0m         word \u001b[38;5;241m=\u001b[39m token[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mword\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mstrip(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mĠ\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m     28\u001b[0m         \u001b[38;5;66;03m# Look up the entity type based on the predicted label\u001b[39;00m\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'tokens'"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "import transformers\n",
+    "from transformers import pipeline\n",
+    "\n",
+    "# Load pre-trained NER model (NuNER-v1.0)\n",
+    "ner = pipeline(\"ner\", model=\"numind/NuNER-v1.0\")\n",
+    "\n",
+    "text = [\n",
+    "    \"NuMind is an AI company based in Paris and USA.\",\n",
+    "    \"See other models from us on https://huggingface.co/numind\"\n",
+    "]\n",
+    "\n",
+    "# Process the text and get NER predictions\n",
+    "results = ner(text)\n",
+    "\n",
+    "label_map = {\n",
+    "    \"LABEL_0\": \"ORG\",  # Organization\n",
+    "    \"LABEL_1\": \"LOC\",  # Location\n",
+    "    # You can add more labels and their mappings here\n",
+    "}\n",
+    "\n",
+    "for result in results:\n",
+    "  # Access tokens list using the 'tokens' key (dictionary access)\n",
+    "  for res in result:\n",
+    "    # Remove the special token prefix (if present)\n",
+    "    word = res['word'].strip('Ġ')\n",
+    "    # Look up the entity type based on the predicted label\n",
+    "    entity_type = label_map.get(res['entity'], \"UNKNOWN\")\n",
+    "    print(f\"Word: {word}, Entity Type: {entity_type}\")\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

prompt.md ADDED Viewed

	@@ -0,0 +1,469 @@

+Evy is a new language. Bellow is an example of some evy code, and following that are some examples of evy programs
+# Syntax by Example
+The following examples will help you understand the syntax of Evy. For a
+more formal definition of the syntax, see the
+[Language Specification](spec.md). Built-in functions, such as `print`
+and `circle`, are documented in the[Built-ins section](builtins.md).
+## Comment
+    // This is a comment
+## Declaration
+    x:num     // declaration: num, string, bool, any, []num, {}string
+    y := 1    // declaration through type inference (num)
+## Assignment
+    z = 5
+## Expression
+    x := 5 * (y + z)  - 2 / 7.6           // arithmetic number expression
+    b := !trace and debug or level == ""  // bool expressions
+## Strings
+    s1 := "quotation mark : \" "          // escaping
+    s2 := "abc" + "🥪123"                 // concatenation
+    s3 := "newline: \n indentation: \t"
+    s4 := s2[0]                           // "a"
+    s5 := s2[1:5]                         // "bc🥪1"
+## `if` statements
+    if z > 0 and x != 0
+        print "block 1"
+    else if y != 0 or a == "abc"
+        print "block 2"
+    else
+        print "block 3"
+    end
+### Nested `if`
+    if z > 0 and x != 0
+        if startswith str "a"
+            print "nested block 1"
+        else
+            print "nested block 2"
+        end
+    end
+## Loop statements
+### `while` loop
+    x := 0
+    while x < 10
+        print x
+        x = x + 1
+    end
+### `for` … `range` number
+    for x := range 5
+        print x           // 0 1 2 3 4
+    end
+    for x := range 5 10
+        print x           // 5 6 7 8 9
+    end
+    for x := range 1 10 2 // from to step
+        print x           // 1 3 5 7 9
+    end
+    for x := range -10
+        print x        // nothing. step is 1 by default.
+    end
+### `for` … `range` array
+    for x := range [1 2 3]
+        print x        // 1 2 3
+    end
+### `for` … `range` map
+    m := { name:"Mali" sport:"climbing" }
+    for key := range m
+        print key m[key]
+    end
+### `break`
+    x := 0
+    while true
+        print "tick... "
+        sleep 1
+        if x > 9
+            print "💥"
+            break  // `break` breaks out of the innermost loop
+        end
+        x = x + 1
+    end
+## Function definition
+    func add:num a:num b:num
+        return a + b
+    end
+### No return type
+    func foxprint s:string
+        print "🦊 " + s
+    end
+### Variadic
+    func list args:any...
+        for arg := range args[:-1]
+            printf "%v, " arg
+        end
+        printf "%v" args[-1]
+    end
+### Function calls
+    n := add 1 2        // 3
+    foxprint "🐾"       // 🦊 🐾
+    list 2 true "blue"  // 2, true, blue
+## Array
+    a1:[]num
+    a2:[][]string
+    a1 = [1 2 3 4]              // type: num[]
+    a2 = [["1" "2"] ["a" "b"]]  // type: string[][]
+    a3 := [true false]          // type: bool[]
+    a4 := ["s1"                 // line break allowed
+           "s2"]                // type: string[]
+    a5 := ["chars" 123]         // type: any[]
+    a6:[]any                    // type: any[]
+### Array element access
+    a1 := [1 2 3 4]
+    a2 := [["1" "2"] ["a" "b"]]
+    print a1[1]                  // 2
+    print a2[1][0]               // "a"
+    print a1[-1]                  // 4
+### Concatenation
+    a := [1 2 3 4]
+    a = a + [ 100 ]          // [1 2 3 4 100]; optional extra whitespace
+    a = [0] + a + [101 102]  // [0 1 2 3 4 100 101 102]
+### Slicing
+    a := [1 2 3]
+    b := a[:2]         // [1 2]
+    b = a[1:2]         // [2]
+    b = a[-2:]         // [2 3]
+## Map
+    m1:{}any // keys used in literals or with `.` must be identifiers.
+    m1.name = "fox"
+    m1.age = 42
+    m1["key with space"] = "🔑🪐"
+    m2 := {letters:"abc" name:"Jill"} // type: {}string
+    m3 := {}                          // type: {}any
+    m4 := {
+        letters:"abc"                 // line break allowed
+        nums:123
+    }                                 // type: {}any
+    m5:{}[]num                        // map of array of numbers
+    m5.digits = [1 2 3]
+    m6:{}num
+    //m6.x = "y"                      // invalid, only num values allowed
+### Map value access
+    m := {letters:"abc" name:"Jill"}
+    s := "letters"
+    print m.letters    // abc
+    print m[s]         // abc
+    print m["letters"] // abc
+## `any`
+    x:any     // any type, default value: false
+    m1:{}any  // map with any value type
+    m2 := { letter:"a" number:1 }
+    arr1:[]any
+    arr2 := [ "b" 2 ]
+## Type assertion
+    x:any
+    x = [ 1 2 3 4 ]  // concrete type num[]
+    s := x.([]num)
+## Type reflection
+    typeof "abc"          // "string"
+    typeof true           // "bool"
+    typeof [ 1 2 ]        // "[]num"
+    typeof [[1 2] [3 4]]  // "[][]num"
+    v:any
+    v = "🐐"
+    if (typeof v) == "string"
+        print "v is a string:" v
+        s := v.(string) // type assertion
+        print s+s       // 🐐🐐
+    end
+## Event handling
+    on key
+        print "key pressed"
+    end
+Evy can only handle a limited set of events, such as key presses,
+pointer movements, or periodic screen redraws.
+### Event handlers with parameters
+    on key k:string
+        printf "%q pressed\n" k
+    end
+# Example evy programs
+```evy
+// 1. Two Sum
+// Solved
+// Easy
+// Topics
+// Companies
+// Hint
+// Given an array of integers nums and an integer target, return indices of the two numbers such that they add up to target.
+// You may assume that each input would have exactly one solution, and you may not use the same element twice.
+// You can return the answer in any order.
+// Example 1:
+// Input: nums = [2,7,11,15], target = 9
+// Output: [0,1]
+// Explanation: Because nums[0] + nums[1] == 9, we return [0, 1].
+// Example 2:
+// Input: nums = [3,2,4], target = 6
+// Output: [1,2]
+// Example 3:
+// Input: nums = [3,3], target = 6
+// Output: [0,1]
+// Constraints:
+// 2 <= nums.length <= 104
+// -109 <= nums[i] <= 109
+// -109 <= target <= 109
+// Only one valid answer exists.
+// Follow-up: Can you come up with an algorithm that is less than O(n2) time complexity?
+func twosum:[]num nums:[]num target:num
+    m:{}num
+    for i := range (len nums)
+        v := nums[i]
+        if has m (sprintf "%v" (target - v))
+            return [m[sprintf "%v" (target - v)] i]
+        end
+        m[sprintf "%v" v] = i
+    end
+    return []
+end
+fails := 0
+total := 0
+func assert want:any got:any
+    total = total + 1
+    if want != got
+        fails = fails + 1
+        printf "want != got: want %v got %v\n" want got
+    end
+end
+func finished
+    printf "%v of %v tests passed\n" (total - fails) total
+end
+// -- Test Cases Start -- //
+assert [0 1] (twosum [2 7 11 15] 9)
+assert [1 2] (twosum [3 2 4] 6)
+assert [0 1] (twosum [3 3] 6)
+// -- Test Cases End -- //
+finished
+```
+```
+// 199. Binary Tree Right Side View
+// Solved
+// Medium
+// Topics
+// Companies
+// Given the root of a binary tree, imagine yourself standing on the right side of it, return the values of the nodes you can see ordered from top to bottom.
+// Example 1:
+// Input: root = [1,2,3,null,5,null,4]
+// Output: [1,3,4]
+// Example 2:
+// Input: root = [1,null,3]
+// Output: [1,3]
+// Example 3:
+// Input: root = []
+// Output: []
+// Constraints:
+// The number of nodes in the tree is in the range [0, 100].
+// -100 <= Node.val <= 100
+func rightSideView:[]any treearr:[]any
+    root:any
+    root = buildBinaryTree treearr
+    queue := []
+    res := []
+    queue = queue + [root]
+    while (len queue) > 0
+        size := len queue
+        for i := range 0 size
+            node:{}any
+            node = queue[0].({}any)
+            queue = queue[1:]
+            if (has node "val") and i == size - 1
+                res = res + [node["val"]]
+            end
+            if (has node "left") and node["left"].({}any) != {}
+                queue = queue + [node["left"]]
+            end
+            if (has node "right") and node["right"].({}any) != {}
+                queue = queue + [node["right"]]
+            end
+        end
+    end
+    return res
+end
+fails := 0
+total := 0
+func assert want:any got:any
+    total = total + 1
+    if want != got
+        fails = fails + 1
+        printf "want != got: want %v got %v\n" want got
+    end
+end
+func finished
+    printf "%v of %v tests passed\n" (total - fails) total
+end
+func buildBinaryTree:{}any tree:[]any
+    root:{}any
+    rootany:any
+    rootany = root
+    queue := [rootany]
+    for i := range 0 (len tree)
+        if (len queue) == 0
+            break
+        end
+        node:{}any
+        node = queue[0].({}any)
+        queue = queue[1:]
+        anynull:any
+        anynull = "null"
+        if tree[i] != anynull
+            node["val"] = tree[i]
+            node["left"] = {}
+            node["right"] = {}
+            queue = queue + [node["left"]]
+            queue = queue + [node["right"]]
+        end
+    end
+    return root
+end
+// -- Test Cases Start -- //
+assert [1 3 4 ""][:-1] (rightSideView [1 2 3 "null" 5 "null" 4])
+assert [1 3 ""][:-1] (rightSideView [1 "null" 3])
+assert [] (rightSideView [])
+assert [1 3 4 ""][:-1] (rightSideView [1 2 3 4])
+// // -- Test Cases End -- //
+finished
+```
+```
+// 412. Fizz Buzz
+// Easy
+// Topics
+// Companies
+// Given an integer n, return a string array answer (1-indexed) where:
+// answer[i] == "FizzBuzz" if i is divisible by 3 and 5.
+// answer[i] == "Fizz" if i is divisible by 3.
+// answer[i] == "Buzz" if i is divisible by 5.
+// answer[i] == i (as a string) if none of the above conditions are true.
+// Example 1:
+// Input: n = 3
+// Output: ["1","2","Fizz"]
+// Example 2:
+// Input: n = 5
+// Output: ["1","2","Fizz","4","Buzz"]
+// Example 3:
+// Input: n = 15
+// Output: ["1","2","Fizz","4","Buzz","Fizz","7","8","Fizz","Buzz","11","Fizz","13","14","FizzBuzz"]
+// Constraints:
+// 1 <= n <= 104
+func fizzbuzz:[]string n:num
+    ans:[]string
+    for i := range 1 (n + 1)
+        s:string
+        if i % 3 == 0
+            s = s + "Fizz"
+        end
+        if i % 5 == 0
+            s = s + "Buzz"
+        end
+        if s == ""
+            s = sprintf "%v" i
+        end
+        ans = ans + [s]
+    end
+    return ans
+end
+fails := 0
+total := 0
+func assert want:any got:any
+    total = total + 1
+    if want != got
+        fails = fails + 1
+        printf "want != got: want %v got %v\n" want got
+    end
+end
+func finished
+    printf "%v of %v tests passed\n" (total - fails) total
+end
+// -- Test Cases Start -- //
+assert ["1" "2" "Fizz"] (fizzbuzz 3)
+assert ["1" "2" "Fizz" "4" "Buzz"] (fizzbuzz 5)
+assert ["1" "2" "Fizz" "4" "Buzz" "Fizz" "7" "8" "Fizz" "Buzz" "11" "Fizz" "13" "14" "FizzBuzz"] (fizzbuzz 15)
+// -- Test Cases End -- //
+finished
+```
+With All of this, solve the following problem:
+Write a function has_close_element that checks if in given list of numbers, are any two numbers closer to each other than given threshold.
+Write the program in evy:

python ADDED Viewed

File without changes

sft.ipynb ADDED Viewed

	@@ -0,0 +1,181 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting trl\n",
+      "  Downloading trl-0.8.0-py3-none-any.whl.metadata (11 kB)\n",
+      "Requirement already satisfied: torch>=1.4.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from trl) (2.2.1)\n",
+      "Requirement already satisfied: transformers>=4.31.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from trl) (4.38.2)\n",
+      "Requirement already satisfied: numpy>=1.18.2 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from trl) (1.26.3)\n",
+      "Requirement already satisfied: accelerate in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from trl) (0.27.2)\n",
+      "Requirement already satisfied: datasets in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from trl) (2.18.0)\n",
+      "Collecting tyro>=0.5.11 (from trl)\n",
+      "  Downloading tyro-0.7.3-py3-none-any.whl.metadata (7.7 kB)\n",
+      "Requirement already satisfied: filelock in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch>=1.4.0->trl) (3.13.1)\n",
+      "Requirement already satisfied: typing-extensions>=4.8.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch>=1.4.0->trl) (4.10.0)\n",
+      "Requirement already satisfied: sympy in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch>=1.4.0->trl) (1.12)\n",
+      "Requirement already satisfied: networkx in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch>=1.4.0->trl) (3.2.1)\n",
+      "Requirement already satisfied: jinja2 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch>=1.4.0->trl) (3.1.3)\n",
+      "Requirement already satisfied: fsspec in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch>=1.4.0->trl) (2024.2.0)\n",
+      "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers>=4.31.0->trl) (0.21.3)\n",
+      "Requirement already satisfied: packaging>=20.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers>=4.31.0->trl) (23.2)\n",
+      "Requirement already satisfied: pyyaml>=5.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers>=4.31.0->trl) (6.0.1)\n",
+      "Requirement already satisfied: regex!=2019.12.17 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers>=4.31.0->trl) (2023.12.25)\n",
+      "Requirement already satisfied: requests in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers>=4.31.0->trl) (2.31.0)\n",
+      "Requirement already satisfied: tokenizers<0.19,>=0.14 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers>=4.31.0->trl) (0.15.2)\n",
+      "Requirement already satisfied: safetensors>=0.4.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers>=4.31.0->trl) (0.4.2)\n",
+      "Requirement already satisfied: tqdm>=4.27 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers>=4.31.0->trl) (4.66.1)\n",
+      "Collecting docstring-parser>=0.14.1 (from tyro>=0.5.11->trl)\n",
+      "  Downloading docstring_parser-0.16-py3-none-any.whl.metadata (3.0 kB)\n",
+      "Requirement already satisfied: rich>=11.1.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from tyro>=0.5.11->trl) (13.7.0)\n",
+      "Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl)\n",
+      "  Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)\n",
+      "Requirement already satisfied: psutil in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from accelerate->trl) (5.9.8)\n",
+      "Requirement already satisfied: pyarrow>=12.0.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from datasets->trl) (15.0.1)\n",
+      "Requirement already satisfied: pyarrow-hotfix in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from datasets->trl) (0.6)\n",
+      "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from datasets->trl) (0.3.8)\n",
+      "Requirement already satisfied: pandas in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from datasets->trl) (2.2.1)\n",
+      "Requirement already satisfied: xxhash in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from datasets->trl) (3.4.1)\n",
+      "Requirement already satisfied: multiprocess in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from datasets->trl) (0.70.16)\n",
+      "Requirement already satisfied: aiohttp in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from datasets->trl) (3.9.3)\n",
+      "Requirement already satisfied: aiosignal>=1.1.2 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from aiohttp->datasets->trl) (1.3.1)\n",
+      "Requirement already satisfied: attrs>=17.3.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from aiohttp->datasets->trl) (23.2.0)\n",
+      "Requirement already satisfied: frozenlist>=1.1.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from aiohttp->datasets->trl) (1.4.1)\n",
+      "Requirement already satisfied: multidict<7.0,>=4.5 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from aiohttp->datasets->trl) (6.0.5)\n",
+      "Requirement already satisfied: yarl<2.0,>=1.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from aiohttp->datasets->trl) (1.9.4)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from requests->transformers>=4.31.0->trl) (3.3.2)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from requests->transformers>=4.31.0->trl) (3.6)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from requests->transformers>=4.31.0->trl) (2.2.1)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from requests->transformers>=4.31.0->trl) (2024.2.2)\n",
+      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from rich>=11.1.0->tyro>=0.5.11->trl) (3.0.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from rich>=11.1.0->tyro>=0.5.11->trl) (2.17.2)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from jinja2->torch>=1.4.0->trl) (2.1.5)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.2 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from pandas->datasets->trl) (2.9.0.post0)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from pandas->datasets->trl) (2024.1)\n",
+      "Requirement already satisfied: tzdata>=2022.7 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from pandas->datasets->trl) (2024.1)\n",
+      "Requirement already satisfied: mpmath>=0.19 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from sympy->torch>=1.4.0->trl) (1.3.0)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich>=11.1.0->tyro>=0.5.11->trl) (0.1.2)\n",
+      "Requirement already satisfied: six>=1.5 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->datasets->trl) (1.16.0)\n",
+      "Downloading trl-0.8.0-py3-none-any.whl (224 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m225.0/225.0 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading tyro-0.7.3-py3-none-any.whl (79 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.8/79.8 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading docstring_parser-0.16-py3-none-any.whl (36 kB)\n",
+      "Downloading shtab-1.7.1-py3-none-any.whl (14 kB)\n",
+      "Installing collected packages: shtab, docstring-parser, tyro, trl\n",
+      "Successfully installed docstring-parser-0.16 shtab-1.7.1 trl-0.8.0 tyro-0.7.3\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install trl"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "'NoneType' object has no attribute 'cadam32bit_grad_fp32'\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/bitsandbytes/cextension.py:34: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.\n",
+      "  warn(\"The installed version of bitsandbytes was compiled without GPU support. \"\n"
+     ]
+    },
+    {
+     "ename": "RuntimeError",
+     "evalue": "Failed to import trl.trainer.sft_trainer because of the following error (look up to see its traceback):\ncannot import name 'prepare_model_for_kbit_training' from 'peft' (/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/peft/__init__.py)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/trl/import_utils.py:172\u001b[0m, in \u001b[0;36m_LazyModule._get_module\u001b[0;34m(self, module_name)\u001b[0m\n\u001b[1;32m    171\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mimportlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimport_module\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mmodule_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;18;43m__name__\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m    173\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/importlib/__init__.py:90\u001b[0m, in \u001b[0;36mimport_module\u001b[0;34m(name, package)\u001b[0m\n\u001b[1;32m     89\u001b[0m         level \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m---> 90\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_bootstrap\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_gcd_import\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m[\u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpackage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m<frozen importlib._bootstrap>:1387\u001b[0m, in \u001b[0;36m_gcd_import\u001b[0;34m(name, package, level)\u001b[0m\n",
+      "File \u001b[0;32m<frozen importlib._bootstrap>:1360\u001b[0m, in \u001b[0;36m_find_and_load\u001b[0;34m(name, import_)\u001b[0m\n",
+      "File \u001b[0;32m<frozen importlib._bootstrap>:1331\u001b[0m, in \u001b[0;36m_find_and_load_unlocked\u001b[0;34m(name, import_)\u001b[0m\n",
+      "File \u001b[0;32m<frozen importlib._bootstrap>:935\u001b[0m, in \u001b[0;36m_load_unlocked\u001b[0;34m(spec)\u001b[0m\n",
+      "File \u001b[0;32m<frozen importlib._bootstrap_external>:994\u001b[0m, in \u001b[0;36mexec_module\u001b[0;34m(self, module)\u001b[0m\n",
+      "File \u001b[0;32m<frozen importlib._bootstrap>:488\u001b[0m, in \u001b[0;36m_call_with_frames_removed\u001b[0;34m(f, *args, **kwds)\u001b[0m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/trl/trainer/sft_trainer.py:53\u001b[0m\n\u001b[1;32m     52\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_peft_available():\n\u001b[0;32m---> 53\u001b[0m     \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpeft\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PeftConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training\n\u001b[1;32m     56\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mSFTTrainer\u001b[39;00m(Trainer):\n",
+      "\u001b[0;31mImportError\u001b[0m: cannot import name 'prepare_model_for_kbit_training' from 'peft' (/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/peft/__init__.py)",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[3], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoModelForCausalLM, AutoTokenizer\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_dataset\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtrl\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m SFTTrainer, DataCollatorForCompletionOnlyLM\n\u001b[1;32m      5\u001b[0m dataset \u001b[38;5;241m=\u001b[39m load_dataset(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlucasmccabe-lmi/CodeAlpaca-20k\u001b[39m\u001b[38;5;124m\"\u001b[39m, split\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrain\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m      7\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForCausalLM\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfacebook/opt-350m\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m<frozen importlib._bootstrap>:1412\u001b[0m, in \u001b[0;36m_handle_fromlist\u001b[0;34m(module, fromlist, import_, recursive)\u001b[0m\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/trl/import_utils.py:163\u001b[0m, in \u001b[0;36m_LazyModule.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m    161\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_class_to_module\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m    162\u001b[0m     module \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_module(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_class_to_module[name])\n\u001b[0;32m--> 163\u001b[0m     value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    164\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    165\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodule \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m has no attribute \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/trl/import_utils.py:162\u001b[0m, in \u001b[0;36m_LazyModule.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m    160\u001b[0m     value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_module(name)\n\u001b[1;32m    161\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_class_to_module\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[0;32m--> 162\u001b[0m     module \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_module\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_class_to_module\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    163\u001b[0m     value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(module, name)\n\u001b[1;32m    164\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
+      "File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/trl/import_utils.py:174\u001b[0m, in \u001b[0;36m_LazyModule._get_module\u001b[0;34m(self, module_name)\u001b[0m\n\u001b[1;32m    172\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m importlib\u001b[38;5;241m.\u001b[39mimport_module(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m module_name, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m)\n\u001b[1;32m    173\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 174\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m    175\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to import \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmodule_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m because of the following error (look up to see its\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    176\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m traceback):\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    177\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n",
+      "\u001b[0;31mRuntimeError\u001b[0m: Failed to import trl.trainer.sft_trainer because of the following error (look up to see its traceback):\ncannot import name 'prepare_model_for_kbit_training' from 'peft' (/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/peft/__init__.py)"
+     ]
+    }
+   ],
+   "source": [
+    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
+    "from datasets import load_dataset\n",
+    "from trl import SFTTrainer, DataCollatorForCompletionOnlyLM\n",
+    "\n",
+    "dataset = load_dataset(\"lucasmccabe-lmi/CodeAlpaca-20k\", split=\"train\")\n",
+    "\n",
+    "model = AutoModelForCausalLM.from_pretrained(\"facebook/opt-350m\")\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"facebook/opt-350m\")\n",
+    "\n",
+    "def formatting_prompts_func(example):\n",
+    "    output_texts = []\n",
+    "    for i in range(len(example['instruction'])):\n",
+    "        text = f\"### Question: {example['instruction'][i]}\\n ### Answer: {example['output'][i]}\"\n",
+    "        output_texts.append(text)\n",
+    "    return output_texts\n",
+    "\n",
+    "response_template = \" ### Answer:\"\n",
+    "collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)\n",
+    "\n",
+    "trainer = SFTTrainer(\n",
+    "    model,\n",
+    "    train_dataset=dataset,\n",
+    "    formatting_func=formatting_prompts_func,\n",
+    "    data_collator=collator,\n",
+    ")\n",
+    "\n",
+    "trainer.train()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

squash.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import json
+def squash_details(data):
+  """
+  Squashes details field into a single string with key-value pairs.
+  Args:
+      data: A list of dictionaries containing message and details fields.
+  Returns:
+      A list of dictionaries with the modified details field.
+  """
+  for item in data:
+    details_str = ", ".join([f"{key}: {value}" for key, value in item["details"].items()])
+    item["details"] = details_str
+  return data
+# Read data from data.json
+with open("data.json", "r") as file:
+  data = json.load(file)
+# Squash details
+squashed_data = squash_details(data)
+# Write modified data to data2.json
+with open("data2.json", "w") as file:
+  json.dump(squashed_data, file, indent=4)  # Add indentation for readability (optional)
+print("Successfully processed data and wrote to data2.json!")

translate.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import json
+with open("HumanEval.jsonl", "r", encoding="utf8") as file:
+    # read data from HumanEval.jsonl in while loop
+    # and load it to json
+    data = [json.loads(line) for line in file]
+    for elem in data[:1]:
+        print("prompt:\n", elem["prompt"])
+        print("entry_point:\n", elem["entry_point"])
+        print("canonical_solution:\n", elem["canonical_solution"])
+        print("test:\n", elem["test"])

youtube-tutorial.ipynb ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+ "cells": [],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}