joshcarp
commited on
Commit
•
bcffb9c
0
Parent(s):
Initial huggingface commit
Browse files- .gitattributes +35 -0
- HumanEval.jsonl +0 -0
- README.md +3 -0
- another.ipynb +112 -0
- calendar copy.ipynb +587 -0
- calendar.ipynb +524 -0
- data-flattened.json +470 -0
- data.json +620 -0
- data2.json +310 -0
- data3.jsonl +80 -0
- dataset.csv +0 -0
- elif.ipynb +452 -0
- foo.py +15 -0
- foobar +0 -0
- foobar.txt +25 -0
- has_closest_elements.evy +38 -0
- ner.ipynb +363 -0
- nltk.ipynb +0 -0
- notebook.ipynb +268 -0
- nuner.ipynb +124 -0
- prompt.md +469 -0
- python +0 -0
- sft.ipynb +181 -0
- squash.py +29 -0
- translate.py +10 -0
- youtube-tutorial.ipynb +24 -0
.gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
HumanEval.jsonl
ADDED
The diff for this file is too large to render.
See raw diff
|
|
README.md
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
## Calendar LLM
|
2 |
+
|
3 |
+
Experiment fine tuning LLMs to generate calendar events from natural language.
|
another.ipynb
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": []
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"cell_type": "code",
|
12 |
+
"execution_count": 13,
|
13 |
+
"metadata": {},
|
14 |
+
"outputs": [
|
15 |
+
{
|
16 |
+
"name": "stdout",
|
17 |
+
"output_type": "stream",
|
18 |
+
"text": [
|
19 |
+
"Entity: L V | Type: ('Let', 'VB')\n",
|
20 |
+
"Entity: ' P | Type: (\"'s\", 'POS')\n",
|
21 |
+
"Entity: m N | Type: ('meet', 'NN')\n",
|
22 |
+
"Entity: f I | Type: ('for', 'IN')\n",
|
23 |
+
"Entity: l N | Type: ('lunch', 'NN')\n",
|
24 |
+
"Entity: t N | Type: ('tomorrow', 'NN')\n",
|
25 |
+
"Entity: a I | Type: ('at', 'IN')\n",
|
26 |
+
"Entity: 1 C | Type: ('12', 'CD')\n",
|
27 |
+
"Entity: P N | Type: ('PM', 'NNP')\n",
|
28 |
+
"Entity: a I | Type: ('at', 'IN')\n",
|
29 |
+
"Entity: t D | Type: ('the', 'DT')\n",
|
30 |
+
"Entity: Italian | Type: (GPE Italian/JJ)\n",
|
31 |
+
"Entity: r N | Type: ('restaurant', 'NN')\n",
|
32 |
+
"Entity: o I | Type: ('on', 'IN')\n",
|
33 |
+
"Entity: Main Street | Type: (FACILITY Main/NNP Street/NNP)\n",
|
34 |
+
"Entity: . . | Type: ('.', '.')\n"
|
35 |
+
]
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"source": [
|
39 |
+
"\n",
|
40 |
+
"from nltk import ne_chunk, pos_tag\n",
|
41 |
+
"from nltk.tokenize import word_tokenize\n",
|
42 |
+
"\n",
|
43 |
+
"# Sample text for demonstration\n",
|
44 |
+
"text = \"Let's meet for lunch tomorrow at 12 PM at the Italian restaurant on Main Street.\"\n",
|
45 |
+
"\n",
|
46 |
+
"# Tokenize the text into words\n",
|
47 |
+
"tokens = word_tokenize(text)\n",
|
48 |
+
"\n",
|
49 |
+
"# Apply NER using NLTK's pre-trained models\n",
|
50 |
+
"ner_tags = ne_chunk(pos_tag(tokens))\n",
|
51 |
+
"\n",
|
52 |
+
"# Print the named entities\n",
|
53 |
+
"for chunk in ner_tags:\n",
|
54 |
+
" if hasattr(chunk, 'label'):\n",
|
55 |
+
" print(f\"Entity: {' '.join(c[0] for c in chunk)} | Type: {chunk}\")"
|
56 |
+
]
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"cell_type": "code",
|
60 |
+
"execution_count": 9,
|
61 |
+
"metadata": {},
|
62 |
+
"outputs": [
|
63 |
+
{
|
64 |
+
"name": "stdout",
|
65 |
+
"output_type": "stream",
|
66 |
+
"text": [
|
67 |
+
"Entity: Lunch Tomorrow | Type: PERSON\n",
|
68 |
+
"Entity: Italian | Type: GPE\n",
|
69 |
+
"Entity: Main Street | Type: FACILITY\n"
|
70 |
+
]
|
71 |
+
}
|
72 |
+
],
|
73 |
+
"source": [
|
74 |
+
"# Apply NER using NLTK's pre-trained models\n",
|
75 |
+
"ner_tags = ne_chunk(pos_tag(tokens))\n",
|
76 |
+
"\n",
|
77 |
+
"# Print the named entities\n",
|
78 |
+
"for chunk in ner_tags:\n",
|
79 |
+
" if hasattr(chunk, 'label'):\n",
|
80 |
+
" print(f\"Entity: {' '.join(c[0] for c in chunk)} | Type: {chunk.label()}\")"
|
81 |
+
]
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"cell_type": "code",
|
85 |
+
"execution_count": null,
|
86 |
+
"metadata": {},
|
87 |
+
"outputs": [],
|
88 |
+
"source": []
|
89 |
+
}
|
90 |
+
],
|
91 |
+
"metadata": {
|
92 |
+
"kernelspec": {
|
93 |
+
"display_name": "Python 3",
|
94 |
+
"language": "python",
|
95 |
+
"name": "python3"
|
96 |
+
},
|
97 |
+
"language_info": {
|
98 |
+
"codemirror_mode": {
|
99 |
+
"name": "ipython",
|
100 |
+
"version": 3
|
101 |
+
},
|
102 |
+
"file_extension": ".py",
|
103 |
+
"mimetype": "text/x-python",
|
104 |
+
"name": "python",
|
105 |
+
"nbconvert_exporter": "python",
|
106 |
+
"pygments_lexer": "ipython3",
|
107 |
+
"version": "3.12.1"
|
108 |
+
}
|
109 |
+
},
|
110 |
+
"nbformat": 4,
|
111 |
+
"nbformat_minor": 2
|
112 |
+
}
|
calendar copy.ipynb
ADDED
@@ -0,0 +1,587 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 52,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"from datasets import load_dataset\n",
|
10 |
+
"\n",
|
11 |
+
"dataset = load_dataset(\"json\", data_files=\"data-flattened.json\", split=\"train\")\n",
|
12 |
+
"\n",
|
13 |
+
"labels = [\"datetime\", \"description\", \"location\"]\n",
|
14 |
+
"dataset = dataset.train_test_split(test_size=0.1)\n"
|
15 |
+
]
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"cell_type": "code",
|
19 |
+
"execution_count": 60,
|
20 |
+
"metadata": {},
|
21 |
+
"outputs": [
|
22 |
+
{
|
23 |
+
"name": "stderr",
|
24 |
+
"output_type": "stream",
|
25 |
+
"text": [
|
26 |
+
"Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google-t5/t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']\n",
|
27 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
28 |
+
]
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"data": {
|
32 |
+
"application/vnd.jupyter.widget-view+json": {
|
33 |
+
"model_id": "7f612c075ba5465b85b56fa25e5c8e91",
|
34 |
+
"version_major": 2,
|
35 |
+
"version_minor": 0
|
36 |
+
},
|
37 |
+
"text/plain": [
|
38 |
+
"Map: 0%| | 0/69 [00:00<?, ? examples/s]"
|
39 |
+
]
|
40 |
+
},
|
41 |
+
"metadata": {},
|
42 |
+
"output_type": "display_data"
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"name": "stderr",
|
46 |
+
"output_type": "stream",
|
47 |
+
"text": [
|
48 |
+
"\n",
|
49 |
+
"No chat template is defined for this tokenizer - using a default chat template that implements the ChatML format (without BOS/EOS tokens!). If the default is not appropriate for your model, please set `tokenizer.chat_template` to an appropriate template. See https://huggingface.co/docs/transformers/main/chat_templating for more information.\n",
|
50 |
+
"\n"
|
51 |
+
]
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"ename": "KeyError",
|
55 |
+
"evalue": "'summary'",
|
56 |
+
"output_type": "error",
|
57 |
+
"traceback": [
|
58 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
59 |
+
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
|
60 |
+
"Cell \u001b[0;32mIn[60], line 28\u001b[0m\n\u001b[1;32m 25\u001b[0m model_inputs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlabels\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m labels[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model_inputs\n\u001b[0;32m---> 28\u001b[0m tokenized_data_set \u001b[38;5;241m=\u001b[39m \u001b[43mdataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpreprocess_function\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatched\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 30\u001b[0m \u001b[38;5;66;03m# Training setup (assuming you have data in optimal JSON format)\u001b[39;00m\n\u001b[1;32m 31\u001b[0m training_args \u001b[38;5;241m=\u001b[39m TrainingArguments(\n\u001b[1;32m 32\u001b[0m output_dir\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcalendar_model\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 33\u001b[0m evaluation_strategy\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mepoch\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[38;5;66;03m# push_to_hub=True,\u001b[39;00m\n\u001b[1;32m 43\u001b[0m )\n",
|
61 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/dataset_dict.py:869\u001b[0m, in \u001b[0;36mDatasetDict.map\u001b[0;34m(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_names, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, desc)\u001b[0m\n\u001b[1;32m 865\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cache_file_names \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 866\u001b[0m cache_file_names \u001b[38;5;241m=\u001b[39m {k: \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mfor\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m}\n\u001b[1;32m 867\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DatasetDict(\n\u001b[1;32m 868\u001b[0m {\n\u001b[0;32m--> 869\u001b[0m k: \u001b[43mdataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 870\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunction\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunction\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 871\u001b[0m \u001b[43m \u001b[49m\u001b[43mwith_indices\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwith_indices\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 872\u001b[0m \u001b[43m \u001b[49m\u001b[43mwith_rank\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwith_rank\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 873\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_columns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_columns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 874\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatched\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatched\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 875\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 876\u001b[0m \u001b[43m \u001b[49m\u001b[43mdrop_last_batch\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdrop_last_batch\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 877\u001b[0m \u001b[43m \u001b[49m\u001b[43mremove_columns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mremove_columns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 878\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeep_in_memory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_in_memory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 879\u001b[0m \u001b[43m \u001b[49m\u001b[43mload_from_cache_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mload_from_cache_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 880\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_file_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_file_names\u001b[49m\u001b[43m[\u001b[49m\u001b[43mk\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 881\u001b[0m \u001b[43m \u001b[49m\u001b[43mwriter_batch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwriter_batch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 882\u001b[0m \u001b[43m \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfeatures\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 883\u001b[0m \u001b[43m \u001b[49m\u001b[43mdisable_nullable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdisable_nullable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 884\u001b[0m \u001b[43m \u001b[49m\u001b[43mfn_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfn_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 885\u001b[0m \u001b[43m \u001b[49m\u001b[43mnum_proc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_proc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 886\u001b[0m \u001b[43m \u001b[49m\u001b[43mdesc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdesc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 887\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 888\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m k, dataset \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m 889\u001b[0m }\n\u001b[1;32m 890\u001b[0m )\n",
|
62 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:593\u001b[0m, in \u001b[0;36mtransmit_tasks.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 591\u001b[0m \u001b[38;5;28mself\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mself\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 592\u001b[0m \u001b[38;5;66;03m# apply actual function\u001b[39;00m\n\u001b[0;32m--> 593\u001b[0m out: Union[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDatasetDict\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 594\u001b[0m datasets: List[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(out\u001b[38;5;241m.\u001b[39mvalues()) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(out, \u001b[38;5;28mdict\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m [out]\n\u001b[1;32m 595\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m dataset \u001b[38;5;129;01min\u001b[39;00m datasets:\n\u001b[1;32m 596\u001b[0m \u001b[38;5;66;03m# Remove task templates if a column mapping of the template is no longer valid\u001b[39;00m\n",
|
63 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:558\u001b[0m, in \u001b[0;36mtransmit_format.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 551\u001b[0m self_format \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 552\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_type,\n\u001b[1;32m 553\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mformat_kwargs\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_kwargs,\n\u001b[1;32m 554\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_columns,\n\u001b[1;32m 555\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moutput_all_columns\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_output_all_columns,\n\u001b[1;32m 556\u001b[0m }\n\u001b[1;32m 557\u001b[0m \u001b[38;5;66;03m# apply actual function\u001b[39;00m\n\u001b[0;32m--> 558\u001b[0m out: Union[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDatasetDict\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 559\u001b[0m datasets: List[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(out\u001b[38;5;241m.\u001b[39mvalues()) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(out, \u001b[38;5;28mdict\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m [out]\n\u001b[1;32m 560\u001b[0m \u001b[38;5;66;03m# re-apply format to the output\u001b[39;00m\n",
|
64 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:3105\u001b[0m, in \u001b[0;36mDataset.map\u001b[0;34m(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, suffix_template, new_fingerprint, desc)\u001b[0m\n\u001b[1;32m 3099\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m transformed_dataset \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 3100\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m hf_tqdm(\n\u001b[1;32m 3101\u001b[0m unit\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m examples\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 3102\u001b[0m total\u001b[38;5;241m=\u001b[39mpbar_total,\n\u001b[1;32m 3103\u001b[0m desc\u001b[38;5;241m=\u001b[39mdesc \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMap\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 3104\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m pbar:\n\u001b[0;32m-> 3105\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrank\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdone\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mDataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_map_single\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdataset_kwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 3106\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mdone\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 3107\u001b[0m \u001b[43m \u001b[49m\u001b[43mshards_done\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\n",
|
65 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:3482\u001b[0m, in \u001b[0;36mDataset._map_single\u001b[0;34m(shard, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, new_fingerprint, rank, offset)\u001b[0m\n\u001b[1;32m 3478\u001b[0m indices \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\n\u001b[1;32m 3479\u001b[0m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m*\u001b[39m(\u001b[38;5;28mslice\u001b[39m(i, i \u001b[38;5;241m+\u001b[39m batch_size)\u001b[38;5;241m.\u001b[39mindices(shard\u001b[38;5;241m.\u001b[39mnum_rows)))\n\u001b[1;32m 3480\u001b[0m ) \u001b[38;5;66;03m# Something simpler?\u001b[39;00m\n\u001b[1;32m 3481\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3482\u001b[0m batch \u001b[38;5;241m=\u001b[39m \u001b[43mapply_function_on_filtered_inputs\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3483\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3484\u001b[0m \u001b[43m \u001b[49m\u001b[43mindices\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3485\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_same_num_examples\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mshard\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlist_indexes\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m>\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3486\u001b[0m \u001b[43m \u001b[49m\u001b[43moffset\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffset\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3487\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3488\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m NumExamplesMismatchError:\n\u001b[1;32m 3489\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m DatasetTransformationNotAllowedError(\n\u001b[1;32m 3490\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUsing `.map` in batched mode on a dataset with attached indexes is allowed only if it doesn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt create or remove existing examples. You can first run `.drop_index() to remove your index and then re-add it.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3491\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
|
66 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:3361\u001b[0m, in \u001b[0;36mDataset._map_single.<locals>.apply_function_on_filtered_inputs\u001b[0;34m(pa_inputs, indices, check_same_num_examples, offset)\u001b[0m\n\u001b[1;32m 3359\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m with_rank:\n\u001b[1;32m 3360\u001b[0m additional_args \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m (rank,)\n\u001b[0;32m-> 3361\u001b[0m processed_inputs \u001b[38;5;241m=\u001b[39m \u001b[43mfunction\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfn_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43madditional_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfn_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3362\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(processed_inputs, LazyDict):\n\u001b[1;32m 3363\u001b[0m processed_inputs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 3364\u001b[0m k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m processed_inputs\u001b[38;5;241m.\u001b[39mdata\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m processed_inputs\u001b[38;5;241m.\u001b[39mkeys_to_format\n\u001b[1;32m 3365\u001b[0m }\n",
|
67 |
+
"Cell \u001b[0;32mIn[60], line 23\u001b[0m, in \u001b[0;36mpreprocess_function\u001b[0;34m(examples)\u001b[0m\n\u001b[1;32m 20\u001b[0m inputs \u001b[38;5;241m=\u001b[39m [doc \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m examples[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmessage\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[1;32m 21\u001b[0m model_inputs \u001b[38;5;241m=\u001b[39m tokenizer(inputs, max_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1024\u001b[39m, truncation\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, padding\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_length\u001b[39m\u001b[38;5;124m\"\u001b[39m) \n\u001b[0;32m---> 23\u001b[0m labels \u001b[38;5;241m=\u001b[39m tokenizer(text_target\u001b[38;5;241m=\u001b[39m\u001b[43mexamples\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msummary\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m, max_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m128\u001b[39m, truncation\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, padding\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_length\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 25\u001b[0m model_inputs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlabels\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m labels[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model_inputs\n",
|
68 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/formatting/formatting.py:270\u001b[0m, in \u001b[0;36mLazyDict.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__getitem__\u001b[39m(\u001b[38;5;28mself\u001b[39m, key):\n\u001b[0;32m--> 270\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 271\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkeys_to_format:\n\u001b[1;32m 272\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mformat(key)\n",
|
69 |
+
"\u001b[0;31mKeyError\u001b[0m: 'summary'"
|
70 |
+
]
|
71 |
+
}
|
72 |
+
],
|
73 |
+
"source": [
|
74 |
+
"from transformers import (\n",
|
75 |
+
" AutoModelForSequenceClassification,\n",
|
76 |
+
" AutoTokenizer,\n",
|
77 |
+
" Trainer,\n",
|
78 |
+
" TextClassificationPipeline,\n",
|
79 |
+
" TrainingArguments,\n",
|
80 |
+
")\n",
|
81 |
+
"\n",
|
82 |
+
"# Model and tokenizer selection\n",
|
83 |
+
"checkpoint = \"google-t5/t5-small\" # Ensure correct model name\n",
|
84 |
+
"\n",
|
85 |
+
"\n",
|
86 |
+
"# Configure model for multi-label classification\n",
|
87 |
+
"model = AutoModelForSequenceClassification.from_pretrained(\n",
|
88 |
+
" checkpoint, num_labels=len(labels)\n",
|
89 |
+
")\n",
|
90 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
91 |
+
"\n",
|
92 |
+
"def preprocess_function(examples):\n",
|
93 |
+
" inputs = [doc for doc in examples[\"message\"]]\n",
|
94 |
+
" model_inputs = tokenizer(inputs, max_length=1024, truncation=True, padding=\"max_length\")\n",
|
95 |
+
"\n",
|
96 |
+
" labels = tokenizer(text_target=examples[\"summary\"], max_length=128, truncation=True, padding=\"max_length\")\n",
|
97 |
+
"\n",
|
98 |
+
" model_inputs[\"labels\"] = labels[\"input_ids\"]\n",
|
99 |
+
" return model_inputs\n",
|
100 |
+
"\n",
|
101 |
+
"tokenized_data_set = dataset.map(preprocess_function, batched=True)\n",
|
102 |
+
"\n",
|
103 |
+
"# Training setup (assuming you have data in optimal JSON format)\n",
|
104 |
+
"training_args = TrainingArguments(\n",
|
105 |
+
" output_dir=\"calendar_model\",\n",
|
106 |
+
" evaluation_strategy=\"epoch\",\n",
|
107 |
+
" learning_rate=5e-5,\n",
|
108 |
+
" per_device_train_batch_size=16,\n",
|
109 |
+
" per_device_eval_batch_size=16,\n",
|
110 |
+
" weight_decay=0.01,\n",
|
111 |
+
" save_total_limit=3,\n",
|
112 |
+
" num_train_epochs=1,\n",
|
113 |
+
" use_mps_device=True,\n",
|
114 |
+
" # fp16=True,\n",
|
115 |
+
" # push_to_hub=True,\n",
|
116 |
+
")\n",
|
117 |
+
"\n",
|
118 |
+
"# Train the model\n",
|
119 |
+
"trainer = Trainer(\n",
|
120 |
+
" model=model,\n",
|
121 |
+
" args=training_args,\n",
|
122 |
+
" train_dataset=dataset[\"train\"],\n",
|
123 |
+
" eval_dataset=dataset[\"test\"],\n",
|
124 |
+
")\n",
|
125 |
+
"trainer.train()\n",
|
126 |
+
"\n",
|
127 |
+
"# Create pipeline for multi-label prediction\n",
|
128 |
+
"pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, labels=labels)\n",
|
129 |
+
"\n",
|
130 |
+
"# Example usage for multi-label prediction\n",
|
131 |
+
"text = \"Meeting with John at 2 pm tomorrow in the conference room\"\n",
|
132 |
+
"calendar_entry = pipe(text)\n",
|
133 |
+
"\n",
|
134 |
+
"print(calendar_entry) # Output will be a list of dictionaries, one per label\n",
|
135 |
+
"\n",
|
136 |
+
"# Example: Accessing scores for the \"datetime\" label\n",
|
137 |
+
"datetime_predictions = calendar_entry[0]\n",
|
138 |
+
"print(datetime_predictions[\"score\"]) # List of prediction scores for \"datetime\"\n"
|
139 |
+
]
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"cell_type": "code",
|
143 |
+
"execution_count": 6,
|
144 |
+
"metadata": {},
|
145 |
+
"outputs": [
|
146 |
+
{
|
147 |
+
"data": {
|
148 |
+
"application/vnd.jupyter.widget-view+json": {
|
149 |
+
"model_id": "506a9ad72c324024a186fda4e1fd7156",
|
150 |
+
"version_major": 2,
|
151 |
+
"version_minor": 0
|
152 |
+
},
|
153 |
+
"text/plain": [
|
154 |
+
"Map: 0%| | 0/69 [00:00<?, ? examples/s]"
|
155 |
+
]
|
156 |
+
},
|
157 |
+
"metadata": {},
|
158 |
+
"output_type": "display_data"
|
159 |
+
},
|
160 |
+
{
|
161 |
+
"ename": "ValueError",
|
162 |
+
"evalue": "text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).",
|
163 |
+
"output_type": "error",
|
164 |
+
"traceback": [
|
165 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
166 |
+
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
167 |
+
"Cell \u001b[0;32mIn[6], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m tokenized_data_set \u001b[38;5;241m=\u001b[39m \u001b[43mdata_set\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpreprocess_function\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatched\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
|
168 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/dataset_dict.py:869\u001b[0m, in \u001b[0;36mDatasetDict.map\u001b[0;34m(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_names, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, desc)\u001b[0m\n\u001b[1;32m 865\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cache_file_names \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 866\u001b[0m cache_file_names \u001b[38;5;241m=\u001b[39m {k: \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mfor\u001b[39;00m k \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m}\n\u001b[1;32m 867\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DatasetDict(\n\u001b[1;32m 868\u001b[0m {\n\u001b[0;32m--> 869\u001b[0m k: \u001b[43mdataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 870\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunction\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunction\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 871\u001b[0m \u001b[43m \u001b[49m\u001b[43mwith_indices\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwith_indices\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 872\u001b[0m \u001b[43m \u001b[49m\u001b[43mwith_rank\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwith_rank\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 873\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_columns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_columns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 874\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatched\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatched\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 875\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 876\u001b[0m \u001b[43m \u001b[49m\u001b[43mdrop_last_batch\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdrop_last_batch\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 877\u001b[0m \u001b[43m \u001b[49m\u001b[43mremove_columns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mremove_columns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 878\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeep_in_memory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_in_memory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 879\u001b[0m \u001b[43m \u001b[49m\u001b[43mload_from_cache_file\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mload_from_cache_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 880\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_file_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_file_names\u001b[49m\u001b[43m[\u001b[49m\u001b[43mk\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 881\u001b[0m \u001b[43m \u001b[49m\u001b[43mwriter_batch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwriter_batch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 882\u001b[0m \u001b[43m \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfeatures\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 883\u001b[0m \u001b[43m \u001b[49m\u001b[43mdisable_nullable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdisable_nullable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 884\u001b[0m \u001b[43m \u001b[49m\u001b[43mfn_kwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfn_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 885\u001b[0m \u001b[43m \u001b[49m\u001b[43mnum_proc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_proc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 886\u001b[0m \u001b[43m \u001b[49m\u001b[43mdesc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdesc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 887\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 888\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m k, dataset \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m 889\u001b[0m }\n\u001b[1;32m 890\u001b[0m )\n",
|
169 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:593\u001b[0m, in \u001b[0;36mtransmit_tasks.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 591\u001b[0m \u001b[38;5;28mself\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mself\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 592\u001b[0m \u001b[38;5;66;03m# apply actual function\u001b[39;00m\n\u001b[0;32m--> 593\u001b[0m out: Union[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDatasetDict\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 594\u001b[0m datasets: List[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(out\u001b[38;5;241m.\u001b[39mvalues()) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(out, \u001b[38;5;28mdict\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m [out]\n\u001b[1;32m 595\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m dataset \u001b[38;5;129;01min\u001b[39;00m datasets:\n\u001b[1;32m 596\u001b[0m \u001b[38;5;66;03m# Remove task templates if a column mapping of the template is no longer valid\u001b[39;00m\n",
|
170 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:558\u001b[0m, in \u001b[0;36mtransmit_format.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 551\u001b[0m self_format \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 552\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtype\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_type,\n\u001b[1;32m 553\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mformat_kwargs\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_kwargs,\n\u001b[1;32m 554\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_columns,\n\u001b[1;32m 555\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moutput_all_columns\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_output_all_columns,\n\u001b[1;32m 556\u001b[0m }\n\u001b[1;32m 557\u001b[0m \u001b[38;5;66;03m# apply actual function\u001b[39;00m\n\u001b[0;32m--> 558\u001b[0m out: Union[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDatasetDict\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 559\u001b[0m datasets: List[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(out\u001b[38;5;241m.\u001b[39mvalues()) \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(out, \u001b[38;5;28mdict\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m [out]\n\u001b[1;32m 560\u001b[0m \u001b[38;5;66;03m# re-apply format to the output\u001b[39;00m\n",
|
171 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:3105\u001b[0m, in \u001b[0;36mDataset.map\u001b[0;34m(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, suffix_template, new_fingerprint, desc)\u001b[0m\n\u001b[1;32m 3099\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m transformed_dataset \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 3100\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m hf_tqdm(\n\u001b[1;32m 3101\u001b[0m unit\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m examples\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 3102\u001b[0m total\u001b[38;5;241m=\u001b[39mpbar_total,\n\u001b[1;32m 3103\u001b[0m desc\u001b[38;5;241m=\u001b[39mdesc \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMap\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 3104\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m pbar:\n\u001b[0;32m-> 3105\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrank\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdone\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mDataset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_map_single\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mdataset_kwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 3106\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mdone\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 3107\u001b[0m \u001b[43m \u001b[49m\u001b[43mshards_done\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\n",
|
172 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:3482\u001b[0m, in \u001b[0;36mDataset._map_single\u001b[0;34m(shard, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, new_fingerprint, rank, offset)\u001b[0m\n\u001b[1;32m 3478\u001b[0m indices \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(\n\u001b[1;32m 3479\u001b[0m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m*\u001b[39m(\u001b[38;5;28mslice\u001b[39m(i, i \u001b[38;5;241m+\u001b[39m batch_size)\u001b[38;5;241m.\u001b[39mindices(shard\u001b[38;5;241m.\u001b[39mnum_rows)))\n\u001b[1;32m 3480\u001b[0m ) \u001b[38;5;66;03m# Something simpler?\u001b[39;00m\n\u001b[1;32m 3481\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3482\u001b[0m batch \u001b[38;5;241m=\u001b[39m \u001b[43mapply_function_on_filtered_inputs\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3483\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3484\u001b[0m \u001b[43m \u001b[49m\u001b[43mindices\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3485\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_same_num_examples\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mshard\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlist_indexes\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m>\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3486\u001b[0m \u001b[43m \u001b[49m\u001b[43moffset\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moffset\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3487\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3488\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m NumExamplesMismatchError:\n\u001b[1;32m 3489\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m DatasetTransformationNotAllowedError(\n\u001b[1;32m 3490\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUsing `.map` in batched mode on a dataset with attached indexes is allowed only if it doesn\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt create or remove existing examples. You can first run `.drop_index() to remove your index and then re-add it.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3491\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
|
173 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/datasets/arrow_dataset.py:3361\u001b[0m, in \u001b[0;36mDataset._map_single.<locals>.apply_function_on_filtered_inputs\u001b[0;34m(pa_inputs, indices, check_same_num_examples, offset)\u001b[0m\n\u001b[1;32m 3359\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m with_rank:\n\u001b[1;32m 3360\u001b[0m additional_args \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m (rank,)\n\u001b[0;32m-> 3361\u001b[0m processed_inputs \u001b[38;5;241m=\u001b[39m \u001b[43mfunction\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfn_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43madditional_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfn_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3362\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(processed_inputs, LazyDict):\n\u001b[1;32m 3363\u001b[0m processed_inputs \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 3364\u001b[0m k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m processed_inputs\u001b[38;5;241m.\u001b[39mdata\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m processed_inputs\u001b[38;5;241m.\u001b[39mkeys_to_format\n\u001b[1;32m 3365\u001b[0m }\n",
|
174 |
+
"Cell \u001b[0;32mIn[5], line 14\u001b[0m, in \u001b[0;36mpreprocess_function\u001b[0;34m(examples)\u001b[0m\n\u001b[1;32m 11\u001b[0m inputs \u001b[38;5;241m=\u001b[39m [doc \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m examples[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmessage\u001b[39m\u001b[38;5;124m\"\u001b[39m]]\n\u001b[1;32m 12\u001b[0m model_inputs \u001b[38;5;241m=\u001b[39m tokenizer(inputs, max_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m128\u001b[39m, truncation\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, padding\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_length\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 14\u001b[0m labels \u001b[38;5;241m=\u001b[39m \u001b[43mtokenizer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexamples\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlabels\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_length\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m128\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtruncation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpadding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_length\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 16\u001b[0m model_inputs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlabels\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m labels[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model_inputs\n",
|
175 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/tokenization_utils_base.py:2829\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase.__call__\u001b[0;34m(self, text, text_pair, text_target, text_pair_target, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)\u001b[0m\n\u001b[1;32m 2827\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_in_target_context_manager:\n\u001b[1;32m 2828\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_switch_to_input_mode()\n\u001b[0;32m-> 2829\u001b[0m encodings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_one\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtext_pair\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtext_pair\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mall_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2830\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m text_target \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 2831\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_switch_to_target_mode()\n",
|
176 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/tokenization_utils_base.py:2887\u001b[0m, in \u001b[0;36mPreTrainedTokenizerBase._call_one\u001b[0;34m(self, text, text_pair, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)\u001b[0m\n\u001b[1;32m 2884\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 2886\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m _is_valid_text_input(text):\n\u001b[0;32m-> 2887\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 2888\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2889\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mor `List[List[str]]` (batch of pretokenized examples).\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2890\u001b[0m )\n\u001b[1;32m 2892\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m text_pair \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m _is_valid_text_input(text_pair):\n\u001b[1;32m 2893\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 2894\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2895\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mor `List[List[str]]` (batch of pretokenized examples).\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2896\u001b[0m )\n",
|
177 |
+
"\u001b[0;31mValueError\u001b[0m: text input must be of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples)."
|
178 |
+
]
|
179 |
+
}
|
180 |
+
],
|
181 |
+
"source": [
|
182 |
+
"tokenized_data_set = data_set.map(preprocess_function, batched=True)"
|
183 |
+
]
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"cell_type": "code",
|
187 |
+
"execution_count": 4,
|
188 |
+
"metadata": {},
|
189 |
+
"outputs": [],
|
190 |
+
"source": [
|
191 |
+
"from transformers import DataCollatorForSeq2Seq"
|
192 |
+
]
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"cell_type": "code",
|
196 |
+
"execution_count": 5,
|
197 |
+
"metadata": {},
|
198 |
+
"outputs": [],
|
199 |
+
"source": [
|
200 |
+
"data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)"
|
201 |
+
]
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"cell_type": "code",
|
205 |
+
"execution_count": 7,
|
206 |
+
"metadata": {},
|
207 |
+
"outputs": [],
|
208 |
+
"source": [
|
209 |
+
"import evaluate"
|
210 |
+
]
|
211 |
+
},
|
212 |
+
{
|
213 |
+
"cell_type": "code",
|
214 |
+
"execution_count": 7,
|
215 |
+
"metadata": {},
|
216 |
+
"outputs": [],
|
217 |
+
"source": [
|
218 |
+
"rouge = evaluate.load(\"rouge\")"
|
219 |
+
]
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"cell_type": "code",
|
223 |
+
"execution_count": 8,
|
224 |
+
"metadata": {},
|
225 |
+
"outputs": [],
|
226 |
+
"source": [
|
227 |
+
"import numpy as np\n",
|
228 |
+
"\n",
|
229 |
+
"def compute_metrics(eval_pred):\n",
|
230 |
+
" predictions, labels = eval_pred\n",
|
231 |
+
" decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)\n",
|
232 |
+
" labels = np.where(labels != -100, labels, tokenizer.pad_token_id)\n",
|
233 |
+
" decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)\n",
|
234 |
+
"\n",
|
235 |
+
" result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)\n",
|
236 |
+
"\n",
|
237 |
+
" prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]\n",
|
238 |
+
" result[\"gen_len\"] = np.mean(prediction_lens)\n",
|
239 |
+
"\n",
|
240 |
+
" return {k: round(v, 4) for k, v in result.items()}\n"
|
241 |
+
]
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"cell_type": "code",
|
245 |
+
"execution_count": 9,
|
246 |
+
"metadata": {},
|
247 |
+
"outputs": [],
|
248 |
+
"source": [
|
249 |
+
"from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer\n",
|
250 |
+
"model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)"
|
251 |
+
]
|
252 |
+
},
|
253 |
+
{
|
254 |
+
"cell_type": "code",
|
255 |
+
"execution_count": 10,
|
256 |
+
"metadata": {},
|
257 |
+
"outputs": [
|
258 |
+
{
|
259 |
+
"name": "stdout",
|
260 |
+
"output_type": "stream",
|
261 |
+
"text": [
|
262 |
+
"Model moved to MPS device\n"
|
263 |
+
]
|
264 |
+
}
|
265 |
+
],
|
266 |
+
"source": [
|
267 |
+
"import torch\n",
|
268 |
+
"\n",
|
269 |
+
"# Check that MPS is available\n",
|
270 |
+
"if not torch.backends.mps.is_available():\n",
|
271 |
+
" if not torch.backends.mps.is_built():\n",
|
272 |
+
" print(\"MPS not available because the current PyTorch install was not \"\n",
|
273 |
+
" \"built with MPS enabled.\")\n",
|
274 |
+
" else:\n",
|
275 |
+
" print(\"MPS not available because the current MacOS version is not 12.3+ \"\n",
|
276 |
+
" \"and/or you do not have an MPS-enabled device on this machine.\")\n",
|
277 |
+
"\n",
|
278 |
+
"else:\n",
|
279 |
+
" mps_device = torch.device(\"mps\")\n",
|
280 |
+
" model.to(mps_device)\n",
|
281 |
+
" print(\"Model moved to MPS device\")"
|
282 |
+
]
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"cell_type": "code",
|
286 |
+
"execution_count": 11,
|
287 |
+
"metadata": {},
|
288 |
+
"outputs": [
|
289 |
+
{
|
290 |
+
"name": "stderr",
|
291 |
+
"output_type": "stream",
|
292 |
+
"text": [
|
293 |
+
"/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/training_args.py:1951: UserWarning: `use_mps_device` is deprecated and will be removed in version 5.0 of 🤗 Transformers. `mps` device will be used by default if available similar to the way `cuda` device is used.Therefore, no action from user is required. \n",
|
294 |
+
" warnings.warn(\n"
|
295 |
+
]
|
296 |
+
}
|
297 |
+
],
|
298 |
+
"source": [
|
299 |
+
"training_args = Seq2SeqTrainingArguments(\n",
|
300 |
+
" output_dir=\"calendar_model\",\n",
|
301 |
+
" evaluation_strategy=\"epoch\",\n",
|
302 |
+
" learning_rate=2e-5,\n",
|
303 |
+
" per_device_train_batch_size=16,\n",
|
304 |
+
" per_device_eval_batch_size=16,\n",
|
305 |
+
" weight_decay=0.01,\n",
|
306 |
+
" save_total_limit=3,\n",
|
307 |
+
" num_train_epochs=3,\n",
|
308 |
+
" predict_with_generate=True,\n",
|
309 |
+
" use_mps_device=True,\n",
|
310 |
+
" # fp16=True,\n",
|
311 |
+
" # push_to_hub=True,\n",
|
312 |
+
")"
|
313 |
+
]
|
314 |
+
},
|
315 |
+
{
|
316 |
+
"cell_type": "code",
|
317 |
+
"execution_count": 12,
|
318 |
+
"metadata": {},
|
319 |
+
"outputs": [
|
320 |
+
{
|
321 |
+
"name": "stdout",
|
322 |
+
"output_type": "stream",
|
323 |
+
"text": [
|
324 |
+
"DatasetDict({\n",
|
325 |
+
" train: Dataset({\n",
|
326 |
+
" features: ['details', 'message'],\n",
|
327 |
+
" num_rows: 69\n",
|
328 |
+
" })\n",
|
329 |
+
" test: Dataset({\n",
|
330 |
+
" features: ['details', 'message'],\n",
|
331 |
+
" num_rows: 8\n",
|
332 |
+
" })\n",
|
333 |
+
"})\n"
|
334 |
+
]
|
335 |
+
}
|
336 |
+
],
|
337 |
+
"source": [
|
338 |
+
"print(data_set)"
|
339 |
+
]
|
340 |
+
},
|
341 |
+
{
|
342 |
+
"cell_type": "code",
|
343 |
+
"execution_count": 13,
|
344 |
+
"metadata": {},
|
345 |
+
"outputs": [],
|
346 |
+
"source": [
|
347 |
+
"trainer = Seq2SeqTrainer(\n",
|
348 |
+
" model=model,\n",
|
349 |
+
" args=training_args,\n",
|
350 |
+
" train_dataset=tokenized_data_set[\"train\"],\n",
|
351 |
+
" eval_dataset=tokenized_data_set[\"test\"],\n",
|
352 |
+
" tokenizer=tokenizer,\n",
|
353 |
+
" data_collator=data_collator,\n",
|
354 |
+
" compute_metrics=compute_metrics,\n",
|
355 |
+
")"
|
356 |
+
]
|
357 |
+
},
|
358 |
+
{
|
359 |
+
"cell_type": "code",
|
360 |
+
"execution_count": 14,
|
361 |
+
"metadata": {},
|
362 |
+
"outputs": [
|
363 |
+
{
|
364 |
+
"data": {
|
365 |
+
"application/vnd.jupyter.widget-view+json": {
|
366 |
+
"model_id": "9452caa67e26493eb4c189fd55a68c32",
|
367 |
+
"version_major": 2,
|
368 |
+
"version_minor": 0
|
369 |
+
},
|
370 |
+
"text/plain": [
|
371 |
+
" 0%| | 0/15 [00:00<?, ?it/s]"
|
372 |
+
]
|
373 |
+
},
|
374 |
+
"metadata": {},
|
375 |
+
"output_type": "display_data"
|
376 |
+
},
|
377 |
+
{
|
378 |
+
"name": "stderr",
|
379 |
+
"output_type": "stream",
|
380 |
+
"text": [
|
381 |
+
"/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/generation/utils.py:1178: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
|
382 |
+
" warnings.warn(\n"
|
383 |
+
]
|
384 |
+
},
|
385 |
+
{
|
386 |
+
"data": {
|
387 |
+
"application/vnd.jupyter.widget-view+json": {
|
388 |
+
"model_id": "9829c1db68244e7b827c76f106a353a8",
|
389 |
+
"version_major": 2,
|
390 |
+
"version_minor": 0
|
391 |
+
},
|
392 |
+
"text/plain": [
|
393 |
+
" 0%| | 0/1 [00:00<?, ?it/s]"
|
394 |
+
]
|
395 |
+
},
|
396 |
+
"metadata": {},
|
397 |
+
"output_type": "display_data"
|
398 |
+
},
|
399 |
+
{
|
400 |
+
"name": "stdout",
|
401 |
+
"output_type": "stream",
|
402 |
+
"text": [
|
403 |
+
"{'eval_loss': 14.770042419433594, 'eval_rouge1': 0.2492, 'eval_rouge2': 0.132, 'eval_rougeL': 0.2098, 'eval_rougeLsum': 0.2078, 'eval_gen_len': 18.5, 'eval_runtime': 3.1599, 'eval_samples_per_second': 2.532, 'eval_steps_per_second': 0.316, 'epoch': 1.0}\n"
|
404 |
+
]
|
405 |
+
},
|
406 |
+
{
|
407 |
+
"data": {
|
408 |
+
"application/vnd.jupyter.widget-view+json": {
|
409 |
+
"model_id": "3e8f24890d4848e5958d51b2fad39827",
|
410 |
+
"version_major": 2,
|
411 |
+
"version_minor": 0
|
412 |
+
},
|
413 |
+
"text/plain": [
|
414 |
+
" 0%| | 0/1 [00:00<?, ?it/s]"
|
415 |
+
]
|
416 |
+
},
|
417 |
+
"metadata": {},
|
418 |
+
"output_type": "display_data"
|
419 |
+
},
|
420 |
+
{
|
421 |
+
"name": "stdout",
|
422 |
+
"output_type": "stream",
|
423 |
+
"text": [
|
424 |
+
"{'eval_loss': 13.279829978942871, 'eval_rouge1': 0.191, 'eval_rouge2': 0.0841, 'eval_rougeL': 0.171, 'eval_rougeLsum': 0.1669, 'eval_gen_len': 18.5, 'eval_runtime': 0.6868, 'eval_samples_per_second': 11.648, 'eval_steps_per_second': 1.456, 'epoch': 2.0}\n"
|
425 |
+
]
|
426 |
+
},
|
427 |
+
{
|
428 |
+
"data": {
|
429 |
+
"application/vnd.jupyter.widget-view+json": {
|
430 |
+
"model_id": "812b05e2e2234a87ab283f4771f8f615",
|
431 |
+
"version_major": 2,
|
432 |
+
"version_minor": 0
|
433 |
+
},
|
434 |
+
"text/plain": [
|
435 |
+
" 0%| | 0/1 [00:00<?, ?it/s]"
|
436 |
+
]
|
437 |
+
},
|
438 |
+
"metadata": {},
|
439 |
+
"output_type": "display_data"
|
440 |
+
},
|
441 |
+
{
|
442 |
+
"name": "stdout",
|
443 |
+
"output_type": "stream",
|
444 |
+
"text": [
|
445 |
+
"{'eval_loss': 12.672184944152832, 'eval_rouge1': 0.1767, 'eval_rouge2': 0.0792, 'eval_rougeL': 0.1555, 'eval_rougeLsum': 0.1518, 'eval_gen_len': 19.0, 'eval_runtime': 0.6063, 'eval_samples_per_second': 13.195, 'eval_steps_per_second': 1.649, 'epoch': 3.0}\n",
|
446 |
+
"{'train_runtime': 12.159, 'train_samples_per_second': 17.024, 'train_steps_per_second': 1.234, 'train_loss': 12.712192789713542, 'epoch': 3.0}\n"
|
447 |
+
]
|
448 |
+
},
|
449 |
+
{
|
450 |
+
"data": {
|
451 |
+
"text/plain": [
|
452 |
+
"TrainOutput(global_step=15, training_loss=12.712192789713542, metrics={'train_runtime': 12.159, 'train_samples_per_second': 17.024, 'train_steps_per_second': 1.234, 'train_loss': 12.712192789713542, 'epoch': 3.0})"
|
453 |
+
]
|
454 |
+
},
|
455 |
+
"execution_count": 14,
|
456 |
+
"metadata": {},
|
457 |
+
"output_type": "execute_result"
|
458 |
+
}
|
459 |
+
],
|
460 |
+
"source": [
|
461 |
+
"trainer.train()"
|
462 |
+
]
|
463 |
+
},
|
464 |
+
{
|
465 |
+
"cell_type": "code",
|
466 |
+
"execution_count": 15,
|
467 |
+
"metadata": {},
|
468 |
+
"outputs": [
|
469 |
+
{
|
470 |
+
"data": {
|
471 |
+
"application/vnd.jupyter.widget-view+json": {
|
472 |
+
"model_id": "7350298fb1d24de696d2fdce2b167cb7",
|
473 |
+
"version_major": 2,
|
474 |
+
"version_minor": 0
|
475 |
+
},
|
476 |
+
"text/plain": [
|
477 |
+
"Upload 2 LFS files: 0%| | 0/2 [00:00<?, ?it/s]"
|
478 |
+
]
|
479 |
+
},
|
480 |
+
"metadata": {},
|
481 |
+
"output_type": "display_data"
|
482 |
+
},
|
483 |
+
{
|
484 |
+
"data": {
|
485 |
+
"application/vnd.jupyter.widget-view+json": {
|
486 |
+
"model_id": "bdd17cc0c7624ab0babcf12b19157c75",
|
487 |
+
"version_major": 2,
|
488 |
+
"version_minor": 0
|
489 |
+
},
|
490 |
+
"text/plain": [
|
491 |
+
"model.safetensors: 0%| | 0.00/242M [00:00<?, ?B/s]"
|
492 |
+
]
|
493 |
+
},
|
494 |
+
"metadata": {},
|
495 |
+
"output_type": "display_data"
|
496 |
+
},
|
497 |
+
{
|
498 |
+
"data": {
|
499 |
+
"application/vnd.jupyter.widget-view+json": {
|
500 |
+
"model_id": "040b39bf6bdc4e939cf56a47e1f4451e",
|
501 |
+
"version_major": 2,
|
502 |
+
"version_minor": 0
|
503 |
+
},
|
504 |
+
"text/plain": [
|
505 |
+
"training_args.bin: 0%| | 0.00/4.98k [00:00<?, ?B/s]"
|
506 |
+
]
|
507 |
+
},
|
508 |
+
"metadata": {},
|
509 |
+
"output_type": "display_data"
|
510 |
+
},
|
511 |
+
{
|
512 |
+
"data": {
|
513 |
+
"text/plain": [
|
514 |
+
"CommitInfo(commit_url='https://huggingface.co/joshcarp/calendar_model/commit/ef13304ccc7e109ab97007e944f01405ce9b1409', commit_message='End of training', commit_description='', oid='ef13304ccc7e109ab97007e944f01405ce9b1409', pr_url=None, pr_revision=None, pr_num=None)"
|
515 |
+
]
|
516 |
+
},
|
517 |
+
"execution_count": 15,
|
518 |
+
"metadata": {},
|
519 |
+
"output_type": "execute_result"
|
520 |
+
}
|
521 |
+
],
|
522 |
+
"source": [
|
523 |
+
"trainer.push_to_hub()"
|
524 |
+
]
|
525 |
+
},
|
526 |
+
{
|
527 |
+
"cell_type": "code",
|
528 |
+
"execution_count": 24,
|
529 |
+
"metadata": {},
|
530 |
+
"outputs": [
|
531 |
+
{
|
532 |
+
"name": "stdout",
|
533 |
+
"output_type": "stream",
|
534 |
+
"text": [
|
535 |
+
"convert to summary: Doctor's appointment on Friday at 9:00 AM.\n",
|
536 |
+
"[{'generated_text': \"Umgekehrt: Doctor's appointment on Friday at 9:00 AM.\"}]\n"
|
537 |
+
]
|
538 |
+
}
|
539 |
+
],
|
540 |
+
"source": [
|
541 |
+
"from transformers import pipeline\n",
|
542 |
+
"\n",
|
543 |
+
"hub_model_id = \"joshcarp/calendar_model\"\n",
|
544 |
+
"summarizer = pipeline(\"textclassificationpipeline\", model=hub_model_id)\n",
|
545 |
+
"text = \"convert to summary: Doctor's appointment on Friday at 9:00 AM.\"\n",
|
546 |
+
"summary = summarizer(text, max_length=50, min_length=6)\n",
|
547 |
+
"print(text)\n",
|
548 |
+
"print(summary)"
|
549 |
+
]
|
550 |
+
},
|
551 |
+
{
|
552 |
+
"cell_type": "code",
|
553 |
+
"execution_count": null,
|
554 |
+
"metadata": {},
|
555 |
+
"outputs": [],
|
556 |
+
"source": []
|
557 |
+
},
|
558 |
+
{
|
559 |
+
"cell_type": "code",
|
560 |
+
"execution_count": null,
|
561 |
+
"metadata": {},
|
562 |
+
"outputs": [],
|
563 |
+
"source": []
|
564 |
+
}
|
565 |
+
],
|
566 |
+
"metadata": {
|
567 |
+
"kernelspec": {
|
568 |
+
"display_name": "Python 3",
|
569 |
+
"language": "python",
|
570 |
+
"name": "python3"
|
571 |
+
},
|
572 |
+
"language_info": {
|
573 |
+
"codemirror_mode": {
|
574 |
+
"name": "ipython",
|
575 |
+
"version": 3
|
576 |
+
},
|
577 |
+
"file_extension": ".py",
|
578 |
+
"mimetype": "text/x-python",
|
579 |
+
"name": "python",
|
580 |
+
"nbconvert_exporter": "python",
|
581 |
+
"pygments_lexer": "ipython3",
|
582 |
+
"version": "3.12.1"
|
583 |
+
}
|
584 |
+
},
|
585 |
+
"nbformat": 4,
|
586 |
+
"nbformat_minor": 2
|
587 |
+
}
|
calendar.ipynb
ADDED
@@ -0,0 +1,524 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 43,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"from datasets import load_dataset\n",
|
10 |
+
"\n",
|
11 |
+
"data_set = load_dataset(\"json\", data_files=\"data3.json\", split=\"train\")\n",
|
12 |
+
"# convert data_set details field to string\n",
|
13 |
+
"#\n",
|
14 |
+
"# data_set = data_set.map(lambda x: {\"details\": str(x[\"details\"])})\n",
|
15 |
+
"data_set = data_set.train_test_split(test_size=0.1)\n",
|
16 |
+
"# print(data_set.data[\"train\"][0])\n",
|
17 |
+
"# print(type(data_set.data[\"train\"]))"
|
18 |
+
]
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"cell_type": "code",
|
22 |
+
"execution_count": 44,
|
23 |
+
"metadata": {},
|
24 |
+
"outputs": [
|
25 |
+
{
|
26 |
+
"ename": "OSError",
|
27 |
+
"evalue": "flan-t5-small is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`",
|
28 |
+
"output_type": "error",
|
29 |
+
"traceback": [
|
30 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
31 |
+
"\u001b[0;31mHTTPError\u001b[0m Traceback (most recent call last)",
|
32 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/utils/_errors.py:304\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m 303\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 304\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
|
33 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/requests/models.py:1021\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1020\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[0;32m-> 1021\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n",
|
34 |
+
"\u001b[0;31mHTTPError\u001b[0m: 404 Client Error: Not Found for url: https://huggingface.co/flan-t5-small/resolve/main/tokenizer_config.json",
|
35 |
+
"\nThe above exception was the direct cause of the following exception:\n",
|
36 |
+
"\u001b[0;31mRepositoryNotFoundError\u001b[0m Traceback (most recent call last)",
|
37 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/utils/hub.py:398\u001b[0m, in \u001b[0;36mcached_file\u001b[0;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 397\u001b[0m \u001b[38;5;66;03m# Load from URL or cache if already cached\u001b[39;00m\n\u001b[0;32m--> 398\u001b[0m resolved_file \u001b[38;5;241m=\u001b[39m \u001b[43mhf_hub_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 399\u001b[0m \u001b[43m \u001b[49m\u001b[43mpath_or_repo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 400\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 401\u001b[0m \u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 402\u001b[0m \u001b[43m \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 403\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 404\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 405\u001b[0m \u001b[43m \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 406\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 407\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 408\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 409\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 410\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 411\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m GatedRepoError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
|
38 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py:118\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 116\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
39 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/file_download.py:1403\u001b[0m, in \u001b[0;36mhf_hub_download\u001b[0;34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, local_dir_use_symlinks, user_agent, force_download, force_filename, proxies, etag_timeout, resume_download, token, local_files_only, legacy_cache_layout, endpoint)\u001b[0m\n\u001b[1;32m 1401\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(head_call_error, RepositoryNotFoundError) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(head_call_error, GatedRepoError):\n\u001b[1;32m 1402\u001b[0m \u001b[38;5;66;03m# Repo not found or gated => let's raise the actual error\u001b[39;00m\n\u001b[0;32m-> 1403\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m head_call_error\n\u001b[1;32m 1404\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1405\u001b[0m \u001b[38;5;66;03m# Otherwise: most likely a connection issue or Hub downtime => let's warn the user\u001b[39;00m\n",
|
40 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/file_download.py:1261\u001b[0m, in \u001b[0;36mhf_hub_download\u001b[0;34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, local_dir_use_symlinks, user_agent, force_download, force_filename, proxies, etag_timeout, resume_download, token, local_files_only, legacy_cache_layout, endpoint)\u001b[0m\n\u001b[1;32m 1260\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1261\u001b[0m metadata \u001b[38;5;241m=\u001b[39m \u001b[43mget_hf_file_metadata\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1262\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1263\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1264\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1265\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1266\u001b[0m \u001b[43m \u001b[49m\u001b[43mlibrary_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlibrary_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1267\u001b[0m \u001b[43m \u001b[49m\u001b[43mlibrary_version\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlibrary_version\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1268\u001b[0m \u001b[43m \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1269\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1270\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m EntryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m http_error:\n\u001b[1;32m 1271\u001b[0m \u001b[38;5;66;03m# Cache the non-existence of the file and raise\u001b[39;00m\n",
|
41 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/utils/_validators.py:118\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.<locals>._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 116\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 118\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
42 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/file_download.py:1667\u001b[0m, in \u001b[0;36mget_hf_file_metadata\u001b[0;34m(url, token, proxies, timeout, library_name, library_version, user_agent)\u001b[0m\n\u001b[1;32m 1666\u001b[0m \u001b[38;5;66;03m# Retrieve metadata\u001b[39;00m\n\u001b[0;32m-> 1667\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1668\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mHEAD\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1669\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1670\u001b[0m \u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1671\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 1672\u001b[0m \u001b[43m \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 1673\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1674\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1675\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1676\u001b[0m hf_raise_for_status(r)\n",
|
43 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/file_download.py:385\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[0;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[1;32m 384\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m follow_relative_redirects:\n\u001b[0;32m--> 385\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 386\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 387\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 388\u001b[0m \u001b[43m \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 389\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 390\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 392\u001b[0m \u001b[38;5;66;03m# If redirection, we redirect only relative paths.\u001b[39;00m\n\u001b[1;32m 393\u001b[0m \u001b[38;5;66;03m# This is useful in case of a renamed repository.\u001b[39;00m\n",
|
44 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/file_download.py:409\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[0;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[1;32m 408\u001b[0m response \u001b[38;5;241m=\u001b[39m get_session()\u001b[38;5;241m.\u001b[39mrequest(method\u001b[38;5;241m=\u001b[39mmethod, url\u001b[38;5;241m=\u001b[39murl, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams)\n\u001b[0;32m--> 409\u001b[0m \u001b[43mhf_raise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 410\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
|
45 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/huggingface_hub/utils/_errors.py:352\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m 344\u001b[0m message \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 345\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m Client Error.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 346\u001b[0m \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 350\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m make sure you are authenticated.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 351\u001b[0m )\n\u001b[0;32m--> 352\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m RepositoryNotFoundError(message, response) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m 354\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m400\u001b[39m:\n",
|
46 |
+
"\u001b[0;31mRepositoryNotFoundError\u001b[0m: 404 Client Error. (Request ID: Root=1-65f984a1-7346ead41ef1f7332a940212;3782e90a-db2f-4d61-bc70-34daff4938f4)\n\nRepository Not Found for url: https://huggingface.co/flan-t5-small/resolve/main/tokenizer_config.json.\nPlease make sure you specified the correct `repo_id` and `repo_type`.\nIf you are trying to access a private or gated repo, make sure you are authenticated.",
|
47 |
+
"\nThe above exception was the direct cause of the following exception:\n",
|
48 |
+
"\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
|
49 |
+
"Cell \u001b[0;32mIn[44], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoTokenizer\n\u001b[1;32m 2\u001b[0m checkpoint \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mflan-t5-small\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 3\u001b[0m tokenizer \u001b[38;5;241m=\u001b[39m \u001b[43mAutoTokenizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcheckpoint\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m prefix \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\"\"\u001b[39m\u001b[38;5;124mextract the calendar event details from the following message. The details should be specified in the following json format:\u001b[39m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m{\u001b[39m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdatetime\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m: \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2024-03-12T12:00:00\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m,\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;124m}\u001b[39m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m 11\u001b[0m prefix \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
|
50 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/models/auto/tokenization_auto.py:767\u001b[0m, in \u001b[0;36mAutoTokenizer.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m 764\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m tokenizer_class\u001b[38;5;241m.\u001b[39mfrom_pretrained(pretrained_model_name_or_path, \u001b[38;5;241m*\u001b[39minputs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 766\u001b[0m \u001b[38;5;66;03m# Next, let's try to use the tokenizer_config file to get the tokenizer class.\u001b[39;00m\n\u001b[0;32m--> 767\u001b[0m tokenizer_config \u001b[38;5;241m=\u001b[39m \u001b[43mget_tokenizer_config\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 768\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m tokenizer_config:\n\u001b[1;32m 769\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m tokenizer_config[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
|
51 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/models/auto/tokenization_auto.py:600\u001b[0m, in \u001b[0;36mget_tokenizer_config\u001b[0;34m(pretrained_model_name_or_path, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, **kwargs)\u001b[0m\n\u001b[1;32m 597\u001b[0m token \u001b[38;5;241m=\u001b[39m use_auth_token\n\u001b[1;32m 599\u001b[0m commit_hash \u001b[38;5;241m=\u001b[39m kwargs\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[0;32m--> 600\u001b[0m resolved_config_file \u001b[38;5;241m=\u001b[39m \u001b[43mcached_file\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 601\u001b[0m \u001b[43m \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 602\u001b[0m \u001b[43m \u001b[49m\u001b[43mTOKENIZER_CONFIG_FILE\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 603\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 604\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 605\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 606\u001b[0m \u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 607\u001b[0m \u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 608\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 609\u001b[0m \u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 610\u001b[0m \u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 611\u001b[0m \u001b[43m \u001b[49m\u001b[43m_raise_exceptions_for_gated_repo\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 612\u001b[0m \u001b[43m \u001b[49m\u001b[43m_raise_exceptions_for_missing_entries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 613\u001b[0m \u001b[43m \u001b[49m\u001b[43m_raise_exceptions_for_connection_errors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 614\u001b[0m \u001b[43m \u001b[49m\u001b[43m_commit_hash\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcommit_hash\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 615\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 616\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m resolved_config_file \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 617\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not locate the tokenizer configuration file, will try to use the model config instead.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
52 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/utils/hub.py:421\u001b[0m, in \u001b[0;36mcached_file\u001b[0;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[1;32m 416\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m 417\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou are trying to access a gated repo.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mMake sure to have access to it at \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 418\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttps://huggingface.co/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mstr\u001b[39m(e)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 419\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m 420\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m RepositoryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 421\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m 422\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is not a local folder and is not a valid model identifier \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 423\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlisted on \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/models\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mIf this is a private repository, make sure to pass a token \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 424\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhaving permission to this repo either by logging in with `huggingface-cli login` or by passing \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 425\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m`token=<your_token>`\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 426\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m 427\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m RevisionNotFoundError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 428\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m 429\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrevision\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is not a valid git identifier (branch name, tag name or commit id) that exists \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 430\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfor this model name. Check the model page at \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 431\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m for available revisions.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 432\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n",
|
53 |
+
"\u001b[0;31mOSError\u001b[0m: flan-t5-small is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`"
|
54 |
+
]
|
55 |
+
}
|
56 |
+
],
|
57 |
+
"source": [
|
58 |
+
"from transformers import AutoTokenizer\n",
|
59 |
+
"checkpoint = \"flan-t5-small\"\n",
|
60 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
61 |
+
"prefix = \"\"\"extract the calendar event details from the following message. The details should be specified in the following json format:\n",
|
62 |
+
"{\n",
|
63 |
+
" \"datetime\": \"2024-03-12T12:00:00\",\n",
|
64 |
+
" \"description\": \"Lunch meeting\",\n",
|
65 |
+
" \"location\": \"Italian restaurant on Main Street\"\n",
|
66 |
+
"}\n",
|
67 |
+
"\"\"\"\n",
|
68 |
+
"prefix = \"\"\n",
|
69 |
+
"\n",
|
70 |
+
"def preprocess_function(examples):\n",
|
71 |
+
" inputs = [prefix + doc for doc in examples[\"message\"]]\n",
|
72 |
+
" target = [doc for doc in examples[\"details\"]]\n",
|
73 |
+
" model_inputs = tokenizer(inputs, text_target=target, max_length=1024, truncation=True, padding=\"max_length\") \n",
|
74 |
+
"\n",
|
75 |
+
" # labels = tokenizer(text_target=examples[\"details\"], max_length=128, truncation=True, padding=\"max_length\")\n",
|
76 |
+
"\n",
|
77 |
+
" # model_inputs[\"labels\"] = labels[\"input_ids\"]\n",
|
78 |
+
" return model_inputs\n"
|
79 |
+
]
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"cell_type": "code",
|
83 |
+
"execution_count": null,
|
84 |
+
"metadata": {},
|
85 |
+
"outputs": [
|
86 |
+
{
|
87 |
+
"data": {
|
88 |
+
"application/vnd.jupyter.widget-view+json": {
|
89 |
+
"model_id": "175dd8b79c984d4ab51850288906a808",
|
90 |
+
"version_major": 2,
|
91 |
+
"version_minor": 0
|
92 |
+
},
|
93 |
+
"text/plain": [
|
94 |
+
"Map: 0%| | 0/69 [00:00<?, ? examples/s]"
|
95 |
+
]
|
96 |
+
},
|
97 |
+
"metadata": {},
|
98 |
+
"output_type": "display_data"
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"data": {
|
102 |
+
"application/vnd.jupyter.widget-view+json": {
|
103 |
+
"model_id": "c80154f297054a54b7eada6199f8a3ab",
|
104 |
+
"version_major": 2,
|
105 |
+
"version_minor": 0
|
106 |
+
},
|
107 |
+
"text/plain": [
|
108 |
+
"Map: 0%| | 0/8 [00:00<?, ? examples/s]"
|
109 |
+
]
|
110 |
+
},
|
111 |
+
"metadata": {},
|
112 |
+
"output_type": "display_data"
|
113 |
+
}
|
114 |
+
],
|
115 |
+
"source": [
|
116 |
+
"tokenized_data_set = data_set.map(preprocess_function, batched=True)"
|
117 |
+
]
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"cell_type": "code",
|
121 |
+
"execution_count": null,
|
122 |
+
"metadata": {},
|
123 |
+
"outputs": [],
|
124 |
+
"source": [
|
125 |
+
"from transformers import DataCollatorForSeq2Seq"
|
126 |
+
]
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"cell_type": "code",
|
130 |
+
"execution_count": null,
|
131 |
+
"metadata": {},
|
132 |
+
"outputs": [],
|
133 |
+
"source": [
|
134 |
+
"data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=checkpoint)"
|
135 |
+
]
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"cell_type": "code",
|
139 |
+
"execution_count": null,
|
140 |
+
"metadata": {},
|
141 |
+
"outputs": [],
|
142 |
+
"source": [
|
143 |
+
"import evaluate"
|
144 |
+
]
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"cell_type": "code",
|
148 |
+
"execution_count": null,
|
149 |
+
"metadata": {},
|
150 |
+
"outputs": [],
|
151 |
+
"source": [
|
152 |
+
"rouge = evaluate.load(\"rouge\")"
|
153 |
+
]
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"cell_type": "code",
|
157 |
+
"execution_count": null,
|
158 |
+
"metadata": {},
|
159 |
+
"outputs": [],
|
160 |
+
"source": [
|
161 |
+
"import numpy as np\n",
|
162 |
+
"\n",
|
163 |
+
"def compute_metrics(eval_pred):\n",
|
164 |
+
" predictions, labels = eval_pred.predictions, eval_pred.label_ids\n",
|
165 |
+
" predicted_strings = tokenizer.batch_decode(predictions, skip_special_tokens=True)\n",
|
166 |
+
" actual_strings = tokenizer.batch_decode(labels, skip_special_tokens=True)\n",
|
167 |
+
"\n",
|
168 |
+
" token_diffs = []\n",
|
169 |
+
" for predicted, actual in zip(predicted_strings, actual_strings):\n",
|
170 |
+
" predicted_tokens = tokenizer(predicted)[\"input_ids\"]\n",
|
171 |
+
" actual_tokens = tokenizer(actual)[\"input_ids\"]\n",
|
172 |
+
" token_diff = abs(len(predicted_tokens) - len(actual_tokens))\n",
|
173 |
+
" token_diffs.append(token_diff)\n",
|
174 |
+
"\n",
|
175 |
+
" avg_token_diff = sum(token_diffs) / len(token_diffs)\n",
|
176 |
+
" return {\"average_token_difference\": avg_token_diff}\n",
|
177 |
+
"\n"
|
178 |
+
]
|
179 |
+
},
|
180 |
+
{
|
181 |
+
"cell_type": "code",
|
182 |
+
"execution_count": null,
|
183 |
+
"metadata": {},
|
184 |
+
"outputs": [],
|
185 |
+
"source": [
|
186 |
+
"from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer\n",
|
187 |
+
"model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)"
|
188 |
+
]
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"cell_type": "code",
|
192 |
+
"execution_count": null,
|
193 |
+
"metadata": {},
|
194 |
+
"outputs": [
|
195 |
+
{
|
196 |
+
"name": "stdout",
|
197 |
+
"output_type": "stream",
|
198 |
+
"text": [
|
199 |
+
"Model moved to MPS device\n"
|
200 |
+
]
|
201 |
+
}
|
202 |
+
],
|
203 |
+
"source": [
|
204 |
+
"import torch\n",
|
205 |
+
"\n",
|
206 |
+
"# Check that MPS is available\n",
|
207 |
+
"if not torch.backends.mps.is_available():\n",
|
208 |
+
" if not torch.backends.mps.is_built():\n",
|
209 |
+
" print(\"MPS not available because the current PyTorch install was not \"\n",
|
210 |
+
" \"built with MPS enabled.\")\n",
|
211 |
+
" else:\n",
|
212 |
+
" print(\"MPS not available because the current MacOS version is not 12.3+ \"\n",
|
213 |
+
" \"and/or you do not have an MPS-enabled device on this machine.\")\n",
|
214 |
+
"\n",
|
215 |
+
"else:\n",
|
216 |
+
" mps_device = torch.device(\"mps\")\n",
|
217 |
+
" model.to(mps_device)\n",
|
218 |
+
" print(\"Model moved to MPS device\")"
|
219 |
+
]
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"cell_type": "code",
|
223 |
+
"execution_count": null,
|
224 |
+
"metadata": {},
|
225 |
+
"outputs": [
|
226 |
+
{
|
227 |
+
"name": "stderr",
|
228 |
+
"output_type": "stream",
|
229 |
+
"text": [
|
230 |
+
"/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/training_args.py:1951: UserWarning: `use_mps_device` is deprecated and will be removed in version 5.0 of 🤗 Transformers. `mps` device will be used by default if available similar to the way `cuda` device is used.Therefore, no action from user is required. \n",
|
231 |
+
" warnings.warn(\n"
|
232 |
+
]
|
233 |
+
}
|
234 |
+
],
|
235 |
+
"source": [
|
236 |
+
"training_args = Seq2SeqTrainingArguments(\n",
|
237 |
+
" output_dir=\"calendar_model\",\n",
|
238 |
+
" evaluation_strategy=\"epoch\",\n",
|
239 |
+
" learning_rate=5e-5,\n",
|
240 |
+
" per_device_train_batch_size=8,\n",
|
241 |
+
" per_device_eval_batch_size=8,\n",
|
242 |
+
" weight_decay=0.01,\n",
|
243 |
+
" save_total_limit=3,\n",
|
244 |
+
" num_train_epochs=1,\n",
|
245 |
+
" predict_with_generate=True,\n",
|
246 |
+
" use_mps_device=True,\n",
|
247 |
+
" # fp16=True,\n",
|
248 |
+
" # push_to_hub=True,\n",
|
249 |
+
")"
|
250 |
+
]
|
251 |
+
},
|
252 |
+
{
|
253 |
+
"cell_type": "code",
|
254 |
+
"execution_count": null,
|
255 |
+
"metadata": {},
|
256 |
+
"outputs": [
|
257 |
+
{
|
258 |
+
"name": "stdout",
|
259 |
+
"output_type": "stream",
|
260 |
+
"text": [
|
261 |
+
"DatasetDict({\n",
|
262 |
+
" train: Dataset({\n",
|
263 |
+
" features: ['details', 'message'],\n",
|
264 |
+
" num_rows: 69\n",
|
265 |
+
" })\n",
|
266 |
+
" test: Dataset({\n",
|
267 |
+
" features: ['details', 'message'],\n",
|
268 |
+
" num_rows: 8\n",
|
269 |
+
" })\n",
|
270 |
+
"})\n"
|
271 |
+
]
|
272 |
+
}
|
273 |
+
],
|
274 |
+
"source": [
|
275 |
+
"print(data_set)"
|
276 |
+
]
|
277 |
+
},
|
278 |
+
{
|
279 |
+
"cell_type": "code",
|
280 |
+
"execution_count": null,
|
281 |
+
"metadata": {},
|
282 |
+
"outputs": [],
|
283 |
+
"source": [
|
284 |
+
"trainer = Seq2SeqTrainer(\n",
|
285 |
+
" model=model,\n",
|
286 |
+
" args=training_args,\n",
|
287 |
+
" train_dataset=tokenized_data_set[\"train\"],\n",
|
288 |
+
" eval_dataset=tokenized_data_set[\"test\"],\n",
|
289 |
+
" tokenizer=tokenizer,\n",
|
290 |
+
" data_collator=data_collator,\n",
|
291 |
+
" compute_metrics=compute_metrics,\n",
|
292 |
+
")"
|
293 |
+
]
|
294 |
+
},
|
295 |
+
{
|
296 |
+
"cell_type": "code",
|
297 |
+
"execution_count": null,
|
298 |
+
"metadata": {},
|
299 |
+
"outputs": [
|
300 |
+
{
|
301 |
+
"data": {
|
302 |
+
"application/vnd.jupyter.widget-view+json": {
|
303 |
+
"model_id": "fddcfbbfca9944309199f434f94b8577",
|
304 |
+
"version_major": 2,
|
305 |
+
"version_minor": 0
|
306 |
+
},
|
307 |
+
"text/plain": [
|
308 |
+
" 0%| | 0/9 [00:00<?, ?it/s]"
|
309 |
+
]
|
310 |
+
},
|
311 |
+
"metadata": {},
|
312 |
+
"output_type": "display_data"
|
313 |
+
},
|
314 |
+
{
|
315 |
+
"name": "stderr",
|
316 |
+
"output_type": "stream",
|
317 |
+
"text": [
|
318 |
+
"/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/generation/utils.py:1178: UserWarning: Using the model-agnostic default `max_length` (=20) to control the generation length. We recommend setting `max_new_tokens` to control the maximum length of the generation.\n",
|
319 |
+
" warnings.warn(\n"
|
320 |
+
]
|
321 |
+
},
|
322 |
+
{
|
323 |
+
"data": {
|
324 |
+
"application/vnd.jupyter.widget-view+json": {
|
325 |
+
"model_id": "71611216935e4ddca1f16114070609f9",
|
326 |
+
"version_major": 2,
|
327 |
+
"version_minor": 0
|
328 |
+
},
|
329 |
+
"text/plain": [
|
330 |
+
" 0%| | 0/1 [00:00<?, ?it/s]"
|
331 |
+
]
|
332 |
+
},
|
333 |
+
"metadata": {},
|
334 |
+
"output_type": "display_data"
|
335 |
+
},
|
336 |
+
{
|
337 |
+
"name": "stdout",
|
338 |
+
"output_type": "stream",
|
339 |
+
"text": [
|
340 |
+
"{'eval_loss': 9.526269912719727, 'eval_average_token_difference': 9.875, 'eval_runtime': 3.2711, 'eval_samples_per_second': 2.446, 'eval_steps_per_second': 0.306, 'epoch': 1.0}\n",
|
341 |
+
"{'train_runtime': 270.5513, 'train_samples_per_second': 0.255, 'train_steps_per_second': 0.033, 'train_loss': 10.85148451063368, 'epoch': 1.0}\n"
|
342 |
+
]
|
343 |
+
},
|
344 |
+
{
|
345 |
+
"data": {
|
346 |
+
"text/plain": [
|
347 |
+
"TrainOutput(global_step=9, training_loss=10.85148451063368, metrics={'train_runtime': 270.5513, 'train_samples_per_second': 0.255, 'train_steps_per_second': 0.033, 'train_loss': 10.85148451063368, 'epoch': 1.0})"
|
348 |
+
]
|
349 |
+
},
|
350 |
+
"execution_count": 31,
|
351 |
+
"metadata": {},
|
352 |
+
"output_type": "execute_result"
|
353 |
+
}
|
354 |
+
],
|
355 |
+
"source": [
|
356 |
+
"trainer.train()"
|
357 |
+
]
|
358 |
+
},
|
359 |
+
{
|
360 |
+
"cell_type": "code",
|
361 |
+
"execution_count": null,
|
362 |
+
"metadata": {},
|
363 |
+
"outputs": [
|
364 |
+
{
|
365 |
+
"data": {
|
366 |
+
"application/vnd.jupyter.widget-view+json": {
|
367 |
+
"model_id": "f6f6f4eab8b44af285b2921106c718ae",
|
368 |
+
"version_major": 2,
|
369 |
+
"version_minor": 0
|
370 |
+
},
|
371 |
+
"text/plain": [
|
372 |
+
"model.safetensors: 0%| | 0.00/242M [00:00<?, ?B/s]"
|
373 |
+
]
|
374 |
+
},
|
375 |
+
"metadata": {},
|
376 |
+
"output_type": "display_data"
|
377 |
+
},
|
378 |
+
{
|
379 |
+
"data": {
|
380 |
+
"application/vnd.jupyter.widget-view+json": {
|
381 |
+
"model_id": "df785967253f4223ab213e82ab8b468e",
|
382 |
+
"version_major": 2,
|
383 |
+
"version_minor": 0
|
384 |
+
},
|
385 |
+
"text/plain": [
|
386 |
+
"Upload 2 LFS files: 0%| | 0/2 [00:00<?, ?it/s]"
|
387 |
+
]
|
388 |
+
},
|
389 |
+
"metadata": {},
|
390 |
+
"output_type": "display_data"
|
391 |
+
},
|
392 |
+
{
|
393 |
+
"data": {
|
394 |
+
"application/vnd.jupyter.widget-view+json": {
|
395 |
+
"model_id": "c511bfb7228c4ee0881c2bf456a8cc10",
|
396 |
+
"version_major": 2,
|
397 |
+
"version_minor": 0
|
398 |
+
},
|
399 |
+
"text/plain": [
|
400 |
+
"training_args.bin: 0%| | 0.00/4.98k [00:00<?, ?B/s]"
|
401 |
+
]
|
402 |
+
},
|
403 |
+
"metadata": {},
|
404 |
+
"output_type": "display_data"
|
405 |
+
},
|
406 |
+
{
|
407 |
+
"data": {
|
408 |
+
"text/plain": [
|
409 |
+
"CommitInfo(commit_url='https://huggingface.co/joshcarp/calendar_model/commit/edfcfa8cc6e1ae5fb389894f56f0fb2a6885828a', commit_message='End of training', commit_description='', oid='edfcfa8cc6e1ae5fb389894f56f0fb2a6885828a', pr_url=None, pr_revision=None, pr_num=None)"
|
410 |
+
]
|
411 |
+
},
|
412 |
+
"execution_count": 32,
|
413 |
+
"metadata": {},
|
414 |
+
"output_type": "execute_result"
|
415 |
+
}
|
416 |
+
],
|
417 |
+
"source": [
|
418 |
+
"# push to hub\n",
|
419 |
+
"trainer.push_to_hub()"
|
420 |
+
]
|
421 |
+
},
|
422 |
+
{
|
423 |
+
"cell_type": "code",
|
424 |
+
"execution_count": null,
|
425 |
+
"metadata": {},
|
426 |
+
"outputs": [
|
427 |
+
{
|
428 |
+
"name": "stdout",
|
429 |
+
"output_type": "stream",
|
430 |
+
"text": [
|
431 |
+
"extract the calendar event details from a message. The details should be specified in the following json format:\n",
|
432 |
+
"{\n",
|
433 |
+
" \"datetime\": \"<inferred start time from input text>\",\n",
|
434 |
+
" \"description\": \"<description of event from input text>\",\n",
|
435 |
+
" \"location\": \"<location of event from input text>\"\n",
|
436 |
+
"}\n",
|
437 |
+
"\n",
|
438 |
+
"Here is an example: \"Reminder: Team meeting on Friday at 10 AM in the conference room.\"\n",
|
439 |
+
"\n",
|
440 |
+
"For this example the output should be:\n",
|
441 |
+
"\n",
|
442 |
+
"{\n",
|
443 |
+
" \"datetime\": \"2024-03-15T10:00:00\",\n",
|
444 |
+
" \"description\": \"Team meeting\",\n",
|
445 |
+
" \"location\": \"Conference room\"\n",
|
446 |
+
"}\n",
|
447 |
+
"\n",
|
448 |
+
"\n",
|
449 |
+
"Here is the input text: Doctor's appointment on Friday at 9:00 AM.\n",
|
450 |
+
"[{'generated_text': 'calendar event details from a message. The details should be specified in json format: \"datetime\": \"inferred start time from input text>\", \"description\": \"description of event from input text>\", \"location\":'}]\n"
|
451 |
+
]
|
452 |
+
}
|
453 |
+
],
|
454 |
+
"source": [
|
455 |
+
"from transformers import pipeline\n",
|
456 |
+
"hub_model_id = \"joshcarp/calendar_model\"\n",
|
457 |
+
"summarizer = pipeline(\"text2text-generation\", model=hub_model_id)\n",
|
458 |
+
"\n",
|
459 |
+
"\n",
|
460 |
+
"prefix = \"\"\"extract the calendar event details from a message. The details should be specified in the following json format:\n",
|
461 |
+
"{\n",
|
462 |
+
" \"datetime\": \"<inferred start time from input text>\",\n",
|
463 |
+
" \"description\": \"<description of event from input text>\",\n",
|
464 |
+
" \"location\": \"<location of event from input text>\"\n",
|
465 |
+
"}\n",
|
466 |
+
"\n",
|
467 |
+
"Here is an example: \"Reminder: Team meeting on Friday at 10 AM in the conference room.\"\n",
|
468 |
+
"\n",
|
469 |
+
"For this example the output should be:\n",
|
470 |
+
"\n",
|
471 |
+
"{\n",
|
472 |
+
" \"datetime\": \"2024-03-15T10:00:00\",\n",
|
473 |
+
" \"description\": \"Team meeting\",\n",
|
474 |
+
" \"location\": \"Conference room\"\n",
|
475 |
+
"}\n",
|
476 |
+
"\n",
|
477 |
+
"\n",
|
478 |
+
"Here is the input text: \"\"\"\n",
|
479 |
+
"\n",
|
480 |
+
"text = prefix+\"Doctor's appointment on Friday at 9:00 AM.\"\n",
|
481 |
+
"\n",
|
482 |
+
"\n",
|
483 |
+
"summary = summarizer(text, max_length=60, min_length=6, truncation=True)\n",
|
484 |
+
"print(text)\n",
|
485 |
+
"print(summary)"
|
486 |
+
]
|
487 |
+
},
|
488 |
+
{
|
489 |
+
"cell_type": "code",
|
490 |
+
"execution_count": null,
|
491 |
+
"metadata": {},
|
492 |
+
"outputs": [],
|
493 |
+
"source": []
|
494 |
+
},
|
495 |
+
{
|
496 |
+
"cell_type": "code",
|
497 |
+
"execution_count": null,
|
498 |
+
"metadata": {},
|
499 |
+
"outputs": [],
|
500 |
+
"source": []
|
501 |
+
}
|
502 |
+
],
|
503 |
+
"metadata": {
|
504 |
+
"kernelspec": {
|
505 |
+
"display_name": "Python 3",
|
506 |
+
"language": "python",
|
507 |
+
"name": "python3"
|
508 |
+
},
|
509 |
+
"language_info": {
|
510 |
+
"codemirror_mode": {
|
511 |
+
"name": "ipython",
|
512 |
+
"version": 3
|
513 |
+
},
|
514 |
+
"file_extension": ".py",
|
515 |
+
"mimetype": "text/x-python",
|
516 |
+
"name": "python",
|
517 |
+
"nbconvert_exporter": "python",
|
518 |
+
"pygments_lexer": "ipython3",
|
519 |
+
"version": "3.12.1"
|
520 |
+
}
|
521 |
+
},
|
522 |
+
"nbformat": 4,
|
523 |
+
"nbformat_minor": 2
|
524 |
+
}
|
data-flattened.json
ADDED
@@ -0,0 +1,470 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"message": "Let's meet for lunch tomorrow at 12 PM at the Italian restaurant on Main Street.",
|
4 |
+
"datetime": "2024-03-12T12:00:00",
|
5 |
+
"description": "Lunch meeting",
|
6 |
+
"location": "Italian restaurant on Main Street"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"message": "Reminder: Team meeting on Friday at 10 AM in the conference room.",
|
10 |
+
"datetime": "2024-03-15T10:00:00",
|
11 |
+
"description": "Team meeting",
|
12 |
+
"location": "Conference room"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"message": "Don't forget the doctor's appointment next Monday at 3:30 PM.",
|
16 |
+
"datetime": "2024-03-18T15:30:00",
|
17 |
+
"description": "Doctor's appointment",
|
18 |
+
"location": ""
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"message": "Dinner with parents this Saturday evening at 7 PM.",
|
22 |
+
"datetime": "2024-03-16T19:00:00",
|
23 |
+
"description": "Dinner with parents",
|
24 |
+
"location": ""
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"message": "Meeting with client next Tuesday at 9:30 AM at their office.",
|
28 |
+
"datetime": "2024-03-19T09:30:00",
|
29 |
+
"description": "Meeting with client",
|
30 |
+
"location": "Client's office"
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"message": "Soccer practice on Wednesday at 4:00 PM at the park.",
|
34 |
+
"datetime": "2024-03-13T16:00:00",
|
35 |
+
"description": "Soccer practice",
|
36 |
+
"location": "Park"
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"message": "Conference call tomorrow at 2:30 PM. Dial-in: 123-456-7890",
|
40 |
+
"datetime": "2024-03-12T14:30:00",
|
41 |
+
"description": "Conference call",
|
42 |
+
"location": ""
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"message": "Pick up groceries on Friday after work.",
|
46 |
+
"datetime": "2024-03-15T17:00:00",
|
47 |
+
"description": "Pick up groceries",
|
48 |
+
"location": ""
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"message": "Movie night with friends on Saturday at 8 PM.",
|
52 |
+
"datetime": "2024-03-16T20:00:00",
|
53 |
+
"description": "Movie night with friends",
|
54 |
+
"location": ""
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"message": "Workout session next Monday morning at the gym.",
|
58 |
+
"datetime": "2024-03-18T08:00:00",
|
59 |
+
"description": "Workout session",
|
60 |
+
"location": "Gym"
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"message": "Team lunch next Wednesday at noon.",
|
64 |
+
"datetime": "2024-03-20T12:00:00",
|
65 |
+
"description": "Team lunch",
|
66 |
+
"location": ""
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"message": "Board meeting on Thursday at 9:00 AM in the boardroom.",
|
70 |
+
"datetime": "2024-03-14T09:00:00",
|
71 |
+
"description": "Board meeting",
|
72 |
+
"location": "Boardroom"
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"message": "Flight to New York City on Friday evening.",
|
76 |
+
"datetime": "2024-03-15T18:00:00",
|
77 |
+
"description": "Flight to New York City",
|
78 |
+
"location": ""
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"message": "Coffee with Jane next Tuesday at 11:30 AM.",
|
82 |
+
"datetime": "2024-03-19T11:30:00",
|
83 |
+
"description": "Coffee with Jane",
|
84 |
+
"location": ""
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"message": "Dentist appointment on Wednesday at 2 PM.",
|
88 |
+
"datetime": "2024-03-13T14:00:00",
|
89 |
+
"description": "Dentist appointment",
|
90 |
+
"location": ""
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"message": "Team outing next Friday afternoon.",
|
94 |
+
"datetime": "2024-03-15T12:00:00",
|
95 |
+
"description": "Team outing",
|
96 |
+
"location": ""
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"message": "Book club meeting on Thursday at 7:30 PM.",
|
100 |
+
"datetime": "2024-03-14T19:30:00",
|
101 |
+
"description": "Book club meeting",
|
102 |
+
"location": ""
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"message": "Conference in Chicago next month from April 10th to April 12th.",
|
106 |
+
"datetime": "2024-04-10T00:00:00",
|
107 |
+
"end": "2024-04-12T00:00:00",
|
108 |
+
"description": "Conference in Chicago",
|
109 |
+
"location": ""
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"message": "Parent-teacher meeting on Monday at 4:30 PM.",
|
113 |
+
"datetime": "2024-03-18T16:30:00",
|
114 |
+
"description": "Parent-teacher meeting",
|
115 |
+
"location": ""
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"message": "Dinner with John next Saturday at 6:30 PM at his place.",
|
119 |
+
"datetime": "2024-03-16T18:30:00",
|
120 |
+
"description": "Dinner with John",
|
121 |
+
"location": "John's place"
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"message": "Birthday party for Sarah on Friday night at 8 PM.",
|
125 |
+
"datetime": "2024-03-15T20:00:00",
|
126 |
+
"description": "Birthday party for Sarah",
|
127 |
+
"location": ""
|
128 |
+
},
|
129 |
+
{
|
130 |
+
"message": "Conference call on Thursday at 11:00 AM.",
|
131 |
+
"datetime": "2024-03-14T11:00:00",
|
132 |
+
"description": "Conference call",
|
133 |
+
"location": ""
|
134 |
+
},
|
135 |
+
{
|
136 |
+
"message": "Meeting with HR on Monday morning at 9 AM.",
|
137 |
+
"datetime": "2024-03-18T09:00:00",
|
138 |
+
"description": "Meeting with HR",
|
139 |
+
"location": ""
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"message": "Conference in London next week from April 1st to April 3rd.",
|
143 |
+
"datetime": "2024-04-01T00:00:00",
|
144 |
+
"end": "2024-04-03T00:00:00",
|
145 |
+
"description": "Conference in London",
|
146 |
+
"location": ""
|
147 |
+
},
|
148 |
+
{
|
149 |
+
"message": "Lunch with colleagues on Thursday at 12:30 PM.",
|
150 |
+
"datetime": "2024-03-14T12:30:00",
|
151 |
+
"description": "Lunch with colleagues",
|
152 |
+
"location": ""
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"message": "Board meeting next Tuesday at 10 AM.",
|
156 |
+
"datetime": "2024-03-19T10:00:00",
|
157 |
+
"description": "Board meeting",
|
158 |
+
"location": ""
|
159 |
+
},
|
160 |
+
{
|
161 |
+
"message": "Workshop on Saturday morning at 9:30 AM in the auditorium.",
|
162 |
+
"datetime": "2024-03-16T09:30:00",
|
163 |
+
"description": "Workshop",
|
164 |
+
"location": "Auditorium"
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"message": "Dinner party at Mike's place next Friday at 7:00 PM.",
|
168 |
+
"datetime": "2024-03-15T19:00:00",
|
169 |
+
"description": "Dinner party at Mike's place",
|
170 |
+
"location": "Mike's place"
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"message": "Training session on Monday afternoon at 2 PM.",
|
174 |
+
"datetime": "2024-03-18T14:00:00",
|
175 |
+
"description": "Training session",
|
176 |
+
"location": ""
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"message": "Coffee meeting on Wednesday at 10:30 AM.",
|
180 |
+
"datetime": "2024-03-13T10:30:00",
|
181 |
+
"description": "Coffee meeting",
|
182 |
+
"location": ""
|
183 |
+
},
|
184 |
+
{
|
185 |
+
"message": "Flight to Paris on Sunday morning at 9:00 AM.",
|
186 |
+
"datetime": "2024-03-17T09:00:00",
|
187 |
+
"description": "Flight to Paris",
|
188 |
+
"location": ""
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"message": "Client presentation on Thursday at 2:00 PM in the conference room.",
|
192 |
+
"datetime": "2024-03-14T14:00:00",
|
193 |
+
"description": "Client presentation",
|
194 |
+
"location": "Conference room"
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"message": "Dentist appointment on Tuesday at 11:00 AM.",
|
198 |
+
"datetime": "2024-03-19T11:00:00",
|
199 |
+
"description": "Dentist appointment",
|
200 |
+
"location": ""
|
201 |
+
},
|
202 |
+
{
|
203 |
+
"message": "Team building event next Friday at 1:00 PM.",
|
204 |
+
"datetime": "2024-03-15T13:00:00",
|
205 |
+
"description": "Team building event",
|
206 |
+
"location": ""
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"message": "Business trip to San Francisco from April 5th to April 7th.",
|
210 |
+
"datetime": "2024-04-05T00:00:00",
|
211 |
+
"end": "2024-04-07T00:00:00",
|
212 |
+
"description": "Business trip to San Francisco",
|
213 |
+
"location": ""
|
214 |
+
},
|
215 |
+
{
|
216 |
+
"message": "Meeting with Sarah on Monday at 4:00 PM.",
|
217 |
+
"datetime": "2024-03-18T16:00:00",
|
218 |
+
"description": "Meeting with Sarah",
|
219 |
+
"location": ""
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"message": "Dinner reservation for two on Friday night at 7:30 PM.",
|
223 |
+
"datetime": "2024-03-15T19:30:00",
|
224 |
+
"description": "Dinner reservation for two",
|
225 |
+
"location": ""
|
226 |
+
},
|
227 |
+
{
|
228 |
+
"message": "Video conference call on Tuesday at 3:00 PM.",
|
229 |
+
"datetime": "2024-03-19T15:00:00",
|
230 |
+
"description": "Video conference call",
|
231 |
+
"location": ""
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"message": "Networking event on Wednesday evening at 6:00 PM.",
|
235 |
+
"datetime": "2024-03-13T18:00:00",
|
236 |
+
"description": "Networking event",
|
237 |
+
"location": ""
|
238 |
+
},
|
239 |
+
{
|
240 |
+
"message": "Pick up dry cleaning on Thursday afternoon.",
|
241 |
+
"datetime": "2024-03-14T12:00:00",
|
242 |
+
"description": "Pick up dry cleaning",
|
243 |
+
"location": ""
|
244 |
+
},
|
245 |
+
{
|
246 |
+
"message": "Coffee catch-up with Mark on Tuesday morning at 10 AM.",
|
247 |
+
"datetime": "2024-03-19T10:00:00",
|
248 |
+
"description": "Coffee catch-up with Mark",
|
249 |
+
"location": ""
|
250 |
+
},
|
251 |
+
{
|
252 |
+
"message": "Volunteer work at the shelter on Saturday afternoon.",
|
253 |
+
"datetime": "2024-03-16T12:00:00",
|
254 |
+
"description": "Volunteer work at the shelter",
|
255 |
+
"location": ""
|
256 |
+
},
|
257 |
+
{
|
258 |
+
"message": "Dinner with the Smiths on Sunday evening at 6:30 PM.",
|
259 |
+
"datetime": "2024-03-17T18:30:00",
|
260 |
+
"description": "Dinner with the Smiths",
|
261 |
+
"location": ""
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"message": "Conference call with investors on Monday at 11:00 AM.",
|
265 |
+
"datetime": "2024-03-18T11:00:00",
|
266 |
+
"description": "Conference call with investors",
|
267 |
+
"location": ""
|
268 |
+
},
|
269 |
+
{
|
270 |
+
"message": "Lunch meeting with client on Thursday at 1:00 PM.",
|
271 |
+
"datetime": "2024-03-14T13:00:00",
|
272 |
+
"description": "Lunch meeting with client",
|
273 |
+
"location": ""
|
274 |
+
},
|
275 |
+
{
|
276 |
+
"message": "Conference in Berlin next month from April 8th to April 10th.",
|
277 |
+
"datetime": "2024-04-08T00:00:00",
|
278 |
+
"end": "2024-04-10T00:00:00",
|
279 |
+
"description": "Conference in Berlin",
|
280 |
+
"location": ""
|
281 |
+
},
|
282 |
+
{
|
283 |
+
"message": "Meeting with project team on Monday at 2:00 PM.",
|
284 |
+
"datetime": "2024-03-18T14:00:00",
|
285 |
+
"description": "Meeting with project team",
|
286 |
+
"location": ""
|
287 |
+
},
|
288 |
+
{
|
289 |
+
"message": "Workout session at the gym on Wednesday at 6:00 AM.",
|
290 |
+
"datetime": "2024-03-13T06:00:00",
|
291 |
+
"description": "Workout session at the gym",
|
292 |
+
"location": ""
|
293 |
+
},
|
294 |
+
{
|
295 |
+
"message": "Family dinner on Sunday at 7:00 PM.",
|
296 |
+
"datetime": "2024-03-17T19:00:00",
|
297 |
+
"description": "Family dinner",
|
298 |
+
"location": ""
|
299 |
+
},
|
300 |
+
{
|
301 |
+
"message": "Client meeting on Friday at 2:30 PM in the boardroom.",
|
302 |
+
"datetime": "2024-03-15T14:30:00",
|
303 |
+
"description": "Client meeting",
|
304 |
+
"location": "Boardroom"
|
305 |
+
},
|
306 |
+
{
|
307 |
+
"message": "Doctor's appointment on Monday at 10:00 AM.",
|
308 |
+
"datetime": "2024-03-18T10:00:00",
|
309 |
+
"description": "Doctor's appointment",
|
310 |
+
"location": ""
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"message": "Movie night with friends next Saturday at 8:00 PM.",
|
314 |
+
"datetime": "2024-03-16T20:00:00",
|
315 |
+
"description": "Movie night with friends",
|
316 |
+
"location": ""
|
317 |
+
},
|
318 |
+
{
|
319 |
+
"message": "Conference call with team members on Tuesday at 11:00 AM.",
|
320 |
+
"datetime": "2024-03-19T11:00:00",
|
321 |
+
"description": "Conference call with team members",
|
322 |
+
"location": ""
|
323 |
+
},
|
324 |
+
{
|
325 |
+
"message": "Dinner at the new restaurant on Friday evening at 7:30 PM.",
|
326 |
+
"datetime": "2024-03-15T19:30:00",
|
327 |
+
"description": "Dinner at the new restaurant",
|
328 |
+
"location": ""
|
329 |
+
},
|
330 |
+
{
|
331 |
+
"message": "Meeting with clients on Wednesday at 3:00 PM.",
|
332 |
+
"datetime": "2024-03-13T15:00:00",
|
333 |
+
"description": "Meeting with clients",
|
334 |
+
"location": ""
|
335 |
+
},
|
336 |
+
{
|
337 |
+
"message": "Lunch with colleagues next Thursday at 1:00 PM.",
|
338 |
+
"datetime": "2024-03-14T13:00:00",
|
339 |
+
"description": "Lunch with colleagues",
|
340 |
+
"location": ""
|
341 |
+
},
|
342 |
+
{
|
343 |
+
"message": "Parent-teacher meeting on Monday at 3:00 PM.",
|
344 |
+
"datetime": "2024-03-18T15:00:00",
|
345 |
+
"description": "Parent-teacher meeting",
|
346 |
+
"location": ""
|
347 |
+
},
|
348 |
+
{
|
349 |
+
"message": "Flight to Tokyo next month on April 9th.",
|
350 |
+
"datetime": "2024-04-09T00:00:00",
|
351 |
+
"description": "Flight to Tokyo",
|
352 |
+
"location": ""
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"message": "Meeting with the marketing team on Tuesday at 2:00 PM.",
|
356 |
+
"datetime": "2024-03-19T14:00:00",
|
357 |
+
"description": "Meeting with the marketing team",
|
358 |
+
"location": ""
|
359 |
+
},
|
360 |
+
{
|
361 |
+
"message": "Dinner with friends on Saturday at 7:00 PM.",
|
362 |
+
"datetime": "2024-03-16T19:00:00",
|
363 |
+
"description": "Dinner with friends",
|
364 |
+
"location": ""
|
365 |
+
},
|
366 |
+
{
|
367 |
+
"message": "Team meeting on Monday at 11:00 AM.",
|
368 |
+
"datetime": "2024-03-18T11:00:00",
|
369 |
+
"description": "Team meeting",
|
370 |
+
"location": ""
|
371 |
+
},
|
372 |
+
{
|
373 |
+
"message": "Conference call with the IT department on Thursday at 10:00 AM.",
|
374 |
+
"datetime": "2024-03-14T10:00:00",
|
375 |
+
"description": "Conference call with the IT department",
|
376 |
+
"location": ""
|
377 |
+
},
|
378 |
+
{
|
379 |
+
"message": "Lunch meeting with Jane on Wednesday at 12:00 PM.",
|
380 |
+
"datetime": "2024-03-13T12:00:00",
|
381 |
+
"description": "Lunch meeting with Jane",
|
382 |
+
"location": ""
|
383 |
+
},
|
384 |
+
{
|
385 |
+
"message": "Conference in Paris next month from April 10th to April 12th.",
|
386 |
+
"datetime": "2024-04-10T00:00:00",
|
387 |
+
"end": "2024-04-12T00:00:00",
|
388 |
+
"description": "Conference in Paris",
|
389 |
+
"location": ""
|
390 |
+
},
|
391 |
+
{
|
392 |
+
"message": "Workshop on Friday afternoon at 3:00 PM.",
|
393 |
+
"datetime": "2024-03-15T15:00:00",
|
394 |
+
"description": "Workshop",
|
395 |
+
"location": ""
|
396 |
+
},
|
397 |
+
{
|
398 |
+
"message": "Dinner with family next Sunday at 6:00 PM.",
|
399 |
+
"datetime": "2024-03-17T18:00:00",
|
400 |
+
"description": "Dinner with family",
|
401 |
+
"location": ""
|
402 |
+
},
|
403 |
+
{
|
404 |
+
"message": "Conference call with the sales team on Monday at 2:00 PM.",
|
405 |
+
"datetime": "2024-03-18T14:00:00",
|
406 |
+
"description": "Conference call with the sales team",
|
407 |
+
"location": ""
|
408 |
+
},
|
409 |
+
{
|
410 |
+
"message": "Doctor's appointment on Thursday at 10:30 AM.",
|
411 |
+
"datetime": "2024-03-14T10:30:00",
|
412 |
+
"description": "Doctor's appointment",
|
413 |
+
"location": ""
|
414 |
+
},
|
415 |
+
{
|
416 |
+
"message": "Meeting with the CEO on Tuesday at 9:00 AM.",
|
417 |
+
"datetime": "2024-03-19T09:00:00",
|
418 |
+
"description": "Meeting with the CEO",
|
419 |
+
"location": ""
|
420 |
+
},
|
421 |
+
{
|
422 |
+
"message": "Lunch with friends on Friday at 1:00 PM.",
|
423 |
+
"datetime": "2024-03-15T13:00:00",
|
424 |
+
"description": "Lunch with friends",
|
425 |
+
"location": ""
|
426 |
+
},
|
427 |
+
{
|
428 |
+
"message": "Meeting with the legal team on Monday at 3:30 PM.",
|
429 |
+
"datetime": "2024-03-18T15:30:00",
|
430 |
+
"description": "Meeting with the legal team",
|
431 |
+
"location": ""
|
432 |
+
},
|
433 |
+
{
|
434 |
+
"message": "Conference in Tokyo next month from April 9th to April 11th.",
|
435 |
+
"datetime": "2024-04-09T00:00:00",
|
436 |
+
"end": "2024-04-11T00:00:00",
|
437 |
+
"description": "Conference in Tokyo",
|
438 |
+
"location": ""
|
439 |
+
},
|
440 |
+
{
|
441 |
+
"message": "Team meeting on Thursday at 11:00 AM.",
|
442 |
+
"datetime": "2024-03-14T11:00:00",
|
443 |
+
"description": "Team meeting",
|
444 |
+
"location": ""
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"message": "Dinner with clients on Wednesday at 7:30 PM.",
|
448 |
+
"datetime": "2024-03-13T19:30:00",
|
449 |
+
"description": "Dinner with clients",
|
450 |
+
"location": ""
|
451 |
+
},
|
452 |
+
{
|
453 |
+
"message": "Doctor's appointment on Friday at 9:00 AM.",
|
454 |
+
"datetime": "2024-03-15T09:00:00",
|
455 |
+
"description": "Doctor's appointment",
|
456 |
+
"location": ""
|
457 |
+
},
|
458 |
+
{
|
459 |
+
"message": "Coffee meeting with Sarah on Tuesday at 10:00 AM.",
|
460 |
+
"datetime": "2024-03-19T10:00:00",
|
461 |
+
"description": "Coffee meeting with Sarah",
|
462 |
+
"location": ""
|
463 |
+
},
|
464 |
+
{
|
465 |
+
"message": "Conference call with clients on Monday at 4:00 PM.",
|
466 |
+
"datetime": "2024-03-18T16:00:00",
|
467 |
+
"description": "Conference call with clients",
|
468 |
+
"location": ""
|
469 |
+
}
|
470 |
+
]
|
data.json
ADDED
@@ -0,0 +1,620 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"message": "Let's meet for lunch tomorrow at 12 PM at the Italian restaurant on Main Street.",
|
4 |
+
"labels": {"datetime": "2024-03-12T12:00:00", "description": "Lunch meeting", "location": "Italian restaurant on Main Street"}
|
5 |
+
},
|
6 |
+
{
|
7 |
+
"message": "Reminder: Team meeting on Friday at 10 AM in the conference room.",
|
8 |
+
"labels": {
|
9 |
+
"datetime": "2024-03-15T10:00:00",
|
10 |
+
"description": "Team meeting",
|
11 |
+
"location": "Conference room"
|
12 |
+
}
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"message": "Don't forget the doctor's appointment next Monday at 3:30 PM.",
|
16 |
+
"labels": {
|
17 |
+
"datetime": "2024-03-18T15:30:00",
|
18 |
+
"description": "Doctor's appointment",
|
19 |
+
"location": ""
|
20 |
+
}
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"message": "Dinner with parents this Saturday evening at 7 PM.",
|
24 |
+
"labels": {
|
25 |
+
"datetime": "2024-03-16T19:00:00",
|
26 |
+
"description": "Dinner with parents",
|
27 |
+
"location": ""
|
28 |
+
}
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"message": "Meeting with client next Tuesday at 9:30 AM at their office.",
|
32 |
+
"labels": {
|
33 |
+
"datetime": "2024-03-19T09:30:00",
|
34 |
+
"description": "Meeting with client",
|
35 |
+
"location": "Client's office"
|
36 |
+
}
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"message": "Soccer practice on Wednesday at 4:00 PM at the park.",
|
40 |
+
"labels": {
|
41 |
+
"datetime": "2024-03-13T16:00:00",
|
42 |
+
"description": "Soccer practice",
|
43 |
+
"location": "Park"
|
44 |
+
}
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"message": "Conference call tomorrow at 2:30 PM. Dial-in: 123-456-7890",
|
48 |
+
"labels": {
|
49 |
+
"datetime": "2024-03-12T14:30:00",
|
50 |
+
"description": "Conference call",
|
51 |
+
"location": ""
|
52 |
+
}
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"message": "Pick up groceries on Friday after work.",
|
56 |
+
"labels": {
|
57 |
+
"datetime": "2024-03-15T17:00:00",
|
58 |
+
"description": "Pick up groceries",
|
59 |
+
"location": ""
|
60 |
+
}
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"message": "Movie night with friends on Saturday at 8 PM.",
|
64 |
+
"labels": {
|
65 |
+
"datetime": "2024-03-16T20:00:00",
|
66 |
+
"description": "Movie night with friends",
|
67 |
+
"location": ""
|
68 |
+
}
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"message": "Workout session next Monday morning at the gym.",
|
72 |
+
"labels": {
|
73 |
+
"datetime": "2024-03-18T08:00:00",
|
74 |
+
"description": "Workout session",
|
75 |
+
"location": "Gym"
|
76 |
+
}
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"message": "Team lunch next Wednesday at noon.",
|
80 |
+
"labels": {
|
81 |
+
"datetime": "2024-03-20T12:00:00",
|
82 |
+
"description": "Team lunch",
|
83 |
+
"location": ""
|
84 |
+
}
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"message": "Board meeting on Thursday at 9:00 AM in the boardroom.",
|
88 |
+
"labels": {
|
89 |
+
"datetime": "2024-03-14T09:00:00",
|
90 |
+
"description": "Board meeting",
|
91 |
+
"location": "Boardroom"
|
92 |
+
}
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"message": "Flight to New York City on Friday evening.",
|
96 |
+
"labels": {
|
97 |
+
"datetime": "2024-03-15T18:00:00",
|
98 |
+
"description": "Flight to New York City",
|
99 |
+
"location": ""
|
100 |
+
}
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"message": "Coffee with Jane next Tuesday at 11:30 AM.",
|
104 |
+
"labels": {
|
105 |
+
"datetime": "2024-03-19T11:30:00",
|
106 |
+
"description": "Coffee with Jane",
|
107 |
+
"location": ""
|
108 |
+
}
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"message": "Dentist appointment on Wednesday at 2 PM.",
|
112 |
+
"labels": {
|
113 |
+
"datetime": "2024-03-13T14:00:00",
|
114 |
+
"description": "Dentist appointment",
|
115 |
+
"location": ""
|
116 |
+
}
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"message": "Team outing next Friday afternoon.",
|
120 |
+
"labels": {
|
121 |
+
"datetime": "2024-03-15T12:00:00",
|
122 |
+
"description": "Team outing",
|
123 |
+
"location": ""
|
124 |
+
}
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"message": "Book club meeting on Thursday at 7:30 PM.",
|
128 |
+
"labels": {
|
129 |
+
"datetime": "2024-03-14T19:30:00",
|
130 |
+
"description": "Book club meeting",
|
131 |
+
"location": ""
|
132 |
+
}
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"message": "Conference in Chicago next month from April 10th to April 12th.",
|
136 |
+
"labels": {
|
137 |
+
"datetime": "2024-04-10T00:00:00",
|
138 |
+
"end": "2024-04-12T00:00:00",
|
139 |
+
"description": "Conference in Chicago",
|
140 |
+
"location": ""
|
141 |
+
}
|
142 |
+
},
|
143 |
+
{
|
144 |
+
"message": "Parent-teacher meeting on Monday at 4:30 PM.",
|
145 |
+
"labels": {
|
146 |
+
"datetime": "2024-03-18T16:30:00",
|
147 |
+
"description": "Parent-teacher meeting",
|
148 |
+
"location": ""
|
149 |
+
}
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"message": "Dinner with John next Saturday at 6:30 PM at his place.",
|
153 |
+
"labels": {
|
154 |
+
"datetime": "2024-03-16T18:30:00",
|
155 |
+
"description": "Dinner with John",
|
156 |
+
"location": "John's place"
|
157 |
+
}
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"message": "Birthday party for Sarah on Friday night at 8 PM.",
|
161 |
+
"labels": {
|
162 |
+
"datetime": "2024-03-15T20:00:00",
|
163 |
+
"description": "Birthday party for Sarah",
|
164 |
+
"location": ""
|
165 |
+
}
|
166 |
+
},
|
167 |
+
{
|
168 |
+
"message": "Conference call on Thursday at 11:00 AM.",
|
169 |
+
"labels": {
|
170 |
+
"datetime": "2024-03-14T11:00:00",
|
171 |
+
"description": "Conference call",
|
172 |
+
"location": ""
|
173 |
+
}
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"message": "Meeting with HR on Monday morning at 9 AM.",
|
177 |
+
"labels": {
|
178 |
+
"datetime": "2024-03-18T09:00:00",
|
179 |
+
"description": "Meeting with HR",
|
180 |
+
"location": ""
|
181 |
+
}
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"message": "Conference in London next week from April 1st to April 3rd.",
|
185 |
+
"labels": {
|
186 |
+
"datetime": "2024-04-01T00:00:00",
|
187 |
+
"end": "2024-04-03T00:00:00",
|
188 |
+
"description": "Conference in London",
|
189 |
+
"location": ""
|
190 |
+
}
|
191 |
+
},
|
192 |
+
{
|
193 |
+
"message": "Lunch with colleagues on Thursday at 12:30 PM.",
|
194 |
+
"labels": {
|
195 |
+
"datetime": "2024-03-14T12:30:00",
|
196 |
+
"description": "Lunch with colleagues",
|
197 |
+
"location": ""
|
198 |
+
}
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"message": "Board meeting next Tuesday at 10 AM.",
|
202 |
+
"labels": {
|
203 |
+
"datetime": "2024-03-19T10:00:00",
|
204 |
+
"description": "Board meeting",
|
205 |
+
"location": ""
|
206 |
+
}
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"message": "Workshop on Saturday morning at 9:30 AM in the auditorium.",
|
210 |
+
"labels": {
|
211 |
+
"datetime": "2024-03-16T09:30:00",
|
212 |
+
"description": "Workshop",
|
213 |
+
"location": "Auditorium"
|
214 |
+
}
|
215 |
+
},
|
216 |
+
{
|
217 |
+
"message": "Dinner party at Mike's place next Friday at 7:00 PM.",
|
218 |
+
"labels": {
|
219 |
+
"datetime": "2024-03-15T19:00:00",
|
220 |
+
"description": "Dinner party at Mike's place",
|
221 |
+
"location": "Mike's place"
|
222 |
+
}
|
223 |
+
},
|
224 |
+
{
|
225 |
+
"message": "Training session on Monday afternoon at 2 PM.",
|
226 |
+
"labels": {
|
227 |
+
"datetime": "2024-03-18T14:00:00",
|
228 |
+
"description": "Training session",
|
229 |
+
"location": ""
|
230 |
+
}
|
231 |
+
},
|
232 |
+
{
|
233 |
+
"message": "Coffee meeting on Wednesday at 10:30 AM.",
|
234 |
+
"labels": {
|
235 |
+
"datetime": "2024-03-13T10:30:00",
|
236 |
+
"description": "Coffee meeting",
|
237 |
+
"location": ""
|
238 |
+
}
|
239 |
+
},
|
240 |
+
{
|
241 |
+
"message": "Flight to Paris on Sunday morning at 9:00 AM.",
|
242 |
+
"labels": {
|
243 |
+
"datetime": "2024-03-17T09:00:00",
|
244 |
+
"description": "Flight to Paris",
|
245 |
+
"location": ""
|
246 |
+
}
|
247 |
+
},
|
248 |
+
{
|
249 |
+
"message": "Client presentation on Thursday at 2:00 PM in the conference room.",
|
250 |
+
"labels": {
|
251 |
+
"datetime": "2024-03-14T14:00:00",
|
252 |
+
"description": "Client presentation",
|
253 |
+
"location": "Conference room"
|
254 |
+
}
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"message": "Dentist appointment on Tuesday at 11:00 AM.",
|
258 |
+
"labels": {
|
259 |
+
"datetime": "2024-03-19T11:00:00",
|
260 |
+
"description": "Dentist appointment",
|
261 |
+
"location": ""
|
262 |
+
}
|
263 |
+
},
|
264 |
+
{
|
265 |
+
"message": "Team building event next Friday at 1:00 PM.",
|
266 |
+
"labels": {
|
267 |
+
"datetime": "2024-03-15T13:00:00",
|
268 |
+
"description": "Team building event",
|
269 |
+
"location": ""
|
270 |
+
}
|
271 |
+
},
|
272 |
+
{
|
273 |
+
"message": "Business trip to San Francisco from April 5th to April 7th.",
|
274 |
+
"labels": {
|
275 |
+
"datetime": "2024-04-05T00:00:00",
|
276 |
+
"end": "2024-04-07T00:00:00",
|
277 |
+
"description": "Business trip to San Francisco",
|
278 |
+
"location": ""
|
279 |
+
}
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"message": "Meeting with Sarah on Monday at 4:00 PM.",
|
283 |
+
"labels": {
|
284 |
+
"datetime": "2024-03-18T16:00:00",
|
285 |
+
"description": "Meeting with Sarah",
|
286 |
+
"location": ""
|
287 |
+
}
|
288 |
+
},
|
289 |
+
{
|
290 |
+
"message": "Dinner reservation for two on Friday night at 7:30 PM.",
|
291 |
+
"labels": {
|
292 |
+
"datetime": "2024-03-15T19:30:00",
|
293 |
+
"description": "Dinner reservation for two",
|
294 |
+
"location": ""
|
295 |
+
}
|
296 |
+
},
|
297 |
+
{
|
298 |
+
"message": "Video conference call on Tuesday at 3:00 PM.",
|
299 |
+
"labels": {
|
300 |
+
"datetime": "2024-03-19T15:00:00",
|
301 |
+
"description": "Video conference call",
|
302 |
+
"location": ""
|
303 |
+
}
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"message": "Networking event on Wednesday evening at 6:00 PM.",
|
307 |
+
"labels": {
|
308 |
+
"datetime": "2024-03-13T18:00:00",
|
309 |
+
"description": "Networking event",
|
310 |
+
"location": ""
|
311 |
+
}
|
312 |
+
},
|
313 |
+
{
|
314 |
+
"message": "Pick up dry cleaning on Thursday afternoon.",
|
315 |
+
"labels": {
|
316 |
+
"datetime": "2024-03-14T12:00:00",
|
317 |
+
"description": "Pick up dry cleaning",
|
318 |
+
"location": ""
|
319 |
+
}
|
320 |
+
},
|
321 |
+
{
|
322 |
+
"message": "Coffee catch-up with Mark on Tuesday morning at 10 AM.",
|
323 |
+
"labels": {
|
324 |
+
"datetime": "2024-03-19T10:00:00",
|
325 |
+
"description": "Coffee catch-up with Mark",
|
326 |
+
"location": ""
|
327 |
+
}
|
328 |
+
},
|
329 |
+
{
|
330 |
+
"message": "Volunteer work at the shelter on Saturday afternoon.",
|
331 |
+
"labels": {
|
332 |
+
"datetime": "2024-03-16T12:00:00",
|
333 |
+
"description": "Volunteer work at the shelter",
|
334 |
+
"location": ""
|
335 |
+
}
|
336 |
+
},
|
337 |
+
{
|
338 |
+
"message": "Dinner with the Smiths on Sunday evening at 6:30 PM.",
|
339 |
+
"labels": {
|
340 |
+
"datetime": "2024-03-17T18:30:00",
|
341 |
+
"description": "Dinner with the Smiths",
|
342 |
+
"location": ""
|
343 |
+
}
|
344 |
+
},
|
345 |
+
{
|
346 |
+
"message": "Conference call with investors on Monday at 11:00 AM.",
|
347 |
+
"labels": {
|
348 |
+
"datetime": "2024-03-18T11:00:00",
|
349 |
+
"description": "Conference call with investors",
|
350 |
+
"location": ""
|
351 |
+
}
|
352 |
+
},
|
353 |
+
{
|
354 |
+
"message": "Lunch meeting with client on Thursday at 1:00 PM.",
|
355 |
+
"labels": {
|
356 |
+
"datetime": "2024-03-14T13:00:00",
|
357 |
+
"description": "Lunch meeting with client",
|
358 |
+
"location": ""
|
359 |
+
}
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"message": "Conference in Berlin next month from April 8th to April 10th.",
|
363 |
+
"labels": {
|
364 |
+
"datetime": "2024-04-08T00:00:00",
|
365 |
+
"end": "2024-04-10T00:00:00",
|
366 |
+
"description": "Conference in Berlin",
|
367 |
+
"location": ""
|
368 |
+
}
|
369 |
+
},
|
370 |
+
{
|
371 |
+
"message": "Meeting with project team on Monday at 2:00 PM.",
|
372 |
+
"labels": {
|
373 |
+
"datetime": "2024-03-18T14:00:00",
|
374 |
+
"description": "Meeting with project team",
|
375 |
+
"location": ""
|
376 |
+
}
|
377 |
+
},
|
378 |
+
{
|
379 |
+
"message": "Workout session at the gym on Wednesday at 6:00 AM.",
|
380 |
+
"labels": {
|
381 |
+
"datetime": "2024-03-13T06:00:00",
|
382 |
+
"description": "Workout session at the gym",
|
383 |
+
"location": ""
|
384 |
+
}
|
385 |
+
},
|
386 |
+
{
|
387 |
+
"message": "Family dinner on Sunday at 7:00 PM.",
|
388 |
+
"labels": {
|
389 |
+
"datetime": "2024-03-17T19:00:00",
|
390 |
+
"description": "Family dinner",
|
391 |
+
"location": ""
|
392 |
+
}
|
393 |
+
},
|
394 |
+
{
|
395 |
+
"message": "Client meeting on Friday at 2:30 PM in the boardroom.",
|
396 |
+
"labels": {
|
397 |
+
"datetime": "2024-03-15T14:30:00",
|
398 |
+
"description": "Client meeting",
|
399 |
+
"location": "Boardroom"
|
400 |
+
}
|
401 |
+
},
|
402 |
+
{
|
403 |
+
"message": "Doctor's appointment on Monday at 10:00 AM.",
|
404 |
+
"labels": {
|
405 |
+
"datetime": "2024-03-18T10:00:00",
|
406 |
+
"description": "Doctor's appointment",
|
407 |
+
"location": ""
|
408 |
+
}
|
409 |
+
},
|
410 |
+
{
|
411 |
+
"message": "Movie night with friends next Saturday at 8:00 PM.",
|
412 |
+
"labels": {
|
413 |
+
"datetime": "2024-03-16T20:00:00",
|
414 |
+
"description": "Movie night with friends",
|
415 |
+
"location": ""
|
416 |
+
}
|
417 |
+
},
|
418 |
+
{
|
419 |
+
"message": "Conference call with team members on Tuesday at 11:00 AM.",
|
420 |
+
"labels": {
|
421 |
+
"datetime": "2024-03-19T11:00:00",
|
422 |
+
"description": "Conference call with team members",
|
423 |
+
"location": ""
|
424 |
+
}
|
425 |
+
},
|
426 |
+
{
|
427 |
+
"message": "Dinner at the new restaurant on Friday evening at 7:30 PM.",
|
428 |
+
"labels": {
|
429 |
+
"datetime": "2024-03-15T19:30:00",
|
430 |
+
"description": "Dinner at the new restaurant",
|
431 |
+
"location": ""
|
432 |
+
}
|
433 |
+
},
|
434 |
+
{
|
435 |
+
"message": "Meeting with clients on Wednesday at 3:00 PM.",
|
436 |
+
"labels": {
|
437 |
+
"datetime": "2024-03-13T15:00:00",
|
438 |
+
"description": "Meeting with clients",
|
439 |
+
"location": ""
|
440 |
+
}
|
441 |
+
},
|
442 |
+
{
|
443 |
+
"message": "Lunch with colleagues next Thursday at 1:00 PM.",
|
444 |
+
"labels": {
|
445 |
+
"datetime": "2024-03-14T13:00:00",
|
446 |
+
"description": "Lunch with colleagues",
|
447 |
+
"location": ""
|
448 |
+
}
|
449 |
+
},
|
450 |
+
{
|
451 |
+
"message": "Parent-teacher meeting on Monday at 3:00 PM.",
|
452 |
+
"labels": {
|
453 |
+
"datetime": "2024-03-18T15:00:00",
|
454 |
+
"description": "Parent-teacher meeting",
|
455 |
+
"location": ""
|
456 |
+
}
|
457 |
+
},
|
458 |
+
{
|
459 |
+
"message": "Flight to Tokyo next month on April 9th.",
|
460 |
+
"labels": {
|
461 |
+
"datetime": "2024-04-09T00:00:00",
|
462 |
+
"description": "Flight to Tokyo",
|
463 |
+
"location": ""
|
464 |
+
}
|
465 |
+
},
|
466 |
+
{
|
467 |
+
"message": "Meeting with the marketing team on Tuesday at 2:00 PM.",
|
468 |
+
"labels": {
|
469 |
+
"datetime": "2024-03-19T14:00:00",
|
470 |
+
"description": "Meeting with the marketing team",
|
471 |
+
"location": ""
|
472 |
+
}
|
473 |
+
},
|
474 |
+
{
|
475 |
+
"message": "Dinner with friends on Saturday at 7:00 PM.",
|
476 |
+
"labels": {
|
477 |
+
"datetime": "2024-03-16T19:00:00",
|
478 |
+
"description": "Dinner with friends",
|
479 |
+
"location": ""
|
480 |
+
}
|
481 |
+
},
|
482 |
+
{
|
483 |
+
"message": "Team meeting on Monday at 11:00 AM.",
|
484 |
+
"labels": {
|
485 |
+
"datetime": "2024-03-18T11:00:00",
|
486 |
+
"description": "Team meeting",
|
487 |
+
"location": ""
|
488 |
+
}
|
489 |
+
},
|
490 |
+
{
|
491 |
+
"message": "Conference call with the IT department on Thursday at 10:00 AM.",
|
492 |
+
"labels": {
|
493 |
+
"datetime": "2024-03-14T10:00:00",
|
494 |
+
"description": "Conference call with the IT department",
|
495 |
+
"location": ""
|
496 |
+
}
|
497 |
+
},
|
498 |
+
{
|
499 |
+
"message": "Lunch meeting with Jane on Wednesday at 12:00 PM.",
|
500 |
+
"labels": {
|
501 |
+
"datetime": "2024-03-13T12:00:00",
|
502 |
+
"description": "Lunch meeting with Jane",
|
503 |
+
"location": ""
|
504 |
+
}
|
505 |
+
},
|
506 |
+
{
|
507 |
+
"message": "Conference in Paris next month from April 10th to April 12th.",
|
508 |
+
"labels": {
|
509 |
+
"datetime": "2024-04-10T00:00:00",
|
510 |
+
"end": "2024-04-12T00:00:00",
|
511 |
+
"description": "Conference in Paris",
|
512 |
+
"location": ""
|
513 |
+
}
|
514 |
+
},
|
515 |
+
{
|
516 |
+
"message": "Workshop on Friday afternoon at 3:00 PM.",
|
517 |
+
"labels": {
|
518 |
+
"datetime": "2024-03-15T15:00:00",
|
519 |
+
"description": "Workshop",
|
520 |
+
"location": ""
|
521 |
+
}
|
522 |
+
},
|
523 |
+
{
|
524 |
+
"message": "Dinner with family next Sunday at 6:00 PM.",
|
525 |
+
"labels": {
|
526 |
+
"datetime": "2024-03-17T18:00:00",
|
527 |
+
"description": "Dinner with family",
|
528 |
+
"location": ""
|
529 |
+
}
|
530 |
+
},
|
531 |
+
{
|
532 |
+
"message": "Conference call with the sales team on Monday at 2:00 PM.",
|
533 |
+
"labels": {
|
534 |
+
"datetime": "2024-03-18T14:00:00",
|
535 |
+
"description": "Conference call with the sales team",
|
536 |
+
"location": ""
|
537 |
+
}
|
538 |
+
},
|
539 |
+
{
|
540 |
+
"message": "Doctor's appointment on Thursday at 10:30 AM.",
|
541 |
+
"labels": {
|
542 |
+
"datetime": "2024-03-14T10:30:00",
|
543 |
+
"description": "Doctor's appointment",
|
544 |
+
"location": ""
|
545 |
+
}
|
546 |
+
},
|
547 |
+
{
|
548 |
+
"message": "Meeting with the CEO on Tuesday at 9:00 AM.",
|
549 |
+
"labels": {
|
550 |
+
"datetime": "2024-03-19T09:00:00",
|
551 |
+
"description": "Meeting with the CEO",
|
552 |
+
"location": ""
|
553 |
+
}
|
554 |
+
},
|
555 |
+
{
|
556 |
+
"message": "Lunch with friends on Friday at 1:00 PM.",
|
557 |
+
"labels": {
|
558 |
+
"datetime": "2024-03-15T13:00:00",
|
559 |
+
"description": "Lunch with friends",
|
560 |
+
"location": ""
|
561 |
+
}
|
562 |
+
},
|
563 |
+
{
|
564 |
+
"message": "Meeting with the legal team on Monday at 3:30 PM.",
|
565 |
+
"labels": {
|
566 |
+
"datetime": "2024-03-18T15:30:00",
|
567 |
+
"description": "Meeting with the legal team",
|
568 |
+
"location": ""
|
569 |
+
}
|
570 |
+
},
|
571 |
+
{
|
572 |
+
"message": "Conference in Tokyo next month from April 9th to April 11th.",
|
573 |
+
"labels": {
|
574 |
+
"datetime": "2024-04-09T00:00:00",
|
575 |
+
"end": "2024-04-11T00:00:00",
|
576 |
+
"description": "Conference in Tokyo",
|
577 |
+
"location": ""
|
578 |
+
}
|
579 |
+
},
|
580 |
+
{
|
581 |
+
"message": "Team meeting on Thursday at 11:00 AM.",
|
582 |
+
"labels": {
|
583 |
+
"datetime": "2024-03-14T11:00:00",
|
584 |
+
"description": "Team meeting",
|
585 |
+
"location": ""
|
586 |
+
}
|
587 |
+
},
|
588 |
+
{
|
589 |
+
"message": "Dinner with clients on Wednesday at 7:30 PM.",
|
590 |
+
"labels": {
|
591 |
+
"datetime": "2024-03-13T19:30:00",
|
592 |
+
"description": "Dinner with clients",
|
593 |
+
"location": ""
|
594 |
+
}
|
595 |
+
},
|
596 |
+
{
|
597 |
+
"message": "Doctor's appointment on Friday at 9:00 AM.",
|
598 |
+
"labels": {
|
599 |
+
"datetime": "2024-03-15T09:00:00",
|
600 |
+
"description": "Doctor's appointment",
|
601 |
+
"location": ""
|
602 |
+
}
|
603 |
+
},
|
604 |
+
{
|
605 |
+
"message": "Coffee meeting with Sarah on Tuesday at 10:00 AM.",
|
606 |
+
"labels": {
|
607 |
+
"datetime": "2024-03-19T10:00:00",
|
608 |
+
"description": "Coffee meeting with Sarah",
|
609 |
+
"location": ""
|
610 |
+
}
|
611 |
+
},
|
612 |
+
{
|
613 |
+
"message": "Conference call with clients on Monday at 4:00 PM.",
|
614 |
+
"labels": {
|
615 |
+
"datetime": "2024-03-18T16:00:00",
|
616 |
+
"description": "Conference call with clients",
|
617 |
+
"location": ""
|
618 |
+
}
|
619 |
+
}
|
620 |
+
]
|
data2.json
ADDED
@@ -0,0 +1,310 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"message": "Let's meet for lunch tomorrow at 12 PM at the Italian restaurant on Main Street.",
|
4 |
+
"details": "datetime: 2024-03-12T12:00:00, end: , description: Lunch meeting, location: Italian restaurant on Main Street"
|
5 |
+
},
|
6 |
+
{
|
7 |
+
"message": "Reminder: Team meeting on Friday at 10 AM in the conference room.",
|
8 |
+
"details": "datetime: 2024-03-15T10:00:00, end: , description: Team meeting, location: Conference room"
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"message": "Don't forget the doctor's appointment next Monday at 3:30 PM.",
|
12 |
+
"details": "datetime: 2024-03-18T15:30:00, end: , description: Doctor's appointment, location: "
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"message": "Dinner with parents this Saturday evening at 7 PM.",
|
16 |
+
"details": "datetime: 2024-03-16T19:00:00, end: , description: Dinner with parents, location: "
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"message": "Meeting with client next Tuesday at 9:30 AM at their office.",
|
20 |
+
"details": "datetime: 2024-03-19T09:30:00, end: , description: Meeting with client, location: Client's office"
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"message": "Soccer practice on Wednesday at 4:00 PM at the park.",
|
24 |
+
"details": "datetime: 2024-03-13T16:00:00, end: , description: Soccer practice, location: Park"
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"message": "Conference call tomorrow at 2:30 PM. Dial-in: 123-456-7890",
|
28 |
+
"details": "datetime: 2024-03-12T14:30:00, end: , description: Conference call, location: "
|
29 |
+
},
|
30 |
+
{
|
31 |
+
"message": "Pick up groceries on Friday after work.",
|
32 |
+
"details": "datetime: 2024-03-15T17:00:00, end: , description: Pick up groceries, location: "
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"message": "Movie night with friends on Saturday at 8 PM.",
|
36 |
+
"details": "datetime: 2024-03-16T20:00:00, end: , description: Movie night with friends, location: "
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"message": "Workout session next Monday morning at the gym.",
|
40 |
+
"details": "datetime: 2024-03-18T08:00:00, end: , description: Workout session, location: Gym"
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"message": "Team lunch next Wednesday at noon.",
|
44 |
+
"details": "datetime: 2024-03-20T12:00:00, end: , description: Team lunch, location: "
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"message": "Board meeting on Thursday at 9:00 AM in the boardroom.",
|
48 |
+
"details": "datetime: 2024-03-14T09:00:00, end: , description: Board meeting, location: Boardroom"
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"message": "Flight to New York City on Friday evening.",
|
52 |
+
"details": "datetime: 2024-03-15T18:00:00, end: , description: Flight to New York City, location: "
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"message": "Coffee with Jane next Tuesday at 11:30 AM.",
|
56 |
+
"details": "datetime: 2024-03-19T11:30:00, end: , description: Coffee with Jane, location: "
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"message": "Dentist appointment on Wednesday at 2 PM.",
|
60 |
+
"details": "datetime: 2024-03-13T14:00:00, end: , description: Dentist appointment, location: "
|
61 |
+
},
|
62 |
+
{
|
63 |
+
"message": "Team outing next Friday afternoon.",
|
64 |
+
"details": "datetime: 2024-03-15T12:00:00, end: , description: Team outing, location: "
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"message": "Book club meeting on Thursday at 7:30 PM.",
|
68 |
+
"details": "datetime: 2024-03-14T19:30:00, end: , description: Book club meeting, location: "
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"message": "Conference in Chicago next month from April 10th to April 12th.",
|
72 |
+
"details": "datetime: 2024-04-10T00:00:00, end: 2024-04-12T00:00:00, description: Conference in Chicago, location: "
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"message": "Parent-teacher meeting on Monday at 4:30 PM.",
|
76 |
+
"details": "datetime: 2024-03-18T16:30:00, end: , description: Parent-teacher meeting, location: "
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"message": "Dinner with John next Saturday at 6:30 PM at his place.",
|
80 |
+
"details": "datetime: 2024-03-16T18:30:00, end: , description: Dinner with John, location: John's place"
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"message": "Birthday party for Sarah on Friday night at 8 PM.",
|
84 |
+
"details": "datetime: 2024-03-15T20:00:00, end: , description: Birthday party for Sarah, location: "
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"message": "Conference call on Thursday at 11:00 AM.",
|
88 |
+
"details": "datetime: 2024-03-14T11:00:00, end: , description: Conference call, location: "
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"message": "Meeting with HR on Monday morning at 9 AM.",
|
92 |
+
"details": "datetime: 2024-03-18T09:00:00, end: , description: Meeting with HR, location: "
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"message": "Conference in London next week from April 1st to April 3rd.",
|
96 |
+
"details": "datetime: 2024-04-01T00:00:00, end: 2024-04-03T00:00:00, description: Conference in London, location: "
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"message": "Lunch with colleagues on Thursday at 12:30 PM.",
|
100 |
+
"details": "datetime: 2024-03-14T12:30:00, end: , description: Lunch with colleagues, location: "
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"message": "Board meeting next Tuesday at 10 AM.",
|
104 |
+
"details": "datetime: 2024-03-19T10:00:00, end: , description: Board meeting, location: "
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"message": "Workshop on Saturday morning at 9:30 AM in the auditorium.",
|
108 |
+
"details": "datetime: 2024-03-16T09:30:00, end: , description: Workshop, location: Auditorium"
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"message": "Dinner party at Mike's place next Friday at 7:00 PM.",
|
112 |
+
"details": "datetime: 2024-03-15T19:00:00, end: , description: Dinner party at Mike's place, location: Mike's place"
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"message": "Training session on Monday afternoon at 2 PM.",
|
116 |
+
"details": "datetime: 2024-03-18T14:00:00, end: , description: Training session, location: "
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"message": "Coffee meeting on Wednesday at 10:30 AM.",
|
120 |
+
"details": "datetime: 2024-03-13T10:30:00, end: , description: Coffee meeting, location: "
|
121 |
+
},
|
122 |
+
{
|
123 |
+
"message": "Flight to Paris on Sunday morning at 9:00 AM.",
|
124 |
+
"details": "datetime: 2024-03-17T09:00:00, end: , description: Flight to Paris, location: "
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"message": "Client presentation on Thursday at 2:00 PM in the conference room.",
|
128 |
+
"details": "datetime: 2024-03-14T14:00:00, end: , description: Client presentation, location: Conference room"
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"message": "Dentist appointment on Tuesday at 11:00 AM.",
|
132 |
+
"details": "datetime: 2024-03-19T11:00:00, end: , description: Dentist appointment, location: "
|
133 |
+
},
|
134 |
+
{
|
135 |
+
"message": "Team building event next Friday at 1:00 PM.",
|
136 |
+
"details": "datetime: 2024-03-15T13:00:00, end: , description: Team building event, location: "
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"message": "Business trip to San Francisco from April 5th to April 7th.",
|
140 |
+
"details": "datetime: 2024-04-05T00:00:00, end: 2024-04-07T00:00:00, description: Business trip to San Francisco, location: "
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"message": "Meeting with Sarah on Monday at 4:00 PM.",
|
144 |
+
"details": "datetime: 2024-03-18T16:00:00, end: , description: Meeting with Sarah, location: "
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"message": "Dinner reservation for two on Friday night at 7:30 PM.",
|
148 |
+
"details": "datetime: 2024-03-15T19:30:00, end: , description: Dinner reservation for two, location: "
|
149 |
+
},
|
150 |
+
{
|
151 |
+
"message": "Video conference call on Tuesday at 3:00 PM.",
|
152 |
+
"details": "datetime: 2024-03-19T15:00:00, end: , description: Video conference call, location: "
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"message": "Networking event on Wednesday evening at 6:00 PM.",
|
156 |
+
"details": "datetime: 2024-03-13T18:00:00, end: , description: Networking event, location: "
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"message": "Pick up dry cleaning on Thursday afternoon.",
|
160 |
+
"details": "datetime: 2024-03-14T12:00:00, end: , description: Pick up dry cleaning, location: "
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"message": "Coffee catch-up with Mark on Tuesday morning at 10 AM.",
|
164 |
+
"details": "datetime: 2024-03-19T10:00:00, end: , description: Coffee catch-up with Mark, location: "
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"message": "Volunteer work at the shelter on Saturday afternoon.",
|
168 |
+
"details": "datetime: 2024-03-16T12:00:00, end: , description: Volunteer work at the shelter, location: "
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"message": "Dinner with the Smiths on Sunday evening at 6:30 PM.",
|
172 |
+
"details": "datetime: 2024-03-17T18:30:00, end: , description: Dinner with the Smiths, location: "
|
173 |
+
},
|
174 |
+
{
|
175 |
+
"message": "Conference call with investors on Monday at 11:00 AM.",
|
176 |
+
"details": "datetime: 2024-03-18T11:00:00, end: , description: Conference call with investors, location: "
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"message": "Lunch meeting with client on Thursday at 1:00 PM.",
|
180 |
+
"details": "datetime: 2024-03-14T13:00:00, end: , description: Lunch meeting with client, location: "
|
181 |
+
},
|
182 |
+
{
|
183 |
+
"message": "Conference in Berlin next month from April 8th to April 10th.",
|
184 |
+
"details": "datetime: 2024-04-08T00:00:00, end: 2024-04-10T00:00:00, description: Conference in Berlin, location: "
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"message": "Meeting with project team on Monday at 2:00 PM.",
|
188 |
+
"details": "datetime: 2024-03-18T14:00:00, end: , description: Meeting with project team, location: "
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"message": "Workout session at the gym on Wednesday at 6:00 AM.",
|
192 |
+
"details": "datetime: 2024-03-13T06:00:00, end: , description: Workout session at the gym, location: "
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"message": "Family dinner on Sunday at 7:00 PM.",
|
196 |
+
"details": "datetime: 2024-03-17T19:00:00, end: , description: Family dinner, location: "
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"message": "Client meeting on Friday at 2:30 PM in the boardroom.",
|
200 |
+
"details": "datetime: 2024-03-15T14:30:00, end: , description: Client meeting, location: Boardroom"
|
201 |
+
},
|
202 |
+
{
|
203 |
+
"message": "Doctor's appointment on Monday at 10:00 AM.",
|
204 |
+
"details": "datetime: 2024-03-18T10:00:00, end: , description: Doctor's appointment, location: "
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"message": "Movie night with friends next Saturday at 8:00 PM.",
|
208 |
+
"details": "datetime: 2024-03-16T20:00:00, end: , description: Movie night with friends, location: "
|
209 |
+
},
|
210 |
+
{
|
211 |
+
"message": "Conference call with team members on Tuesday at 11:00 AM.",
|
212 |
+
"details": "datetime: 2024-03-19T11:00:00, end: , description: Conference call with team members, location: "
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"message": "Dinner at the new restaurant on Friday evening at 7:30 PM.",
|
216 |
+
"details": "datetime: 2024-03-15T19:30:00, end: , description: Dinner at the new restaurant, location: "
|
217 |
+
},
|
218 |
+
{
|
219 |
+
"message": "Meeting with clients on Wednesday at 3:00 PM.",
|
220 |
+
"details": "datetime: 2024-03-13T15:00:00, end: , description: Meeting with clients, location: "
|
221 |
+
},
|
222 |
+
{
|
223 |
+
"message": "Lunch with colleagues next Thursday at 1:00 PM.",
|
224 |
+
"details": "datetime: 2024-03-14T13:00:00, end: , description: Lunch with colleagues, location: "
|
225 |
+
},
|
226 |
+
{
|
227 |
+
"message": "Parent-teacher meeting on Monday at 3:00 PM.",
|
228 |
+
"details": "datetime: 2024-03-18T15:00:00, end: , description: Parent-teacher meeting, location: "
|
229 |
+
},
|
230 |
+
{
|
231 |
+
"message": "Flight to Tokyo next month on April 9th.",
|
232 |
+
"details": "datetime: 2024-04-09T00:00:00, end: , description: Flight to Tokyo, location: "
|
233 |
+
},
|
234 |
+
{
|
235 |
+
"message": "Meeting with the marketing team on Tuesday at 2:00 PM.",
|
236 |
+
"details": "datetime: 2024-03-19T14:00:00, end: , description: Meeting with the marketing team, location: "
|
237 |
+
},
|
238 |
+
{
|
239 |
+
"message": "Dinner with friends on Saturday at 7:00 PM.",
|
240 |
+
"details": "datetime: 2024-03-16T19:00:00, end: , description: Dinner with friends, location: "
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"message": "Team meeting on Monday at 11:00 AM.",
|
244 |
+
"details": "datetime: 2024-03-18T11:00:00, end: , description: Team meeting, location: "
|
245 |
+
},
|
246 |
+
{
|
247 |
+
"message": "Conference call with the IT department on Thursday at 10:00 AM.",
|
248 |
+
"details": "datetime: 2024-03-14T10:00:00, end: , description: Conference call with the IT department, location: "
|
249 |
+
},
|
250 |
+
{
|
251 |
+
"message": "Lunch meeting with Jane on Wednesday at 12:00 PM.",
|
252 |
+
"details": "datetime: 2024-03-13T12:00:00, end: , description: Lunch meeting with Jane, location: "
|
253 |
+
},
|
254 |
+
{
|
255 |
+
"message": "Conference in Paris next month from April 10th to April 12th.",
|
256 |
+
"details": "datetime: 2024-04-10T00:00:00, end: 2024-04-12T00:00:00, description: Conference in Paris, location: "
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"message": "Workshop on Friday afternoon at 3:00 PM.",
|
260 |
+
"details": "datetime: 2024-03-15T15:00:00, end: , description: Workshop, location: "
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"message": "Dinner with family next Sunday at 6:00 PM.",
|
264 |
+
"details": "datetime: 2024-03-17T18:00:00, end: , description: Dinner with family, location: "
|
265 |
+
},
|
266 |
+
{
|
267 |
+
"message": "Conference call with the sales team on Monday at 2:00 PM.",
|
268 |
+
"details": "datetime: 2024-03-18T14:00:00, end: , description: Conference call with the sales team, location: "
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"message": "Doctor's appointment on Thursday at 10:30 AM.",
|
272 |
+
"details": "datetime: 2024-03-14T10:30:00, end: , description: Doctor's appointment, location: "
|
273 |
+
},
|
274 |
+
{
|
275 |
+
"message": "Meeting with the CEO on Tuesday at 9:00 AM.",
|
276 |
+
"details": "datetime: 2024-03-19T09:00:00, end: , description: Meeting with the CEO, location: "
|
277 |
+
},
|
278 |
+
{
|
279 |
+
"message": "Lunch with friends on Friday at 1:00 PM.",
|
280 |
+
"details": "datetime: 2024-03-15T13:00:00, end: , description: Lunch with friends, location: "
|
281 |
+
},
|
282 |
+
{
|
283 |
+
"message": "Meeting with the legal team on Monday at 3:30 PM.",
|
284 |
+
"details": "datetime: 2024-03-18T15:30:00, end: , description: Meeting with the legal team, location: "
|
285 |
+
},
|
286 |
+
{
|
287 |
+
"message": "Conference in Tokyo next month from April 9th to April 11th.",
|
288 |
+
"details": "datetime: 2024-04-09T00:00:00, end: 2024-04-11T00:00:00, description: Conference in Tokyo, location: "
|
289 |
+
},
|
290 |
+
{
|
291 |
+
"message": "Team meeting on Thursday at 11:00 AM.",
|
292 |
+
"details": "datetime: 2024-03-14T11:00:00, end: , description: Team meeting, location: "
|
293 |
+
},
|
294 |
+
{
|
295 |
+
"message": "Dinner with clients on Wednesday at 7:30 PM.",
|
296 |
+
"details": "datetime: 2024-03-13T19:30:00, end: , description: Dinner with clients, location: "
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"message": "Doctor's appointment on Friday at 9:00 AM.",
|
300 |
+
"details": "datetime: 2024-03-15T09:00:00, end: , description: Doctor's appointment, location: "
|
301 |
+
},
|
302 |
+
{
|
303 |
+
"message": "Coffee meeting with Sarah on Tuesday at 10:00 AM.",
|
304 |
+
"details": "datetime: 2024-03-19T10:00:00, end: , description: Coffee meeting with Sarah, location: "
|
305 |
+
},
|
306 |
+
{
|
307 |
+
"message": "Conference call with clients on Monday at 4:00 PM.",
|
308 |
+
"details": "datetime: 2024-03-18T16:00:00, end: , description: Conference call with clients, location: "
|
309 |
+
}
|
310 |
+
]
|
data3.jsonl
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Let's meet for lunch tomorrow at 12 PM at the Italian restaurant on Main Street.\nThe Details are as follows: {'datetime': '2024-03-12T12:00:00', 'description': 'Lunch meeting', 'location': 'Italian restaurant on Main Street'}\n",
|
3 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Reminder: Team meeting on Friday at 10 AM in the conference room.\nThe Details are as follows: {'datetime': '2024-03-15T10:00:00', 'description': 'Team meeting', 'location': 'Conference room'}\n",
|
4 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Don't forget the doctor's appointment next Monday at 3:30 PM.\nThe Details are as follows: {'datetime': '2024-03-18T15:30:00', 'description': \"Doctor's appointment\", 'location': ''}\n",
|
5 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner with parents this Saturday evening at 7 PM.\nThe Details are as follows: {'datetime': '2024-03-16T19:00:00', 'description': 'Dinner with parents', 'location': ''}\n",
|
6 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Meeting with client next Tuesday at 9:30 AM at their office.\nThe Details are as follows: {'datetime': '2024-03-19T09:30:00', 'description': 'Meeting with client', 'location': \"Client's office\"}\n",
|
7 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Soccer practice on Wednesday at 4:00 PM at the park.\nThe Details are as follows: {'datetime': '2024-03-13T16:00:00', 'description': 'Soccer practice', 'location': 'Park'}\n",
|
8 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference call tomorrow at 2:30 PM. Dial-in: 123-456-7890\nThe Details are as follows: {'datetime': '2024-03-12T14:30:00', 'description': 'Conference call', 'location': ''}\n",
|
9 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Pick up groceries on Friday after work.\nThe Details are as follows: {'datetime': '2024-03-15T17:00:00', 'description': 'Pick up groceries', 'location': ''}\n",
|
10 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Movie night with friends on Saturday at 8 PM.\nThe Details are as follows: {'datetime': '2024-03-16T20:00:00', 'description': 'Movie night with friends', 'location': ''}\n",
|
11 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Workout session next Monday morning at the gym.\nThe Details are as follows: {'datetime': '2024-03-18T08:00:00', 'description': 'Workout session', 'location': 'Gym'}\n",
|
12 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Team lunch next Wednesday at noon.\nThe Details are as follows: {'datetime': '2024-03-20T12:00:00', 'description': 'Team lunch', 'location': ''}\n",
|
13 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Board meeting on Thursday at 9:00 AM in the boardroom.\nThe Details are as follows: {'datetime': '2024-03-14T09:00:00', 'description': 'Board meeting', 'location': 'Boardroom'}\n",
|
14 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Flight to New York City on Friday evening.\nThe Details are as follows: {'datetime': '2024-03-15T18:00:00', 'description': 'Flight to New York City', 'location': ''}\n",
|
15 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Coffee with Jane next Tuesday at 11:30 AM.\nThe Details are as follows: {'datetime': '2024-03-19T11:30:00', 'description': 'Coffee with Jane', 'location': ''}\n",
|
16 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dentist appointment on Wednesday at 2 PM.\nThe Details are as follows: {'datetime': '2024-03-13T14:00:00', 'description': 'Dentist appointment', 'location': ''}\n",
|
17 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Team outing next Friday afternoon.\nThe Details are as follows: {'datetime': '2024-03-15T12:00:00', 'description': 'Team outing', 'location': ''}\n",
|
18 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Book club meeting on Thursday at 7:30 PM.\nThe Details are as follows: {'datetime': '2024-03-14T19:30:00', 'description': 'Book club meeting', 'location': ''}\n",
|
19 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference in Chicago next month from April 10th to April 12th.\nThe Details are as follows: {'datetime': '2024-04-10T00:00:00', 'end': '2024-04-12T00:00:00', 'description': 'Conference in Chicago', 'location': ''}\n",
|
20 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Parent-teacher meeting on Monday at 4:30 PM.\nThe Details are as follows: {'datetime': '2024-03-18T16:30:00', 'description': 'Parent-teacher meeting', 'location': ''}\n",
|
21 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner with John next Saturday at 6:30 PM at his place.\nThe Details are as follows: {'datetime': '2024-03-16T18:30:00', 'description': 'Dinner with John', 'location': \"John's place\"}\n",
|
22 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Birthday party for Sarah on Friday night at 8 PM.\nThe Details are as follows: {'datetime': '2024-03-15T20:00:00', 'description': 'Birthday party for Sarah', 'location': ''}\n",
|
23 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference call on Thursday at 11:00 AM.\nThe Details are as follows: {'datetime': '2024-03-14T11:00:00', 'description': 'Conference call', 'location': ''}\n",
|
24 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Meeting with HR on Monday morning at 9 AM.\nThe Details are as follows: {'datetime': '2024-03-18T09:00:00', 'description': 'Meeting with HR', 'location': ''}\n",
|
25 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference in London next week from April 1st to April 3rd.\nThe Details are as follows: {'datetime': '2024-04-01T00:00:00', 'end': '2024-04-03T00:00:00', 'description': 'Conference in London', 'location': ''}\n",
|
26 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Lunch with colleagues on Thursday at 12:30 PM.\nThe Details are as follows: {'datetime': '2024-03-14T12:30:00', 'description': 'Lunch with colleagues', 'location': ''}\n",
|
27 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Board meeting next Tuesday at 10 AM.\nThe Details are as follows: {'datetime': '2024-03-19T10:00:00', 'description': 'Board meeting', 'location': ''}\n",
|
28 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Workshop on Saturday morning at 9:30 AM in the auditorium.\nThe Details are as follows: {'datetime': '2024-03-16T09:30:00', 'description': 'Workshop', 'location': 'Auditorium'}\n",
|
29 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner party at Mike's place next Friday at 7:00 PM.\nThe Details are as follows: {'datetime': '2024-03-15T19:00:00', 'description': \"Dinner party at Mike's place\", 'location': \"Mike's place\"}\n",
|
30 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Training session on Monday afternoon at 2 PM.\nThe Details are as follows: {'datetime': '2024-03-18T14:00:00', 'description': 'Training session', 'location': ''}\n",
|
31 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Coffee meeting on Wednesday at 10:30 AM.\nThe Details are as follows: {'datetime': '2024-03-13T10:30:00', 'description': 'Coffee meeting', 'location': ''}\n",
|
32 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Flight to Paris on Sunday morning at 9:00 AM.\nThe Details are as follows: {'datetime': '2024-03-17T09:00:00', 'description': 'Flight to Paris', 'location': ''}\n",
|
33 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Client presentation on Thursday at 2:00 PM in the conference room.\nThe Details are as follows: {'datetime': '2024-03-14T14:00:00', 'description': 'Client presentation', 'location': 'Conference room'}\n",
|
34 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dentist appointment on Tuesday at 11:00 AM.\nThe Details are as follows: {'datetime': '2024-03-19T11:00:00', 'description': 'Dentist appointment', 'location': ''}\n",
|
35 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Team building event next Friday at 1:00 PM.\nThe Details are as follows: {'datetime': '2024-03-15T13:00:00', 'description': 'Team building event', 'location': ''}\n",
|
36 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Business trip to San Francisco from April 5th to April 7th.\nThe Details are as follows: {'datetime': '2024-04-05T00:00:00', 'end': '2024-04-07T00:00:00', 'description': 'Business trip to San Francisco', 'location': ''}\n",
|
37 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Meeting with Sarah on Monday at 4:00 PM.\nThe Details are as follows: {'datetime': '2024-03-18T16:00:00', 'description': 'Meeting with Sarah', 'location': ''}\n",
|
38 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner reservation for two on Friday night at 7:30 PM.\nThe Details are as follows: {'datetime': '2024-03-15T19:30:00', 'description': 'Dinner reservation for two', 'location': ''}\n",
|
39 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Video conference call on Tuesday at 3:00 PM.\nThe Details are as follows: {'datetime': '2024-03-19T15:00:00', 'description': 'Video conference call', 'location': ''}\n",
|
40 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Networking event on Wednesday evening at 6:00 PM.\nThe Details are as follows: {'datetime': '2024-03-13T18:00:00', 'description': 'Networking event', 'location': ''}\n",
|
41 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Pick up dry cleaning on Thursday afternoon.\nThe Details are as follows: {'datetime': '2024-03-14T12:00:00', 'description': 'Pick up dry cleaning', 'location': ''}\n",
|
42 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Coffee catch-up with Mark on Tuesday morning at 10 AM.\nThe Details are as follows: {'datetime': '2024-03-19T10:00:00', 'description': 'Coffee catch-up with Mark', 'location': ''}\n",
|
43 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Volunteer work at the shelter on Saturday afternoon.\nThe Details are as follows: {'datetime': '2024-03-16T12:00:00', 'description': 'Volunteer work at the shelter', 'location': ''}\n",
|
44 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner with the Smiths on Sunday evening at 6:30 PM.\nThe Details are as follows: {'datetime': '2024-03-17T18:30:00', 'description': 'Dinner with the Smiths', 'location': ''}\n",
|
45 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference call with investors on Monday at 11:00 AM.\nThe Details are as follows: {'datetime': '2024-03-18T11:00:00', 'description': 'Conference call with investors', 'location': ''}\n",
|
46 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Lunch meeting with client on Thursday at 1:00 PM.\nThe Details are as follows: {'datetime': '2024-03-14T13:00:00', 'description': 'Lunch meeting with client', 'location': ''}\n",
|
47 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference in Berlin next month from April 8th to April 10th.\nThe Details are as follows: {'datetime': '2024-04-08T00:00:00', 'end': '2024-04-10T00:00:00', 'description': 'Conference in Berlin', 'location': ''}\n",
|
48 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Meeting with project team on Monday at 2:00 PM.\nThe Details are as follows: {'datetime': '2024-03-18T14:00:00', 'description': 'Meeting with project team', 'location': ''}\n",
|
49 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Workout session at the gym on Wednesday at 6:00 AM.\nThe Details are as follows: {'datetime': '2024-03-13T06:00:00', 'description': 'Workout session at the gym', 'location': ''}\n",
|
50 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Family dinner on Sunday at 7:00 PM.\nThe Details are as follows: {'datetime': '2024-03-17T19:00:00', 'description': 'Family dinner', 'location': ''}\n",
|
51 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Client meeting on Friday at 2:30 PM in the boardroom.\nThe Details are as follows: {'datetime': '2024-03-15T14:30:00', 'description': 'Client meeting', 'location': 'Boardroom'}\n",
|
52 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Doctor's appointment on Monday at 10:00 AM.\nThe Details are as follows: {'datetime': '2024-03-18T10:00:00', 'description': \"Doctor's appointment\", 'location': ''}\n",
|
53 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Movie night with friends next Saturday at 8:00 PM.\nThe Details are as follows: {'datetime': '2024-03-16T20:00:00', 'description': 'Movie night with friends', 'location': ''}\n",
|
54 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference call with team members on Tuesday at 11:00 AM.\nThe Details are as follows: {'datetime': '2024-03-19T11:00:00', 'description': 'Conference call with team members', 'location': ''}\n",
|
55 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner at the new restaurant on Friday evening at 7:30 PM.\nThe Details are as follows: {'datetime': '2024-03-15T19:30:00', 'description': 'Dinner at the new restaurant', 'location': ''}\n",
|
56 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Meeting with clients on Wednesday at 3:00 PM.\nThe Details are as follows: {'datetime': '2024-03-13T15:00:00', 'description': 'Meeting with clients', 'location': ''}\n",
|
57 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Lunch with colleagues next Thursday at 1:00 PM.\nThe Details are as follows: {'datetime': '2024-03-14T13:00:00', 'description': 'Lunch with colleagues', 'location': ''}\n",
|
58 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Parent-teacher meeting on Monday at 3:00 PM.\nThe Details are as follows: {'datetime': '2024-03-18T15:00:00', 'description': 'Parent-teacher meeting', 'location': ''}\n",
|
59 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Flight to Tokyo next month on April 9th.\nThe Details are as follows: {'datetime': '2024-04-09T00:00:00', 'description': 'Flight to Tokyo', 'location': ''}\n",
|
60 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Meeting with the marketing team on Tuesday at 2:00 PM.\nThe Details are as follows: {'datetime': '2024-03-19T14:00:00', 'description': 'Meeting with the marketing team', 'location': ''}\n",
|
61 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner with friends on Saturday at 7:00 PM.\nThe Details are as follows: {'datetime': '2024-03-16T19:00:00', 'description': 'Dinner with friends', 'location': ''}\n",
|
62 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Team meeting on Monday at 11:00 AM.\nThe Details are as follows: {'datetime': '2024-03-18T11:00:00', 'description': 'Team meeting', 'location': ''}\n",
|
63 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference call with the IT department on Thursday at 10:00 AM.\nThe Details are as follows: {'datetime': '2024-03-14T10:00:00', 'description': 'Conference call with the IT department', 'location': ''}\n",
|
64 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Lunch meeting with Jane on Wednesday at 12:00 PM.\nThe Details are as follows: {'datetime': '2024-03-13T12:00:00', 'description': 'Lunch meeting with Jane', 'location': ''}\n",
|
65 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference in Paris next month from April 10th to April 12th.\nThe Details are as follows: {'datetime': '2024-04-10T00:00:00', 'end': '2024-04-12T00:00:00', 'description': 'Conference in Paris', 'location': ''}\n",
|
66 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Workshop on Friday afternoon at 3:00 PM.\nThe Details are as follows: {'datetime': '2024-03-15T15:00:00', 'description': 'Workshop', 'location': ''}\n",
|
67 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner with family next Sunday at 6:00 PM.\nThe Details are as follows: {'datetime': '2024-03-17T18:00:00', 'description': 'Dinner with family', 'location': ''}\n",
|
68 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference call with the sales team on Monday at 2:00 PM.\nThe Details are as follows: {'datetime': '2024-03-18T14:00:00', 'description': 'Conference call with the sales team', 'location': ''}\n",
|
69 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Doctor's appointment on Thursday at 10:30 AM.\nThe Details are as follows: {'datetime': '2024-03-14T10:30:00', 'description': \"Doctor's appointment\", 'location': ''}\n",
|
70 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Meeting with the CEO on Tuesday at 9:00 AM.\nThe Details are as follows: {'datetime': '2024-03-19T09:00:00', 'description': 'Meeting with the CEO', 'location': ''}\n",
|
71 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Lunch with friends on Friday at 1:00 PM.\nThe Details are as follows: {'datetime': '2024-03-15T13:00:00', 'description': 'Lunch with friends', 'location': ''}\n",
|
72 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Meeting with the legal team on Monday at 3:30 PM.\nThe Details are as follows: {'datetime': '2024-03-18T15:30:00', 'description': 'Meeting with the legal team', 'location': ''}\n",
|
73 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference in Tokyo next month from April 9th to April 11th.\nThe Details are as follows: {'datetime': '2024-04-09T00:00:00', 'end': '2024-04-11T00:00:00', 'description': 'Conference in Tokyo', 'location': ''}\n",
|
74 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Team meeting on Thursday at 11:00 AM.\nThe Details are as follows: {'datetime': '2024-03-14T11:00:00', 'description': 'Team meeting', 'location': ''}\n",
|
75 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Dinner with clients on Wednesday at 7:30 PM.\nThe Details are as follows: {'datetime': '2024-03-13T19:30:00', 'description': 'Dinner with clients', 'location': ''}\n",
|
76 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Doctor's appointment on Friday at 9:00 AM.\nThe Details are as follows: {'datetime': '2024-03-15T09:00:00', 'description': \"Doctor's appointment\", 'location': ''}\n",
|
77 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Coffee meeting with Sarah on Tuesday at 10:00 AM.\nThe Details are as follows: {'datetime': '2024-03-19T10:00:00', 'description': 'Coffee meeting with Sarah', 'location': ''}\n",
|
78 |
+
"Extract the calendar events from the following text, the text will contain a place, time, and possibly a location. Here is the text: Conference call with clients on Monday at 4:00 PM.\nThe Details are as follows: {'datetime': '2024-03-18T16:00:00', 'description': 'Conference call with clients', 'location': ''}\n"
|
79 |
+
|
80 |
+
]
|
dataset.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
elif.ipynb
ADDED
@@ -0,0 +1,452 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 94,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"data": {
|
10 |
+
"application/vnd.jupyter.widget-view+json": {
|
11 |
+
"model_id": "cf86fed9cae54700b31a616cd82b7180",
|
12 |
+
"version_major": 2,
|
13 |
+
"version_minor": 0
|
14 |
+
},
|
15 |
+
"text/plain": [
|
16 |
+
"Generating train split: 0 examples [00:00, ? examples/s]"
|
17 |
+
]
|
18 |
+
},
|
19 |
+
"metadata": {},
|
20 |
+
"output_type": "display_data"
|
21 |
+
}
|
22 |
+
],
|
23 |
+
"source": [
|
24 |
+
"from datasets import load_dataset\n",
|
25 |
+
"# load dataset from data.jsonl file:\n",
|
26 |
+
"eli5 = load_dataset(\"json\", data_files=\"data3.jsonl\", split=\"train[:80%]\")"
|
27 |
+
]
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"cell_type": "code",
|
31 |
+
"execution_count": 95,
|
32 |
+
"metadata": {},
|
33 |
+
"outputs": [],
|
34 |
+
"source": [
|
35 |
+
"eli5 = eli5.train_test_split(test_size=0.2)"
|
36 |
+
]
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"cell_type": "code",
|
40 |
+
"execution_count": 96,
|
41 |
+
"metadata": {},
|
42 |
+
"outputs": [
|
43 |
+
{
|
44 |
+
"data": {
|
45 |
+
"text/plain": [
|
46 |
+
"{'text': \"Extract the calendar events from the following text, the text will contain a place, time , land possibly a location. Here is the text: : Board meeting next Tuesday at 10 AM.\\nThe Details are as follows: {'datetime': '2024-03-19T10:00:00', 'description': 'Board meeting', 'location': ''}\\n\"}"
|
47 |
+
]
|
48 |
+
},
|
49 |
+
"execution_count": 96,
|
50 |
+
"metadata": {},
|
51 |
+
"output_type": "execute_result"
|
52 |
+
}
|
53 |
+
],
|
54 |
+
"source": [
|
55 |
+
"eli5[\"train\"][0]"
|
56 |
+
]
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"cell_type": "code",
|
60 |
+
"execution_count": 97,
|
61 |
+
"metadata": {},
|
62 |
+
"outputs": [],
|
63 |
+
"source": [
|
64 |
+
"from transformers import AutoTokenizer\n",
|
65 |
+
"\n",
|
66 |
+
"tokenizer = AutoTokenizer.from_pretrained(\"distilbert/distilgpt2\")"
|
67 |
+
]
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"cell_type": "code",
|
71 |
+
"execution_count": 98,
|
72 |
+
"metadata": {},
|
73 |
+
"outputs": [],
|
74 |
+
"source": [
|
75 |
+
"eli5 = eli5.flatten()"
|
76 |
+
]
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"cell_type": "code",
|
80 |
+
"execution_count": 99,
|
81 |
+
"metadata": {},
|
82 |
+
"outputs": [
|
83 |
+
{
|
84 |
+
"data": {
|
85 |
+
"text/plain": [
|
86 |
+
"{'text': \"Extract the calendar events from the following text, the text will contain a place, time , land possibly a location. Here is the text: : Board meeting next Tuesday at 10 AM.\\nThe Details are as follows: {'datetime': '2024-03-19T10:00:00', 'description': 'Board meeting', 'location': ''}\\n\"}"
|
87 |
+
]
|
88 |
+
},
|
89 |
+
"execution_count": 99,
|
90 |
+
"metadata": {},
|
91 |
+
"output_type": "execute_result"
|
92 |
+
}
|
93 |
+
],
|
94 |
+
"source": [
|
95 |
+
"eli5[\"train\"][0]"
|
96 |
+
]
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"cell_type": "code",
|
100 |
+
"execution_count": 100,
|
101 |
+
"metadata": {},
|
102 |
+
"outputs": [],
|
103 |
+
"source": [
|
104 |
+
"def preprocess_function(examples):\n",
|
105 |
+
" return tokenizer([\" \".join(x) for x in examples])"
|
106 |
+
]
|
107 |
+
},
|
108 |
+
{
|
109 |
+
"cell_type": "code",
|
110 |
+
"execution_count": 101,
|
111 |
+
"metadata": {},
|
112 |
+
"outputs": [
|
113 |
+
{
|
114 |
+
"data": {
|
115 |
+
"application/vnd.jupyter.widget-view+json": {
|
116 |
+
"model_id": "7d326a1d4117454f98bfd6c7f575120c",
|
117 |
+
"version_major": 2,
|
118 |
+
"version_minor": 0
|
119 |
+
},
|
120 |
+
"text/plain": [
|
121 |
+
"Map (num_proc=4): 0%| | 0/49 [00:00<?, ? examples/s]"
|
122 |
+
]
|
123 |
+
},
|
124 |
+
"metadata": {},
|
125 |
+
"output_type": "display_data"
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"data": {
|
129 |
+
"application/vnd.jupyter.widget-view+json": {
|
130 |
+
"model_id": "13f25ef46a43486ea69fec77f62f7c9a",
|
131 |
+
"version_major": 2,
|
132 |
+
"version_minor": 0
|
133 |
+
},
|
134 |
+
"text/plain": [
|
135 |
+
"Map (num_proc=4): 0%| | 0/13 [00:00<?, ? examples/s]"
|
136 |
+
]
|
137 |
+
},
|
138 |
+
"metadata": {},
|
139 |
+
"output_type": "display_data"
|
140 |
+
}
|
141 |
+
],
|
142 |
+
"source": [
|
143 |
+
"tokenized_eli5 = eli5.map(\n",
|
144 |
+
" preprocess_function,\n",
|
145 |
+
" batched=True,\n",
|
146 |
+
" num_proc=4,\n",
|
147 |
+
" remove_columns=eli5[\"train\"].column_names,\n",
|
148 |
+
")"
|
149 |
+
]
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"cell_type": "code",
|
153 |
+
"execution_count": 102,
|
154 |
+
"metadata": {},
|
155 |
+
"outputs": [],
|
156 |
+
"source": [
|
157 |
+
"block_size = 128\n",
|
158 |
+
"\n",
|
159 |
+
"\n",
|
160 |
+
"def group_texts(examples):\n",
|
161 |
+
" # Concatenate all texts.\n",
|
162 |
+
" concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}\n",
|
163 |
+
" total_length = len(concatenated_examples[list(examples.keys())[0]])\n",
|
164 |
+
" # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can\n",
|
165 |
+
" # customize this part to your needs.\n",
|
166 |
+
" if total_length >= block_size:\n",
|
167 |
+
" total_length = (total_length // block_size) * block_size\n",
|
168 |
+
" # Split by chunks of block_size.\n",
|
169 |
+
" result = {\n",
|
170 |
+
" k: [t[i : i + block_size] for i in range(0, total_length, block_size)]\n",
|
171 |
+
" for k, t in concatenated_examples.items()\n",
|
172 |
+
" }\n",
|
173 |
+
" result[\"labels\"] = result[\"input_ids\"].copy()\n",
|
174 |
+
" return result"
|
175 |
+
]
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"cell_type": "code",
|
179 |
+
"execution_count": 103,
|
180 |
+
"metadata": {},
|
181 |
+
"outputs": [
|
182 |
+
{
|
183 |
+
"data": {
|
184 |
+
"application/vnd.jupyter.widget-view+json": {
|
185 |
+
"model_id": "a571ed26269640278514bfb2b02b1e03",
|
186 |
+
"version_major": 2,
|
187 |
+
"version_minor": 0
|
188 |
+
},
|
189 |
+
"text/plain": [
|
190 |
+
"Map (num_proc=4): 0%| | 0/4 [00:00<?, ? examples/s]"
|
191 |
+
]
|
192 |
+
},
|
193 |
+
"metadata": {},
|
194 |
+
"output_type": "display_data"
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"data": {
|
198 |
+
"application/vnd.jupyter.widget-view+json": {
|
199 |
+
"model_id": "3385544255ec4af79d74e2d131845e07",
|
200 |
+
"version_major": 2,
|
201 |
+
"version_minor": 0
|
202 |
+
},
|
203 |
+
"text/plain": [
|
204 |
+
"Map (num_proc=4): 0%| | 0/4 [00:00<?, ? examples/s]"
|
205 |
+
]
|
206 |
+
},
|
207 |
+
"metadata": {},
|
208 |
+
"output_type": "display_data"
|
209 |
+
}
|
210 |
+
],
|
211 |
+
"source": [
|
212 |
+
"lm_dataset = tokenized_eli5.map(group_texts, batched=True, num_proc=4)"
|
213 |
+
]
|
214 |
+
},
|
215 |
+
{
|
216 |
+
"cell_type": "code",
|
217 |
+
"execution_count": 104,
|
218 |
+
"metadata": {},
|
219 |
+
"outputs": [],
|
220 |
+
"source": [
|
221 |
+
"from transformers import DataCollatorForLanguageModeling\n",
|
222 |
+
"\n",
|
223 |
+
"tokenizer.pad_token = tokenizer.eos_token\n",
|
224 |
+
"data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)"
|
225 |
+
]
|
226 |
+
},
|
227 |
+
{
|
228 |
+
"cell_type": "code",
|
229 |
+
"execution_count": 105,
|
230 |
+
"metadata": {},
|
231 |
+
"outputs": [],
|
232 |
+
"source": [
|
233 |
+
"from transformers import AutoModelForCausalLM, TrainingArguments, Trainer\n",
|
234 |
+
"\n",
|
235 |
+
"model = AutoModelForCausalLM.from_pretrained(\"distilbert/distilgpt2\")"
|
236 |
+
]
|
237 |
+
},
|
238 |
+
{
|
239 |
+
"cell_type": "code",
|
240 |
+
"execution_count": 106,
|
241 |
+
"metadata": {},
|
242 |
+
"outputs": [
|
243 |
+
{
|
244 |
+
"name": "stdout",
|
245 |
+
"output_type": "stream",
|
246 |
+
"text": [
|
247 |
+
"Model moved to MPS device\n"
|
248 |
+
]
|
249 |
+
}
|
250 |
+
],
|
251 |
+
"source": [
|
252 |
+
"import torch\n",
|
253 |
+
"\n",
|
254 |
+
"# Check that MPS is available\n",
|
255 |
+
"if not torch.backends.mps.is_available():\n",
|
256 |
+
" if not torch.backends.mps.is_built():\n",
|
257 |
+
" print(\"MPS not available because the current PyTorch install was not \"\n",
|
258 |
+
" \"built with MPS enabled.\")\n",
|
259 |
+
" else:\n",
|
260 |
+
" print(\"MPS not available because the current MacOS version is not 12.3+ \"\n",
|
261 |
+
" \"and/or you do not have an MPS-enabled device on this machine.\")\n",
|
262 |
+
"\n",
|
263 |
+
"else:\n",
|
264 |
+
" mps_device = torch.device(\"mps\")\n",
|
265 |
+
" model.to(mps_device)\n",
|
266 |
+
" print(\"Model moved to MPS device\")"
|
267 |
+
]
|
268 |
+
},
|
269 |
+
{
|
270 |
+
"cell_type": "code",
|
271 |
+
"execution_count": 107,
|
272 |
+
"metadata": {},
|
273 |
+
"outputs": [
|
274 |
+
{
|
275 |
+
"data": {
|
276 |
+
"application/vnd.jupyter.widget-view+json": {
|
277 |
+
"model_id": "a71332654a414bfe87d416ce502c9cdc",
|
278 |
+
"version_major": 2,
|
279 |
+
"version_minor": 0
|
280 |
+
},
|
281 |
+
"text/plain": [
|
282 |
+
" 0%| | 0/3 [00:00<?, ?it/s]"
|
283 |
+
]
|
284 |
+
},
|
285 |
+
"metadata": {},
|
286 |
+
"output_type": "display_data"
|
287 |
+
},
|
288 |
+
{
|
289 |
+
"data": {
|
290 |
+
"application/vnd.jupyter.widget-view+json": {
|
291 |
+
"model_id": "eab9e9bcc2814d3e8ba55806dc9d4a4f",
|
292 |
+
"version_major": 2,
|
293 |
+
"version_minor": 0
|
294 |
+
},
|
295 |
+
"text/plain": [
|
296 |
+
" 0%| | 0/1 [00:00<?, ?it/s]"
|
297 |
+
]
|
298 |
+
},
|
299 |
+
"metadata": {},
|
300 |
+
"output_type": "display_data"
|
301 |
+
},
|
302 |
+
{
|
303 |
+
"name": "stdout",
|
304 |
+
"output_type": "stream",
|
305 |
+
"text": [
|
306 |
+
"{'eval_loss': 6.667893886566162, 'eval_runtime': 0.0262, 'eval_samples_per_second': 152.548, 'eval_steps_per_second': 38.137, 'epoch': 1.0}\n"
|
307 |
+
]
|
308 |
+
},
|
309 |
+
{
|
310 |
+
"data": {
|
311 |
+
"application/vnd.jupyter.widget-view+json": {
|
312 |
+
"model_id": "b9634b37ea2d436c9700ce311651fdae",
|
313 |
+
"version_major": 2,
|
314 |
+
"version_minor": 0
|
315 |
+
},
|
316 |
+
"text/plain": [
|
317 |
+
" 0%| | 0/1 [00:00<?, ?it/s]"
|
318 |
+
]
|
319 |
+
},
|
320 |
+
"metadata": {},
|
321 |
+
"output_type": "display_data"
|
322 |
+
},
|
323 |
+
{
|
324 |
+
"name": "stdout",
|
325 |
+
"output_type": "stream",
|
326 |
+
"text": [
|
327 |
+
"{'eval_loss': 6.2145514488220215, 'eval_runtime': 0.1232, 'eval_samples_per_second': 32.47, 'eval_steps_per_second': 8.118, 'epoch': 2.0}\n"
|
328 |
+
]
|
329 |
+
},
|
330 |
+
{
|
331 |
+
"data": {
|
332 |
+
"application/vnd.jupyter.widget-view+json": {
|
333 |
+
"model_id": "3af25ad29ad04c319677ec04dc22d3d1",
|
334 |
+
"version_major": 2,
|
335 |
+
"version_minor": 0
|
336 |
+
},
|
337 |
+
"text/plain": [
|
338 |
+
" 0%| | 0/1 [00:00<?, ?it/s]"
|
339 |
+
]
|
340 |
+
},
|
341 |
+
"metadata": {},
|
342 |
+
"output_type": "display_data"
|
343 |
+
},
|
344 |
+
{
|
345 |
+
"name": "stdout",
|
346 |
+
"output_type": "stream",
|
347 |
+
"text": [
|
348 |
+
"{'eval_loss': 5.993268966674805, 'eval_runtime': 0.0204, 'eval_samples_per_second': 196.346, 'eval_steps_per_second': 49.087, 'epoch': 3.0}\n",
|
349 |
+
"{'train_runtime': 1.588, 'train_samples_per_second': 7.556, 'train_steps_per_second': 1.889, 'train_loss': 6.412024815877278, 'epoch': 3.0}\n"
|
350 |
+
]
|
351 |
+
},
|
352 |
+
{
|
353 |
+
"data": {
|
354 |
+
"text/plain": [
|
355 |
+
"TrainOutput(global_step=3, training_loss=6.412024815877278, metrics={'train_runtime': 1.588, 'train_samples_per_second': 7.556, 'train_steps_per_second': 1.889, 'train_loss': 6.412024815877278, 'epoch': 3.0})"
|
356 |
+
]
|
357 |
+
},
|
358 |
+
"execution_count": 107,
|
359 |
+
"metadata": {},
|
360 |
+
"output_type": "execute_result"
|
361 |
+
}
|
362 |
+
],
|
363 |
+
"source": [
|
364 |
+
"training_args = TrainingArguments(\n",
|
365 |
+
" output_dir=\"my_awesome_eli5_clm-model\",\n",
|
366 |
+
" evaluation_strategy=\"epoch\",\n",
|
367 |
+
" learning_rate=2e-5,\n",
|
368 |
+
" weight_decay=0.01,\n",
|
369 |
+
" push_to_hub=True,\n",
|
370 |
+
")\n",
|
371 |
+
"\n",
|
372 |
+
"trainer = Trainer(\n",
|
373 |
+
" model=model,\n",
|
374 |
+
" args=training_args,\n",
|
375 |
+
" train_dataset=lm_dataset[\"train\"],\n",
|
376 |
+
" eval_dataset=lm_dataset[\"test\"],\n",
|
377 |
+
" data_collator=data_collator,\n",
|
378 |
+
")\n",
|
379 |
+
"\n",
|
380 |
+
"trainer.train()"
|
381 |
+
]
|
382 |
+
},
|
383 |
+
{
|
384 |
+
"cell_type": "code",
|
385 |
+
"execution_count": 110,
|
386 |
+
"metadata": {},
|
387 |
+
"outputs": [
|
388 |
+
{
|
389 |
+
"name": "stderr",
|
390 |
+
"output_type": "stream",
|
391 |
+
"text": [
|
392 |
+
"Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
|
393 |
+
]
|
394 |
+
},
|
395 |
+
{
|
396 |
+
"data": {
|
397 |
+
"text/plain": [
|
398 |
+
"[{'generated_text': \"Extract the calendar events from the following text, the text will contain a place, time , land possibly a location. Here is the text: : Let's meet for lunch tomorrow at 12 PM at the Italian restaurant on Main Street.\\nThe Details are as follows: { if (is_empty(the_time()) : return next_day_long(_.length(this); } } }\\nThe Time is: 12 PM on Sunday 12th at the Italian restaurant on Main Street.\\nTaste: 12 PM on Sunday 8th at the Italian restaurant on Main Street.\\nThe Time is: 11 PM on Monday 9th at the Italian restaurant on Main Street.\\nThe Time is: 11 AM on Monday 9th at the Italian restaurant on Main Street.\\nTaste: 11 AM on Sunday 8th at the Italian restaurant on Main Street.\\nThe Time is: 11 AM on Monday 9th at the Italian restaurant on Main Street.\\nThe Time is: 11 AM\"}]"
|
399 |
+
]
|
400 |
+
},
|
401 |
+
"execution_count": 110,
|
402 |
+
"metadata": {},
|
403 |
+
"output_type": "execute_result"
|
404 |
+
}
|
405 |
+
],
|
406 |
+
"source": [
|
407 |
+
"prompt = \"Extract the calendar events from the following text, the text will contain a place, time , land possibly a location. Here is the text: : Let's meet for lunch tomorrow at 12 PM at the Italian restaurant on Main Street.\\nThe Details are as follows: {\"\n",
|
408 |
+
"from transformers import pipeline\n",
|
409 |
+
"\n",
|
410 |
+
"generator = pipeline(\"text-generation\", model=model, tokenizer=tokenizer, max_length=200)\n",
|
411 |
+
"generator(prompt)"
|
412 |
+
]
|
413 |
+
},
|
414 |
+
{
|
415 |
+
"cell_type": "code",
|
416 |
+
"execution_count": null,
|
417 |
+
"metadata": {},
|
418 |
+
"outputs": [],
|
419 |
+
"source": [
|
420 |
+
"from transformers import AutoTokenizer"
|
421 |
+
]
|
422 |
+
},
|
423 |
+
{
|
424 |
+
"cell_type": "code",
|
425 |
+
"execution_count": null,
|
426 |
+
"metadata": {},
|
427 |
+
"outputs": [],
|
428 |
+
"source": []
|
429 |
+
}
|
430 |
+
],
|
431 |
+
"metadata": {
|
432 |
+
"kernelspec": {
|
433 |
+
"display_name": "Python 3",
|
434 |
+
"language": "python",
|
435 |
+
"name": "python3"
|
436 |
+
},
|
437 |
+
"language_info": {
|
438 |
+
"codemirror_mode": {
|
439 |
+
"name": "ipython",
|
440 |
+
"version": 3
|
441 |
+
},
|
442 |
+
"file_extension": ".py",
|
443 |
+
"mimetype": "text/x-python",
|
444 |
+
"name": "python",
|
445 |
+
"nbconvert_exporter": "python",
|
446 |
+
"pygments_lexer": "ipython3",
|
447 |
+
"version": "3.12.1"
|
448 |
+
}
|
449 |
+
},
|
450 |
+
"nbformat": 4,
|
451 |
+
"nbformat_minor": 2
|
452 |
+
}
|
foo.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
|
3 |
+
newdata = []
|
4 |
+
|
5 |
+
with open("data.json", "r", encoding="utf8") as file:
|
6 |
+
data = json.load(file)
|
7 |
+
for elem in data:
|
8 |
+
newdata.append(
|
9 |
+
f"Convert the following message to json format: {elem['message']}\n"+
|
10 |
+
f"Details: {elem['labels']}\n"
|
11 |
+
)
|
12 |
+
|
13 |
+
# write modified data to data3.json
|
14 |
+
with open("data3.json", "w", encoding="utf8") as file:
|
15 |
+
json.dump(newdata, file, indent=4)
|
foobar
ADDED
File without changes
|
foobar.txt
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Complete the code:
|
2 |
+
|
3 |
+
from typing import List
|
4 |
+
|
5 |
+
|
6 |
+
def has_close_elements(numbers: List[float], threshold: float) -> bool:
|
7 |
+
""" Check if in given list of numbers, are any two numbers closer to each other than
|
8 |
+
given threshold.
|
9 |
+
>>> has_close_elements([1.0, 2.0, 3.0], 0.5)
|
10 |
+
False
|
11 |
+
>>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)
|
12 |
+
True
|
13 |
+
"""
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
|
18 |
+
|
19 |
+
def truncate_number(number: float) -> float:
|
20 |
+
// Given a positive floating point number, it can be decomposed into
|
21 |
+
// and integer part (largest integer smaller than given number) and decimals
|
22 |
+
// (leftover part always smaller than 1).
|
23 |
+
//
|
24 |
+
// Return the decimal part of the number.
|
25 |
+
// >>> truncate_number 3.5 // 0.5
|
has_closest_elements.evy
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
func has_close_element:bool nums:[]num threshold:num
|
2 |
+
for i := range (len nums)
|
3 |
+
for j := range (i + 1) (len nums)
|
4 |
+
if (abs nums[i]-nums[j])<threshold
|
5 |
+
return true
|
6 |
+
end
|
7 |
+
end
|
8 |
+
end
|
9 |
+
return false
|
10 |
+
end
|
11 |
+
|
12 |
+
func abs:num n:num
|
13 |
+
if n < 0
|
14 |
+
return -n
|
15 |
+
end
|
16 |
+
return n
|
17 |
+
end
|
18 |
+
|
19 |
+
fails := 0
|
20 |
+
total := 0
|
21 |
+
|
22 |
+
func assert want:any got:any
|
23 |
+
total = total + 1
|
24 |
+
if want != got
|
25 |
+
fails = fails + 1
|
26 |
+
printf "want != got: want %v got %v\n" want got
|
27 |
+
end
|
28 |
+
end
|
29 |
+
|
30 |
+
func finished
|
31 |
+
printf "%v of %v tests passed\n" (total - fails) total
|
32 |
+
end
|
33 |
+
|
34 |
+
// -- Test Cases Start -- //
|
35 |
+
assert false (has_close_element [1.0 2.0 3.0] 0.5)
|
36 |
+
assert true (has_close_element [1.0 2.8 3.0 4.0 5.0 2.0] 0.3)
|
37 |
+
// -- Test Cases End -- //
|
38 |
+
finished
|
ner.ipynb
ADDED
@@ -0,0 +1,363 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"!pip install svgling\n"
|
10 |
+
]
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"cell_type": "code",
|
14 |
+
"execution_count": null,
|
15 |
+
"metadata": {},
|
16 |
+
"outputs": [],
|
17 |
+
"source": [
|
18 |
+
"!pip install transformers datasets evaluate seqeval"
|
19 |
+
]
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"cell_type": "code",
|
23 |
+
"execution_count": null,
|
24 |
+
"metadata": {},
|
25 |
+
"outputs": [],
|
26 |
+
"source": [
|
27 |
+
"from datasets import load_dataset\n",
|
28 |
+
"import pandas as pd\n",
|
29 |
+
"\n",
|
30 |
+
"wnut = load_dataset(\"wnut_17\")\n",
|
31 |
+
"df = pd.DataFrame(wnut[\"train\"]) # Assuming 'train' split, you can choose other splits\n",
|
32 |
+
"\n",
|
33 |
+
"# Save the DataFrame to a CSV file\n",
|
34 |
+
"df.to_csv('dataset.csv', index=False) # Change 'dataset.csv' to your desired file name\n"
|
35 |
+
]
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"cell_type": "code",
|
39 |
+
"execution_count": null,
|
40 |
+
"metadata": {},
|
41 |
+
"outputs": [],
|
42 |
+
"source": [
|
43 |
+
"wnut[\"train\"][0]"
|
44 |
+
]
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"cell_type": "code",
|
48 |
+
"execution_count": null,
|
49 |
+
"metadata": {},
|
50 |
+
"outputs": [],
|
51 |
+
"source": []
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"cell_type": "code",
|
55 |
+
"execution_count": null,
|
56 |
+
"metadata": {},
|
57 |
+
"outputs": [],
|
58 |
+
"source": []
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"cell_type": "code",
|
62 |
+
"execution_count": null,
|
63 |
+
"metadata": {},
|
64 |
+
"outputs": [],
|
65 |
+
"source": [
|
66 |
+
"import nltk\n",
|
67 |
+
"import ssl\n",
|
68 |
+
"\n",
|
69 |
+
"try:\n",
|
70 |
+
" _create_unverified_https_context = ssl._create_unverified_context\n",
|
71 |
+
"except AttributeError:\n",
|
72 |
+
" pass\n",
|
73 |
+
"else:\n",
|
74 |
+
" ssl._create_default_https_context = _create_unverified_https_context\n",
|
75 |
+
"\n",
|
76 |
+
"nltk.download('punkt')"
|
77 |
+
]
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"cell_type": "code",
|
81 |
+
"execution_count": null,
|
82 |
+
"metadata": {},
|
83 |
+
"outputs": [],
|
84 |
+
"source": [
|
85 |
+
"label_list = wnut[\"train\"].features[f\"ner_tags\"].feature.names\n",
|
86 |
+
"label_list"
|
87 |
+
]
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"cell_type": "code",
|
91 |
+
"execution_count": null,
|
92 |
+
"metadata": {},
|
93 |
+
"outputs": [],
|
94 |
+
"source": [
|
95 |
+
"from transformers import AutoTokenizer\n",
|
96 |
+
"\n",
|
97 |
+
"tokenizer = AutoTokenizer.from_pretrained(\"distilbert/distilbert-base-uncased\")"
|
98 |
+
]
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"cell_type": "code",
|
102 |
+
"execution_count": null,
|
103 |
+
"metadata": {},
|
104 |
+
"outputs": [],
|
105 |
+
"source": [
|
106 |
+
"example = wnut[\"train\"][0]\n",
|
107 |
+
"tokenized_input = tokenizer(example[\"tokens\"], is_split_into_words=True)\n",
|
108 |
+
"tokens = tokenizer.convert_ids_to_tokens(tokenized_input[\"input_ids\"])\n",
|
109 |
+
"tokens"
|
110 |
+
]
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"cell_type": "code",
|
114 |
+
"execution_count": null,
|
115 |
+
"metadata": {},
|
116 |
+
"outputs": [],
|
117 |
+
"source": [
|
118 |
+
"def tokenize_and_align_labels(examples):\n",
|
119 |
+
" tokenized_inputs = tokenizer(examples[\"tokens\"], truncation=True, is_split_into_words=True)\n",
|
120 |
+
"\n",
|
121 |
+
" labels = []\n",
|
122 |
+
" for i, label in enumerate(examples[f\"ner_tags\"]):\n",
|
123 |
+
" word_ids = tokenized_inputs.word_ids(batch_index=i) # Map tokens to their respective word.\n",
|
124 |
+
" previous_word_idx = None\n",
|
125 |
+
" label_ids = []\n",
|
126 |
+
" for word_idx in word_ids: # Set the special tokens to -100.\n",
|
127 |
+
" if word_idx is None:\n",
|
128 |
+
" label_ids.append(-100)\n",
|
129 |
+
" elif word_idx != previous_word_idx: # Only label the first token of a given word.\n",
|
130 |
+
" label_ids.append(label[word_idx])\n",
|
131 |
+
" else:\n",
|
132 |
+
" label_ids.append(-100)\n",
|
133 |
+
" previous_word_idx = word_idx\n",
|
134 |
+
" labels.append(label_ids)\n",
|
135 |
+
"\n",
|
136 |
+
" tokenized_inputs[\"labels\"] = labels\n",
|
137 |
+
" return tokenized_inputs"
|
138 |
+
]
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"cell_type": "code",
|
142 |
+
"execution_count": null,
|
143 |
+
"metadata": {},
|
144 |
+
"outputs": [],
|
145 |
+
"source": [
|
146 |
+
"tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)"
|
147 |
+
]
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"cell_type": "code",
|
151 |
+
"execution_count": null,
|
152 |
+
"metadata": {},
|
153 |
+
"outputs": [],
|
154 |
+
"source": [
|
155 |
+
"tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)"
|
156 |
+
]
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"cell_type": "code",
|
160 |
+
"execution_count": null,
|
161 |
+
"metadata": {},
|
162 |
+
"outputs": [],
|
163 |
+
"source": [
|
164 |
+
"import evaluate\n",
|
165 |
+
"\n",
|
166 |
+
"seqeval = evaluate.load(\"seqeval\")"
|
167 |
+
]
|
168 |
+
},
|
169 |
+
{
|
170 |
+
"cell_type": "code",
|
171 |
+
"execution_count": null,
|
172 |
+
"metadata": {},
|
173 |
+
"outputs": [],
|
174 |
+
"source": [
|
175 |
+
"from transformers import DataCollatorForTokenClassification\n",
|
176 |
+
"\n",
|
177 |
+
"data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)"
|
178 |
+
]
|
179 |
+
},
|
180 |
+
{
|
181 |
+
"cell_type": "code",
|
182 |
+
"execution_count": null,
|
183 |
+
"metadata": {},
|
184 |
+
"outputs": [],
|
185 |
+
"source": [
|
186 |
+
"import evaluate\n",
|
187 |
+
"\n",
|
188 |
+
"seqeval = evaluate.load(\"seqeval\")"
|
189 |
+
]
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"cell_type": "code",
|
193 |
+
"execution_count": null,
|
194 |
+
"metadata": {},
|
195 |
+
"outputs": [],
|
196 |
+
"source": [
|
197 |
+
"import numpy as np\n",
|
198 |
+
"\n",
|
199 |
+
"labels = [label_list[i] for i in example[f\"ner_tags\"]]\n",
|
200 |
+
"\n",
|
201 |
+
"\n",
|
202 |
+
"def compute_metrics(p):\n",
|
203 |
+
" predictions, labels = p\n",
|
204 |
+
" predictions = np.argmax(predictions, axis=2)\n",
|
205 |
+
"\n",
|
206 |
+
" true_predictions = [\n",
|
207 |
+
" [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n",
|
208 |
+
" for prediction, label in zip(predictions, labels)\n",
|
209 |
+
" ]\n",
|
210 |
+
" true_labels = [\n",
|
211 |
+
" [label_list[l] for (p, l) in zip(prediction, label) if l != -100]\n",
|
212 |
+
" for prediction, label in zip(predictions, labels)\n",
|
213 |
+
" ]\n",
|
214 |
+
"\n",
|
215 |
+
" results = seqeval.compute(predictions=true_predictions, references=true_labels)\n",
|
216 |
+
" return {\n",
|
217 |
+
" \"precision\": results[\"overall_precision\"],\n",
|
218 |
+
" \"recall\": results[\"overall_recall\"],\n",
|
219 |
+
" \"f1\": results[\"overall_f1\"],\n",
|
220 |
+
" \"accuracy\": results[\"overall_accuracy\"],\n",
|
221 |
+
" }"
|
222 |
+
]
|
223 |
+
},
|
224 |
+
{
|
225 |
+
"cell_type": "code",
|
226 |
+
"execution_count": null,
|
227 |
+
"metadata": {},
|
228 |
+
"outputs": [],
|
229 |
+
"source": [
|
230 |
+
"id2label = {\n",
|
231 |
+
" 0: \"O\",\n",
|
232 |
+
" 1: \"B-corporation\",\n",
|
233 |
+
" 2: \"I-corporation\",\n",
|
234 |
+
" 3: \"B-creative-work\",\n",
|
235 |
+
" 4: \"I-creative-work\",\n",
|
236 |
+
" 5: \"B-group\",\n",
|
237 |
+
" 6: \"I-group\",\n",
|
238 |
+
" 7: \"B-location\",\n",
|
239 |
+
" 8: \"I-location\",\n",
|
240 |
+
" 9: \"B-person\",\n",
|
241 |
+
" 10: \"I-person\",\n",
|
242 |
+
" 11: \"B-product\",\n",
|
243 |
+
" 12: \"I-product\",\n",
|
244 |
+
"}\n",
|
245 |
+
"label2id = {\n",
|
246 |
+
" \"O\": 0,\n",
|
247 |
+
" \"B-corporation\": 1,\n",
|
248 |
+
" \"I-corporation\": 2,\n",
|
249 |
+
" \"B-creative-work\": 3,\n",
|
250 |
+
" \"I-creative-work\": 4,\n",
|
251 |
+
" \"B-group\": 5,\n",
|
252 |
+
" \"I-group\": 6,\n",
|
253 |
+
" \"B-location\": 7,\n",
|
254 |
+
" \"I-location\": 8,\n",
|
255 |
+
" \"B-person\": 9,\n",
|
256 |
+
" \"I-person\": 10,\n",
|
257 |
+
" \"B-product\": 11,\n",
|
258 |
+
" \"I-product\": 12,\n",
|
259 |
+
"}"
|
260 |
+
]
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"cell_type": "code",
|
264 |
+
"execution_count": null,
|
265 |
+
"metadata": {},
|
266 |
+
"outputs": [],
|
267 |
+
"source": [
|
268 |
+
"from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer\n",
|
269 |
+
"\n",
|
270 |
+
"model = AutoModelForTokenClassification.from_pretrained(\n",
|
271 |
+
" \"distilbert/distilbert-base-uncased\", num_labels=13, id2label=id2label, label2id=label2id\n",
|
272 |
+
")"
|
273 |
+
]
|
274 |
+
},
|
275 |
+
{
|
276 |
+
"cell_type": "code",
|
277 |
+
"execution_count": null,
|
278 |
+
"metadata": {},
|
279 |
+
"outputs": [],
|
280 |
+
"source": [
|
281 |
+
"training_args = TrainingArguments(\n",
|
282 |
+
" output_dir=\"my_awesome_wnut_model\",\n",
|
283 |
+
" learning_rate=2e-5,\n",
|
284 |
+
" per_device_train_batch_size=16,\n",
|
285 |
+
" per_device_eval_batch_size=16,\n",
|
286 |
+
" num_train_epochs=2,\n",
|
287 |
+
" weight_decay=0.01,\n",
|
288 |
+
" evaluation_strategy=\"epoch\",\n",
|
289 |
+
" save_strategy=\"epoch\",\n",
|
290 |
+
" load_best_model_at_end=True,\n",
|
291 |
+
" push_to_hub=False,\n",
|
292 |
+
")\n",
|
293 |
+
"\n",
|
294 |
+
"trainer = Trainer(\n",
|
295 |
+
" model=model,\n",
|
296 |
+
" args=training_args,\n",
|
297 |
+
" train_dataset=tokenized_wnut[\"train\"],\n",
|
298 |
+
" eval_dataset=tokenized_wnut[\"test\"],\n",
|
299 |
+
" tokenizer=tokenizer,\n",
|
300 |
+
" data_collator=data_collator,\n",
|
301 |
+
" compute_metrics=compute_metrics,\n",
|
302 |
+
")\n",
|
303 |
+
"\n",
|
304 |
+
"trainer.train()"
|
305 |
+
]
|
306 |
+
},
|
307 |
+
{
|
308 |
+
"cell_type": "code",
|
309 |
+
"execution_count": null,
|
310 |
+
"metadata": {},
|
311 |
+
"outputs": [],
|
312 |
+
"source": []
|
313 |
+
},
|
314 |
+
{
|
315 |
+
"cell_type": "code",
|
316 |
+
"execution_count": null,
|
317 |
+
"metadata": {},
|
318 |
+
"outputs": [],
|
319 |
+
"source": [
|
320 |
+
"from transformers import pipeline\n",
|
321 |
+
"\n",
|
322 |
+
"text = \"Let's meet for Lunch Tomorrow at 12 PM at the Italian restaurant on Main Street. Simon\"\n",
|
323 |
+
"classifier = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n",
|
324 |
+
"classifier(text)"
|
325 |
+
]
|
326 |
+
},
|
327 |
+
{
|
328 |
+
"cell_type": "code",
|
329 |
+
"execution_count": null,
|
330 |
+
"metadata": {},
|
331 |
+
"outputs": [],
|
332 |
+
"source": []
|
333 |
+
},
|
334 |
+
{
|
335 |
+
"cell_type": "code",
|
336 |
+
"execution_count": null,
|
337 |
+
"metadata": {},
|
338 |
+
"outputs": [],
|
339 |
+
"source": []
|
340 |
+
}
|
341 |
+
],
|
342 |
+
"metadata": {
|
343 |
+
"kernelspec": {
|
344 |
+
"display_name": "Python 3",
|
345 |
+
"language": "python",
|
346 |
+
"name": "python3"
|
347 |
+
},
|
348 |
+
"language_info": {
|
349 |
+
"codemirror_mode": {
|
350 |
+
"name": "ipython",
|
351 |
+
"version": 3
|
352 |
+
},
|
353 |
+
"file_extension": ".py",
|
354 |
+
"mimetype": "text/x-python",
|
355 |
+
"name": "python",
|
356 |
+
"nbconvert_exporter": "python",
|
357 |
+
"pygments_lexer": "ipython3",
|
358 |
+
"version": "3.12.1"
|
359 |
+
}
|
360 |
+
},
|
361 |
+
"nbformat": 4,
|
362 |
+
"nbformat_minor": 2
|
363 |
+
}
|
nltk.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
notebook.ipynb
ADDED
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": []
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"cell_type": "code",
|
12 |
+
"execution_count": 1,
|
13 |
+
"metadata": {},
|
14 |
+
"outputs": [],
|
15 |
+
"source": [
|
16 |
+
"from datasets import load_dataset\n",
|
17 |
+
"\n",
|
18 |
+
"billsum = load_dataset(\"billsum\", split=\"ca_test\")\n",
|
19 |
+
"billsum = billsum.select(range(1000))\n",
|
20 |
+
"billsum = billsum.train_test_split(test_size=0.2)"
|
21 |
+
]
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"cell_type": "code",
|
25 |
+
"execution_count": 2,
|
26 |
+
"metadata": {},
|
27 |
+
"outputs": [],
|
28 |
+
"source": [
|
29 |
+
"from transformers import AutoTokenizer\n",
|
30 |
+
"checkpoint = \"google-t5/t5-small\"\n",
|
31 |
+
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
|
32 |
+
"prefix = \"summarize: \"\n",
|
33 |
+
"\n",
|
34 |
+
"def preprocess_function(examples):\n",
|
35 |
+
" inputs = [prefix + doc for doc in examples[\"text\"]]\n",
|
36 |
+
" model_inputs = tokenizer(inputs, max_length=1024, truncation=True, padding=\"max_length\") \n",
|
37 |
+
"\n",
|
38 |
+
" labels = tokenizer(text_target=examples[\"summary\"], max_length=128, truncation=True, padding=\"max_length\")\n",
|
39 |
+
"\n",
|
40 |
+
" model_inputs[\"labels\"] = labels[\"input_ids\"]\n",
|
41 |
+
" return model_inputs\n"
|
42 |
+
]
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"cell_type": "code",
|
46 |
+
"execution_count": 3,
|
47 |
+
"metadata": {},
|
48 |
+
"outputs": [
|
49 |
+
{
|
50 |
+
"data": {
|
51 |
+
"application/vnd.jupyter.widget-view+json": {
|
52 |
+
"model_id": "4dfbb4c779af4a4ca5398622f2bd887d",
|
53 |
+
"version_major": 2,
|
54 |
+
"version_minor": 0
|
55 |
+
},
|
56 |
+
"text/plain": [
|
57 |
+
"Map: 0%| | 0/800 [00:00<?, ? examples/s]"
|
58 |
+
]
|
59 |
+
},
|
60 |
+
"metadata": {},
|
61 |
+
"output_type": "display_data"
|
62 |
+
},
|
63 |
+
{
|
64 |
+
"data": {
|
65 |
+
"application/vnd.jupyter.widget-view+json": {
|
66 |
+
"model_id": "2a4f6446a1e541ed9ef835ca2b2bdfa1",
|
67 |
+
"version_major": 2,
|
68 |
+
"version_minor": 0
|
69 |
+
},
|
70 |
+
"text/plain": [
|
71 |
+
"Map: 0%| | 0/200 [00:00<?, ? examples/s]"
|
72 |
+
]
|
73 |
+
},
|
74 |
+
"metadata": {},
|
75 |
+
"output_type": "display_data"
|
76 |
+
}
|
77 |
+
],
|
78 |
+
"source": [
|
79 |
+
"tokenized_billsum = billsum.map(preprocess_function, batched=True)"
|
80 |
+
]
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"cell_type": "code",
|
84 |
+
"execution_count": 4,
|
85 |
+
"metadata": {},
|
86 |
+
"outputs": [],
|
87 |
+
"source": [
|
88 |
+
"from transformers import AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer\n",
|
89 |
+
"model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)"
|
90 |
+
]
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"cell_type": "code",
|
94 |
+
"execution_count": 5,
|
95 |
+
"metadata": {},
|
96 |
+
"outputs": [
|
97 |
+
{
|
98 |
+
"name": "stdout",
|
99 |
+
"output_type": "stream",
|
100 |
+
"text": [
|
101 |
+
"Model moved to MPS device\n"
|
102 |
+
]
|
103 |
+
}
|
104 |
+
],
|
105 |
+
"source": [
|
106 |
+
"import torch\n",
|
107 |
+
"\n",
|
108 |
+
"# Check that MPS is available\n",
|
109 |
+
"if not torch.backends.mps.is_available():\n",
|
110 |
+
" if not torch.backends.mps.is_built():\n",
|
111 |
+
" print(\"MPS not available because the current PyTorch install was not \"\n",
|
112 |
+
" \"built with MPS enabled.\")\n",
|
113 |
+
" else:\n",
|
114 |
+
" print(\"MPS not available because the current MacOS version is not 12.3+ \"\n",
|
115 |
+
" \"and/or you do not have an MPS-enabled device on this machine.\")\n",
|
116 |
+
"\n",
|
117 |
+
"else:\n",
|
118 |
+
" mps_device = torch.device(\"mps\")\n",
|
119 |
+
" model.to(mps_device)\n",
|
120 |
+
" print(\"Model moved to MPS device\")"
|
121 |
+
]
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"cell_type": "code",
|
125 |
+
"execution_count": 6,
|
126 |
+
"metadata": {},
|
127 |
+
"outputs": [
|
128 |
+
{
|
129 |
+
"name": "stderr",
|
130 |
+
"output_type": "stream",
|
131 |
+
"text": [
|
132 |
+
"/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/training_args.py:1951: UserWarning: `use_mps_device` is deprecated and will be removed in version 5.0 of 🤗 Transformers. `mps` device will be used by default if available similar to the way `cuda` device is used.Therefore, no action from user is required. \n",
|
133 |
+
" warnings.warn(\n"
|
134 |
+
]
|
135 |
+
}
|
136 |
+
],
|
137 |
+
"source": [
|
138 |
+
"training_args = Seq2SeqTrainingArguments(\n",
|
139 |
+
" output_dir=\"calendar_model\",\n",
|
140 |
+
" evaluation_strategy=\"epoch\",\n",
|
141 |
+
" learning_rate=5e-5,\n",
|
142 |
+
" per_device_train_batch_size=16,\n",
|
143 |
+
" per_device_eval_batch_size=16,\n",
|
144 |
+
" weight_decay=0.01,\n",
|
145 |
+
" save_total_limit=3,\n",
|
146 |
+
" num_train_epochs=1,\n",
|
147 |
+
" predict_with_generate=True,\n",
|
148 |
+
" use_mps_device=True,\n",
|
149 |
+
" # fp16=True,\n",
|
150 |
+
" # push_to_hub=True,\n",
|
151 |
+
")"
|
152 |
+
]
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"cell_type": "code",
|
156 |
+
"execution_count": 7,
|
157 |
+
"metadata": {},
|
158 |
+
"outputs": [],
|
159 |
+
"source": [
|
160 |
+
"import numpy as np\n",
|
161 |
+
"import evaluate\n",
|
162 |
+
"metric = evaluate.load(\"accuracy\")\n"
|
163 |
+
]
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"cell_type": "code",
|
167 |
+
"execution_count": 8,
|
168 |
+
"metadata": {},
|
169 |
+
"outputs": [],
|
170 |
+
"source": [
|
171 |
+
"def compute_metrics(eval_pred):\n",
|
172 |
+
" logits, labels = eval_pred\n",
|
173 |
+
" predictions = np.argmax(logits, axis=-1)\n",
|
174 |
+
" return metric.compute(predictions=predictions, references=labels)"
|
175 |
+
]
|
176 |
+
},
|
177 |
+
{
|
178 |
+
"cell_type": "code",
|
179 |
+
"execution_count": 9,
|
180 |
+
"metadata": {},
|
181 |
+
"outputs": [],
|
182 |
+
"source": [
|
183 |
+
"from transformers import TrainingArguments, Trainer\n",
|
184 |
+
"training_args = TrainingArguments(output_dir=\"test_trainer\", evaluation_strategy=\"epoch\")"
|
185 |
+
]
|
186 |
+
},
|
187 |
+
{
|
188 |
+
"cell_type": "code",
|
189 |
+
"execution_count": null,
|
190 |
+
"metadata": {},
|
191 |
+
"outputs": [],
|
192 |
+
"source": []
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"cell_type": "code",
|
196 |
+
"execution_count": 10,
|
197 |
+
"metadata": {},
|
198 |
+
"outputs": [],
|
199 |
+
"source": [
|
200 |
+
"trainer = Trainer(\n",
|
201 |
+
" model=model,\n",
|
202 |
+
" args=training_args,\n",
|
203 |
+
" train_dataset=tokenized_billsum[\"train\"],\n",
|
204 |
+
" eval_dataset=tokenized_billsum[\"test\"],\n",
|
205 |
+
" compute_metrics=compute_metrics,\n",
|
206 |
+
" )"
|
207 |
+
]
|
208 |
+
},
|
209 |
+
{
|
210 |
+
"cell_type": "code",
|
211 |
+
"execution_count": 11,
|
212 |
+
"metadata": {},
|
213 |
+
"outputs": [
|
214 |
+
{
|
215 |
+
"data": {
|
216 |
+
"application/vnd.jupyter.widget-view+json": {
|
217 |
+
"model_id": "b8af6446b2b344818e0812c345023f53",
|
218 |
+
"version_major": 2,
|
219 |
+
"version_minor": 0
|
220 |
+
},
|
221 |
+
"text/plain": [
|
222 |
+
" 0%| | 0/300 [00:00<?, ?it/s]"
|
223 |
+
]
|
224 |
+
},
|
225 |
+
"metadata": {},
|
226 |
+
"output_type": "display_data"
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"ename": "KeyboardInterrupt",
|
230 |
+
"evalue": "",
|
231 |
+
"output_type": "error",
|
232 |
+
"traceback": [
|
233 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
234 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
235 |
+
"Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
|
236 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/trainer.py:1624\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1622\u001b[0m hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[1;32m 1623\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1624\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1625\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1626\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1627\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1628\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1629\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
237 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/transformers/trainer.py:1966\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 1960\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maccelerator\u001b[38;5;241m.\u001b[39maccumulate(model):\n\u001b[1;32m 1961\u001b[0m tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining_step(model, inputs)\n\u001b[1;32m 1963\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 1964\u001b[0m args\u001b[38;5;241m.\u001b[39mlogging_nan_inf_filter\n\u001b[1;32m 1965\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_tpu_available()\n\u001b[0;32m-> 1966\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39misnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43misinf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtr_loss_step\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 1967\u001b[0m ):\n\u001b[1;32m 1968\u001b[0m \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[1;32m 1969\u001b[0m tr_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m tr_loss \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_globalstep_last_logged)\n\u001b[1;32m 1970\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
|
238 |
+
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
239 |
+
]
|
240 |
+
}
|
241 |
+
],
|
242 |
+
"source": [
|
243 |
+
"trainer.train()"
|
244 |
+
]
|
245 |
+
}
|
246 |
+
],
|
247 |
+
"metadata": {
|
248 |
+
"kernelspec": {
|
249 |
+
"display_name": "Python 3",
|
250 |
+
"language": "python",
|
251 |
+
"name": "python3"
|
252 |
+
},
|
253 |
+
"language_info": {
|
254 |
+
"codemirror_mode": {
|
255 |
+
"name": "ipython",
|
256 |
+
"version": 3
|
257 |
+
},
|
258 |
+
"file_extension": ".py",
|
259 |
+
"mimetype": "text/x-python",
|
260 |
+
"name": "python",
|
261 |
+
"nbconvert_exporter": "python",
|
262 |
+
"pygments_lexer": "ipython3",
|
263 |
+
"version": "3.12.1"
|
264 |
+
}
|
265 |
+
},
|
266 |
+
"nbformat": 4,
|
267 |
+
"nbformat_minor": 2
|
268 |
+
}
|
nuner.ipynb
ADDED
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 30,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import torch\n",
|
10 |
+
"import transformers\n",
|
11 |
+
"\n",
|
12 |
+
"\n",
|
13 |
+
"model = transformers.AutoModel.from_pretrained(\n",
|
14 |
+
" 'numind/NuNER-v1.0',\n",
|
15 |
+
" output_hidden_states=True\n",
|
16 |
+
")\n",
|
17 |
+
"tokenizer = transformers.AutoTokenizer.from_pretrained(\n",
|
18 |
+
" 'numind/NuNER-v1.0'\n",
|
19 |
+
")\n",
|
20 |
+
"\n",
|
21 |
+
"text = [\n",
|
22 |
+
" \"NuMind is an AI company based in Paris and USA.\",\n",
|
23 |
+
" \"See other models from us on https://huggingface.co/numind\"\n",
|
24 |
+
"]\n",
|
25 |
+
"encoded_input = tokenizer(\n",
|
26 |
+
" text,\n",
|
27 |
+
" return_tensors='pt',\n",
|
28 |
+
" padding=True,\n",
|
29 |
+
" truncation=True\n",
|
30 |
+
")\n",
|
31 |
+
"output = model(**encoded_input)\n",
|
32 |
+
"\n",
|
33 |
+
"# for better quality\n",
|
34 |
+
"emb = torch.cat(\n",
|
35 |
+
" (output.hidden_states[-1], output.hidden_states[-7]),\n",
|
36 |
+
" dim=2\n",
|
37 |
+
")\n",
|
38 |
+
"\n",
|
39 |
+
"# for better speed\n",
|
40 |
+
"# emb = output.hidden_states[-1]\n",
|
41 |
+
"\n"
|
42 |
+
]
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"cell_type": "code",
|
46 |
+
"execution_count": 36,
|
47 |
+
"metadata": {},
|
48 |
+
"outputs": [
|
49 |
+
{
|
50 |
+
"name": "stderr",
|
51 |
+
"output_type": "stream",
|
52 |
+
"text": [
|
53 |
+
"Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at numind/NuNER-v1.0 and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
|
54 |
+
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
55 |
+
]
|
56 |
+
},
|
57 |
+
{
|
58 |
+
"ename": "KeyError",
|
59 |
+
"evalue": "'tokens'",
|
60 |
+
"output_type": "error",
|
61 |
+
"traceback": [
|
62 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
63 |
+
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
|
64 |
+
"Cell \u001b[0;32mIn[36], line 25\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m result \u001b[38;5;129;01min\u001b[39;00m results:\n\u001b[1;32m 23\u001b[0m \u001b[38;5;66;03m# Access tokens list using the 'tokens' key (dictionary access)\u001b[39;00m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m res \u001b[38;5;129;01min\u001b[39;00m result:\n\u001b[0;32m---> 25\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m token \u001b[38;5;129;01min\u001b[39;00m \u001b[43mres\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtokens\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m:\n\u001b[1;32m 26\u001b[0m \u001b[38;5;66;03m# Remove the special token prefix (if present)\u001b[39;00m\n\u001b[1;32m 27\u001b[0m word \u001b[38;5;241m=\u001b[39m token[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mword\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mstrip(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mĠ\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 28\u001b[0m \u001b[38;5;66;03m# Look up the entity type based on the predicted label\u001b[39;00m\n",
|
65 |
+
"\u001b[0;31mKeyError\u001b[0m: 'tokens'"
|
66 |
+
]
|
67 |
+
}
|
68 |
+
],
|
69 |
+
"source": [
|
70 |
+
"import torch\n",
|
71 |
+
"import transformers\n",
|
72 |
+
"from transformers import pipeline\n",
|
73 |
+
"\n",
|
74 |
+
"# Load pre-trained NER model (NuNER-v1.0)\n",
|
75 |
+
"ner = pipeline(\"ner\", model=\"numind/NuNER-v1.0\")\n",
|
76 |
+
"\n",
|
77 |
+
"text = [\n",
|
78 |
+
" \"NuMind is an AI company based in Paris and USA.\",\n",
|
79 |
+
" \"See other models from us on https://huggingface.co/numind\"\n",
|
80 |
+
"]\n",
|
81 |
+
"\n",
|
82 |
+
"# Process the text and get NER predictions\n",
|
83 |
+
"results = ner(text)\n",
|
84 |
+
"\n",
|
85 |
+
"label_map = {\n",
|
86 |
+
" \"LABEL_0\": \"ORG\", # Organization\n",
|
87 |
+
" \"LABEL_1\": \"LOC\", # Location\n",
|
88 |
+
" # You can add more labels and their mappings here\n",
|
89 |
+
"}\n",
|
90 |
+
"\n",
|
91 |
+
"for result in results:\n",
|
92 |
+
" # Access tokens list using the 'tokens' key (dictionary access)\n",
|
93 |
+
" for res in result:\n",
|
94 |
+
" # Remove the special token prefix (if present)\n",
|
95 |
+
" word = res['word'].strip('Ġ')\n",
|
96 |
+
" # Look up the entity type based on the predicted label\n",
|
97 |
+
" entity_type = label_map.get(res['entity'], \"UNKNOWN\")\n",
|
98 |
+
" print(f\"Word: {word}, Entity Type: {entity_type}\")\n",
|
99 |
+
"\n"
|
100 |
+
]
|
101 |
+
}
|
102 |
+
],
|
103 |
+
"metadata": {
|
104 |
+
"kernelspec": {
|
105 |
+
"display_name": "Python 3",
|
106 |
+
"language": "python",
|
107 |
+
"name": "python3"
|
108 |
+
},
|
109 |
+
"language_info": {
|
110 |
+
"codemirror_mode": {
|
111 |
+
"name": "ipython",
|
112 |
+
"version": 3
|
113 |
+
},
|
114 |
+
"file_extension": ".py",
|
115 |
+
"mimetype": "text/x-python",
|
116 |
+
"name": "python",
|
117 |
+
"nbconvert_exporter": "python",
|
118 |
+
"pygments_lexer": "ipython3",
|
119 |
+
"version": "3.12.1"
|
120 |
+
}
|
121 |
+
},
|
122 |
+
"nbformat": 4,
|
123 |
+
"nbformat_minor": 2
|
124 |
+
}
|
prompt.md
ADDED
@@ -0,0 +1,469 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Evy is a new language. Bellow is an example of some evy code, and following that are some examples of evy programs
|
2 |
+
|
3 |
+
# Syntax by Example
|
4 |
+
|
5 |
+
The following examples will help you understand the syntax of Evy. For a
|
6 |
+
more formal definition of the syntax, see the
|
7 |
+
[Language Specification](spec.md). Built-in functions, such as `print`
|
8 |
+
and `circle`, are documented in the[Built-ins section](builtins.md).
|
9 |
+
|
10 |
+
## Comment
|
11 |
+
|
12 |
+
// This is a comment
|
13 |
+
|
14 |
+
## Declaration
|
15 |
+
|
16 |
+
x:num // declaration: num, string, bool, any, []num, {}string
|
17 |
+
y := 1 // declaration through type inference (num)
|
18 |
+
|
19 |
+
## Assignment
|
20 |
+
|
21 |
+
z = 5
|
22 |
+
|
23 |
+
## Expression
|
24 |
+
|
25 |
+
x := 5 * (y + z) - 2 / 7.6 // arithmetic number expression
|
26 |
+
b := !trace and debug or level == "" // bool expressions
|
27 |
+
|
28 |
+
## Strings
|
29 |
+
|
30 |
+
s1 := "quotation mark : \" " // escaping
|
31 |
+
s2 := "abc" + "🥪123" // concatenation
|
32 |
+
s3 := "newline: \n indentation: \t"
|
33 |
+
s4 := s2[0] // "a"
|
34 |
+
s5 := s2[1:5] // "bc🥪1"
|
35 |
+
|
36 |
+
## `if` statements
|
37 |
+
|
38 |
+
if z > 0 and x != 0
|
39 |
+
print "block 1"
|
40 |
+
else if y != 0 or a == "abc"
|
41 |
+
print "block 2"
|
42 |
+
else
|
43 |
+
print "block 3"
|
44 |
+
end
|
45 |
+
|
46 |
+
### Nested `if`
|
47 |
+
|
48 |
+
if z > 0 and x != 0
|
49 |
+
if startswith str "a"
|
50 |
+
print "nested block 1"
|
51 |
+
else
|
52 |
+
print "nested block 2"
|
53 |
+
end
|
54 |
+
end
|
55 |
+
|
56 |
+
## Loop statements
|
57 |
+
|
58 |
+
### `while` loop
|
59 |
+
|
60 |
+
x := 0
|
61 |
+
while x < 10
|
62 |
+
print x
|
63 |
+
x = x + 1
|
64 |
+
end
|
65 |
+
|
66 |
+
### `for` … `range` number
|
67 |
+
|
68 |
+
for x := range 5
|
69 |
+
print x // 0 1 2 3 4
|
70 |
+
end
|
71 |
+
|
72 |
+
for x := range 5 10
|
73 |
+
print x // 5 6 7 8 9
|
74 |
+
end
|
75 |
+
|
76 |
+
for x := range 1 10 2 // from to step
|
77 |
+
print x // 1 3 5 7 9
|
78 |
+
end
|
79 |
+
|
80 |
+
for x := range -10
|
81 |
+
print x // nothing. step is 1 by default.
|
82 |
+
end
|
83 |
+
|
84 |
+
### `for` … `range` array
|
85 |
+
|
86 |
+
for x := range [1 2 3]
|
87 |
+
print x // 1 2 3
|
88 |
+
end
|
89 |
+
|
90 |
+
### `for` … `range` map
|
91 |
+
|
92 |
+
m := { name:"Mali" sport:"climbing" }
|
93 |
+
for key := range m
|
94 |
+
print key m[key]
|
95 |
+
end
|
96 |
+
|
97 |
+
### `break`
|
98 |
+
|
99 |
+
x := 0
|
100 |
+
while true
|
101 |
+
print "tick... "
|
102 |
+
sleep 1
|
103 |
+
if x > 9
|
104 |
+
print "💥"
|
105 |
+
break // `break` breaks out of the innermost loop
|
106 |
+
end
|
107 |
+
x = x + 1
|
108 |
+
end
|
109 |
+
|
110 |
+
## Function definition
|
111 |
+
|
112 |
+
func add:num a:num b:num
|
113 |
+
return a + b
|
114 |
+
end
|
115 |
+
|
116 |
+
### No return type
|
117 |
+
|
118 |
+
func foxprint s:string
|
119 |
+
print "🦊 " + s
|
120 |
+
end
|
121 |
+
|
122 |
+
### Variadic
|
123 |
+
|
124 |
+
func list args:any...
|
125 |
+
for arg := range args[:-1]
|
126 |
+
printf "%v, " arg
|
127 |
+
end
|
128 |
+
printf "%v" args[-1]
|
129 |
+
end
|
130 |
+
|
131 |
+
### Function calls
|
132 |
+
|
133 |
+
n := add 1 2 // 3
|
134 |
+
foxprint "🐾" // 🦊 🐾
|
135 |
+
list 2 true "blue" // 2, true, blue
|
136 |
+
|
137 |
+
## Array
|
138 |
+
|
139 |
+
a1:[]num
|
140 |
+
a2:[][]string
|
141 |
+
a1 = [1 2 3 4] // type: num[]
|
142 |
+
a2 = [["1" "2"] ["a" "b"]] // type: string[][]
|
143 |
+
a3 := [true false] // type: bool[]
|
144 |
+
a4 := ["s1" // line break allowed
|
145 |
+
"s2"] // type: string[]
|
146 |
+
a5 := ["chars" 123] // type: any[]
|
147 |
+
a6:[]any // type: any[]
|
148 |
+
|
149 |
+
### Array element access
|
150 |
+
|
151 |
+
a1 := [1 2 3 4]
|
152 |
+
a2 := [["1" "2"] ["a" "b"]]
|
153 |
+
print a1[1] // 2
|
154 |
+
print a2[1][0] // "a"
|
155 |
+
print a1[-1] // 4
|
156 |
+
|
157 |
+
### Concatenation
|
158 |
+
|
159 |
+
a := [1 2 3 4]
|
160 |
+
a = a + [ 100 ] // [1 2 3 4 100]; optional extra whitespace
|
161 |
+
a = [0] + a + [101 102] // [0 1 2 3 4 100 101 102]
|
162 |
+
|
163 |
+
### Slicing
|
164 |
+
|
165 |
+
a := [1 2 3]
|
166 |
+
b := a[:2] // [1 2]
|
167 |
+
b = a[1:2] // [2]
|
168 |
+
b = a[-2:] // [2 3]
|
169 |
+
|
170 |
+
## Map
|
171 |
+
|
172 |
+
m1:{}any // keys used in literals or with `.` must be identifiers.
|
173 |
+
m1.name = "fox"
|
174 |
+
m1.age = 42
|
175 |
+
m1["key with space"] = "🔑🪐"
|
176 |
+
|
177 |
+
m2 := {letters:"abc" name:"Jill"} // type: {}string
|
178 |
+
m3 := {} // type: {}any
|
179 |
+
m4 := {
|
180 |
+
letters:"abc" // line break allowed
|
181 |
+
nums:123
|
182 |
+
} // type: {}any
|
183 |
+
m5:{}[]num // map of array of numbers
|
184 |
+
m5.digits = [1 2 3]
|
185 |
+
m6:{}num
|
186 |
+
//m6.x = "y" // invalid, only num values allowed
|
187 |
+
|
188 |
+
### Map value access
|
189 |
+
|
190 |
+
m := {letters:"abc" name:"Jill"}
|
191 |
+
s := "letters"
|
192 |
+
print m.letters // abc
|
193 |
+
print m[s] // abc
|
194 |
+
print m["letters"] // abc
|
195 |
+
|
196 |
+
## `any`
|
197 |
+
|
198 |
+
x:any // any type, default value: false
|
199 |
+
m1:{}any // map with any value type
|
200 |
+
m2 := { letter:"a" number:1 }
|
201 |
+
arr1:[]any
|
202 |
+
arr2 := [ "b" 2 ]
|
203 |
+
|
204 |
+
## Type assertion
|
205 |
+
|
206 |
+
x:any
|
207 |
+
x = [ 1 2 3 4 ] // concrete type num[]
|
208 |
+
s := x.([]num)
|
209 |
+
|
210 |
+
## Type reflection
|
211 |
+
|
212 |
+
typeof "abc" // "string"
|
213 |
+
typeof true // "bool"
|
214 |
+
typeof [ 1 2 ] // "[]num"
|
215 |
+
typeof [[1 2] [3 4]] // "[][]num"
|
216 |
+
|
217 |
+
v:any
|
218 |
+
v = "🐐"
|
219 |
+
if (typeof v) == "string"
|
220 |
+
print "v is a string:" v
|
221 |
+
s := v.(string) // type assertion
|
222 |
+
print s+s // 🐐🐐
|
223 |
+
end
|
224 |
+
|
225 |
+
## Event handling
|
226 |
+
|
227 |
+
on key
|
228 |
+
print "key pressed"
|
229 |
+
end
|
230 |
+
|
231 |
+
Evy can only handle a limited set of events, such as key presses,
|
232 |
+
pointer movements, or periodic screen redraws.
|
233 |
+
|
234 |
+
### Event handlers with parameters
|
235 |
+
|
236 |
+
on key k:string
|
237 |
+
printf "%q pressed\n" k
|
238 |
+
end
|
239 |
+
|
240 |
+
# Example evy programs
|
241 |
+
|
242 |
+
```evy
|
243 |
+
// 1. Two Sum
|
244 |
+
// Solved
|
245 |
+
// Easy
|
246 |
+
// Topics
|
247 |
+
// Companies
|
248 |
+
// Hint
|
249 |
+
// Given an array of integers nums and an integer target, return indices of the two numbers such that they add up to target.
|
250 |
+
// You may assume that each input would have exactly one solution, and you may not use the same element twice.
|
251 |
+
// You can return the answer in any order.
|
252 |
+
// Example 1:
|
253 |
+
// Input: nums = [2,7,11,15], target = 9
|
254 |
+
// Output: [0,1]
|
255 |
+
// Explanation: Because nums[0] + nums[1] == 9, we return [0, 1].
|
256 |
+
// Example 2:
|
257 |
+
// Input: nums = [3,2,4], target = 6
|
258 |
+
// Output: [1,2]
|
259 |
+
// Example 3:
|
260 |
+
// Input: nums = [3,3], target = 6
|
261 |
+
// Output: [0,1]
|
262 |
+
// Constraints:
|
263 |
+
// 2 <= nums.length <= 104
|
264 |
+
// -109 <= nums[i] <= 109
|
265 |
+
// -109 <= target <= 109
|
266 |
+
// Only one valid answer exists.
|
267 |
+
// Follow-up: Can you come up with an algorithm that is less than O(n2) time complexity?
|
268 |
+
|
269 |
+
func twosum:[]num nums:[]num target:num
|
270 |
+
m:{}num
|
271 |
+
for i := range (len nums)
|
272 |
+
v := nums[i]
|
273 |
+
if has m (sprintf "%v" (target - v))
|
274 |
+
return [m[sprintf "%v" (target - v)] i]
|
275 |
+
end
|
276 |
+
m[sprintf "%v" v] = i
|
277 |
+
end
|
278 |
+
return []
|
279 |
+
end
|
280 |
+
|
281 |
+
fails := 0
|
282 |
+
total := 0
|
283 |
+
|
284 |
+
func assert want:any got:any
|
285 |
+
total = total + 1
|
286 |
+
if want != got
|
287 |
+
fails = fails + 1
|
288 |
+
printf "want != got: want %v got %v\n" want got
|
289 |
+
end
|
290 |
+
end
|
291 |
+
|
292 |
+
func finished
|
293 |
+
printf "%v of %v tests passed\n" (total - fails) total
|
294 |
+
end
|
295 |
+
|
296 |
+
// -- Test Cases Start -- //
|
297 |
+
assert [0 1] (twosum [2 7 11 15] 9)
|
298 |
+
assert [1 2] (twosum [3 2 4] 6)
|
299 |
+
assert [0 1] (twosum [3 3] 6)
|
300 |
+
// -- Test Cases End -- //
|
301 |
+
finished
|
302 |
+
```
|
303 |
+
|
304 |
+
|
305 |
+
```
|
306 |
+
// 199. Binary Tree Right Side View
|
307 |
+
// Solved
|
308 |
+
// Medium
|
309 |
+
// Topics
|
310 |
+
// Companies
|
311 |
+
// Given the root of a binary tree, imagine yourself standing on the right side of it, return the values of the nodes you can see ordered from top to bottom.
|
312 |
+
// Example 1:
|
313 |
+
// Input: root = [1,2,3,null,5,null,4]
|
314 |
+
// Output: [1,3,4]
|
315 |
+
// Example 2:
|
316 |
+
// Input: root = [1,null,3]
|
317 |
+
// Output: [1,3]
|
318 |
+
// Example 3:
|
319 |
+
// Input: root = []
|
320 |
+
// Output: []
|
321 |
+
// Constraints:
|
322 |
+
// The number of nodes in the tree is in the range [0, 100].
|
323 |
+
// -100 <= Node.val <= 100
|
324 |
+
|
325 |
+
func rightSideView:[]any treearr:[]any
|
326 |
+
root:any
|
327 |
+
root = buildBinaryTree treearr
|
328 |
+
queue := []
|
329 |
+
res := []
|
330 |
+
queue = queue + [root]
|
331 |
+
while (len queue) > 0
|
332 |
+
size := len queue
|
333 |
+
for i := range 0 size
|
334 |
+
node:{}any
|
335 |
+
node = queue[0].({}any)
|
336 |
+
queue = queue[1:]
|
337 |
+
if (has node "val") and i == size - 1
|
338 |
+
res = res + [node["val"]]
|
339 |
+
end
|
340 |
+
if (has node "left") and node["left"].({}any) != {}
|
341 |
+
queue = queue + [node["left"]]
|
342 |
+
end
|
343 |
+
if (has node "right") and node["right"].({}any) != {}
|
344 |
+
queue = queue + [node["right"]]
|
345 |
+
end
|
346 |
+
end
|
347 |
+
end
|
348 |
+
return res
|
349 |
+
end
|
350 |
+
|
351 |
+
fails := 0
|
352 |
+
total := 0
|
353 |
+
|
354 |
+
func assert want:any got:any
|
355 |
+
total = total + 1
|
356 |
+
if want != got
|
357 |
+
fails = fails + 1
|
358 |
+
printf "want != got: want %v got %v\n" want got
|
359 |
+
end
|
360 |
+
end
|
361 |
+
|
362 |
+
func finished
|
363 |
+
printf "%v of %v tests passed\n" (total - fails) total
|
364 |
+
end
|
365 |
+
|
366 |
+
func buildBinaryTree:{}any tree:[]any
|
367 |
+
root:{}any
|
368 |
+
rootany:any
|
369 |
+
rootany = root
|
370 |
+
queue := [rootany]
|
371 |
+
for i := range 0 (len tree)
|
372 |
+
if (len queue) == 0
|
373 |
+
break
|
374 |
+
end
|
375 |
+
node:{}any
|
376 |
+
node = queue[0].({}any)
|
377 |
+
queue = queue[1:]
|
378 |
+
anynull:any
|
379 |
+
anynull = "null"
|
380 |
+
if tree[i] != anynull
|
381 |
+
node["val"] = tree[i]
|
382 |
+
node["left"] = {}
|
383 |
+
node["right"] = {}
|
384 |
+
queue = queue + [node["left"]]
|
385 |
+
queue = queue + [node["right"]]
|
386 |
+
end
|
387 |
+
end
|
388 |
+
return root
|
389 |
+
end
|
390 |
+
|
391 |
+
// -- Test Cases Start -- //
|
392 |
+
|
393 |
+
assert [1 3 4 ""][:-1] (rightSideView [1 2 3 "null" 5 "null" 4])
|
394 |
+
assert [1 3 ""][:-1] (rightSideView [1 "null" 3])
|
395 |
+
assert [] (rightSideView [])
|
396 |
+
assert [1 3 4 ""][:-1] (rightSideView [1 2 3 4])
|
397 |
+
// // -- Test Cases End -- //
|
398 |
+
finished
|
399 |
+
```
|
400 |
+
|
401 |
+
```
|
402 |
+
// 412. Fizz Buzz
|
403 |
+
// Easy
|
404 |
+
// Topics
|
405 |
+
// Companies
|
406 |
+
// Given an integer n, return a string array answer (1-indexed) where:
|
407 |
+
// answer[i] == "FizzBuzz" if i is divisible by 3 and 5.
|
408 |
+
// answer[i] == "Fizz" if i is divisible by 3.
|
409 |
+
// answer[i] == "Buzz" if i is divisible by 5.
|
410 |
+
// answer[i] == i (as a string) if none of the above conditions are true.
|
411 |
+
// Example 1:
|
412 |
+
// Input: n = 3
|
413 |
+
// Output: ["1","2","Fizz"]
|
414 |
+
// Example 2:
|
415 |
+
// Input: n = 5
|
416 |
+
// Output: ["1","2","Fizz","4","Buzz"]
|
417 |
+
// Example 3:
|
418 |
+
// Input: n = 15
|
419 |
+
// Output: ["1","2","Fizz","4","Buzz","Fizz","7","8","Fizz","Buzz","11","Fizz","13","14","FizzBuzz"]
|
420 |
+
// Constraints:
|
421 |
+
// 1 <= n <= 104
|
422 |
+
|
423 |
+
func fizzbuzz:[]string n:num
|
424 |
+
ans:[]string
|
425 |
+
for i := range 1 (n + 1)
|
426 |
+
s:string
|
427 |
+
if i % 3 == 0
|
428 |
+
s = s + "Fizz"
|
429 |
+
end
|
430 |
+
if i % 5 == 0
|
431 |
+
s = s + "Buzz"
|
432 |
+
end
|
433 |
+
if s == ""
|
434 |
+
s = sprintf "%v" i
|
435 |
+
end
|
436 |
+
ans = ans + [s]
|
437 |
+
end
|
438 |
+
return ans
|
439 |
+
end
|
440 |
+
|
441 |
+
fails := 0
|
442 |
+
total := 0
|
443 |
+
|
444 |
+
func assert want:any got:any
|
445 |
+
total = total + 1
|
446 |
+
if want != got
|
447 |
+
fails = fails + 1
|
448 |
+
printf "want != got: want %v got %v\n" want got
|
449 |
+
end
|
450 |
+
end
|
451 |
+
|
452 |
+
func finished
|
453 |
+
printf "%v of %v tests passed\n" (total - fails) total
|
454 |
+
end
|
455 |
+
|
456 |
+
// -- Test Cases Start -- //
|
457 |
+
assert ["1" "2" "Fizz"] (fizzbuzz 3)
|
458 |
+
assert ["1" "2" "Fizz" "4" "Buzz"] (fizzbuzz 5)
|
459 |
+
assert ["1" "2" "Fizz" "4" "Buzz" "Fizz" "7" "8" "Fizz" "Buzz" "11" "Fizz" "13" "14" "FizzBuzz"] (fizzbuzz 15)
|
460 |
+
// -- Test Cases End -- //
|
461 |
+
finished
|
462 |
+
```
|
463 |
+
|
464 |
+
|
465 |
+
With All of this, solve the following problem:
|
466 |
+
|
467 |
+
Write a function has_close_element that checks if in given list of numbers, are any two numbers closer to each other than given threshold.
|
468 |
+
|
469 |
+
Write the program in evy:
|
python
ADDED
File without changes
|
sft.ipynb
ADDED
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 2,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"Collecting trl\n",
|
13 |
+
" Downloading trl-0.8.0-py3-none-any.whl.metadata (11 kB)\n",
|
14 |
+
"Requirement already satisfied: torch>=1.4.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from trl) (2.2.1)\n",
|
15 |
+
"Requirement already satisfied: transformers>=4.31.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from trl) (4.38.2)\n",
|
16 |
+
"Requirement already satisfied: numpy>=1.18.2 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from trl) (1.26.3)\n",
|
17 |
+
"Requirement already satisfied: accelerate in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from trl) (0.27.2)\n",
|
18 |
+
"Requirement already satisfied: datasets in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from trl) (2.18.0)\n",
|
19 |
+
"Collecting tyro>=0.5.11 (from trl)\n",
|
20 |
+
" Downloading tyro-0.7.3-py3-none-any.whl.metadata (7.7 kB)\n",
|
21 |
+
"Requirement already satisfied: filelock in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch>=1.4.0->trl) (3.13.1)\n",
|
22 |
+
"Requirement already satisfied: typing-extensions>=4.8.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch>=1.4.0->trl) (4.10.0)\n",
|
23 |
+
"Requirement already satisfied: sympy in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch>=1.4.0->trl) (1.12)\n",
|
24 |
+
"Requirement already satisfied: networkx in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch>=1.4.0->trl) (3.2.1)\n",
|
25 |
+
"Requirement already satisfied: jinja2 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch>=1.4.0->trl) (3.1.3)\n",
|
26 |
+
"Requirement already satisfied: fsspec in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from torch>=1.4.0->trl) (2024.2.0)\n",
|
27 |
+
"Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers>=4.31.0->trl) (0.21.3)\n",
|
28 |
+
"Requirement already satisfied: packaging>=20.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers>=4.31.0->trl) (23.2)\n",
|
29 |
+
"Requirement already satisfied: pyyaml>=5.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers>=4.31.0->trl) (6.0.1)\n",
|
30 |
+
"Requirement already satisfied: regex!=2019.12.17 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers>=4.31.0->trl) (2023.12.25)\n",
|
31 |
+
"Requirement already satisfied: requests in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers>=4.31.0->trl) (2.31.0)\n",
|
32 |
+
"Requirement already satisfied: tokenizers<0.19,>=0.14 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers>=4.31.0->trl) (0.15.2)\n",
|
33 |
+
"Requirement already satisfied: safetensors>=0.4.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers>=4.31.0->trl) (0.4.2)\n",
|
34 |
+
"Requirement already satisfied: tqdm>=4.27 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from transformers>=4.31.0->trl) (4.66.1)\n",
|
35 |
+
"Collecting docstring-parser>=0.14.1 (from tyro>=0.5.11->trl)\n",
|
36 |
+
" Downloading docstring_parser-0.16-py3-none-any.whl.metadata (3.0 kB)\n",
|
37 |
+
"Requirement already satisfied: rich>=11.1.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from tyro>=0.5.11->trl) (13.7.0)\n",
|
38 |
+
"Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl)\n",
|
39 |
+
" Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)\n",
|
40 |
+
"Requirement already satisfied: psutil in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from accelerate->trl) (5.9.8)\n",
|
41 |
+
"Requirement already satisfied: pyarrow>=12.0.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from datasets->trl) (15.0.1)\n",
|
42 |
+
"Requirement already satisfied: pyarrow-hotfix in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from datasets->trl) (0.6)\n",
|
43 |
+
"Requirement already satisfied: dill<0.3.9,>=0.3.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from datasets->trl) (0.3.8)\n",
|
44 |
+
"Requirement already satisfied: pandas in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from datasets->trl) (2.2.1)\n",
|
45 |
+
"Requirement already satisfied: xxhash in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from datasets->trl) (3.4.1)\n",
|
46 |
+
"Requirement already satisfied: multiprocess in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from datasets->trl) (0.70.16)\n",
|
47 |
+
"Requirement already satisfied: aiohttp in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from datasets->trl) (3.9.3)\n",
|
48 |
+
"Requirement already satisfied: aiosignal>=1.1.2 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from aiohttp->datasets->trl) (1.3.1)\n",
|
49 |
+
"Requirement already satisfied: attrs>=17.3.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from aiohttp->datasets->trl) (23.2.0)\n",
|
50 |
+
"Requirement already satisfied: frozenlist>=1.1.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from aiohttp->datasets->trl) (1.4.1)\n",
|
51 |
+
"Requirement already satisfied: multidict<7.0,>=4.5 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from aiohttp->datasets->trl) (6.0.5)\n",
|
52 |
+
"Requirement already satisfied: yarl<2.0,>=1.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from aiohttp->datasets->trl) (1.9.4)\n",
|
53 |
+
"Requirement already satisfied: charset-normalizer<4,>=2 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from requests->transformers>=4.31.0->trl) (3.3.2)\n",
|
54 |
+
"Requirement already satisfied: idna<4,>=2.5 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from requests->transformers>=4.31.0->trl) (3.6)\n",
|
55 |
+
"Requirement already satisfied: urllib3<3,>=1.21.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from requests->transformers>=4.31.0->trl) (2.2.1)\n",
|
56 |
+
"Requirement already satisfied: certifi>=2017.4.17 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from requests->transformers>=4.31.0->trl) (2024.2.2)\n",
|
57 |
+
"Requirement already satisfied: markdown-it-py>=2.2.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from rich>=11.1.0->tyro>=0.5.11->trl) (3.0.0)\n",
|
58 |
+
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from rich>=11.1.0->tyro>=0.5.11->trl) (2.17.2)\n",
|
59 |
+
"Requirement already satisfied: MarkupSafe>=2.0 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from jinja2->torch>=1.4.0->trl) (2.1.5)\n",
|
60 |
+
"Requirement already satisfied: python-dateutil>=2.8.2 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from pandas->datasets->trl) (2.9.0.post0)\n",
|
61 |
+
"Requirement already satisfied: pytz>=2020.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from pandas->datasets->trl) (2024.1)\n",
|
62 |
+
"Requirement already satisfied: tzdata>=2022.7 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from pandas->datasets->trl) (2024.1)\n",
|
63 |
+
"Requirement already satisfied: mpmath>=0.19 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from sympy->torch>=1.4.0->trl) (1.3.0)\n",
|
64 |
+
"Requirement already satisfied: mdurl~=0.1 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich>=11.1.0->tyro>=0.5.11->trl) (0.1.2)\n",
|
65 |
+
"Requirement already satisfied: six>=1.5 in /Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->datasets->trl) (1.16.0)\n",
|
66 |
+
"Downloading trl-0.8.0-py3-none-any.whl (224 kB)\n",
|
67 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m225.0/225.0 kB\u001b[0m \u001b[31m9.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
68 |
+
"\u001b[?25hDownloading tyro-0.7.3-py3-none-any.whl (79 kB)\n",
|
69 |
+
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.8/79.8 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
|
70 |
+
"\u001b[?25hDownloading docstring_parser-0.16-py3-none-any.whl (36 kB)\n",
|
71 |
+
"Downloading shtab-1.7.1-py3-none-any.whl (14 kB)\n",
|
72 |
+
"Installing collected packages: shtab, docstring-parser, tyro, trl\n",
|
73 |
+
"Successfully installed docstring-parser-0.16 shtab-1.7.1 trl-0.8.0 tyro-0.7.3\n"
|
74 |
+
]
|
75 |
+
}
|
76 |
+
],
|
77 |
+
"source": [
|
78 |
+
"!pip install trl"
|
79 |
+
]
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"cell_type": "code",
|
83 |
+
"execution_count": 3,
|
84 |
+
"metadata": {},
|
85 |
+
"outputs": [
|
86 |
+
{
|
87 |
+
"name": "stdout",
|
88 |
+
"output_type": "stream",
|
89 |
+
"text": [
|
90 |
+
"'NoneType' object has no attribute 'cadam32bit_grad_fp32'\n"
|
91 |
+
]
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"name": "stderr",
|
95 |
+
"output_type": "stream",
|
96 |
+
"text": [
|
97 |
+
"/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/bitsandbytes/cextension.py:34: UserWarning: The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.\n",
|
98 |
+
" warn(\"The installed version of bitsandbytes was compiled without GPU support. \"\n"
|
99 |
+
]
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"ename": "RuntimeError",
|
103 |
+
"evalue": "Failed to import trl.trainer.sft_trainer because of the following error (look up to see its traceback):\ncannot import name 'prepare_model_for_kbit_training' from 'peft' (/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/peft/__init__.py)",
|
104 |
+
"output_type": "error",
|
105 |
+
"traceback": [
|
106 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
107 |
+
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
|
108 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/trl/import_utils.py:172\u001b[0m, in \u001b[0;36m_LazyModule._get_module\u001b[0;34m(self, module_name)\u001b[0m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 172\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mimportlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimport_module\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mmodule_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;18;43m__name__\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 173\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
|
109 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/importlib/__init__.py:90\u001b[0m, in \u001b[0;36mimport_module\u001b[0;34m(name, package)\u001b[0m\n\u001b[1;32m 89\u001b[0m level \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m---> 90\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_bootstrap\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_gcd_import\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m[\u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpackage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m)\u001b[49m\n",
|
110 |
+
"File \u001b[0;32m<frozen importlib._bootstrap>:1387\u001b[0m, in \u001b[0;36m_gcd_import\u001b[0;34m(name, package, level)\u001b[0m\n",
|
111 |
+
"File \u001b[0;32m<frozen importlib._bootstrap>:1360\u001b[0m, in \u001b[0;36m_find_and_load\u001b[0;34m(name, import_)\u001b[0m\n",
|
112 |
+
"File \u001b[0;32m<frozen importlib._bootstrap>:1331\u001b[0m, in \u001b[0;36m_find_and_load_unlocked\u001b[0;34m(name, import_)\u001b[0m\n",
|
113 |
+
"File \u001b[0;32m<frozen importlib._bootstrap>:935\u001b[0m, in \u001b[0;36m_load_unlocked\u001b[0;34m(spec)\u001b[0m\n",
|
114 |
+
"File \u001b[0;32m<frozen importlib._bootstrap_external>:994\u001b[0m, in \u001b[0;36mexec_module\u001b[0;34m(self, module)\u001b[0m\n",
|
115 |
+
"File \u001b[0;32m<frozen importlib._bootstrap>:488\u001b[0m, in \u001b[0;36m_call_with_frames_removed\u001b[0;34m(f, *args, **kwds)\u001b[0m\n",
|
116 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/trl/trainer/sft_trainer.py:53\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_peft_available():\n\u001b[0;32m---> 53\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpeft\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PeftConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training\n\u001b[1;32m 56\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mSFTTrainer\u001b[39;00m(Trainer):\n",
|
117 |
+
"\u001b[0;31mImportError\u001b[0m: cannot import name 'prepare_model_for_kbit_training' from 'peft' (/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/peft/__init__.py)",
|
118 |
+
"\nThe above exception was the direct cause of the following exception:\n",
|
119 |
+
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
|
120 |
+
"Cell \u001b[0;32mIn[3], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AutoModelForCausalLM, AutoTokenizer\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdatasets\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m load_dataset\n\u001b[0;32m----> 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtrl\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m SFTTrainer, DataCollatorForCompletionOnlyLM\n\u001b[1;32m 5\u001b[0m dataset \u001b[38;5;241m=\u001b[39m load_dataset(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlucasmccabe-lmi/CodeAlpaca-20k\u001b[39m\u001b[38;5;124m\"\u001b[39m, split\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrain\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 7\u001b[0m model \u001b[38;5;241m=\u001b[39m AutoModelForCausalLM\u001b[38;5;241m.\u001b[39mfrom_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfacebook/opt-350m\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
121 |
+
"File \u001b[0;32m<frozen importlib._bootstrap>:1412\u001b[0m, in \u001b[0;36m_handle_fromlist\u001b[0;34m(module, fromlist, import_, recursive)\u001b[0m\n",
|
122 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/trl/import_utils.py:163\u001b[0m, in \u001b[0;36m_LazyModule.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_class_to_module\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 162\u001b[0m module \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_module(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_class_to_module[name])\n\u001b[0;32m--> 163\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodule \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m has no attribute \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
|
123 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/trl/import_utils.py:162\u001b[0m, in \u001b[0;36m_LazyModule.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 160\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_module(name)\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_class_to_module\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[0;32m--> 162\u001b[0m module \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_module\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_class_to_module\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 163\u001b[0m value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(module, name)\n\u001b[1;32m 164\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
|
124 |
+
"File \u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/trl/import_utils.py:174\u001b[0m, in \u001b[0;36m_LazyModule._get_module\u001b[0;34m(self, module_name)\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m importlib\u001b[38;5;241m.\u001b[39mimport_module(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m module_name, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m)\n\u001b[1;32m 173\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 174\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 175\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to import \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmodule_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m because of the following error (look up to see its\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 176\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m traceback):\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 177\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n",
|
125 |
+
"\u001b[0;31mRuntimeError\u001b[0m: Failed to import trl.trainer.sft_trainer because of the following error (look up to see its traceback):\ncannot import name 'prepare_model_for_kbit_training' from 'peft' (/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/peft/__init__.py)"
|
126 |
+
]
|
127 |
+
}
|
128 |
+
],
|
129 |
+
"source": [
|
130 |
+
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
|
131 |
+
"from datasets import load_dataset\n",
|
132 |
+
"from trl import SFTTrainer, DataCollatorForCompletionOnlyLM\n",
|
133 |
+
"\n",
|
134 |
+
"dataset = load_dataset(\"lucasmccabe-lmi/CodeAlpaca-20k\", split=\"train\")\n",
|
135 |
+
"\n",
|
136 |
+
"model = AutoModelForCausalLM.from_pretrained(\"facebook/opt-350m\")\n",
|
137 |
+
"tokenizer = AutoTokenizer.from_pretrained(\"facebook/opt-350m\")\n",
|
138 |
+
"\n",
|
139 |
+
"def formatting_prompts_func(example):\n",
|
140 |
+
" output_texts = []\n",
|
141 |
+
" for i in range(len(example['instruction'])):\n",
|
142 |
+
" text = f\"### Question: {example['instruction'][i]}\\n ### Answer: {example['output'][i]}\"\n",
|
143 |
+
" output_texts.append(text)\n",
|
144 |
+
" return output_texts\n",
|
145 |
+
"\n",
|
146 |
+
"response_template = \" ### Answer:\"\n",
|
147 |
+
"collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)\n",
|
148 |
+
"\n",
|
149 |
+
"trainer = SFTTrainer(\n",
|
150 |
+
" model,\n",
|
151 |
+
" train_dataset=dataset,\n",
|
152 |
+
" formatting_func=formatting_prompts_func,\n",
|
153 |
+
" data_collator=collator,\n",
|
154 |
+
")\n",
|
155 |
+
"\n",
|
156 |
+
"trainer.train()"
|
157 |
+
]
|
158 |
+
}
|
159 |
+
],
|
160 |
+
"metadata": {
|
161 |
+
"kernelspec": {
|
162 |
+
"display_name": "Python 3",
|
163 |
+
"language": "python",
|
164 |
+
"name": "python3"
|
165 |
+
},
|
166 |
+
"language_info": {
|
167 |
+
"codemirror_mode": {
|
168 |
+
"name": "ipython",
|
169 |
+
"version": 3
|
170 |
+
},
|
171 |
+
"file_extension": ".py",
|
172 |
+
"mimetype": "text/x-python",
|
173 |
+
"name": "python",
|
174 |
+
"nbconvert_exporter": "python",
|
175 |
+
"pygments_lexer": "ipython3",
|
176 |
+
"version": "3.12.1"
|
177 |
+
}
|
178 |
+
},
|
179 |
+
"nbformat": 4,
|
180 |
+
"nbformat_minor": 2
|
181 |
+
}
|
squash.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
|
3 |
+
def squash_details(data):
|
4 |
+
"""
|
5 |
+
Squashes details field into a single string with key-value pairs.
|
6 |
+
|
7 |
+
Args:
|
8 |
+
data: A list of dictionaries containing message and details fields.
|
9 |
+
|
10 |
+
Returns:
|
11 |
+
A list of dictionaries with the modified details field.
|
12 |
+
"""
|
13 |
+
for item in data:
|
14 |
+
details_str = ", ".join([f"{key}: {value}" for key, value in item["details"].items()])
|
15 |
+
item["details"] = details_str
|
16 |
+
return data
|
17 |
+
|
18 |
+
# Read data from data.json
|
19 |
+
with open("data.json", "r") as file:
|
20 |
+
data = json.load(file)
|
21 |
+
|
22 |
+
# Squash details
|
23 |
+
squashed_data = squash_details(data)
|
24 |
+
|
25 |
+
# Write modified data to data2.json
|
26 |
+
with open("data2.json", "w") as file:
|
27 |
+
json.dump(squashed_data, file, indent=4) # Add indentation for readability (optional)
|
28 |
+
|
29 |
+
print("Successfully processed data and wrote to data2.json!")
|
translate.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
with open("HumanEval.jsonl", "r", encoding="utf8") as file:
|
3 |
+
# read data from HumanEval.jsonl in while loop
|
4 |
+
# and load it to json
|
5 |
+
data = [json.loads(line) for line in file]
|
6 |
+
for elem in data[:1]:
|
7 |
+
print("prompt:\n", elem["prompt"])
|
8 |
+
print("entry_point:\n", elem["entry_point"])
|
9 |
+
print("canonical_solution:\n", elem["canonical_solution"])
|
10 |
+
print("test:\n", elem["test"])
|
youtube-tutorial.ipynb
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [],
|
3 |
+
"metadata": {
|
4 |
+
"kernelspec": {
|
5 |
+
"display_name": "Python 3",
|
6 |
+
"language": "python",
|
7 |
+
"name": "python3"
|
8 |
+
},
|
9 |
+
"language_info": {
|
10 |
+
"codemirror_mode": {
|
11 |
+
"name": "ipython",
|
12 |
+
"version": 3
|
13 |
+
},
|
14 |
+
"file_extension": ".py",
|
15 |
+
"mimetype": "text/x-python",
|
16 |
+
"name": "python",
|
17 |
+
"nbconvert_exporter": "python",
|
18 |
+
"pygments_lexer": "ipython3",
|
19 |
+
"version": "3.12.1"
|
20 |
+
}
|
21 |
+
},
|
22 |
+
"nbformat": 4,
|
23 |
+
"nbformat_minor": 2
|
24 |
+
}
|