davanstrien HF staff commited on
Commit
f1f701a
1 Parent(s): 4a255f8

implement fixes

Browse files
generate_collection_using_huggingface_hub.ipynb CHANGED
@@ -46,58 +46,23 @@
46
  "output_type": "stream",
47
  "text": [
48
  "Collecting git+https://github.com/huggingface/huggingface_hub\n",
49
- " Cloning https://github.com/huggingface/huggingface_hub to /private/var/folders/gf/nk18mwt53sb4d0zpvjzs40bw0000gn/T/pip-req-build-bdjiy2_a\n",
50
- " Running command git clone --filter=blob:none --quiet https://github.com/huggingface/huggingface_hub /private/var/folders/gf/nk18mwt53sb4d0zpvjzs40bw0000gn/T/pip-req-build-bdjiy2_a\n",
51
  " Resolved https://github.com/huggingface/huggingface_hub to commit c32d4b31b679c9e91b906709631901f6aa85324d\n",
52
  " Installing build dependencies ... \u001b[?25ldone\n",
53
  "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n",
54
  "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
55
- "\u001b[?25hCollecting filelock (from huggingface-hub==0.18.0.dev0)\n",
56
- " Obtaining dependency information for filelock from https://files.pythonhosted.org/packages/5e/5d/97afbafd9d584ff1b45fcb354a479a3609bd97f912f8f1f6c563cb1fae21/filelock-3.12.4-py3-none-any.whl.metadata\n",
57
- " Using cached filelock-3.12.4-py3-none-any.whl.metadata (2.8 kB)\n",
58
- "Collecting fsspec>=2023.5.0 (from huggingface-hub==0.18.0.dev0)\n",
59
- " Obtaining dependency information for fsspec>=2023.5.0 from https://files.pythonhosted.org/packages/fe/d3/e1aa96437d944fbb9cc95d0316e25583886e9cd9e6adc07baad943524eda/fsspec-2023.9.2-py3-none-any.whl.metadata\n",
60
- " Using cached fsspec-2023.9.2-py3-none-any.whl.metadata (6.7 kB)\n",
61
- "Collecting requests (from huggingface-hub==0.18.0.dev0)\n",
62
- " Obtaining dependency information for requests from https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl.metadata\n",
63
- " Using cached requests-2.31.0-py3-none-any.whl.metadata (4.6 kB)\n",
64
- "Collecting tqdm>=4.42.1 (from huggingface-hub==0.18.0.dev0)\n",
65
- " Obtaining dependency information for tqdm>=4.42.1 from https://files.pythonhosted.org/packages/00/e5/f12a80907d0884e6dff9c16d0c0114d81b8cd07dc3ae54c5e962cc83037e/tqdm-4.66.1-py3-none-any.whl.metadata\n",
66
- " Using cached tqdm-4.66.1-py3-none-any.whl.metadata (57 kB)\n",
67
- "Collecting pyyaml>=5.1 (from huggingface-hub==0.18.0.dev0)\n",
68
- " Obtaining dependency information for pyyaml>=5.1 from https://files.pythonhosted.org/packages/28/09/55f715ddbf95a054b764b547f617e22f1d5e45d83905660e9a088078fe67/PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl.metadata\n",
69
- " Using cached PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl.metadata (2.1 kB)\n",
70
- "Collecting typing-extensions>=3.7.4.3 (from huggingface-hub==0.18.0.dev0)\n",
71
- " Obtaining dependency information for typing-extensions>=3.7.4.3 from https://files.pythonhosted.org/packages/24/21/7d397a4b7934ff4028987914ac1044d3b7d52712f30e2ac7a2ae5bc86dd0/typing_extensions-4.8.0-py3-none-any.whl.metadata\n",
72
- " Using cached typing_extensions-4.8.0-py3-none-any.whl.metadata (3.0 kB)\n",
73
  "Requirement already satisfied: packaging>=20.9 in ./.venv/lib/python3.11/site-packages (from huggingface-hub==0.18.0.dev0) (23.1)\n",
74
- "Collecting charset-normalizer<4,>=2 (from requests->huggingface-hub==0.18.0.dev0)\n",
75
- " Obtaining dependency information for charset-normalizer<4,>=2 from https://files.pythonhosted.org/packages/91/e6/8fa919fc84a106e9b04109de62bdf8526899e2754a64da66e1cd50ac1faa/charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl.metadata\n",
76
- " Using cached charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (31 kB)\n",
77
- "Collecting idna<4,>=2.5 (from requests->huggingface-hub==0.18.0.dev0)\n",
78
- " Using cached idna-3.4-py3-none-any.whl (61 kB)\n",
79
- "Collecting urllib3<3,>=1.21.1 (from requests->huggingface-hub==0.18.0.dev0)\n",
80
- " Obtaining dependency information for urllib3<3,>=1.21.1 from https://files.pythonhosted.org/packages/37/dc/399e63f5d1d96bb643404ee830657f4dfcf8503f5ba8fa3c6d465d0c57fe/urllib3-2.0.5-py3-none-any.whl.metadata\n",
81
- " Using cached urllib3-2.0.5-py3-none-any.whl.metadata (6.6 kB)\n",
82
- "Collecting certifi>=2017.4.17 (from requests->huggingface-hub==0.18.0.dev0)\n",
83
- " Obtaining dependency information for certifi>=2017.4.17 from https://files.pythonhosted.org/packages/4c/dd/2234eab22353ffc7d94e8d13177aaa050113286e93e7b40eae01fbf7c3d9/certifi-2023.7.22-py3-none-any.whl.metadata\n",
84
- " Using cached certifi-2023.7.22-py3-none-any.whl.metadata (2.2 kB)\n",
85
- "Using cached fsspec-2023.9.2-py3-none-any.whl (173 kB)\n",
86
- "Using cached PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl (167 kB)\n",
87
- "Using cached tqdm-4.66.1-py3-none-any.whl (78 kB)\n",
88
- "Using cached typing_extensions-4.8.0-py3-none-any.whl (31 kB)\n",
89
- "Using cached filelock-3.12.4-py3-none-any.whl (11 kB)\n",
90
- "Using cached requests-2.31.0-py3-none-any.whl (62 kB)\n",
91
- "Using cached certifi-2023.7.22-py3-none-any.whl (158 kB)\n",
92
- "Using cached charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl (122 kB)\n",
93
- "Using cached urllib3-2.0.5-py3-none-any.whl (123 kB)\n",
94
- "Building wheels for collected packages: huggingface-hub\n",
95
- " Building wheel for huggingface-hub (pyproject.toml) ... \u001b[?25ldone\n",
96
- "\u001b[?25h Created wheel for huggingface-hub: filename=huggingface_hub-0.18.0.dev0-py3-none-any.whl size=298588 sha256=88b09ea2b9f009a9aeae12440af109575fc5b82e58a29b0b250cc9a95eaff3aa\n",
97
- " Stored in directory: /private/var/folders/gf/nk18mwt53sb4d0zpvjzs40bw0000gn/T/pip-ephem-wheel-cache-5yfewvyz/wheels/0d/44/01/c6da8315f53a5f367cd4bb3e00643c462c8df2065b29a67f4f\n",
98
- "Successfully built huggingface-hub\n",
99
- "Installing collected packages: urllib3, typing-extensions, tqdm, pyyaml, idna, fsspec, filelock, charset-normalizer, certifi, requests, huggingface-hub\n",
100
- "Successfully installed certifi-2023.7.22 charset-normalizer-3.2.0 filelock-3.12.4 fsspec-2023.9.2 huggingface-hub-0.18.0.dev0 idna-3.4 pyyaml-6.0.1 requests-2.31.0 tqdm-4.66.1 typing-extensions-4.8.0 urllib3-2.0.5\n",
101
  "Note: you may need to restart the kernel to use updated packages.\n"
102
  ]
103
  }
@@ -112,23 +77,23 @@
112
  "source": [
113
  "## Authenticate\n",
114
  "\n",
115
- "In order to create and manage collections, you need to be authenticated. You can do this via the `huggingface_hub` library using the `notebook_login` function if you're using a notebook, or the `login` function if you're using a script. "
116
  ]
117
  },
118
  {
119
  "cell_type": "code",
120
- "execution_count": 5,
121
  "metadata": {
122
  "id": "Qn9p5Bsz2NN5"
123
  },
124
  "outputs": [],
125
  "source": [
126
- "from huggingface_hub import notebook_login"
127
  ]
128
  },
129
  {
130
  "cell_type": "code",
131
- "execution_count": 7,
132
  "metadata": {
133
  "colab": {
134
  "base_uri": "https://localhost:8080/",
@@ -175,7 +140,7 @@
175
  {
176
  "data": {
177
  "application/vnd.jupyter.widget-view+json": {
178
- "model_id": "79b9c67a0334432bad65c411b7560672",
179
  "version_major": 2,
180
  "version_minor": 0
181
  },
@@ -188,7 +153,7 @@
188
  }
189
  ],
190
  "source": [
191
- "notebook_login()"
192
  ]
193
  },
194
  {
@@ -202,7 +167,7 @@
202
  },
203
  {
204
  "cell_type": "code",
205
- "execution_count": 8,
206
  "metadata": {
207
  "id": "SfV6YRentLI8"
208
  },
@@ -220,7 +185,7 @@
220
  },
221
  {
222
  "cell_type": "code",
223
- "execution_count": 9,
224
  "metadata": {},
225
  "outputs": [],
226
  "source": [
@@ -236,7 +201,7 @@
236
  },
237
  {
238
  "cell_type": "code",
239
- "execution_count": 10,
240
  "metadata": {},
241
  "outputs": [
242
  {
@@ -245,7 +210,7 @@
245
  "generator"
246
  ]
247
  },
248
- "execution_count": 10,
249
  "metadata": {},
250
  "output_type": "execute_result"
251
  }
@@ -263,7 +228,7 @@
263
  },
264
  {
265
  "cell_type": "code",
266
- "execution_count": 11,
267
  "metadata": {},
268
  "outputs": [],
269
  "source": [
@@ -290,7 +255,7 @@
290
  },
291
  {
292
  "cell_type": "code",
293
- "execution_count": 12,
294
  "metadata": {},
295
  "outputs": [
296
  {
@@ -316,7 +281,7 @@
316
  "}"
317
  ]
318
  },
319
- "execution_count": 12,
320
  "metadata": {},
321
  "output_type": "execute_result"
322
  }
@@ -334,7 +299,7 @@
334
  },
335
  {
336
  "cell_type": "code",
337
- "execution_count": 13,
338
  "metadata": {},
339
  "outputs": [],
340
  "source": [
@@ -352,7 +317,7 @@
352
  },
353
  {
354
  "cell_type": "code",
355
- "execution_count": 14,
356
  "metadata": {
357
  "id": "Q3JCU5lj9dU3"
358
  },
@@ -364,7 +329,7 @@
364
  },
365
  {
366
  "cell_type": "code",
367
- "execution_count": 15,
368
  "metadata": {
369
  "id": "wKuyNPK09YJ5"
370
  },
@@ -378,7 +343,7 @@
378
  },
379
  {
380
  "cell_type": "code",
381
- "execution_count": 16,
382
  "metadata": {
383
  "id": "9mrqhvLq_0Yk"
384
  },
@@ -389,7 +354,7 @@
389
  "10"
390
  ]
391
  },
392
- "execution_count": 16,
393
  "metadata": {},
394
  "output_type": "execute_result"
395
  }
@@ -408,7 +373,7 @@
408
  },
409
  {
410
  "cell_type": "code",
411
- "execution_count": 17,
412
  "metadata": {
413
  "id": "YguMpWpt7rlD"
414
  },
@@ -419,7 +384,7 @@
419
  },
420
  {
421
  "cell_type": "code",
422
- "execution_count": 18,
423
  "metadata": {},
424
  "outputs": [
425
  {
@@ -428,7 +393,7 @@
428
  "13"
429
  ]
430
  },
431
- "execution_count": 18,
432
  "metadata": {},
433
  "output_type": "execute_result"
434
  }
@@ -447,12 +412,12 @@
447
  "\n",
448
  "We can to this using the `create_collection` function. This function allows us to create a Collection programmatically. We must pass in a `title` and we can also specify a `description` and a `namespace`. If you don't specify a namespace, the collection will be created in your personal namespace but since I want to add this collection to the `librarian-bots` organization I'll specify it explicitly here. \n",
449
  "\n",
450
- "The `existed_ok` parameter allows us to specify what to do if a collection with the same title already exists. If we set this to `True` then the function will return the existing collection. If we set this to `False` then the function will raise an error if a collection with the same title already exists."
451
  ]
452
  },
453
  {
454
  "cell_type": "code",
455
- "execution_count": 19,
456
  "metadata": {},
457
  "outputs": [],
458
  "source": [
@@ -475,7 +440,7 @@
475
  },
476
  {
477
  "cell_type": "code",
478
- "execution_count": 20,
479
  "metadata": {},
480
  "outputs": [
481
  {
@@ -485,18 +450,18 @@
485
  " {'description': \"Collects datasets with 'instruction' in the name and more than 1 download and in the top 10% for the \"\n",
486
  " 'number of likes',\n",
487
  " 'items': [],\n",
488
- " 'last_updated': datetime.datetime(2023, 9, 25, 11, 52, 53, 545000, tzinfo=datetime.timezone.utc),\n",
489
  " 'owner': 'librarian-bots',\n",
490
  " 'position': 0,\n",
491
  " 'private': False,\n",
492
- " 'slug': 'librarian-bots/top-10-instruction-tuning-datasets-65117495134fd906b070c410',\n",
493
  " 'theme': 'indigo',\n",
494
  " 'title': 'Top 10% instruction tuning datasets',\n",
495
- " 'url': 'https://huggingface.co/collections/librarian-bots/top-10-instruction-tuning-datasets-65117495134fd906b070c410'}\n",
496
  "}"
497
  ]
498
  },
499
- "execution_count": 20,
500
  "metadata": {},
501
  "output_type": "execute_result"
502
  }
@@ -523,7 +488,7 @@
523
  },
524
  {
525
  "cell_type": "code",
526
- "execution_count": 21,
527
  "metadata": {
528
  "id": "8a9U5Bc376qD"
529
  },
@@ -541,7 +506,7 @@
541
  },
542
  {
543
  "cell_type": "code",
544
- "execution_count": 22,
545
  "metadata": {
546
  "colab": {
547
  "base_uri": "https://localhost:8080/"
@@ -598,7 +563,7 @@
598
  " }]"
599
  ]
600
  },
601
- "execution_count": 22,
602
  "metadata": {},
603
  "output_type": "execute_result"
604
  }
@@ -618,7 +583,7 @@
618
  },
619
  {
620
  "cell_type": "code",
621
- "execution_count": 23,
622
  "metadata": {},
623
  "outputs": [],
624
  "source": [
@@ -627,7 +592,7 @@
627
  },
628
  {
629
  "cell_type": "code",
630
- "execution_count": 24,
631
  "metadata": {},
632
  "outputs": [
633
  {
@@ -703,23 +668,27 @@
703
  "\n",
704
  "We also need to specify the `item_id` of the item we want to add. For datasets we can access the `id` from the `DatasetInfo` object to get this value. Additionally we need to specify the type of the item we want to add. This should be one of `dataset`, `model`, `space`, or `paper`. \n",
705
  "\n",
706
- "We can optionally add a note which we could use to store some additional information about the item. For example, we could use this to store the reason why we added this item to the collection. In this case we'll store the number of likes and downloads for the dataset."
707
  ]
708
  },
709
  {
710
  "cell_type": "code",
711
- "execution_count": 25,
712
  "metadata": {
713
  "id": "mp2GYws46kBD"
714
  },
715
  "outputs": [],
716
  "source": [
717
  "for dataset in datasets:\n",
 
 
 
 
718
  " add_collection_item(\n",
719
  " collection.slug,\n",
720
  " item_id=dataset.id,\n",
721
  " item_type=\"dataset\",\n",
722
- " note=f\"Dataset has {dataset.downloads} downloads and {dataset.likes} likes\",\n",
723
  " )"
724
  ]
725
  },
@@ -734,7 +703,7 @@
734
  },
735
  {
736
  "cell_type": "code",
737
- "execution_count": 29,
738
  "metadata": {},
739
  "outputs": [],
740
  "source": [
@@ -750,7 +719,7 @@
750
  },
751
  {
752
  "cell_type": "code",
753
- "execution_count": 31,
754
  "metadata": {},
755
  "outputs": [
756
  {
@@ -762,11 +731,13 @@
762
  " 'gated': False,\n",
763
  " 'isLikedByUser': False,\n",
764
  " 'item_id': 'Muennighoff/natural-instructions',\n",
765
- " 'item_object_id': '6511749fbb66f847cc57a04f',\n",
766
  " 'item_type': 'dataset',\n",
767
  " 'lastModified': '2022-12-23T20:08:44.000Z',\n",
768
  " 'likes': 18,\n",
769
- " 'note': 'Dataset has 313 downloads and 18 likes',\n",
 
 
770
  " 'position': 0,\n",
771
  " 'private': False,\n",
772
  " 'repoType': 'dataset',\n",
@@ -778,11 +749,11 @@
778
  " 'gated': False,\n",
779
  " 'isLikedByUser': False,\n",
780
  " 'item_id': 'qwedsacf/grade-school-math-instructions',\n",
781
- " 'item_object_id': '6511749f01307d048b987e74',\n",
782
  " 'item_type': 'dataset',\n",
783
  " 'lastModified': '2023-02-11T01:59:26.000Z',\n",
784
  " 'likes': 21,\n",
785
- " 'note': 'Dataset has 225 downloads and 21 likes',\n",
786
  " 'position': 1,\n",
787
  " 'private': False,\n",
788
  " 'repoType': 'dataset',\n",
@@ -790,7 +761,7 @@
790
  " }]"
791
  ]
792
  },
793
- "execution_count": 31,
794
  "metadata": {},
795
  "output_type": "execute_result"
796
  }
@@ -809,7 +780,7 @@
809
  },
810
  {
811
  "cell_type": "code",
812
- "execution_count": 34,
813
  "metadata": {},
814
  "outputs": [
815
  {
@@ -818,7 +789,7 @@
818
  "502.6923076923077"
819
  ]
820
  },
821
- "execution_count": 34,
822
  "metadata": {},
823
  "output_type": "execute_result"
824
  }
@@ -838,7 +809,7 @@
838
  },
839
  {
840
  "cell_type": "code",
841
- "execution_count": 46,
842
  "metadata": {},
843
  "outputs": [],
844
  "source": [
@@ -847,7 +818,7 @@
847
  },
848
  {
849
  "cell_type": "code",
850
- "execution_count": 47,
851
  "metadata": {},
852
  "outputs": [],
853
  "source": [
@@ -860,7 +831,7 @@
860
  },
861
  {
862
  "cell_type": "code",
863
- "execution_count": 48,
864
  "metadata": {},
865
  "outputs": [
866
  {
@@ -881,7 +852,7 @@
881
  " None]"
882
  ]
883
  },
884
- "execution_count": 48,
885
  "metadata": {},
886
  "output_type": "execute_result"
887
  }
@@ -899,7 +870,7 @@
899
  },
900
  {
901
  "cell_type": "code",
902
- "execution_count": 52,
903
  "metadata": {},
904
  "outputs": [
905
  {
@@ -937,16 +908,16 @@
937
  },
938
  {
939
  "cell_type": "code",
940
- "execution_count": 53,
941
  "metadata": {},
942
  "outputs": [
943
  {
944
  "data": {
945
  "text/plain": [
946
- "'https://huggingface.co/collections/librarian-bots/top-10-instruction-tuning-datasets-65117495134fd906b070c410'"
947
  ]
948
  },
949
- "execution_count": 53,
950
  "metadata": {},
951
  "output_type": "execute_result"
952
  }
 
46
  "output_type": "stream",
47
  "text": [
48
  "Collecting git+https://github.com/huggingface/huggingface_hub\n",
49
+ " Cloning https://github.com/huggingface/huggingface_hub to /private/var/folders/gf/nk18mwt53sb4d0zpvjzs40bw0000gn/T/pip-req-build-hs4ssvjo\n",
50
+ " Running command git clone --filter=blob:none --quiet https://github.com/huggingface/huggingface_hub /private/var/folders/gf/nk18mwt53sb4d0zpvjzs40bw0000gn/T/pip-req-build-hs4ssvjo\n",
51
  " Resolved https://github.com/huggingface/huggingface_hub to commit c32d4b31b679c9e91b906709631901f6aa85324d\n",
52
  " Installing build dependencies ... \u001b[?25ldone\n",
53
  "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n",
54
  "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
55
+ "\u001b[?25hRequirement already satisfied: filelock in ./.venv/lib/python3.11/site-packages (from huggingface-hub==0.18.0.dev0) (3.12.4)\n",
56
+ "Requirement already satisfied: fsspec>=2023.5.0 in ./.venv/lib/python3.11/site-packages (from huggingface-hub==0.18.0.dev0) (2023.9.2)\n",
57
+ "Requirement already satisfied: requests in ./.venv/lib/python3.11/site-packages (from huggingface-hub==0.18.0.dev0) (2.31.0)\n",
58
+ "Requirement already satisfied: tqdm>=4.42.1 in ./.venv/lib/python3.11/site-packages (from huggingface-hub==0.18.0.dev0) (4.66.1)\n",
59
+ "Requirement already satisfied: pyyaml>=5.1 in ./.venv/lib/python3.11/site-packages (from huggingface-hub==0.18.0.dev0) (6.0.1)\n",
60
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in ./.venv/lib/python3.11/site-packages (from huggingface-hub==0.18.0.dev0) (4.8.0)\n",
 
 
 
 
 
 
 
 
 
 
 
 
61
  "Requirement already satisfied: packaging>=20.9 in ./.venv/lib/python3.11/site-packages (from huggingface-hub==0.18.0.dev0) (23.1)\n",
62
+ "Requirement already satisfied: charset-normalizer<4,>=2 in ./.venv/lib/python3.11/site-packages (from requests->huggingface-hub==0.18.0.dev0) (3.2.0)\n",
63
+ "Requirement already satisfied: idna<4,>=2.5 in ./.venv/lib/python3.11/site-packages (from requests->huggingface-hub==0.18.0.dev0) (3.4)\n",
64
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in ./.venv/lib/python3.11/site-packages (from requests->huggingface-hub==0.18.0.dev0) (2.0.5)\n",
65
+ "Requirement already satisfied: certifi>=2017.4.17 in ./.venv/lib/python3.11/site-packages (from requests->huggingface-hub==0.18.0.dev0) (2023.7.22)\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  "Note: you may need to restart the kernel to use updated packages.\n"
67
  ]
68
  }
 
77
  "source": [
78
  "## Authenticate\n",
79
  "\n",
80
+ "In order to create and manage collections, you need to be authenticated. You can do this via the `huggingface_hub` library using the `login` function. This function will detect where you are running your code and suggest the best way to authenticate."
81
  ]
82
  },
83
  {
84
  "cell_type": "code",
85
+ "execution_count": 2,
86
  "metadata": {
87
  "id": "Qn9p5Bsz2NN5"
88
  },
89
  "outputs": [],
90
  "source": [
91
+ "from huggingface_hub import login"
92
  ]
93
  },
94
  {
95
  "cell_type": "code",
96
+ "execution_count": 3,
97
  "metadata": {
98
  "colab": {
99
  "base_uri": "https://localhost:8080/",
 
140
  {
141
  "data": {
142
  "application/vnd.jupyter.widget-view+json": {
143
+ "model_id": "b3c0b966dfeb400c953e3e13689d5a0d",
144
  "version_major": 2,
145
  "version_minor": 0
146
  },
 
153
  }
154
  ],
155
  "source": [
156
+ "login()"
157
  ]
158
  },
159
  {
 
167
  },
168
  {
169
  "cell_type": "code",
170
+ "execution_count": 4,
171
  "metadata": {
172
  "id": "SfV6YRentLI8"
173
  },
 
185
  },
186
  {
187
  "cell_type": "code",
188
+ "execution_count": 5,
189
  "metadata": {},
190
  "outputs": [],
191
  "source": [
 
201
  },
202
  {
203
  "cell_type": "code",
204
+ "execution_count": 6,
205
  "metadata": {},
206
  "outputs": [
207
  {
 
210
  "generator"
211
  ]
212
  },
213
+ "execution_count": 6,
214
  "metadata": {},
215
  "output_type": "execute_result"
216
  }
 
228
  },
229
  {
230
  "cell_type": "code",
231
+ "execution_count": 7,
232
  "metadata": {},
233
  "outputs": [],
234
  "source": [
 
255
  },
256
  {
257
  "cell_type": "code",
258
+ "execution_count": 8,
259
  "metadata": {},
260
  "outputs": [
261
  {
 
281
  "}"
282
  ]
283
  },
284
+ "execution_count": 8,
285
  "metadata": {},
286
  "output_type": "execute_result"
287
  }
 
299
  },
300
  {
301
  "cell_type": "code",
302
+ "execution_count": 9,
303
  "metadata": {},
304
  "outputs": [],
305
  "source": [
 
317
  },
318
  {
319
  "cell_type": "code",
320
+ "execution_count": 30,
321
  "metadata": {
322
  "id": "Q3JCU5lj9dU3"
323
  },
 
329
  },
330
  {
331
  "cell_type": "code",
332
+ "execution_count": 11,
333
  "metadata": {
334
  "id": "wKuyNPK09YJ5"
335
  },
 
343
  },
344
  {
345
  "cell_type": "code",
346
+ "execution_count": 12,
347
  "metadata": {
348
  "id": "9mrqhvLq_0Yk"
349
  },
 
354
  "10"
355
  ]
356
  },
357
+ "execution_count": 12,
358
  "metadata": {},
359
  "output_type": "execute_result"
360
  }
 
373
  },
374
  {
375
  "cell_type": "code",
376
+ "execution_count": 13,
377
  "metadata": {
378
  "id": "YguMpWpt7rlD"
379
  },
 
384
  },
385
  {
386
  "cell_type": "code",
387
+ "execution_count": 14,
388
  "metadata": {},
389
  "outputs": [
390
  {
 
393
  "13"
394
  ]
395
  },
396
+ "execution_count": 14,
397
  "metadata": {},
398
  "output_type": "execute_result"
399
  }
 
412
  "\n",
413
  "We can to this using the `create_collection` function. This function allows us to create a Collection programmatically. We must pass in a `title` and we can also specify a `description` and a `namespace`. If you don't specify a namespace, the collection will be created in your personal namespace but since I want to add this collection to the `librarian-bots` organization I'll specify it explicitly here. \n",
414
  "\n",
415
+ "The `exists_ok` parameter allows us to specify what to do if a collection with the same title already exists. If we set this to `True` then the function will return the existing collection. If we set this to `False` then the function will raise an error if a collection with the same title already exists."
416
  ]
417
  },
418
  {
419
  "cell_type": "code",
420
+ "execution_count": 15,
421
  "metadata": {},
422
  "outputs": [],
423
  "source": [
 
440
  },
441
  {
442
  "cell_type": "code",
443
+ "execution_count": 16,
444
  "metadata": {},
445
  "outputs": [
446
  {
 
450
  " {'description': \"Collects datasets with 'instruction' in the name and more than 1 download and in the top 10% for the \"\n",
451
  " 'number of likes',\n",
452
  " 'items': [],\n",
453
+ " 'last_updated': datetime.datetime(2023, 9, 25, 12, 36, 58, 301000, tzinfo=datetime.timezone.utc),\n",
454
  " 'owner': 'librarian-bots',\n",
455
  " 'position': 0,\n",
456
  " 'private': False,\n",
457
+ " 'slug': 'librarian-bots/top-10-instruction-tuning-datasets-65117eeaca29f41ae7ae39fe',\n",
458
  " 'theme': 'indigo',\n",
459
  " 'title': 'Top 10% instruction tuning datasets',\n",
460
+ " 'url': 'https://huggingface.co/collections/librarian-bots/top-10-instruction-tuning-datasets-65117eeaca29f41ae7ae39fe'}\n",
461
  "}"
462
  ]
463
  },
464
+ "execution_count": 16,
465
  "metadata": {},
466
  "output_type": "execute_result"
467
  }
 
488
  },
489
  {
490
  "cell_type": "code",
491
+ "execution_count": 17,
492
  "metadata": {
493
  "id": "8a9U5Bc376qD"
494
  },
 
506
  },
507
  {
508
  "cell_type": "code",
509
+ "execution_count": 18,
510
  "metadata": {
511
  "colab": {
512
  "base_uri": "https://localhost:8080/"
 
563
  " }]"
564
  ]
565
  },
566
+ "execution_count": 18,
567
  "metadata": {},
568
  "output_type": "execute_result"
569
  }
 
583
  },
584
  {
585
  "cell_type": "code",
586
+ "execution_count": 19,
587
  "metadata": {},
588
  "outputs": [],
589
  "source": [
 
592
  },
593
  {
594
  "cell_type": "code",
595
+ "execution_count": 20,
596
  "metadata": {},
597
  "outputs": [
598
  {
 
668
  "\n",
669
  "We also need to specify the `item_id` of the item we want to add. For datasets we can access the `id` from the `DatasetInfo` object to get this value. Additionally we need to specify the type of the item we want to add. This should be one of `dataset`, `model`, `space`, or `paper`. \n",
670
  "\n",
671
+ "We can optionally add a note which we could use to store some additional information about the item. For example, we could use this to store the reason why we added this item to the collection. In this case we'll store any tags that the dataset has."
672
  ]
673
  },
674
  {
675
  "cell_type": "code",
676
+ "execution_count": 21,
677
  "metadata": {
678
  "id": "mp2GYws46kBD"
679
  },
680
  "outputs": [],
681
  "source": [
682
  "for dataset in datasets:\n",
683
+ " if dataset.tags is not None:\n",
684
+ " note = f\"Dataset has the following tags: {dataset.tags}\"\n",
685
+ " else:\n",
686
+ " note = \"Dataset does not have any tags\"\n",
687
  " add_collection_item(\n",
688
  " collection.slug,\n",
689
  " item_id=dataset.id,\n",
690
  " item_type=\"dataset\",\n",
691
+ " note=note,\n",
692
  " )"
693
  ]
694
  },
 
703
  },
704
  {
705
  "cell_type": "code",
706
+ "execution_count": 22,
707
  "metadata": {},
708
  "outputs": [],
709
  "source": [
 
719
  },
720
  {
721
  "cell_type": "code",
722
+ "execution_count": 23,
723
  "metadata": {},
724
  "outputs": [
725
  {
 
731
  " 'gated': False,\n",
732
  " 'isLikedByUser': False,\n",
733
  " 'item_id': 'Muennighoff/natural-instructions',\n",
734
+ " 'item_object_id': '65117eeaec7fac9ec2fcaec1',\n",
735
  " 'item_type': 'dataset',\n",
736
  " 'lastModified': '2022-12-23T20:08:44.000Z',\n",
737
  " 'likes': 18,\n",
738
+ " 'note': \"Dataset has the following tags: ['task_categories:other', 'annotations_creators:crowdsourced', \"\n",
739
+ " \"'annotations_creators:expert-generated', 'multilinguality:monolingual', 'size_categories:100M<n<1B', \"\n",
740
+ " \"'language:en', 'region:us']\",\n",
741
  " 'position': 0,\n",
742
  " 'private': False,\n",
743
  " 'repoType': 'dataset',\n",
 
749
  " 'gated': False,\n",
750
  " 'isLikedByUser': False,\n",
751
  " 'item_id': 'qwedsacf/grade-school-math-instructions',\n",
752
+ " 'item_object_id': '65117eeb3368c9f41c835e6a',\n",
753
  " 'item_type': 'dataset',\n",
754
  " 'lastModified': '2023-02-11T01:59:26.000Z',\n",
755
  " 'likes': 21,\n",
756
+ " 'note': \"Dataset has the following tags: ['region:us']\",\n",
757
  " 'position': 1,\n",
758
  " 'private': False,\n",
759
  " 'repoType': 'dataset',\n",
 
761
  " }]"
762
  ]
763
  },
764
+ "execution_count": 23,
765
  "metadata": {},
766
  "output_type": "execute_result"
767
  }
 
780
  },
781
  {
782
  "cell_type": "code",
783
+ "execution_count": 24,
784
  "metadata": {},
785
  "outputs": [
786
  {
 
789
  "502.6923076923077"
790
  ]
791
  },
792
+ "execution_count": 24,
793
  "metadata": {},
794
  "output_type": "execute_result"
795
  }
 
809
  },
810
  {
811
  "cell_type": "code",
812
+ "execution_count": 25,
813
  "metadata": {},
814
  "outputs": [],
815
  "source": [
 
818
  },
819
  {
820
  "cell_type": "code",
821
+ "execution_count": 26,
822
  "metadata": {},
823
  "outputs": [],
824
  "source": [
 
831
  },
832
  {
833
  "cell_type": "code",
834
+ "execution_count": 27,
835
  "metadata": {},
836
  "outputs": [
837
  {
 
852
  " None]"
853
  ]
854
  },
855
+ "execution_count": 27,
856
  "metadata": {},
857
  "output_type": "execute_result"
858
  }
 
870
  },
871
  {
872
  "cell_type": "code",
873
+ "execution_count": 28,
874
  "metadata": {},
875
  "outputs": [
876
  {
 
908
  },
909
  {
910
  "cell_type": "code",
911
+ "execution_count": 29,
912
  "metadata": {},
913
  "outputs": [
914
  {
915
  "data": {
916
  "text/plain": [
917
+ "'https://huggingface.co/collections/librarian-bots/top-10-instruction-tuning-datasets-65117eeaca29f41ae7ae39fe'"
918
  ]
919
  },
920
+ "execution_count": 29,
921
  "metadata": {},
922
  "output_type": "execute_result"
923
  }