Mohamed-BC commited on
Commit
4cb4a9d
β€’
1 Parent(s): 66f5c36

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. demo.ipynb +91 -1
  2. recommend.py +2 -1
demo.ipynb CHANGED
@@ -283,7 +283,97 @@
283
  "name": "stdout",
284
  "output_type": "stream",
285
  "text": [
286
- "Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See https://huggingface.co/docs/huggingface_hub/hf_transfer for more details.\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  ]
288
  }
289
  ],
 
283
  "name": "stdout",
284
  "output_type": "stream",
285
  "text": [
286
+ "Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See https://huggingface.co/docs/huggingface_hub/hf_transfer for more details.\n",
287
+ "articles_embeddings.pkl: 0%| | 0.00/666M [00:00<?, ?B/s]\n",
288
+ "medium_articles.csv: 0%| | 0.00/1.04G [00:00<?, ?B/s]\u001b[A\n",
289
+ "\n",
290
+ "articles_embeddings.pkl: 1%| | 4.33M/666M [00:00<00:16, 40.2MB/s]\u001b[A\u001b[A\n",
291
+ "articles_embeddings.pkl: 1%|▏ | 9.91M/666M [00:00<00:14, 46.6MB/s]\u001b[A\n",
292
+ "articles_embeddings.pkl: 2%|β–Ž | 15.6M/666M [00:00<00:12, 51.2MB/s]\u001b[A\n",
293
+ "articles_embeddings.pkl: 3%|▍ | 20.8M/666M [00:00<00:22, 28.7MB/s]\u001b[A\n",
294
+ "articles_embeddings.pkl: 4%|β–Œ | 29.7M/666M [00:00<00:19, 32.4MB/s]\u001b[A\n",
295
+ "medium_articles.csv: 2%|▍ | 23.0M/1.04G [00:00<00:40, 24.9MB/s]\u001b[A\n",
296
+ "articles_embeddings.pkl: 5%|β–‹ | 33.4M/666M [00:01<00:33, 19.0MB/s]\u001b[A\n",
297
+ "articles_embeddings.pkl: 9%|β–ˆβ–Ž | 60.0M/666M [00:02<00:23, 25.6MB/s]\u001b[A\n",
298
+ "articles_embeddings.pkl: 10%|β–ˆβ–Ž | 63.5M/666M [00:02<00:22, 26.6MB/s]\u001b[A\n",
299
+ "articles_embeddings.pkl: 10%|β–ˆβ– | 66.7M/666M [00:02<00:31, 19.1MB/s]\u001b[A\n",
300
+ "medium_articles.csv: 7%|β–ˆβ– | 72.4M/1.04G [00:02<00:29, 32.9MB/s]\u001b[A\n",
301
+ "articles_embeddings.pkl: 14%|β–ˆβ–ˆ | 95.3M/666M [00:03<00:14, 38.7MB/s]\u001b[A\n",
302
+ "medium_articles.csv: 8%|β–ˆβ–Ž | 80.8M/1.04G [00:03<00:41, 23.4MB/s]\u001b[A\n",
303
+ "medium_articles.csv: 8%|β–ˆβ– | 88.1M/1.04G [00:03<00:33, 28.5MB/s]\u001b[A\n",
304
+ "articles_embeddings.pkl: 15%|β–ˆβ–ˆ | 99.8M/666M [00:03<00:27, 20.3MB/s]\u001b[A\n",
305
+ "medium_articles.csv: 9%|β–ˆβ–Œ | 96.0M/1.04G [00:03<00:48, 19.4MB/s]\u001b[A\n",
306
+ "articles_embeddings.pkl: 17%|β–ˆβ–ˆβ–Œ | 112M/666M [00:04<00:26, 21.0MB/s]\u001b[A\n",
307
+ "articles_embeddings.pkl: 19%|β–ˆβ–ˆβ–‰ | 128M/666M [00:04<00:18, 29.5MB/s]\u001b[A\n",
308
+ "articles_embeddings.pkl: 24%|β–ˆβ–ˆβ–ˆβ–Œ | 160M/666M [00:05<00:13, 37.5MB/s]\u001b[A\n",
309
+ "articles_embeddings.pkl: 26%|β–ˆβ–ˆβ–ˆβ–‰ | 176M/666M [00:05<00:11, 42.2MB/s]\u001b[A\n",
310
+ "articles_embeddings.pkl: 29%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 192M/666M [00:05<00:10, 44.5MB/s]\u001b[A\n",
311
+ "articles_embeddings.pkl: 30%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 200M/666M [00:06<00:09, 48.2MB/s]\u001b[A\n",
312
+ "medium_articles.csv: 18%|β–ˆβ–ˆβ–ˆβ– | 185M/1.04G [00:06<00:19, 43.7MB/s]\u001b[A\n",
313
+ "articles_embeddings.pkl: 31%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 205M/666M [00:06<00:11, 39.0MB/s]\u001b[A\n",
314
+ "medium_articles.csv: 19%|β–ˆβ–ˆβ–ˆβ–Ž | 195M/1.04G [00:06<00:27, 31.0MB/s]\u001b[A\n",
315
+ "articles_embeddings.pkl: 32%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 210M/666M [00:06<00:17, 26.4MB/s]\u001b[A\n",
316
+ "articles_embeddings.pkl: 36%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 240M/666M [00:07<00:11, 37.8MB/s]\u001b[A\n",
317
+ "articles_embeddings.pkl: 38%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 256M/666M [00:07<00:09, 42.4MB/s]\u001b[A\n",
318
+ "articles_embeddings.pkl: 41%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 272M/666M [00:08<00:09, 42.8MB/s]\u001b[A\n",
319
+ "articles_embeddings.pkl: 44%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 292M/666M [00:08<00:07, 47.1MB/s]\u001b[A\n",
320
+ "medium_articles.csv: 25%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 256M/1.04G [00:08<00:19, 40.3MB/s]\u001b[A\n",
321
+ "articles_embeddings.pkl: 46%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 304M/666M [00:08<00:08, 41.9MB/s]\u001b[A\n",
322
+ "articles_embeddings.pkl: 50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 336M/666M [00:09<00:06, 47.9MB/s]\u001b[A\n",
323
+ "articles_embeddings.pkl: 53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 352M/666M [00:09<00:06, 50.1MB/s]\u001b[A\n",
324
+ "articles_embeddings.pkl: 55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 368M/666M [00:10<00:06, 47.3MB/s]\u001b[A\n",
325
+ "medium_articles.csv: 32%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 336M/1.04G [00:10<00:15, 46.1MB/s]\u001b[A\n",
326
+ "articles_embeddings.pkl: 60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 400M/666M [00:10<00:05, 47.6MB/s]\u001b[A\n",
327
+ "medium_articles.csv: 35%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 368M/1.04G [00:10<00:14, 47.8MB/s]\u001b[A\n",
328
+ "articles_embeddings.pkl: 62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 416M/666M [00:11<00:05, 44.7MB/s]\u001b[A\n",
329
+ "articles_embeddings.pkl: 65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 432M/666M [00:11<00:04, 47.4MB/s]\u001b[A\n",
330
+ "articles_embeddings.pkl: 70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 464M/666M [00:12<00:04, 49.1MB/s]\u001b[A\n",
331
+ "articles_embeddings.pkl: 72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 480M/666M [00:12<00:03, 48.2MB/s]\u001b[A\n",
332
+ "articles_embeddings.pkl: 75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 496M/666M [00:12<00:03, 47.4MB/s]\u001b[A\n",
333
+ "medium_articles.csv: 45%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 464M/1.04G [00:12<00:13, 44.2MB/s]\u001b[A\n",
334
+ "articles_embeddings.pkl: 77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 512M/666M [00:13<00:03, 44.4MB/s]\u001b[A\n",
335
+ "articles_embeddings.pkl: 79%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 528M/666M [00:13<00:03, 38.0MB/s]\u001b[A\n",
336
+ "articles_embeddings.pkl: 82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 544M/666M [00:14<00:03, 40.4MB/s]\u001b[A\n",
337
+ "articles_embeddings.pkl: 84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 560M/666M [00:14<00:02, 41.1MB/s]\u001b[A\n",
338
+ "articles_embeddings.pkl: 87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 576M/666M [00:14<00:01, 46.1MB/s]\u001b[A\n",
339
+ "articles_embeddings.pkl: 89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 592M/666M [00:15<00:01, 44.4MB/s]\u001b[A\n",
340
+ "medium_articles.csv: 55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 576M/1.04G [00:15<00:09, 48.7MB/s]\u001b[A\n",
341
+ "articles_embeddings.pkl: 91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 608M/666M [00:15<00:01, 38.4MB/s]\u001b[A\n",
342
+ "articles_embeddings.pkl: 94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 624M/666M [00:16<00:01, 38.8MB/s]\u001b[A\n",
343
+ "medium_articles.csv: 60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 624M/1.04G [00:16<00:08, 50.4MB/s]\u001b[A\n",
344
+ "articles_embeddings.pkl: 96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 640M/666M [00:16<00:00, 39.6MB/s]\u001b[A\n",
345
+ "articles_embeddings.pkl: 99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 656M/666M [00:16<00:00, 42.1MB/s]\u001b[A\n",
346
+ "articles_embeddings.pkl: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 666M/666M [00:17<00:00, 39.0MB/s]\u001b[A\n",
347
+ "\n",
348
+ "medium_articles.csv: 66%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 688M/1.04G [00:17<00:06, 52.4MB/s]\u001b[A\n",
349
+ "\n",
350
+ "Upload 2 LFS files: 50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 1/2 [00:17<00:17, 17.43s/it]\u001b[A\u001b[A\n",
351
+ "medium_articles.csv: 68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 704M/1.04G [00:17<00:07, 43.1MB/s]\u001b[A\n",
352
+ "medium_articles.csv: 69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 720M/1.04G [00:18<00:07, 41.5MB/s]\u001b[A\n",
353
+ "medium_articles.csv: 71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 736M/1.04G [00:18<00:06, 46.2MB/s]\u001b[A\n",
354
+ "medium_articles.csv: 72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 752M/1.04G [00:18<00:06, 48.3MB/s]\u001b[A\n",
355
+ "medium_articles.csv: 74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 768M/1.04G [00:19<00:05, 48.9MB/s]\u001b[A\n",
356
+ "medium_articles.csv: 75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 784M/1.04G [00:19<00:05, 49.2MB/s]\u001b[A\n",
357
+ "medium_articles.csv: 77%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 800M/1.04G [00:19<00:05, 47.3MB/s]\u001b[A\n",
358
+ "medium_articles.csv: 78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 816M/1.04G [00:20<00:04, 48.0MB/s]\u001b[A\n",
359
+ "medium_articles.csv: 80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 832M/1.04G [00:20<00:04, 47.6MB/s]\u001b[A\n",
360
+ "medium_articles.csv: 81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 848M/1.04G [00:20<00:03, 51.5MB/s]\u001b[A\n",
361
+ "medium_articles.csv: 83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 864M/1.04G [00:21<00:03, 48.1MB/s]\u001b[A\n",
362
+ "medium_articles.csv: 84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 880M/1.04G [00:21<00:03, 47.9MB/s]\u001b[A\n",
363
+ "medium_articles.csv: 86%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 896M/1.04G [00:21<00:03, 46.6MB/s]\u001b[A\n",
364
+ "medium_articles.csv: 87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 912M/1.04G [00:22<00:02, 48.3MB/s]\u001b[A\n",
365
+ "medium_articles.csv: 89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 928M/1.04G [00:22<00:02, 49.1MB/s]\u001b[A\n",
366
+ "medium_articles.csv: 91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 944M/1.04G [00:22<00:02, 45.7MB/s]\u001b[A\n",
367
+ "medium_articles.csv: 92%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 960M/1.04G [00:23<00:01, 45.0MB/s]\u001b[A\n",
368
+ "medium_articles.csv: 94%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 976M/1.04G [00:23<00:01, 46.9MB/s]\u001b[A\n",
369
+ "medium_articles.csv: 95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 992M/1.04G [00:23<00:01, 47.4MB/s]\u001b[A\n",
370
+ "medium_articles.csv: 97%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 1.01G/1.04G [00:24<00:00, 47.9MB/s]\u001b[A\n",
371
+ "medium_articles.csv: 98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 1.02G/1.04G [00:24<00:00, 49.3MB/s]\u001b[A\n",
372
+ "medium_articles.csv: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.04G/1.04G [00:24<00:00, 41.8MB/s]\u001b[A\n",
373
+ "\n",
374
+ "\n",
375
+ "Upload 2 LFS files: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 2/2 [00:25<00:00, 12.59s/it]\u001b[A\u001b[A\n",
376
+ "https://huggingface.co/Mohamed-BC/articles_recommender_system/tree/main/.\n"
377
  ]
378
  }
379
  ],
recommend.py CHANGED
@@ -8,7 +8,8 @@ def recommend(query, n=5):
8
  # Load the model
9
  model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
10
  # Load the data
11
- data = pd.read_csv('data/medium_articles.csv')
 
12
  # get the embeddings
13
  a_embeddings = pkl.load(open('data/articles_embeddings.pkl', 'rb'))
14
  # Encode the query
 
8
  # Load the model
9
  model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
10
  # Load the data
11
+ # data = pd.read_csv('data/medium_articles.csv')
12
+ data = load_dataset('Mohamed-BC/Articles')['train'].to_pandas()
13
  # get the embeddings
14
  a_embeddings = pkl.load(open('data/articles_embeddings.pkl', 'rb'))
15
  # Encode the query