Mohamed-BC
commited on
Commit
β’
4cb4a9d
1
Parent(s):
66f5c36
Upload folder using huggingface_hub
Browse files- demo.ipynb +91 -1
- recommend.py +2 -1
demo.ipynb
CHANGED
@@ -283,7 +283,97 @@
|
|
283 |
"name": "stdout",
|
284 |
"output_type": "stream",
|
285 |
"text": [
|
286 |
-
"Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See https://huggingface.co/docs/huggingface_hub/hf_transfer for more details.\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
]
|
288 |
}
|
289 |
],
|
|
|
283 |
"name": "stdout",
|
284 |
"output_type": "stream",
|
285 |
"text": [
|
286 |
+
"Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See https://huggingface.co/docs/huggingface_hub/hf_transfer for more details.\n",
|
287 |
+
"articles_embeddings.pkl: 0%| | 0.00/666M [00:00<?, ?B/s]\n",
|
288 |
+
"medium_articles.csv: 0%| | 0.00/1.04G [00:00<?, ?B/s]\u001b[A\n",
|
289 |
+
"\n",
|
290 |
+
"articles_embeddings.pkl: 1%| | 4.33M/666M [00:00<00:16, 40.2MB/s]\u001b[A\u001b[A\n",
|
291 |
+
"articles_embeddings.pkl: 1%|β | 9.91M/666M [00:00<00:14, 46.6MB/s]\u001b[A\n",
|
292 |
+
"articles_embeddings.pkl: 2%|β | 15.6M/666M [00:00<00:12, 51.2MB/s]\u001b[A\n",
|
293 |
+
"articles_embeddings.pkl: 3%|β | 20.8M/666M [00:00<00:22, 28.7MB/s]\u001b[A\n",
|
294 |
+
"articles_embeddings.pkl: 4%|β | 29.7M/666M [00:00<00:19, 32.4MB/s]\u001b[A\n",
|
295 |
+
"medium_articles.csv: 2%|β | 23.0M/1.04G [00:00<00:40, 24.9MB/s]\u001b[A\n",
|
296 |
+
"articles_embeddings.pkl: 5%|β | 33.4M/666M [00:01<00:33, 19.0MB/s]\u001b[A\n",
|
297 |
+
"articles_embeddings.pkl: 9%|ββ | 60.0M/666M [00:02<00:23, 25.6MB/s]\u001b[A\n",
|
298 |
+
"articles_embeddings.pkl: 10%|ββ | 63.5M/666M [00:02<00:22, 26.6MB/s]\u001b[A\n",
|
299 |
+
"articles_embeddings.pkl: 10%|ββ | 66.7M/666M [00:02<00:31, 19.1MB/s]\u001b[A\n",
|
300 |
+
"medium_articles.csv: 7%|ββ | 72.4M/1.04G [00:02<00:29, 32.9MB/s]\u001b[A\n",
|
301 |
+
"articles_embeddings.pkl: 14%|ββ | 95.3M/666M [00:03<00:14, 38.7MB/s]\u001b[A\n",
|
302 |
+
"medium_articles.csv: 8%|ββ | 80.8M/1.04G [00:03<00:41, 23.4MB/s]\u001b[A\n",
|
303 |
+
"medium_articles.csv: 8%|ββ | 88.1M/1.04G [00:03<00:33, 28.5MB/s]\u001b[A\n",
|
304 |
+
"articles_embeddings.pkl: 15%|ββ | 99.8M/666M [00:03<00:27, 20.3MB/s]\u001b[A\n",
|
305 |
+
"medium_articles.csv: 9%|ββ | 96.0M/1.04G [00:03<00:48, 19.4MB/s]\u001b[A\n",
|
306 |
+
"articles_embeddings.pkl: 17%|βββ | 112M/666M [00:04<00:26, 21.0MB/s]\u001b[A\n",
|
307 |
+
"articles_embeddings.pkl: 19%|βββ | 128M/666M [00:04<00:18, 29.5MB/s]\u001b[A\n",
|
308 |
+
"articles_embeddings.pkl: 24%|ββββ | 160M/666M [00:05<00:13, 37.5MB/s]\u001b[A\n",
|
309 |
+
"articles_embeddings.pkl: 26%|ββββ | 176M/666M [00:05<00:11, 42.2MB/s]\u001b[A\n",
|
310 |
+
"articles_embeddings.pkl: 29%|βββββ | 192M/666M [00:05<00:10, 44.5MB/s]\u001b[A\n",
|
311 |
+
"articles_embeddings.pkl: 30%|βββββ | 200M/666M [00:06<00:09, 48.2MB/s]\u001b[A\n",
|
312 |
+
"medium_articles.csv: 18%|ββββ | 185M/1.04G [00:06<00:19, 43.7MB/s]\u001b[A\n",
|
313 |
+
"articles_embeddings.pkl: 31%|βββββ | 205M/666M [00:06<00:11, 39.0MB/s]\u001b[A\n",
|
314 |
+
"medium_articles.csv: 19%|ββββ | 195M/1.04G [00:06<00:27, 31.0MB/s]\u001b[A\n",
|
315 |
+
"articles_embeddings.pkl: 32%|βββββ | 210M/666M [00:06<00:17, 26.4MB/s]\u001b[A\n",
|
316 |
+
"articles_embeddings.pkl: 36%|ββββββ | 240M/666M [00:07<00:11, 37.8MB/s]\u001b[A\n",
|
317 |
+
"articles_embeddings.pkl: 38%|ββββββ | 256M/666M [00:07<00:09, 42.4MB/s]\u001b[A\n",
|
318 |
+
"articles_embeddings.pkl: 41%|βββββββ | 272M/666M [00:08<00:09, 42.8MB/s]\u001b[A\n",
|
319 |
+
"articles_embeddings.pkl: 44%|βββββββ | 292M/666M [00:08<00:07, 47.1MB/s]\u001b[A\n",
|
320 |
+
"medium_articles.csv: 25%|βββββ | 256M/1.04G [00:08<00:19, 40.3MB/s]\u001b[A\n",
|
321 |
+
"articles_embeddings.pkl: 46%|βββββββ | 304M/666M [00:08<00:08, 41.9MB/s]\u001b[A\n",
|
322 |
+
"articles_embeddings.pkl: 50%|ββββββββ | 336M/666M [00:09<00:06, 47.9MB/s]\u001b[A\n",
|
323 |
+
"articles_embeddings.pkl: 53%|ββββββββ | 352M/666M [00:09<00:06, 50.1MB/s]\u001b[A\n",
|
324 |
+
"articles_embeddings.pkl: 55%|βββββββββ | 368M/666M [00:10<00:06, 47.3MB/s]\u001b[A\n",
|
325 |
+
"medium_articles.csv: 32%|ββββββ | 336M/1.04G [00:10<00:15, 46.1MB/s]\u001b[A\n",
|
326 |
+
"articles_embeddings.pkl: 60%|βββββββββ | 400M/666M [00:10<00:05, 47.6MB/s]\u001b[A\n",
|
327 |
+
"medium_articles.csv: 35%|βββββββ | 368M/1.04G [00:10<00:14, 47.8MB/s]\u001b[A\n",
|
328 |
+
"articles_embeddings.pkl: 62%|ββββββββββ | 416M/666M [00:11<00:05, 44.7MB/s]\u001b[A\n",
|
329 |
+
"articles_embeddings.pkl: 65%|ββββββββββ | 432M/666M [00:11<00:04, 47.4MB/s]\u001b[A\n",
|
330 |
+
"articles_embeddings.pkl: 70%|βββββββββββ | 464M/666M [00:12<00:04, 49.1MB/s]\u001b[A\n",
|
331 |
+
"articles_embeddings.pkl: 72%|βββββββββββ | 480M/666M [00:12<00:03, 48.2MB/s]\u001b[A\n",
|
332 |
+
"articles_embeddings.pkl: 75%|ββββββββββββ | 496M/666M [00:12<00:03, 47.4MB/s]\u001b[A\n",
|
333 |
+
"medium_articles.csv: 45%|ββββββββ | 464M/1.04G [00:12<00:13, 44.2MB/s]\u001b[A\n",
|
334 |
+
"articles_embeddings.pkl: 77%|ββββββββββββ | 512M/666M [00:13<00:03, 44.4MB/s]\u001b[A\n",
|
335 |
+
"articles_embeddings.pkl: 79%|ββββββββββββ | 528M/666M [00:13<00:03, 38.0MB/s]\u001b[A\n",
|
336 |
+
"articles_embeddings.pkl: 82%|βββββββββββββ | 544M/666M [00:14<00:03, 40.4MB/s]\u001b[A\n",
|
337 |
+
"articles_embeddings.pkl: 84%|βββββββββββββ | 560M/666M [00:14<00:02, 41.1MB/s]\u001b[A\n",
|
338 |
+
"articles_embeddings.pkl: 87%|βββββββββββββ | 576M/666M [00:14<00:01, 46.1MB/s]\u001b[A\n",
|
339 |
+
"articles_embeddings.pkl: 89%|ββββββββββββββ | 592M/666M [00:15<00:01, 44.4MB/s]\u001b[A\n",
|
340 |
+
"medium_articles.csv: 55%|ββββββββββ | 576M/1.04G [00:15<00:09, 48.7MB/s]\u001b[A\n",
|
341 |
+
"articles_embeddings.pkl: 91%|ββββββββββββββ | 608M/666M [00:15<00:01, 38.4MB/s]\u001b[A\n",
|
342 |
+
"articles_embeddings.pkl: 94%|ββββββββββββββ | 624M/666M [00:16<00:01, 38.8MB/s]\u001b[A\n",
|
343 |
+
"medium_articles.csv: 60%|βββββββββββ | 624M/1.04G [00:16<00:08, 50.4MB/s]\u001b[A\n",
|
344 |
+
"articles_embeddings.pkl: 96%|βββββββββββββββ| 640M/666M [00:16<00:00, 39.6MB/s]\u001b[A\n",
|
345 |
+
"articles_embeddings.pkl: 99%|βββββββββββββββ| 656M/666M [00:16<00:00, 42.1MB/s]\u001b[A\n",
|
346 |
+
"articles_embeddings.pkl: 100%|βββββββββββββββ| 666M/666M [00:17<00:00, 39.0MB/s]\u001b[A\n",
|
347 |
+
"\n",
|
348 |
+
"medium_articles.csv: 66%|ββββββββββββ | 688M/1.04G [00:17<00:06, 52.4MB/s]\u001b[A\n",
|
349 |
+
"\n",
|
350 |
+
"Upload 2 LFS files: 50%|βββββββββββββ | 1/2 [00:17<00:17, 17.43s/it]\u001b[A\u001b[A\n",
|
351 |
+
"medium_articles.csv: 68%|βββββββββββββ | 704M/1.04G [00:17<00:07, 43.1MB/s]\u001b[A\n",
|
352 |
+
"medium_articles.csv: 69%|βββββββββββββ | 720M/1.04G [00:18<00:07, 41.5MB/s]\u001b[A\n",
|
353 |
+
"medium_articles.csv: 71%|βββββββββββββ | 736M/1.04G [00:18<00:06, 46.2MB/s]\u001b[A\n",
|
354 |
+
"medium_articles.csv: 72%|βββββββββββββ | 752M/1.04G [00:18<00:06, 48.3MB/s]\u001b[A\n",
|
355 |
+
"medium_articles.csv: 74%|ββββββββββββββ | 768M/1.04G [00:19<00:05, 48.9MB/s]\u001b[A\n",
|
356 |
+
"medium_articles.csv: 75%|ββββββββββββββ | 784M/1.04G [00:19<00:05, 49.2MB/s]\u001b[A\n",
|
357 |
+
"medium_articles.csv: 77%|ββββββββββββββ | 800M/1.04G [00:19<00:05, 47.3MB/s]\u001b[A\n",
|
358 |
+
"medium_articles.csv: 78%|ββββββββββββββ | 816M/1.04G [00:20<00:04, 48.0MB/s]\u001b[A\n",
|
359 |
+
"medium_articles.csv: 80%|βββββββββββββββ | 832M/1.04G [00:20<00:04, 47.6MB/s]\u001b[A\n",
|
360 |
+
"medium_articles.csv: 81%|βββββββββββββββ | 848M/1.04G [00:20<00:03, 51.5MB/s]\u001b[A\n",
|
361 |
+
"medium_articles.csv: 83%|βββββββββββββββ | 864M/1.04G [00:21<00:03, 48.1MB/s]\u001b[A\n",
|
362 |
+
"medium_articles.csv: 84%|ββββββββββββββββ | 880M/1.04G [00:21<00:03, 47.9MB/s]\u001b[A\n",
|
363 |
+
"medium_articles.csv: 86%|ββββββββββββββββ | 896M/1.04G [00:21<00:03, 46.6MB/s]\u001b[A\n",
|
364 |
+
"medium_articles.csv: 87%|ββββββββββββββββ | 912M/1.04G [00:22<00:02, 48.3MB/s]\u001b[A\n",
|
365 |
+
"medium_articles.csv: 89%|ββββββββββββββββ | 928M/1.04G [00:22<00:02, 49.1MB/s]\u001b[A\n",
|
366 |
+
"medium_articles.csv: 91%|βββββββββββββββββ | 944M/1.04G [00:22<00:02, 45.7MB/s]\u001b[A\n",
|
367 |
+
"medium_articles.csv: 92%|βββββββββββββββββ | 960M/1.04G [00:23<00:01, 45.0MB/s]\u001b[A\n",
|
368 |
+
"medium_articles.csv: 94%|βββββββββββββββββ | 976M/1.04G [00:23<00:01, 46.9MB/s]\u001b[A\n",
|
369 |
+
"medium_articles.csv: 95%|ββββββββββββββββββ| 992M/1.04G [00:23<00:01, 47.4MB/s]\u001b[A\n",
|
370 |
+
"medium_articles.csv: 97%|βββββββββββββββββ| 1.01G/1.04G [00:24<00:00, 47.9MB/s]\u001b[A\n",
|
371 |
+
"medium_articles.csv: 98%|βββββββββββββββββ| 1.02G/1.04G [00:24<00:00, 49.3MB/s]\u001b[A\n",
|
372 |
+
"medium_articles.csv: 100%|βββββββββββββββββ| 1.04G/1.04G [00:24<00:00, 41.8MB/s]\u001b[A\n",
|
373 |
+
"\n",
|
374 |
+
"\n",
|
375 |
+
"Upload 2 LFS files: 100%|βββββββββββββββββββββββββ| 2/2 [00:25<00:00, 12.59s/it]\u001b[A\u001b[A\n",
|
376 |
+
"https://huggingface.co/Mohamed-BC/articles_recommender_system/tree/main/.\n"
|
377 |
]
|
378 |
}
|
379 |
],
|
recommend.py
CHANGED
@@ -8,7 +8,8 @@ def recommend(query, n=5):
|
|
8 |
# Load the model
|
9 |
model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
|
10 |
# Load the data
|
11 |
-
data = pd.read_csv('data/medium_articles.csv')
|
|
|
12 |
# get the embeddings
|
13 |
a_embeddings = pkl.load(open('data/articles_embeddings.pkl', 'rb'))
|
14 |
# Encode the query
|
|
|
8 |
# Load the model
|
9 |
model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
|
10 |
# Load the data
|
11 |
+
# data = pd.read_csv('data/medium_articles.csv')
|
12 |
+
data = load_dataset('Mohamed-BC/Articles')['train'].to_pandas()
|
13 |
# get the embeddings
|
14 |
a_embeddings = pkl.load(open('data/articles_embeddings.pkl', 'rb'))
|
15 |
# Encode the query
|