{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/codespace/.python/current/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "from datasets import load_dataset" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Downloading data: 100%|██████████| 1.74G/1.74G [00:27<00:00, 62.8MB/s]\n", "Generating train split: 100%|██████████| 192363/192363 [00:31<00:00, 6170.02 examples/s]\n" ] } ], "source": [ "data = load_dataset(\"Mohamed-BC/Articles\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "app.py\tdemo.ipynb recommend.py requirements.txt user.py\n" ] } ], "source": [ "!ls" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "!mkdir -p data" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dataset URL: https://www.kaggle.com/datasets/fabiochiusano/medium-articles\n", "License(s): CC0-1.0\n", "Downloading medium-articles.zip to /workspaces/codespaces-blank\n", " 99%|███████████████████████████████████████▊| 367M/369M [00:14<00:00, 42.9MB/s]\n", "100%|████████████████████████████████████████| 369M/369M [00:14<00:00, 27.5MB/s]\n" ] } ], "source": [ "!kaggle datasets download -d fabiochiusano/medium-articles" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Archive: medium-articles.zip\n", " inflating: data/medium_articles.csv \n" ] } ], "source": [ "!unzip medium-articles.zip -d data\n", "!rm medium-articles.zip" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cloning into 'articles_embeddings'...\n", "remote: Enumerating objects: 6, done.\u001b[K\n", "remote: Counting objects: 100% (3/3), done.\u001b[K\n", "remote: Compressing objects: 100% (3/3), done.\u001b[K\n", "remote: Total 6 (delta 0), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n", "Unpacking objects: 100% (6/6), 2.11 KiB | 1.06 MiB/s, done.\n" ] } ], "source": [ "!git clone https://huggingface.co/Mohamed-BC/articles_embeddings " ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "!mv articles_embeddings/articles_embeddings.pkl data\n", "!rm -rf articles_embeddings" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "emb = pd.read_pickle('data/articles_embeddings.pkl')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(192363,)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "emb.shape" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "from recommend import recommend" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/codespace/.python/current/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", " warnings.warn(\n" ] }, { "ename": "", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", "\u001b[1;31mClick here for more info. \n", "\u001b[1;31mView Jupyter log for further details." ] } ], "source": [ "query = \"How to train a model in PyTorch?\"\n", "recommend(query=\"How to train a model in PyTorch?\")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", " _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|\n", " _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n", " _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|\n", " _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n", " _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|\n", "\n", " To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\n", "Enter your token (input will not be visible): Traceback (most recent call last):\n", " File \"/home/codespace/.python/current/bin/huggingface-cli\", line 8, in \n", " sys.exit(main())\n", " File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/commands/huggingface_cli.py\", line 51, in main\n", " service.run()\n", " File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/commands/user.py\", line 98, in run\n", " login(token=self.args.token, add_to_git_credential=self.args.add_to_git_credential)\n", " File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/_login.py\", line 115, in login\n", " interpreter_login(new_session=new_session, write_permission=write_permission)\n", " File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/_login.py\", line 191, in interpreter_login\n", " token = getpass(\"Enter your token (input will not be visible): \")\n", " File \"/usr/local/python/3.10.13/lib/python3.10/getpass.py\", line 77, in unix_getpass\n", " passwd = _raw_input(prompt, stream, input=input)\n", " File \"/usr/local/python/3.10.13/lib/python3.10/getpass.py\", line 146, in _raw_input\n", " line = input.readline()\n", " File \"/usr/local/python/3.10.13/lib/python3.10/codecs.py\", line 319, in decode\n", " def decode(self, input, final=False):\n", "KeyboardInterrupt\n" ] } ], "source": [ "!huggingface-cli login" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[90mgit version 2.44.0\u001b[0m\n", "\u001b[90mgit-lfs/3.5.1 (GitHub; linux amd64; go 1.21.8)\u001b[0m\n", "\n", "You are about to create \u001b[1mspaces/Mohamed-BC/articles_recommender_system\u001b[0m\n", "Proceed? [Y/n] ^C\n", "Traceback (most recent call last):\n", " File \"/home/codespace/.python/current/bin/huggingface-cli\", line 8, in \n", " sys.exit(main())\n", " File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/commands/huggingface_cli.py\", line 51, in main\n", " service.run()\n", " File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/commands/user.py\", line 169, in run\n", " choice = input(\"Proceed? [Y/n] \").lower()\n", "KeyboardInterrupt\n" ] } ], "source": [ "!huggingface-cli repo create articles_recommender_system --type space" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See https://huggingface.co/docs/huggingface_hub/hf_transfer for more details.\n", "articles_embeddings.pkl: 0%| | 0.00/666M [00:00