Mohamed-BC
/

Articles-RecSys

Model card Files Files and versions Community

Mohamed-BC commited on May 11

Commit

66f5c36

•

1 Parent(s): 7ad11fc

Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

.gitattributes +1 -0
__pycache__/recommend.cpython-310.pyc +0 -0
app.py +49 -0
data/articles_embeddings.pkl +3 -0
data/medium_articles.csv +3 -0
demo.ipynb +323 -0
recommend.py +20 -0
requirements.txt +6 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data/medium_articles.csv filter=lfs diff=lfs merge=lfs -text

__pycache__/recommend.cpython-310.pyc ADDED Viewed

Binary file (974 Bytes). View file

app.py ADDED Viewed

	@@ -0,0 +1,49 @@

+# Streamlit app script
+import streamlit as st
+from recommend import recommend
+# A simple function to check login credentials (for demonstration purposes)
+def check_login(username, password):
+    # Hardcoding a simple example username and password
+    user = "admin"
+    pwd = "pass123"
+    return username == user and password == pwd
+# Main application code
+def main():
+    # Initialize session state for login status
+    if "logged_in" not in st.session_state:
+        st.session_state.logged_in = False
+    # If not logged in, display login form
+    if not st.session_state.logged_in:
+        st.title("Login Page")
+        username = st.text_input("Username")
+        password = st.text_input("Password", type="password")
+        if st.button("Login"):
+            if check_login(username, password):
+                # Update session state to indicate user is logged in
+                # st.session_state.username = username
+                st.session_state.logged_in = True
+                st.rerun()  # Rerun the script to reflect the new state
+            else:
+                st.error("Invalid credentials. Please try again.")
+    # If logged in, redirect to another page or show different content
+    else:
+        # This can be another Streamlit page, or a condition to render a different view
+        st.title(f"Welcome :)!")
+        cols = st.columns([3,1])
+        with cols[0]:
+            query = st.text_input('Search here', placeholder="Describe what you're looking for", label_visibility="collapsed")
+        with cols[1]:
+            btn = st.button('Search')
+        if btn and query:
+            with st.spinner('Searching...'):
+                st.write_stream(recommend(query))
+        # Example: Provide a logout button
+        if st.sidebar.button("Logout"):
+            st.session_state.logged_in = False
+            st.rerun()
+if __name__ == "__main__":
+    main()

data/articles_embeddings.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb9b2d170c8857dfb76178505ea4b1232d1a7c5fdd904d4d2cc5465879d96d0f
+size 665668376

data/medium_articles.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bba7b1022b2450cfcad0cdccae82ad29714e1fa8812f786fd01b302a7cb12a5c
+size 1042340506

demo.ipynb ADDED Viewed

	@@ -0,0 +1,323 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/codespace/.python/current/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "from datasets import load_dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Downloading data: 100%|██████████| 1.74G/1.74G [00:27<00:00, 62.8MB/s]\n",
+      "Generating train split: 100%|██████████| 192363/192363 [00:31<00:00, 6170.02 examples/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = load_dataset(\"Mohamed-BC/Articles\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "app.py\tdemo.ipynb  recommend.py  requirements.txt  user.py\n"
+     ]
+    }
+   ],
+   "source": [
+    "!ls"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!mkdir -p data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dataset URL: https://www.kaggle.com/datasets/fabiochiusano/medium-articles\n",
+      "License(s): CC0-1.0\n",
+      "Downloading medium-articles.zip to /workspaces/codespaces-blank\n",
+      " 99%|███████████████████████████████████████▊| 367M/369M [00:14<00:00, 42.9MB/s]\n",
+      "100%|████████████████████████████████████████| 369M/369M [00:14<00:00, 27.5MB/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "!kaggle datasets download -d fabiochiusano/medium-articles"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Archive:  medium-articles.zip\n",
+      "  inflating: data/medium_articles.csv  \n"
+     ]
+    }
+   ],
+   "source": [
+    "!unzip medium-articles.zip -d data\n",
+    "!rm medium-articles.zip"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Cloning into 'articles_embeddings'...\n",
+      "remote: Enumerating objects: 6, done.\u001b[K\n",
+      "remote: Counting objects: 100% (3/3), done.\u001b[K\n",
+      "remote: Compressing objects: 100% (3/3), done.\u001b[K\n",
+      "remote: Total 6 (delta 0), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n",
+      "Unpacking objects: 100% (6/6), 2.11 KiB | 1.06 MiB/s, done.\n"
+     ]
+    }
+   ],
+   "source": [
+    "!git clone https://huggingface.co/Mohamed-BC/articles_embeddings "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!mv articles_embeddings/articles_embeddings.pkl data\n",
+    "!rm -rf articles_embeddings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "emb = pd.read_pickle('data/articles_embeddings.pkl')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(192363,)"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "emb.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from recommend import recommend"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/codespace/.python/current/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
+      "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
+      "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
+      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
+     ]
+    }
+   ],
+   "source": [
+    "query = \"How to train a model in PyTorch?\"\n",
+    "recommend(query=\"How to train a model in PyTorch?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|\n",
+      "    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|\n",
+      "    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|\n",
+      "    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|\n",
+      "    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|\n",
+      "\n",
+      "    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\n",
+      "Enter your token (input will not be visible): Traceback (most recent call last):\n",
+      "  File \"/home/codespace/.python/current/bin/huggingface-cli\", line 8, in <module>\n",
+      "    sys.exit(main())\n",
+      "  File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/commands/huggingface_cli.py\", line 51, in main\n",
+      "    service.run()\n",
+      "  File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/commands/user.py\", line 98, in run\n",
+      "    login(token=self.args.token, add_to_git_credential=self.args.add_to_git_credential)\n",
+      "  File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/_login.py\", line 115, in login\n",
+      "    interpreter_login(new_session=new_session, write_permission=write_permission)\n",
+      "  File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/_login.py\", line 191, in interpreter_login\n",
+      "    token = getpass(\"Enter your token (input will not be visible): \")\n",
+      "  File \"/usr/local/python/3.10.13/lib/python3.10/getpass.py\", line 77, in unix_getpass\n",
+      "    passwd = _raw_input(prompt, stream, input=input)\n",
+      "  File \"/usr/local/python/3.10.13/lib/python3.10/getpass.py\", line 146, in _raw_input\n",
+      "    line = input.readline()\n",
+      "  File \"/usr/local/python/3.10.13/lib/python3.10/codecs.py\", line 319, in decode\n",
+      "    def decode(self, input, final=False):\n",
+      "KeyboardInterrupt\n"
+     ]
+    }
+   ],
+   "source": [
+    "!huggingface-cli login"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[90mgit version 2.44.0\u001b[0m\n",
+      "\u001b[90mgit-lfs/3.5.1 (GitHub; linux amd64; go 1.21.8)\u001b[0m\n",
+      "\n",
+      "You are about to create \u001b[1mspaces/Mohamed-BC/articles_recommender_system\u001b[0m\n",
+      "Proceed? [Y/n] ^C\n",
+      "Traceback (most recent call last):\n",
+      "  File \"/home/codespace/.python/current/bin/huggingface-cli\", line 8, in <module>\n",
+      "    sys.exit(main())\n",
+      "  File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/commands/huggingface_cli.py\", line 51, in main\n",
+      "    service.run()\n",
+      "  File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/commands/user.py\", line 169, in run\n",
+      "    choice = input(\"Proceed? [Y/n] \").lower()\n",
+      "KeyboardInterrupt\n"
+     ]
+    }
+   ],
+   "source": [
+    "!huggingface-cli repo create articles_recommender_system --type space"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See https://huggingface.co/docs/huggingface_hub/hf_transfer for more details.\n"
+     ]
+    }
+   ],
+   "source": [
+    "!huggingface-cli upload Mohamed-BC/articles_recommender_system ."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

recommend.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from sentence_transformers import SentenceTransformer
+from scipy.spatial.distance import cosine
+import numpy as np
+import pandas as pd
+from datasets import load_dataset
+import pickle as pkl
+def recommend(query, n=5):
+    # Load the model
+    model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
+    # Load the data
+    data = pd.read_csv('data/medium_articles.csv')
+    # get the embeddings
+    a_embeddings = pkl.load(open('data/articles_embeddings.pkl', 'rb'))
+    # Encode the query
+    q_embedding = model.encode(query)
+    # Calculate the cosine similarity
+    cos_sim = np.array([1 - cosine(q_embedding, emb) for emb in a_embeddings[:1000]])
+    # Get the top n recommendations
+    top_n = np.argsort(cos_sim)[-n:]
+    return data.iloc[top_n]

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit
+pandas
+numpy
+sentence-transformers
+datasets
+huggingface-hub