Mohamed-BC
commited on
Commit
•
66f5c36
1
Parent(s):
7ad11fc
Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- __pycache__/recommend.cpython-310.pyc +0 -0
- app.py +49 -0
- data/articles_embeddings.pkl +3 -0
- data/medium_articles.csv +3 -0
- demo.ipynb +323 -0
- recommend.py +20 -0
- requirements.txt +6 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
data/medium_articles.csv filter=lfs diff=lfs merge=lfs -text
|
__pycache__/recommend.cpython-310.pyc
ADDED
Binary file (974 Bytes). View file
|
|
app.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Streamlit app script
|
2 |
+
import streamlit as st
|
3 |
+
from recommend import recommend
|
4 |
+
# A simple function to check login credentials (for demonstration purposes)
|
5 |
+
def check_login(username, password):
|
6 |
+
# Hardcoding a simple example username and password
|
7 |
+
user = "admin"
|
8 |
+
pwd = "pass123"
|
9 |
+
return username == user and password == pwd
|
10 |
+
|
11 |
+
# Main application code
|
12 |
+
def main():
|
13 |
+
# Initialize session state for login status
|
14 |
+
if "logged_in" not in st.session_state:
|
15 |
+
st.session_state.logged_in = False
|
16 |
+
|
17 |
+
# If not logged in, display login form
|
18 |
+
if not st.session_state.logged_in:
|
19 |
+
st.title("Login Page")
|
20 |
+
username = st.text_input("Username")
|
21 |
+
password = st.text_input("Password", type="password")
|
22 |
+
if st.button("Login"):
|
23 |
+
if check_login(username, password):
|
24 |
+
# Update session state to indicate user is logged in
|
25 |
+
# st.session_state.username = username
|
26 |
+
st.session_state.logged_in = True
|
27 |
+
st.rerun() # Rerun the script to reflect the new state
|
28 |
+
else:
|
29 |
+
st.error("Invalid credentials. Please try again.")
|
30 |
+
|
31 |
+
# If logged in, redirect to another page or show different content
|
32 |
+
else:
|
33 |
+
# This can be another Streamlit page, or a condition to render a different view
|
34 |
+
st.title(f"Welcome :)!")
|
35 |
+
cols = st.columns([3,1])
|
36 |
+
with cols[0]:
|
37 |
+
query = st.text_input('Search here', placeholder="Describe what you're looking for", label_visibility="collapsed")
|
38 |
+
with cols[1]:
|
39 |
+
btn = st.button('Search')
|
40 |
+
if btn and query:
|
41 |
+
with st.spinner('Searching...'):
|
42 |
+
st.write_stream(recommend(query))
|
43 |
+
# Example: Provide a logout button
|
44 |
+
if st.sidebar.button("Logout"):
|
45 |
+
st.session_state.logged_in = False
|
46 |
+
st.rerun()
|
47 |
+
|
48 |
+
if __name__ == "__main__":
|
49 |
+
main()
|
data/articles_embeddings.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb9b2d170c8857dfb76178505ea4b1232d1a7c5fdd904d4d2cc5465879d96d0f
|
3 |
+
size 665668376
|
data/medium_articles.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bba7b1022b2450cfcad0cdccae82ad29714e1fa8812f786fd01b302a7cb12a5c
|
3 |
+
size 1042340506
|
demo.ipynb
ADDED
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 2,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stderr",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"/home/codespace/.python/current/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
13 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
14 |
+
]
|
15 |
+
}
|
16 |
+
],
|
17 |
+
"source": [
|
18 |
+
"from datasets import load_dataset"
|
19 |
+
]
|
20 |
+
},
|
21 |
+
{
|
22 |
+
"cell_type": "code",
|
23 |
+
"execution_count": 3,
|
24 |
+
"metadata": {},
|
25 |
+
"outputs": [
|
26 |
+
{
|
27 |
+
"name": "stderr",
|
28 |
+
"output_type": "stream",
|
29 |
+
"text": [
|
30 |
+
"Downloading data: 100%|██████████| 1.74G/1.74G [00:27<00:00, 62.8MB/s]\n",
|
31 |
+
"Generating train split: 100%|██████████| 192363/192363 [00:31<00:00, 6170.02 examples/s]\n"
|
32 |
+
]
|
33 |
+
}
|
34 |
+
],
|
35 |
+
"source": [
|
36 |
+
"data = load_dataset(\"Mohamed-BC/Articles\")"
|
37 |
+
]
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"cell_type": "code",
|
41 |
+
"execution_count": 4,
|
42 |
+
"metadata": {},
|
43 |
+
"outputs": [
|
44 |
+
{
|
45 |
+
"name": "stdout",
|
46 |
+
"output_type": "stream",
|
47 |
+
"text": [
|
48 |
+
"app.py\tdemo.ipynb recommend.py requirements.txt user.py\n"
|
49 |
+
]
|
50 |
+
}
|
51 |
+
],
|
52 |
+
"source": [
|
53 |
+
"!ls"
|
54 |
+
]
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"cell_type": "code",
|
58 |
+
"execution_count": 5,
|
59 |
+
"metadata": {},
|
60 |
+
"outputs": [],
|
61 |
+
"source": [
|
62 |
+
"!mkdir -p data"
|
63 |
+
]
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"cell_type": "code",
|
67 |
+
"execution_count": 8,
|
68 |
+
"metadata": {},
|
69 |
+
"outputs": [
|
70 |
+
{
|
71 |
+
"name": "stdout",
|
72 |
+
"output_type": "stream",
|
73 |
+
"text": [
|
74 |
+
"Dataset URL: https://www.kaggle.com/datasets/fabiochiusano/medium-articles\n",
|
75 |
+
"License(s): CC0-1.0\n",
|
76 |
+
"Downloading medium-articles.zip to /workspaces/codespaces-blank\n",
|
77 |
+
" 99%|███████████████████████████████████████▊| 367M/369M [00:14<00:00, 42.9MB/s]\n",
|
78 |
+
"100%|████████████████████████████████████████| 369M/369M [00:14<00:00, 27.5MB/s]\n"
|
79 |
+
]
|
80 |
+
}
|
81 |
+
],
|
82 |
+
"source": [
|
83 |
+
"!kaggle datasets download -d fabiochiusano/medium-articles"
|
84 |
+
]
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"cell_type": "code",
|
88 |
+
"execution_count": 9,
|
89 |
+
"metadata": {},
|
90 |
+
"outputs": [
|
91 |
+
{
|
92 |
+
"name": "stdout",
|
93 |
+
"output_type": "stream",
|
94 |
+
"text": [
|
95 |
+
"Archive: medium-articles.zip\n",
|
96 |
+
" inflating: data/medium_articles.csv \n"
|
97 |
+
]
|
98 |
+
}
|
99 |
+
],
|
100 |
+
"source": [
|
101 |
+
"!unzip medium-articles.zip -d data\n",
|
102 |
+
"!rm medium-articles.zip"
|
103 |
+
]
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"cell_type": "code",
|
107 |
+
"execution_count": 10,
|
108 |
+
"metadata": {},
|
109 |
+
"outputs": [
|
110 |
+
{
|
111 |
+
"name": "stdout",
|
112 |
+
"output_type": "stream",
|
113 |
+
"text": [
|
114 |
+
"Cloning into 'articles_embeddings'...\n",
|
115 |
+
"remote: Enumerating objects: 6, done.\u001b[K\n",
|
116 |
+
"remote: Counting objects: 100% (3/3), done.\u001b[K\n",
|
117 |
+
"remote: Compressing objects: 100% (3/3), done.\u001b[K\n",
|
118 |
+
"remote: Total 6 (delta 0), reused 0 (delta 0), pack-reused 3 (from 1)\u001b[K\n",
|
119 |
+
"Unpacking objects: 100% (6/6), 2.11 KiB | 1.06 MiB/s, done.\n"
|
120 |
+
]
|
121 |
+
}
|
122 |
+
],
|
123 |
+
"source": [
|
124 |
+
"!git clone https://huggingface.co/Mohamed-BC/articles_embeddings "
|
125 |
+
]
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"cell_type": "code",
|
129 |
+
"execution_count": 11,
|
130 |
+
"metadata": {},
|
131 |
+
"outputs": [],
|
132 |
+
"source": [
|
133 |
+
"!mv articles_embeddings/articles_embeddings.pkl data\n",
|
134 |
+
"!rm -rf articles_embeddings"
|
135 |
+
]
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"cell_type": "code",
|
139 |
+
"execution_count": 12,
|
140 |
+
"metadata": {},
|
141 |
+
"outputs": [],
|
142 |
+
"source": [
|
143 |
+
"import pandas as pd\n",
|
144 |
+
"emb = pd.read_pickle('data/articles_embeddings.pkl')"
|
145 |
+
]
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"cell_type": "code",
|
149 |
+
"execution_count": 14,
|
150 |
+
"metadata": {},
|
151 |
+
"outputs": [
|
152 |
+
{
|
153 |
+
"data": {
|
154 |
+
"text/plain": [
|
155 |
+
"(192363,)"
|
156 |
+
]
|
157 |
+
},
|
158 |
+
"execution_count": 14,
|
159 |
+
"metadata": {},
|
160 |
+
"output_type": "execute_result"
|
161 |
+
}
|
162 |
+
],
|
163 |
+
"source": [
|
164 |
+
"emb.shape"
|
165 |
+
]
|
166 |
+
},
|
167 |
+
{
|
168 |
+
"cell_type": "code",
|
169 |
+
"execution_count": 15,
|
170 |
+
"metadata": {},
|
171 |
+
"outputs": [],
|
172 |
+
"source": [
|
173 |
+
"from recommend import recommend"
|
174 |
+
]
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"cell_type": "code",
|
178 |
+
"execution_count": 16,
|
179 |
+
"metadata": {},
|
180 |
+
"outputs": [
|
181 |
+
{
|
182 |
+
"name": "stderr",
|
183 |
+
"output_type": "stream",
|
184 |
+
"text": [
|
185 |
+
"/home/codespace/.python/current/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
|
186 |
+
" warnings.warn(\n"
|
187 |
+
]
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"ename": "",
|
191 |
+
"evalue": "",
|
192 |
+
"output_type": "error",
|
193 |
+
"traceback": [
|
194 |
+
"\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
|
195 |
+
"\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
|
196 |
+
"\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
|
197 |
+
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
|
198 |
+
]
|
199 |
+
}
|
200 |
+
],
|
201 |
+
"source": [
|
202 |
+
"query = \"How to train a model in PyTorch?\"\n",
|
203 |
+
"recommend(query=\"How to train a model in PyTorch?\")"
|
204 |
+
]
|
205 |
+
},
|
206 |
+
{
|
207 |
+
"cell_type": "code",
|
208 |
+
"execution_count": 2,
|
209 |
+
"metadata": {},
|
210 |
+
"outputs": [
|
211 |
+
{
|
212 |
+
"name": "stdout",
|
213 |
+
"output_type": "stream",
|
214 |
+
"text": [
|
215 |
+
"\n",
|
216 |
+
" _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|\n",
|
217 |
+
" _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n",
|
218 |
+
" _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|\n",
|
219 |
+
" _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n",
|
220 |
+
" _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|\n",
|
221 |
+
"\n",
|
222 |
+
" To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .\n",
|
223 |
+
"Enter your token (input will not be visible): Traceback (most recent call last):\n",
|
224 |
+
" File \"/home/codespace/.python/current/bin/huggingface-cli\", line 8, in <module>\n",
|
225 |
+
" sys.exit(main())\n",
|
226 |
+
" File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/commands/huggingface_cli.py\", line 51, in main\n",
|
227 |
+
" service.run()\n",
|
228 |
+
" File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/commands/user.py\", line 98, in run\n",
|
229 |
+
" login(token=self.args.token, add_to_git_credential=self.args.add_to_git_credential)\n",
|
230 |
+
" File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/_login.py\", line 115, in login\n",
|
231 |
+
" interpreter_login(new_session=new_session, write_permission=write_permission)\n",
|
232 |
+
" File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/_login.py\", line 191, in interpreter_login\n",
|
233 |
+
" token = getpass(\"Enter your token (input will not be visible): \")\n",
|
234 |
+
" File \"/usr/local/python/3.10.13/lib/python3.10/getpass.py\", line 77, in unix_getpass\n",
|
235 |
+
" passwd = _raw_input(prompt, stream, input=input)\n",
|
236 |
+
" File \"/usr/local/python/3.10.13/lib/python3.10/getpass.py\", line 146, in _raw_input\n",
|
237 |
+
" line = input.readline()\n",
|
238 |
+
" File \"/usr/local/python/3.10.13/lib/python3.10/codecs.py\", line 319, in decode\n",
|
239 |
+
" def decode(self, input, final=False):\n",
|
240 |
+
"KeyboardInterrupt\n"
|
241 |
+
]
|
242 |
+
}
|
243 |
+
],
|
244 |
+
"source": [
|
245 |
+
"!huggingface-cli login"
|
246 |
+
]
|
247 |
+
},
|
248 |
+
{
|
249 |
+
"cell_type": "code",
|
250 |
+
"execution_count": 4,
|
251 |
+
"metadata": {},
|
252 |
+
"outputs": [
|
253 |
+
{
|
254 |
+
"name": "stdout",
|
255 |
+
"output_type": "stream",
|
256 |
+
"text": [
|
257 |
+
"\u001b[90mgit version 2.44.0\u001b[0m\n",
|
258 |
+
"\u001b[90mgit-lfs/3.5.1 (GitHub; linux amd64; go 1.21.8)\u001b[0m\n",
|
259 |
+
"\n",
|
260 |
+
"You are about to create \u001b[1mspaces/Mohamed-BC/articles_recommender_system\u001b[0m\n",
|
261 |
+
"Proceed? [Y/n] ^C\n",
|
262 |
+
"Traceback (most recent call last):\n",
|
263 |
+
" File \"/home/codespace/.python/current/bin/huggingface-cli\", line 8, in <module>\n",
|
264 |
+
" sys.exit(main())\n",
|
265 |
+
" File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/commands/huggingface_cli.py\", line 51, in main\n",
|
266 |
+
" service.run()\n",
|
267 |
+
" File \"/usr/local/python/3.10.13/lib/python3.10/site-packages/huggingface_hub/commands/user.py\", line 169, in run\n",
|
268 |
+
" choice = input(\"Proceed? [Y/n] \").lower()\n",
|
269 |
+
"KeyboardInterrupt\n"
|
270 |
+
]
|
271 |
+
}
|
272 |
+
],
|
273 |
+
"source": [
|
274 |
+
"!huggingface-cli repo create articles_recommender_system --type space"
|
275 |
+
]
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"cell_type": "code",
|
279 |
+
"execution_count": 6,
|
280 |
+
"metadata": {},
|
281 |
+
"outputs": [
|
282 |
+
{
|
283 |
+
"name": "stdout",
|
284 |
+
"output_type": "stream",
|
285 |
+
"text": [
|
286 |
+
"Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See https://huggingface.co/docs/huggingface_hub/hf_transfer for more details.\n"
|
287 |
+
]
|
288 |
+
}
|
289 |
+
],
|
290 |
+
"source": [
|
291 |
+
"!huggingface-cli upload Mohamed-BC/articles_recommender_system ."
|
292 |
+
]
|
293 |
+
},
|
294 |
+
{
|
295 |
+
"cell_type": "code",
|
296 |
+
"execution_count": null,
|
297 |
+
"metadata": {},
|
298 |
+
"outputs": [],
|
299 |
+
"source": []
|
300 |
+
}
|
301 |
+
],
|
302 |
+
"metadata": {
|
303 |
+
"kernelspec": {
|
304 |
+
"display_name": "Python 3",
|
305 |
+
"language": "python",
|
306 |
+
"name": "python3"
|
307 |
+
},
|
308 |
+
"language_info": {
|
309 |
+
"codemirror_mode": {
|
310 |
+
"name": "ipython",
|
311 |
+
"version": 3
|
312 |
+
},
|
313 |
+
"file_extension": ".py",
|
314 |
+
"mimetype": "text/x-python",
|
315 |
+
"name": "python",
|
316 |
+
"nbconvert_exporter": "python",
|
317 |
+
"pygments_lexer": "ipython3",
|
318 |
+
"version": "3.10.13"
|
319 |
+
}
|
320 |
+
},
|
321 |
+
"nbformat": 4,
|
322 |
+
"nbformat_minor": 2
|
323 |
+
}
|
recommend.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer
|
2 |
+
from scipy.spatial.distance import cosine
|
3 |
+
import numpy as np
|
4 |
+
import pandas as pd
|
5 |
+
from datasets import load_dataset
|
6 |
+
import pickle as pkl
|
7 |
+
def recommend(query, n=5):
|
8 |
+
# Load the model
|
9 |
+
model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
|
10 |
+
# Load the data
|
11 |
+
data = pd.read_csv('data/medium_articles.csv')
|
12 |
+
# get the embeddings
|
13 |
+
a_embeddings = pkl.load(open('data/articles_embeddings.pkl', 'rb'))
|
14 |
+
# Encode the query
|
15 |
+
q_embedding = model.encode(query)
|
16 |
+
# Calculate the cosine similarity
|
17 |
+
cos_sim = np.array([1 - cosine(q_embedding, emb) for emb in a_embeddings[:1000]])
|
18 |
+
# Get the top n recommendations
|
19 |
+
top_n = np.argsort(cos_sim)[-n:]
|
20 |
+
return data.iloc[top_n]
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
numpy
|
4 |
+
sentence-transformers
|
5 |
+
datasets
|
6 |
+
huggingface-hub
|