Pushkar02-n commited on
Commit
d906298
·
verified ·
1 Parent(s): 0701749

Delete notebooks

Browse files
notebooks/01_embedding_experiments.ipynb DELETED
@@ -1,347 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 2,
6
- "id": "8e40675b",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "from sentence_transformers import SentenceTransformer\n",
11
- "import chromadb"
12
- ]
13
- },
14
- {
15
- "cell_type": "code",
16
- "execution_count": 3,
17
- "id": "bac36205",
18
- "metadata": {},
19
- "outputs": [
20
- {
21
- "name": "stdout",
22
- "output_type": "stream",
23
- "text": [
24
- "/home/pushkar/AllDocuments/EtE_projects/anime_rag_system\n"
25
- ]
26
- }
27
- ],
28
- "source": [
29
- "from pathlib import Path\n",
30
- "\n",
31
- "# BASE_DIR = Path(__file__).resolve().parent # if python file\n",
32
- "BASE_DIR = Path.cwd().parent\n",
33
- "print(BASE_DIR)"
34
- ]
35
- },
36
- {
37
- "cell_type": "code",
38
- "execution_count": 4,
39
- "id": "2878800c",
40
- "metadata": {},
41
- "outputs": [],
42
- "source": [
43
- "client = chromadb.PersistentClient(path=BASE_DIR / \"data/embeddings/chroma_db\")\n",
44
- "collection = client.get_collection(name=\"anime_collection\")"
45
- ]
46
- },
47
- {
48
- "cell_type": "code",
49
- "execution_count": 5,
50
- "id": "70b38ecc",
51
- "metadata": {},
52
- "outputs": [],
53
- "source": [
54
- "queries = [\n",
55
- " \"psychological thriller anime\",\n",
56
- " \"comedy slice of life school anime\",\n",
57
- " \"anime like Attack on Titan\",\n",
58
- " \"sad anime that will make me cry\"\n",
59
- "]"
60
- ]
61
- },
62
- {
63
- "cell_type": "code",
64
- "execution_count": 6,
65
- "id": "672b383c",
66
- "metadata": {},
67
- "outputs": [
68
- {
69
- "name": "stdout",
70
- "output_type": "stream",
71
- "text": [
72
- "Query: psychological thriller anime: \n",
73
- "1. Kimetsu no Yaiba Movie 1: Mugenjou-hen - Akaza Sairai (distance: 0.902)\n",
74
- "2. Hoozuki no Reitetsu 2nd Season (distance: 0.912)\n",
75
- "3. Mob Psycho 100 II (distance: 0.961)\n",
76
- "4. Mob Psycho 100 III (distance: 1.003)\n",
77
- "5. Akira (distance: 1.037)\n",
78
- "6. Mob Psycho 100 (distance: 1.041)\n",
79
- "7. Fujimoto Tatsuki 17-26 (distance: 1.056)\n",
80
- "8. Guimi Zhi Zhu: Xiaochou Pian (distance: 1.057)\n",
81
- "Query: comedy slice of life school anime: \n",
82
- "1. 3-nen Z-gumi Ginpachi-sensei (distance: 0.759)\n",
83
- "2. Kimi to Boku. 2 (distance: 0.793)\n",
84
- "3. School Rumble (distance: 0.914)\n",
85
- "4. Non Non Biyori Repeat (distance: 0.926)\n",
86
- "5. Hidamari Sketch: Sae Hiro Sotsugyou-hen (distance: 0.930)\n",
87
- "6. 5-toubun no Hanayome∽ (distance: 0.933)\n",
88
- "7. Danshi Koukousei no Nichijou (distance: 0.935)\n",
89
- "8. Kono Subarashii Sekai ni Shukufuku wo! 3: Bonus Stage (distance: 0.940)\n",
90
- "Query: anime like Attack on Titan: \n",
91
- "1. Shingeki no Kyojin Movie: Kanketsu-hen - The Last Attack (distance: 0.719)\n",
92
- "2. Kidou Senshi Zeta Gundam (distance: 0.886)\n",
93
- "3. Shingeki no Kyojin: The Final Season (distance: 0.937)\n",
94
- "4. Shingeki no Kyojin: The Final Season Part 2 (distance: 0.941)\n",
95
- "5. Shingeki no Kyojin (distance: 0.971)\n",
96
- "6. Shingeki no Kyojin Season 2 (distance: 0.994)\n",
97
- "7. Shingeki no Kyojin Season 3 Part 2 (distance: 1.011)\n",
98
- "8. Shingeki no Kyojin OVA (distance: 1.034)\n",
99
- "Query: sad anime that will make me cry: \n",
100
- "1. Fujimoto Tatsuki 17-26 (distance: 0.990)\n",
101
- "2. Osomatsu-san Movie (distance: 1.051)\n",
102
- "3. Gintama: Dai Hanseikai (distance: 1.052)\n",
103
- "4. Yuuki Yuuna wa Yuusha de Aru: Washio Sumi no Shou 3 - Yakusoku (distance: 1.094)\n",
104
- "5. Tengoku Daimakyou (distance: 1.096)\n",
105
- "6. Kimetsu no Yaiba Movie 1: Mugenjou-hen - Akaza Sairai (distance: 1.107)\n",
106
- "7. Girls Band Cry (distance: 1.111)\n",
107
- "8. Mahou Shoujo Madoka★Magica Movie 3: Hangyaku no Monogatari (distance: 1.121)\n"
108
- ]
109
- }
110
- ],
111
- "source": [
112
- "for query in queries:\n",
113
- " print(f\"Query: {query}: \")\n",
114
- " results = collection.query(\n",
115
- " query_texts=[query],\n",
116
- " n_results=8\n",
117
- " )\n",
118
- "\n",
119
- " for i, (title, distance) in enumerate(zip(\n",
120
- " [m[\"title\"] for m in results[\"metadatas\"][0]],\n",
121
- " results[\"distances\"][0]\n",
122
- " )):\n",
123
- " print(f\"{i+1}. {title} (distance: {distance:.3f})\")\n"
124
- ]
125
- },
126
- {
127
- "cell_type": "code",
128
- "execution_count": 7,
129
- "id": "a9843de5",
130
- "metadata": {},
131
- "outputs": [],
132
- "source": [
133
- "def show_results(results):\n",
134
- " for i, (title, distance) in enumerate(zip(\n",
135
- " [m[\"title\"] for m in results[\"metadatas\"][0]],\n",
136
- " results[\"distances\"][0]\n",
137
- " )):\n",
138
- " print(f\"{i+1}. {title} (distance: {distance:.3f})\")"
139
- ]
140
- },
141
- {
142
- "cell_type": "code",
143
- "execution_count": null,
144
- "id": "453ed4e7",
145
- "metadata": {},
146
- "outputs": [],
147
- "source": [
148
- "que = [\"Anime about a mage who goes on adventures\"]\n",
149
- "\n",
150
- "res1 = collection.query(query_texts=que,\n",
151
- " n_results=20)\n",
152
- "\n",
153
- "res2 = collection.query(query_texts=que,\n",
154
- " n_results=100,\n",
155
- " where={\"score\":{\n",
156
- " \"$gt\": 0\n",
157
- " }})"
158
- ]
159
- },
160
- {
161
- "cell_type": "code",
162
- "execution_count": 13,
163
- "id": "985018d1",
164
- "metadata": {},
165
- "outputs": [
166
- {
167
- "name": "stdout",
168
- "output_type": "stream",
169
- "text": [
170
- "Results without metadata filtering: \n",
171
- "1. Meitantei Conan: Episode One - Chiisaku Natta Meitantei (distance: 1.009)\n",
172
- "2. Magi: The Kingdom of Magic (distance: 1.035)\n",
173
- "3. Guimi Zhi Zhu: Xiaochou Pian (distance: 1.072)\n",
174
- "4. 3-nen Z-gumi Ginpachi-sensei (distance: 1.075)\n",
175
- "5. Mahoutsukai no Yome: Hoshi Matsu Hito (distance: 1.088)\n",
176
- "6. Magi: The Labyrinth of Magic (distance: 1.090)\n",
177
- "7. Akagami no Shirayuki-hime 2nd Season (distance: 1.091)\n",
178
- "8. Kimetsu no Yaiba Movie 1: Mugenjou-hen - Akaza Sairai (distance: 1.103)\n",
179
- "9. Silent Witch: Chinmoku no Majo no Kakushigoto (distance: 1.117)\n",
180
- "10. Meitantei Conan Movie 13: Shikkoku no Chaser (distance: 1.121)\n",
181
- "11. Yuru Yuri San☆Hai! (distance: 1.126)\n",
182
- "12. Gintama: Dai Hanseikai (distance: 1.139)\n",
183
- "13. Zhu Xian 2nd Season (distance: 1.142)\n",
184
- "14. Yuri!!! on Ice: Yuri Plisetsky GPF in Barcelona EX - Welcome to The Madness (distance: 1.144)\n",
185
- "15. Mushoku Tensei: Isekai Ittara Honki Dasu (distance: 1.148)\n",
186
- "16. Kono Subarashii Sekai ni Shukufuku wo! 3: Bonus Stage (distance: 1.149)\n",
187
- "17. Lupin III vs. Meitantei Conan: The Movie (distance: 1.154)\n",
188
- "18. xxxHOLiC Movie: Manatsu no Yoru no Yume (distance: 1.155)\n",
189
- "19. Hoozuki no Reitetsu 2nd Season (distance: 1.156)\n",
190
- "20. Tsubasa: Shunraiki (distance: 1.157)\n",
191
- "Results with metadata filtering: \n",
192
- "1 Meitantei Conan: Episode One - Chiisaku Natta Meitantei, \n",
193
- "Genre: Adventure, Comedy, Mystery \n",
194
- "Distance: 1.0094857215881348\n",
195
- "1 Magi: The Kingdom of Magic, \n",
196
- "Genre: Action, Adventure, Fantasy \n",
197
- "Distance: 1.0348175764083862\n",
198
- "1 Magi: The Labyrinth of Magic, \n",
199
- "Genre: Action, Adventure, Fantasy \n",
200
- "Distance: 1.0896832942962646\n",
201
- "1 Zhu Xian 2nd Season, \n",
202
- "Genre: Action, Adventure, Fantasy \n",
203
- "Distance: 1.1424976587295532\n",
204
- "1 Mushoku Tensei: Isekai Ittara Honki Dasu, \n",
205
- "Genre: Adventure, Drama, Fantasy, Ecchi \n",
206
- "Distance: 1.1479932069778442\n",
207
- "1 Kono Subarashii Sekai ni Shukufuku wo! 3: Bonus Stage, \n",
208
- "Genre: Adventure, Comedy, Fantasy \n",
209
- "Distance: 1.1492812633514404\n",
210
- "1 Tsubasa: Shunraiki, \n",
211
- "Genre: Action, Adventure, Drama, Fantasy, Romance \n",
212
- "Distance: 1.1567258834838867\n",
213
- "1 Wanmei Shijie, \n",
214
- "Genre: Action, Adventure, Fantasy \n",
215
- "Distance: 1.1588501930236816\n",
216
- "1 Meitantei Conan OVA 09: 10-nengo no Stranger, \n",
217
- "Genre: Adventure, Comedy, Mystery \n",
218
- "Distance: 1.1683908700942993\n",
219
- "1 Meitantei Conan Movie 03: Seikimatsu no Majutsushi, \n",
220
- "Genre: Adventure, Comedy, Mystery \n",
221
- "Distance: 1.183741807937622\n",
222
- "1 Golden Boy, \n",
223
- "Genre: Adventure, Comedy, Ecchi \n",
224
- "Distance: 1.1899913549423218\n",
225
- "1 Mirai Shounen Conan, \n",
226
- "Genre: Adventure, Drama, Sci-Fi \n",
227
- "Distance: 1.192782998085022\n",
228
- "1 Mushoku Tensei: Isekai Ittara Honki Dasu Part 2, \n",
229
- "Genre: Adventure, Drama, Fantasy, Ecchi \n",
230
- "Distance: 1.2001222372055054\n",
231
- "1 Majo no Takkyuubin, \n",
232
- "Genre: Adventure, Award Winning, Comedy, Drama, Fantasy \n",
233
- "Distance: 1.2021398544311523\n",
234
- "1 Naruto, \n",
235
- "Genre: Action, Adventure, Fantasy \n",
236
- "Distance: 1.2085628509521484\n",
237
- "1 Sword Art Online: Progressive Movie - Hoshi Naki Yoru no Aria, \n",
238
- "Genre: Action, Adventure, Fantasy \n",
239
- "Distance: 1.2172694206237793\n",
240
- "1 Ginga Sengoku Gunyuuden Rai, \n",
241
- "Genre: Adventure, Romance, Sci-Fi \n",
242
- "Distance: 1.2243961095809937\n",
243
- "1 Xian Ni, \n",
244
- "Genre: Action, Adventure, Fantasy \n",
245
- "Distance: 1.2341101169586182\n",
246
- "1 Overlord, \n",
247
- "Genre: Action, Adventure, Fantasy \n",
248
- "Distance: 1.2380468845367432\n",
249
- "1 Akatsuki no Yona OVA, \n",
250
- "Genre: Adventure, Fantasy \n",
251
- "Distance: 1.2383925914764404\n",
252
- "1 Mushishi Zoku Shou: Suzu no Shizuku, \n",
253
- "Genre: Adventure, Mystery, Slice of Life, Supernatural \n",
254
- "Distance: 1.2392939329147339\n",
255
- "1 Shen Yin Wangzuo Movie: Yi Lai Ke Si Chuanqi, \n",
256
- "Genre: Action, Adventure, Fantasy \n",
257
- "Distance: 1.239490270614624\n",
258
- "1 Dragon Ball, \n",
259
- "Genre: Action, Adventure, Comedy, Fantasy \n",
260
- "Distance: 1.2412399053573608\n",
261
- "1 Wu Liuqi: Xuanwu Guo Pian, \n",
262
- "Genre: Action, Adventure, Comedy, Drama, Mystery \n",
263
- "Distance: 1.242012858390808\n",
264
- "1 Meitantei Conan, \n",
265
- "Genre: Adventure, Comedy, Mystery \n",
266
- "Distance: 1.2530790567398071\n",
267
- "1 Kono Subarashii Sekai ni Shukufuku wo! 2, \n",
268
- "Genre: Adventure, Comedy, Fantasy \n",
269
- "Distance: 1.2592735290527344\n",
270
- "1 Fanren Xiu Xian Chuan: Xinghai Feichi Prologue, \n",
271
- "Genre: Action, Adventure, Fantasy \n",
272
- "Distance: 1.264496922492981\n",
273
- "1 Saint Seiya: The Lost Canvas - Meiou Shinwa 2, \n",
274
- "Genre: Action, Adventure, Fantasy \n",
275
- "Distance: 1.267714023590088\n",
276
- "1 Meitantei Conan Movie 08: Ginyoku no Magician, \n",
277
- "Genre: Adventure, Comedy, Mystery \n",
278
- "Distance: 1.2713207006454468\n",
279
- "1 Fullmetal Alchemist: Brotherhood Specials, \n",
280
- "Genre: Action, Adventure, Drama, Fantasy \n",
281
- "Distance: 1.273901343345642\n",
282
- "1 Naruto: Shippuuden, \n",
283
- "Genre: Action, Adventure, Fantasy \n",
284
- "Distance: 1.2760953903198242\n",
285
- "1 Tenkuu no Shiro Laputa, \n",
286
- "Genre: Adventure, Award Winning, Fantasy \n",
287
- "Distance: 1.278309941291809\n",
288
- "1 Ie Naki Ko Remy, \n",
289
- "Genre: Adventure, Drama \n",
290
- "Distance: 1.2802643775939941\n",
291
- "1 Yi Nian Yong Heng 3rd Season, \n",
292
- "Genre: Action, Adventure, Comedy, Fantasy \n",
293
- "Distance: 1.284270167350769\n"
294
- ]
295
- }
296
- ],
297
- "source": [
298
- "print(\"Results without metadata filtering: \")\n",
299
- "show_results(res1)\n",
300
- "\n",
301
- "print(\"Results with metadata filtering: \")\n",
302
- "i = 0;\n",
303
- "for _, (title, genres, distance) in enumerate(zip([m[\"title\"] for m in res2[\"metadatas\"][0]], [m.get(\"genres\", \"Unknown\") for m in res2[\"metadatas\"][0]], res2[\"distances\"][0])):\n",
304
- " if \"adventure\" in genres.lower():\n",
305
- " print(f\"{i + 1} {title}, \\nGenre: {genres} \\nDistance: {distance}\")\n",
306
- "\n"
307
- ]
308
- },
309
- {
310
- "cell_type": "code",
311
- "execution_count": null,
312
- "id": "b6e41bbf",
313
- "metadata": {},
314
- "outputs": [],
315
- "source": []
316
- },
317
- {
318
- "cell_type": "code",
319
- "execution_count": null,
320
- "id": "42413036",
321
- "metadata": {},
322
- "outputs": [],
323
- "source": []
324
- }
325
- ],
326
- "metadata": {
327
- "kernelspec": {
328
- "display_name": "anime-rag-system",
329
- "language": "python",
330
- "name": "python3"
331
- },
332
- "language_info": {
333
- "codemirror_mode": {
334
- "name": "ipython",
335
- "version": 3
336
- },
337
- "file_extension": ".py",
338
- "mimetype": "text/x-python",
339
- "name": "python",
340
- "nbconvert_exporter": "python",
341
- "pygments_lexer": "ipython3",
342
- "version": "3.14.0"
343
- }
344
- },
345
- "nbformat": 4,
346
- "nbformat_minor": 5
347
- }