henryhyunwookim commited on
Commit
b47611f
·
verified ·
1 Parent(s): 70c8cf6

Upload 15 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ vectore_storage/chroma/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ log/
2
+ __pycache__
README.md CHANGED
@@ -1,13 +1 @@
1
- ---
2
- title: GTA Multimodal RAG
3
- emoji: 🚀
4
- colorFrom: pink
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 4.32.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # GrandTheftAuto-multimodal-RAG-application
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils.utils import get_logger, initialization, get_result
2
+ import gradio as gr
3
+ import logging
4
+
5
+
6
+ logger = get_logger()
7
+ collection = None
8
+
9
+
10
+ def main(query):
11
+ logger = logging.getLogger(__name__)
12
+ print("Starting search...")
13
+ logger.info("Starting search...")
14
+ print("-------------------------------------------------------")
15
+ logger.info("-------------------------------------------------------")
16
+ exit = False
17
+ while not exit:
18
+ # Collect user query
19
+ # query = input('Type your query, or "exit" if you want to exit: ')
20
+
21
+ if query == "exit":
22
+ exit = True
23
+ print("-------------------------------------------------------")
24
+ logger.info("-------------------------------------------------------")
25
+ print("Search terminated.")
26
+ logger.info("Search terminated.")
27
+ return None, "Search terminated."
28
+ else:
29
+ # Get search result including the original descriptions of the images
30
+ image, text = get_result(collection, data_set, query, model, n_results=2)
31
+
32
+ # Display the image, its caption, and user query
33
+ # show_image(image, text, query)
34
+ return image, text
35
+
36
+
37
+ if __name__ == "__main__":
38
+ try:
39
+ if collection == None:
40
+ collection, data_set, model, logger = initialization(logger)
41
+ # main()
42
+ app = gr.Interface(
43
+ fn=main,
44
+ inputs=["text"],
45
+ outputs=["image", "text"],
46
+ title="Search for a scene in the world of GTA!"
47
+ )
48
+ app.launch(share=True)
49
+ except Exception as e:
50
+ logger.exception(e)
51
+ raise e
data/data_set.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3b259221c8f9df17cde27c4e0913b9ce110c768910ff81e40c3db443196b68c
3
+ size 3229
log/20240530.log ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-05-30 15:47:21 INFO Initializing...
2
+ 2024-05-30 15:47:21 INFO -------------------------------------------------------
3
+ 2024-05-30 15:47:21 INFO Importing functions...
4
+ 2024-05-30 15:47:29 INFO Set directories...
5
+ 2024-05-30 15:47:29 INFO Loading data...
6
+ 2024-05-30 15:47:30 INFO Loading CLIP model...
7
+ 2024-05-30 15:47:30 INFO Load pretrained SentenceTransformer: sentence-transformers/clip-ViT-L-14
8
+ 2024-05-30 15:47:34 INFO Use pytorch device_name: cpu
9
+ 2024-05-30 15:47:34 INFO Getting vector embeddings...
10
+ 2024-05-30 15:48:33 INFO Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.
11
+ 2024-05-30 15:48:33 INFO Collection image_vectors is not created.
12
+ 2024-05-30 15:48:34 INFO -------------------------------------------------------
13
+ 2024-05-30 15:48:34 INFO Initialization completed! Ready for search.
14
+ 2024-05-30 15:48:35 INFO HTTP Request: GET https://checkip.amazonaws.com/ "HTTP/1.1 200 "
15
+ 2024-05-30 15:48:35 INFO HTTP Request: GET http://127.0.0.1:7860/startup-events "HTTP/1.1 200 OK"
16
+ 2024-05-30 15:48:35 INFO HTTP Request: HEAD http://127.0.0.1:7860/ "HTTP/1.1 200 OK"
17
+ 2024-05-30 15:48:36 INFO HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
18
+ 2024-05-30 15:48:36 INFO HTTP Request: GET https://api.gradio.app/v2/tunnel-request "HTTP/1.1 200 OK"
19
+ 2024-05-30 15:49:30 INFO Starting search...
20
+ 2024-05-30 15:49:30 INFO -------------------------------------------------------
21
+ 2024-05-30 15:52:06 INFO Starting search...
22
+ 2024-05-30 15:52:06 INFO -------------------------------------------------------
23
+ 2024-05-30 15:53:34 INFO Starting search...
24
+ 2024-05-30 15:53:34 INFO -------------------------------------------------------
25
+ 2024-05-30 15:53:34 INFO -------------------------------------------------------
26
+ 2024-05-30 15:53:34 INFO Search terminated.
27
+ 2024-05-30 15:54:41 INFO Initializing...
28
+ 2024-05-30 15:54:41 INFO -------------------------------------------------------
29
+ 2024-05-30 15:54:41 INFO Importing functions...
30
+ 2024-05-30 15:54:50 INFO Set directories...
31
+ 2024-05-30 15:54:50 INFO Loading data...
32
+ 2024-05-30 15:54:51 INFO Loading CLIP model...
33
+ 2024-05-30 15:54:51 INFO Load pretrained SentenceTransformer: sentence-transformers/clip-ViT-L-14
34
+ 2024-05-30 15:54:55 INFO Use pytorch device_name: cpu
35
+ 2024-05-30 15:54:55 INFO Getting vector embeddings...
36
+ 2024-05-30 15:55:59 INFO Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.
37
+ 2024-05-30 15:56:00 INFO Collection image_vectors is not created.
38
+ 2024-05-30 15:56:01 INFO -------------------------------------------------------
39
+ 2024-05-30 15:56:01 INFO Initialization completed! Ready for search.
40
+ 2024-05-30 15:56:02 INFO HTTP Request: GET https://checkip.amazonaws.com/ "HTTP/1.1 200 "
41
+ 2024-05-30 15:56:02 INFO HTTP Request: GET http://127.0.0.1:7860/startup-events "HTTP/1.1 200 OK"
42
+ 2024-05-30 15:56:02 INFO HTTP Request: HEAD http://127.0.0.1:7860/ "HTTP/1.1 200 OK"
43
+ 2024-05-30 15:56:02 INFO HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
44
+ 2024-05-30 15:56:03 INFO HTTP Request: GET https://api.gradio.app/v2/tunnel-request "HTTP/1.1 200 OK"
45
+ 2024-05-30 15:56:34 INFO Starting search...
46
+ 2024-05-30 15:56:34 INFO -------------------------------------------------------
47
+ 2024-05-30 15:57:55 INFO Starting search...
48
+ 2024-05-30 15:57:55 INFO -------------------------------------------------------
49
+ 2024-05-30 15:57:55 INFO -------------------------------------------------------
50
+ 2024-05-30 15:57:55 INFO Search terminated.
51
+ 2024-05-30 16:11:29 INFO Initializing...
52
+ 2024-05-30 16:11:29 INFO -------------------------------------------------------
53
+ 2024-05-30 16:11:29 INFO Importing functions...
54
+ 2024-05-30 16:11:37 INFO Set directories...
55
+ 2024-05-30 16:11:37 INFO Loading data...
56
+ 2024-05-30 16:11:38 INFO Loading CLIP model...
57
+ 2024-05-30 16:11:38 INFO Load pretrained SentenceTransformer: sentence-transformers/clip-ViT-L-14
58
+ 2024-05-30 16:11:42 INFO Use pytorch device_name: cpu
59
+ 2024-05-30 16:11:42 INFO Getting vector embeddings...
60
+ 2024-05-30 16:12:38 INFO Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.
61
+ 2024-05-30 16:12:38 INFO Collection image_vectors is not created.
62
+ 2024-05-30 16:12:39 INFO -------------------------------------------------------
63
+ 2024-05-30 16:12:39 INFO Initialization completed! Ready for search.
64
+ 2024-05-30 16:12:40 INFO HTTP Request: GET https://checkip.amazonaws.com/ "HTTP/1.1 200 "
65
+ 2024-05-30 16:12:40 INFO HTTP Request: GET http://127.0.0.1:7860/startup-events "HTTP/1.1 200 OK"
66
+ 2024-05-30 16:12:40 INFO HTTP Request: HEAD http://127.0.0.1:7860/ "HTTP/1.1 200 OK"
67
+ 2024-05-30 16:12:40 INFO HTTP Request: GET https://api.gradio.app/pkg-version "HTTP/1.1 200 OK"
68
+ 2024-05-30 16:12:41 INFO HTTP Request: GET https://api.gradio.app/v2/tunnel-request "HTTP/1.1 200 OK"
69
+ 2024-05-30 16:14:12 INFO Starting search...
70
+ 2024-05-30 16:14:12 INFO -------------------------------------------------------
71
+ 2024-05-30 16:15:31 INFO Starting search...
72
+ 2024-05-30 16:15:31 INFO -------------------------------------------------------
73
+ 2024-05-30 16:15:31 INFO -------------------------------------------------------
74
+ 2024-05-30 16:15:31 INFO Search terminated.
notebook_1.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebook_2.ipynb ADDED
@@ -0,0 +1,1148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stdout",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "Data set already exists in the local drive. Loading it.\n"
13
+ ]
14
+ }
15
+ ],
16
+ "source": [
17
+ "import os\n",
18
+ "from pathlib import Path\n",
19
+ "import pickle\n",
20
+ "from datasets import load_dataset\n",
21
+ "\n",
22
+ "curr_dir = Path(os.getcwd())\n",
23
+ "data_dir = curr_dir / 'data'\n",
24
+ "if not os.path.exists(data_dir):\n",
25
+ " os.mkdir(data_dir)\n",
26
+ "data_pickle_path = data_dir / 'data_set.pkl'\n",
27
+ "\n",
28
+ "if not os.path.exists(data_pickle_path):\n",
29
+ " print(f\"Data set hasn't been loaded. Loading from the datasets library and save it as a pickle.\")\n",
30
+ " data_set = load_dataset(\"vipulmaheshwari/GTA-Image-Captioning-Dataset\")\n",
31
+ " with open(data_pickle_path, 'wb') as outfile:\n",
32
+ " pickle.dump(data_set, outfile)\n",
33
+ "else:\n",
34
+ " print(f\"Data set already exists in the local drive. Loading it.\")\n",
35
+ " with open(data_pickle_path, 'rb') as infile:\n",
36
+ " data_set = pickle.load(infile)"
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": 17,
42
+ "metadata": {},
43
+ "outputs": [],
44
+ "source": [
45
+ "# print(data_set)\n",
46
+ "# len(data_set['train']['image']), len(data_set['train']['text'])"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "code",
51
+ "execution_count": 44,
52
+ "metadata": {},
53
+ "outputs": [],
54
+ "source": [
55
+ "# Source: https://huggingface.co/sentence-transformers/clip-ViT-L-14\n",
56
+ "\n",
57
+ "from sentence_transformers import SentenceTransformer, util\n",
58
+ "# from PIL import Image\n",
59
+ "\n",
60
+ "#Load CLIP model\n",
61
+ "model = SentenceTransformer(\"sentence-transformers/clip-ViT-L-14\") # SentenceTransformer('clip-ViT-L-14')\n",
62
+ "\n",
63
+ "#Encode an image:\n",
64
+ "# img_emb = model.encode(image) # Image.open('two_dogs_in_snow.jpg')\n",
65
+ "\n",
66
+ "# #Encode text descriptions\n",
67
+ "# text_emb = model.encode(text) # ['Two dogs in the snow', 'A cat on a table', 'A picture of London at night']\n",
68
+ "\n",
69
+ "# #Compute cosine similarities \n",
70
+ "# cos_scores = util.cos_sim(img_emb, text_emb)\n",
71
+ "# print(cos_scores)"
72
+ ]
73
+ },
74
+ {
75
+ "cell_type": "code",
76
+ "execution_count": null,
77
+ "metadata": {},
78
+ "outputs": [],
79
+ "source": [
80
+ "img_embeddings = []\n",
81
+ "for image in tqdm(data_set['train']['image'][:2]):\n",
82
+ " img_embedding = model.encode(image)\n",
83
+ " img_embeddings.append(img_embedding)"
84
+ ]
85
+ },
86
+ {
87
+ "cell_type": "code",
88
+ "execution_count": null,
89
+ "metadata": {},
90
+ "outputs": [],
91
+ "source": []
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "execution_count": null,
96
+ "metadata": {},
97
+ "outputs": [],
98
+ "source": []
99
+ },
100
+ {
101
+ "cell_type": "code",
102
+ "execution_count": null,
103
+ "metadata": {},
104
+ "outputs": [],
105
+ "source": []
106
+ },
107
+ {
108
+ "cell_type": "markdown",
109
+ "metadata": {},
110
+ "source": [
111
+ "# try FAISS. Chroma, Pinecone (check the GAFS project)"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "code",
116
+ "execution_count": null,
117
+ "metadata": {},
118
+ "outputs": [],
119
+ "source": []
120
+ },
121
+ {
122
+ "cell_type": "code",
123
+ "execution_count": null,
124
+ "metadata": {},
125
+ "outputs": [],
126
+ "source": []
127
+ },
128
+ {
129
+ "cell_type": "code",
130
+ "execution_count": null,
131
+ "metadata": {},
132
+ "outputs": [],
133
+ "source": []
134
+ },
135
+ {
136
+ "cell_type": "code",
137
+ "execution_count": null,
138
+ "metadata": {},
139
+ "outputs": [],
140
+ "source": []
141
+ },
142
+ {
143
+ "cell_type": "code",
144
+ "execution_count": null,
145
+ "metadata": {},
146
+ "outputs": [],
147
+ "source": []
148
+ },
149
+ {
150
+ "cell_type": "code",
151
+ "execution_count": null,
152
+ "metadata": {},
153
+ "outputs": [],
154
+ "source": []
155
+ },
156
+ {
157
+ "cell_type": "code",
158
+ "execution_count": null,
159
+ "metadata": {},
160
+ "outputs": [],
161
+ "source": [
162
+ "import pyarrow as pa\n",
163
+ "import lancedb\n",
164
+ "\n",
165
+ "db = lancedb.connect('./data/tables')\n",
166
+ "schema = pa.schema(\n",
167
+ " [\n",
168
+ " pa.field(\"vector\", pa.list_(pa.float32())),\n",
169
+ " # pa.field(\"text\", pa.string()),\n",
170
+ " # pa.field(\"id\", pa.int32())\n",
171
+ " ])\n",
172
+ "# tbl = db.create_table(\"gta_data\", schema=schema, mode=\"overwrite\")"
173
+ ]
174
+ },
175
+ {
176
+ "cell_type": "code",
177
+ "execution_count": 60,
178
+ "metadata": {},
179
+ "outputs": [
180
+ {
181
+ "name": "stderr",
182
+ "output_type": "stream",
183
+ "text": [
184
+ "100%|██████████| 2/2 [00:15<00:00, 7.65s/it]\n"
185
+ ]
186
+ }
187
+ ],
188
+ "source": [
189
+ "from tqdm import tqdm\n",
190
+ "import numpy as np\n",
191
+ "\n",
192
+ "img_embeddings = []\n",
193
+ "for image in tqdm(data_set['train']['image'][:2]):\n",
194
+ " img_embedding = model.encode(image)\n",
195
+ " img_embeddings.append(img_embedding)\n",
196
+ "\n",
197
+ "tbl_data = pa.Table.from_arrays([pa.array(img_embeddings)], [\"vector\"])\n",
198
+ "tbl = db.create_table(\"gta_data\", tbl_data, schema=schema, mode=\"overwrite\")\n",
199
+ "\n",
200
+ "# tbl.add(img_embeddings)\n",
201
+ "# tbl.to_pandas()"
202
+ ]
203
+ },
204
+ {
205
+ "cell_type": "code",
206
+ "execution_count": 63,
207
+ "metadata": {},
208
+ "outputs": [
209
+ {
210
+ "ename": "TypeError",
211
+ "evalue": "Query column vector must be a vector. Got list<item: float>.",
212
+ "output_type": "error",
213
+ "traceback": [
214
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
215
+ "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
216
+ "Cell \u001b[1;32mIn[63], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mtbl\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msearch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43ma road with a stop\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvector_column_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mvector\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlimit\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2\u001b[0m res\n",
217
+ "File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\grandtheftauto-multimodal-rag-application-ufxwo2j--py3.11\\Lib\\site-packages\\lancedb\\query.py:262\u001b[0m, in \u001b[0;36mLanceQueryBuilder.to_pandas\u001b[1;34m(self, flatten)\u001b[0m\n\u001b[0;32m 247\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mto_pandas\u001b[39m(\u001b[38;5;28mself\u001b[39m, flatten: Optional[Union[\u001b[38;5;28mint\u001b[39m, \u001b[38;5;28mbool\u001b[39m]] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpd.DataFrame\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m 248\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 249\u001b[0m \u001b[38;5;124;03m Execute the query and return the results as a pandas DataFrame.\u001b[39;00m\n\u001b[0;32m 250\u001b[0m \u001b[38;5;124;03m In addition to the selected columns, LanceDB also returns a vector\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 260\u001b[0m \u001b[38;5;124;03m If unspecified, do not flatten the nested columns.\u001b[39;00m\n\u001b[0;32m 261\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 262\u001b[0m tbl \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_arrow\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 263\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m flatten \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m 264\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n",
218
+ "File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\grandtheftauto-multimodal-rag-application-ufxwo2j--py3.11\\Lib\\site-packages\\lancedb\\query.py:527\u001b[0m, in \u001b[0;36mLanceVectorQueryBuilder.to_arrow\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 518\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mto_arrow\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m pa\u001b[38;5;241m.\u001b[39mTable:\n\u001b[0;32m 519\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 520\u001b[0m \u001b[38;5;124;03m Execute the query and return the results as an\u001b[39;00m\n\u001b[0;32m 521\u001b[0m \u001b[38;5;124;03m [Apache Arrow Table](https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table).\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 525\u001b[0m \u001b[38;5;124;03m vector and the returned vectors.\u001b[39;00m\n\u001b[0;32m 526\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 527\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_batches\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mread_all()\n",
219
+ "File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\grandtheftauto-multimodal-rag-application-ufxwo2j--py3.11\\Lib\\site-packages\\lancedb\\query.py:557\u001b[0m, in \u001b[0;36mLanceVectorQueryBuilder.to_batches\u001b[1;34m(self, batch_size)\u001b[0m\n\u001b[0;32m 544\u001b[0m vector \u001b[38;5;241m=\u001b[39m [v\u001b[38;5;241m.\u001b[39mtolist() \u001b[38;5;28;01mfor\u001b[39;00m v \u001b[38;5;129;01min\u001b[39;00m vector]\n\u001b[0;32m 545\u001b[0m query \u001b[38;5;241m=\u001b[39m Query(\n\u001b[0;32m 546\u001b[0m vector\u001b[38;5;241m=\u001b[39mvector,\n\u001b[0;32m 547\u001b[0m \u001b[38;5;28mfilter\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_where,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 555\u001b[0m with_row_id\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_with_row_id,\n\u001b[0;32m 556\u001b[0m )\n\u001b[1;32m--> 557\u001b[0m result_set \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_table\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execute_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 558\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reranker \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 559\u001b[0m rs_table \u001b[38;5;241m=\u001b[39m result_set\u001b[38;5;241m.\u001b[39mread_all()\n",
220
+ "File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\grandtheftauto-multimodal-rag-application-ufxwo2j--py3.11\\Lib\\site-packages\\lancedb\\table.py:1616\u001b[0m, in \u001b[0;36mLanceTable._execute_query\u001b[1;34m(self, query, batch_size)\u001b[0m\n\u001b[0;32m 1612\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_execute_query\u001b[39m(\n\u001b[0;32m 1613\u001b[0m \u001b[38;5;28mself\u001b[39m, query: Query, batch_size: Optional[\u001b[38;5;28mint\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 1614\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m pa\u001b[38;5;241m.\u001b[39mRecordBatchReader:\n\u001b[0;32m 1615\u001b[0m ds \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mto_lance()\n\u001b[1;32m-> 1616\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscanner\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1617\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1618\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mfilter\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfilter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1619\u001b[0m \u001b[43m \u001b[49m\u001b[43mprefilter\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprefilter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1620\u001b[0m \u001b[43m \u001b[49m\u001b[43mnearest\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\n\u001b[0;32m 1621\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcolumn\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvector_column\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1622\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mq\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvector\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1623\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mk\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1624\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmetric\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmetric\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1625\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mnprobes\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnprobes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1626\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrefine_factor\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrefine_factor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1627\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1628\u001b[0m \u001b[43m \u001b[49m\u001b[43mwith_row_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwith_row_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1629\u001b[0m \u001b[43m \u001b[49m\u001b[43mbatch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbatch_size\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 1630\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mto_reader()\n",
221
+ "File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\grandtheftauto-multimodal-rag-application-ufxwo2j--py3.11\\Lib\\site-packages\\lance\\dataset.py:321\u001b[0m, in \u001b[0;36mLanceDataset.scanner\u001b[1;34m(self, columns, filter, limit, offset, nearest, batch_size, batch_readahead, fragment_readahead, scan_in_order, fragments, prefilter, with_row_id, use_stats)\u001b[0m\n\u001b[0;32m 305\u001b[0m builder \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 306\u001b[0m ScannerBuilder(\u001b[38;5;28mself\u001b[39m)\n\u001b[0;32m 307\u001b[0m \u001b[38;5;241m.\u001b[39mcolumns(columns)\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 318\u001b[0m \u001b[38;5;241m.\u001b[39muse_stats(use_stats)\n\u001b[0;32m 319\u001b[0m )\n\u001b[0;32m 320\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m nearest \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m--> 321\u001b[0m builder \u001b[38;5;241m=\u001b[39m \u001b[43mbuilder\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnearest\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mnearest\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 322\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m builder\u001b[38;5;241m.\u001b[39mto_scanner()\n",
222
+ "File \u001b[1;32mc:\\Users\\Admin\\AppData\\Local\\pypoetry\\Cache\\virtualenvs\\grandtheftauto-multimodal-rag-application-ufxwo2j--py3.11\\Lib\\site-packages\\lance\\dataset.py:2049\u001b[0m, in \u001b[0;36mScannerBuilder.nearest\u001b[1;34m(self, column, q, k, metric, nprobes, refine_factor, use_index)\u001b[0m\n\u001b[0;32m 2047\u001b[0m column_type \u001b[38;5;241m=\u001b[39m column_type\u001b[38;5;241m.\u001b[39mstorage_type\n\u001b[0;32m 2048\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m pa\u001b[38;5;241m.\u001b[39mtypes\u001b[38;5;241m.\u001b[39mis_fixed_size_list(column_type):\n\u001b[1;32m-> 2049\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[0;32m 2050\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mQuery column \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcolumn\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m must be a vector. Got \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcolumn_field\u001b[38;5;241m.\u001b[39mtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 2051\u001b[0m )\n\u001b[0;32m 2052\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(q) \u001b[38;5;241m!=\u001b[39m column_type\u001b[38;5;241m.\u001b[39mlist_size:\n\u001b[0;32m 2053\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 2054\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mQuery vector size \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(q)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m does not match index column size\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 2055\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcolumn_type\u001b[38;5;241m.\u001b[39mlist_size\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 2056\u001b[0m )\n",
223
+ "\u001b[1;31mTypeError\u001b[0m: Query column vector must be a vector. Got list<item: float>."
224
+ ]
225
+ }
226
+ ],
227
+ "source": [
228
+ "res = tbl.search(model.encode(\"a road with a stop\"), vector_column_name=\"vector\").limit(3).to_pandas()\n",
229
+ "res"
230
+ ]
231
+ },
232
+ {
233
+ "cell_type": "code",
234
+ "execution_count": null,
235
+ "metadata": {},
236
+ "outputs": [],
237
+ "source": []
238
+ },
239
+ {
240
+ "cell_type": "code",
241
+ "execution_count": null,
242
+ "metadata": {},
243
+ "outputs": [],
244
+ "source": []
245
+ },
246
+ {
247
+ "cell_type": "code",
248
+ "execution_count": null,
249
+ "metadata": {},
250
+ "outputs": [],
251
+ "source": [
252
+ "# https://huggingface.co/openai/clip-vit-large-patch14"
253
+ ]
254
+ },
255
+ {
256
+ "cell_type": "code",
257
+ "execution_count": 24,
258
+ "metadata": {},
259
+ "outputs": [],
260
+ "source": [
261
+ "import clip\n",
262
+ "import torch\n",
263
+ "import os\n",
264
+ "from datasets import load_dataset\n",
265
+ "\n",
266
+ "# ds = load_dataset(\"vipulmaheshwari/GTA-Image-Captioning-Dataset\")\n",
267
+ "# device = torch.device(\"mps\")\n",
268
+ "model, preprocess = clip.load(\"ViT-L/14\") # , device=device"
269
+ ]
270
+ },
271
+ {
272
+ "cell_type": "code",
273
+ "execution_count": 15,
274
+ "metadata": {},
275
+ "outputs": [
276
+ {
277
+ "data": {
278
+ "text/plain": [
279
+ "768"
280
+ ]
281
+ },
282
+ "execution_count": 15,
283
+ "metadata": {},
284
+ "output_type": "execute_result"
285
+ }
286
+ ],
287
+ "source": [
288
+ "def embed_txt(txt):\n",
289
+ " tokenized_text = clip.tokenize([txt])\n",
290
+ " embeddings = model.encode_text(tokenized_text)\n",
291
+ " \n",
292
+ " # Detach, move to CPU, convert to numpy array, and extract the first element as a list\n",
293
+ " result = embeddings.detach().numpy()[0].tolist()\n",
294
+ " return result\n",
295
+ "\n",
296
+ "len(embed_txt(\"a road with a stop\"))"
297
+ ]
298
+ },
299
+ {
300
+ "cell_type": "code",
301
+ "execution_count": 11,
302
+ "metadata": {},
303
+ "outputs": [
304
+ {
305
+ "data": {
306
+ "text/plain": [
307
+ "[1.172108769416809,\n",
308
+ " 0.5741956830024719,\n",
309
+ " -0.11420677602291107,\n",
310
+ " -0.5107784271240234,\n",
311
+ " -0.7742195725440979,\n",
312
+ " 0.7895426750183105,\n",
313
+ " 0.31811264157295227,\n",
314
+ " 0.5389135479927063,\n",
315
+ " 0.17074763774871826,\n",
316
+ " -1.0352754592895508,\n",
317
+ " -0.013449656777083874,\n",
318
+ " -0.5795634388923645,\n",
319
+ " -0.37020763754844666,\n",
320
+ " -0.7534741163253784,\n",
321
+ " 0.6788989901542664,\n",
322
+ " -0.1245330423116684,\n",
323
+ " 1.0375893115997314,\n",
324
+ " -0.08196641504764557,\n",
325
+ " 0.169560506939888,\n",
326
+ " -0.3306411802768707,\n",
327
+ " 0.6850194931030273,\n",
328
+ " -0.4113234281539917,\n",
329
+ " -0.3725243806838989,\n",
330
+ " -0.8902166485786438,\n",
331
+ " -0.2419223040342331,\n",
332
+ " 0.33643779158592224,\n",
333
+ " 0.18724264204502106,\n",
334
+ " 0.6745221018791199,\n",
335
+ " 0.00899740681052208,\n",
336
+ " -0.29769381880760193,\n",
337
+ " 0.6830898523330688,\n",
338
+ " 0.7002785205841064,\n",
339
+ " 0.5598942041397095,\n",
340
+ " -0.27884775400161743,\n",
341
+ " 0.29804039001464844,\n",
342
+ " 0.4663200378417969,\n",
343
+ " -0.40516427159309387,\n",
344
+ " -0.2796509861946106,\n",
345
+ " -0.3568377196788788,\n",
346
+ " 0.7982958555221558,\n",
347
+ " 1.0218019485473633,\n",
348
+ " -0.3191905915737152,\n",
349
+ " -0.8690600395202637,\n",
350
+ " -0.5986450910568237,\n",
351
+ " 0.6520456671714783,\n",
352
+ " 0.8482719659805298,\n",
353
+ " 0.45436325669288635,\n",
354
+ " -0.24868743121623993,\n",
355
+ " -0.22428922355175018,\n",
356
+ " -0.3995105028152466,\n",
357
+ " 0.1387435346841812,\n",
358
+ " 0.030430370941758156,\n",
359
+ " 0.1954972743988037,\n",
360
+ " 0.36345618963241577,\n",
361
+ " 0.23408269882202148,\n",
362
+ " 0.030055442824959755,\n",
363
+ " -0.13948054611682892,\n",
364
+ " -0.6816356778144836,\n",
365
+ " -0.2554306387901306,\n",
366
+ " -0.8186500668525696,\n",
367
+ " 0.0802079439163208,\n",
368
+ " -0.28623825311660767,\n",
369
+ " 0.889072060585022,\n",
370
+ " 0.3205733895301819,\n",
371
+ " 1.4578713178634644,\n",
372
+ " 0.5289382934570312,\n",
373
+ " -0.9107804894447327,\n",
374
+ " -0.1899547427892685,\n",
375
+ " -0.39814451336860657,\n",
376
+ " 0.07741428166627884,\n",
377
+ " 0.00696764700114727,\n",
378
+ " 0.8374080657958984,\n",
379
+ " 0.17547933757305145,\n",
380
+ " -0.6835469007492065,\n",
381
+ " 0.44190704822540283,\n",
382
+ " -0.258558452129364,\n",
383
+ " -0.16306370496749878,\n",
384
+ " 0.17053553462028503,\n",
385
+ " 0.8770076036453247,\n",
386
+ " 0.2896091341972351,\n",
387
+ " -0.2233574390411377,\n",
388
+ " -0.30297425389289856,\n",
389
+ " -0.7410178780555725,\n",
390
+ " 0.010058385320007801,\n",
391
+ " -0.7731197476387024,\n",
392
+ " -0.2569619417190552,\n",
393
+ " 0.05559535324573517,\n",
394
+ " 0.6135262846946716,\n",
395
+ " -0.5267459154129028,\n",
396
+ " -0.14416567981243134,\n",
397
+ " 0.3300650715827942,\n",
398
+ " 0.3322101831436157,\n",
399
+ " 0.260479211807251,\n",
400
+ " -0.6002621054649353,\n",
401
+ " 0.033296529203653336,\n",
402
+ " 0.5030784010887146,\n",
403
+ " -0.5291236042976379,\n",
404
+ " 0.11839054524898529,\n",
405
+ " -0.2279912680387497,\n",
406
+ " -0.24884033203125,\n",
407
+ " -0.27888786792755127,\n",
408
+ " -0.1304142028093338,\n",
409
+ " 0.1286783516407013,\n",
410
+ " 0.15377336740493774,\n",
411
+ " 0.5802848935127258,\n",
412
+ " -0.3416184186935425,\n",
413
+ " -0.41235557198524475,\n",
414
+ " 0.04911366105079651,\n",
415
+ " 0.28588297963142395,\n",
416
+ " 1.097459316253662,\n",
417
+ " 0.8836804628372192,\n",
418
+ " -0.06680312007665634,\n",
419
+ " 0.5119672417640686,\n",
420
+ " 0.1433386206626892,\n",
421
+ " 0.3975537121295929,\n",
422
+ " 0.751021683216095,\n",
423
+ " -0.5127158761024475,\n",
424
+ " -1.0673898458480835,\n",
425
+ " -0.810725212097168,\n",
426
+ " -0.9325631260871887,\n",
427
+ " 0.28165996074676514,\n",
428
+ " -1.1700552701950073,\n",
429
+ " -0.6979520916938782,\n",
430
+ " 0.09645866602659225,\n",
431
+ " -0.15432433784008026,\n",
432
+ " -0.6545705199241638,\n",
433
+ " -0.2297753095626831,\n",
434
+ " 0.9147917628288269,\n",
435
+ " -0.3901214897632599,\n",
436
+ " -0.08340626955032349,\n",
437
+ " -0.0342048779129982,\n",
438
+ " 0.4271363615989685,\n",
439
+ " 0.3410806655883789,\n",
440
+ " -0.14932666718959808,\n",
441
+ " 0.05415431410074234,\n",
442
+ " -0.5995809435844421,\n",
443
+ " -0.33829835057258606,\n",
444
+ " -0.23623280227184296,\n",
445
+ " -0.5740441679954529,\n",
446
+ " 0.3325800895690918,\n",
447
+ " -0.18519632518291473,\n",
448
+ " -0.26904159784317017,\n",
449
+ " 0.03128799423575401,\n",
450
+ " 0.15838740766048431,\n",
451
+ " -0.003409828059375286,\n",
452
+ " -0.2664038836956024,\n",
453
+ " -0.6785658597946167,\n",
454
+ " 0.4431314170360565,\n",
455
+ " -0.38189026713371277,\n",
456
+ " 0.5427551865577698,\n",
457
+ " 0.5074883103370667,\n",
458
+ " -0.186558797955513,\n",
459
+ " 0.08342668414115906,\n",
460
+ " 0.04791847988963127,\n",
461
+ " -0.1341174989938736,\n",
462
+ " 0.8764032125473022,\n",
463
+ " -0.10158982127904892,\n",
464
+ " 0.9622796177864075,\n",
465
+ " -0.058163080364465714,\n",
466
+ " -1.0029855966567993,\n",
467
+ " -0.22422465682029724,\n",
468
+ " 1.2381765842437744,\n",
469
+ " 0.17981192469596863,\n",
470
+ " 0.034056372940540314,\n",
471
+ " -0.2695963978767395,\n",
472
+ " -0.21056877076625824,\n",
473
+ " -0.3712306320667267,\n",
474
+ " 0.17336499691009521,\n",
475
+ " 0.5278773903846741,\n",
476
+ " 0.7908108234405518,\n",
477
+ " -1.034334659576416,\n",
478
+ " -0.5650461912155151,\n",
479
+ " -0.7466263175010681,\n",
480
+ " -0.16805803775787354,\n",
481
+ " 0.39045724272727966,\n",
482
+ " -0.5074604749679565,\n",
483
+ " 0.29658886790275574,\n",
484
+ " -0.1186276450753212,\n",
485
+ " 0.7888982892036438,\n",
486
+ " -0.00017159162962343544,\n",
487
+ " 0.9989897608757019,\n",
488
+ " 0.21528062224388123,\n",
489
+ " 0.3544112741947174,\n",
490
+ " -0.18352235853672028,\n",
491
+ " -0.5933219790458679,\n",
492
+ " -0.4221193492412567,\n",
493
+ " 0.20716431736946106,\n",
494
+ " 0.026883812621235847,\n",
495
+ " 1.2931787967681885,\n",
496
+ " 0.3020362854003906,\n",
497
+ " 0.26052647829055786,\n",
498
+ " 0.056001197546720505,\n",
499
+ " -0.5442985892295837,\n",
500
+ " -0.24692402780056,\n",
501
+ " -0.04342973232269287,\n",
502
+ " 0.32930392026901245,\n",
503
+ " -0.7617244124412537,\n",
504
+ " 0.26960083842277527,\n",
505
+ " 0.29244083166122437,\n",
506
+ " -0.2099844217300415,\n",
507
+ " 0.2785693407058716,\n",
508
+ " 0.07669660449028015,\n",
509
+ " -0.1421067714691162,\n",
510
+ " 0.46162599325180054,\n",
511
+ " 0.3855959475040436,\n",
512
+ " 0.27650055289268494,\n",
513
+ " -0.44994688034057617,\n",
514
+ " -0.28603509068489075,\n",
515
+ " -0.5041812062263489,\n",
516
+ " -0.3805933892726898,\n",
517
+ " 0.5895918011665344,\n",
518
+ " 0.6383715867996216,\n",
519
+ " -0.08397688716650009,\n",
520
+ " 0.22880668938159943,\n",
521
+ " -0.25133225321769714,\n",
522
+ " 0.2853071093559265,\n",
523
+ " -0.0931459441781044,\n",
524
+ " 0.3020959496498108,\n",
525
+ " 0.24055352807044983,\n",
526
+ " 0.18953140079975128,\n",
527
+ " -0.17559008300304413,\n",
528
+ " 0.11638100445270538,\n",
529
+ " 0.5736441612243652,\n",
530
+ " 0.34651291370391846,\n",
531
+ " 0.0011261674808338284,\n",
532
+ " 0.6858928203582764,\n",
533
+ " -0.3585776090621948,\n",
534
+ " 0.21113723516464233,\n",
535
+ " -0.451948344707489,\n",
536
+ " -0.6812528371810913,\n",
537
+ " -0.37171897292137146,\n",
538
+ " -0.11487153172492981,\n",
539
+ " -0.7819438576698303,\n",
540
+ " 0.2523130476474762,\n",
541
+ " -0.006692436058074236,\n",
542
+ " 0.5665392279624939,\n",
543
+ " -0.5619456768035889,\n",
544
+ " 0.06306441873311996,\n",
545
+ " 0.21295419335365295,\n",
546
+ " 0.5865535140037537,\n",
547
+ " 0.27423301339149475,\n",
548
+ " 0.2840102016925812,\n",
549
+ " -0.37136274576187134,\n",
550
+ " 0.016866570338606834,\n",
551
+ " 0.2263607531785965,\n",
552
+ " 0.43608683347702026,\n",
553
+ " -0.4567808508872986,\n",
554
+ " 0.9201197028160095,\n",
555
+ " -0.28868433833122253,\n",
556
+ " 0.2835354208946228,\n",
557
+ " 0.5691022276878357,\n",
558
+ " -0.24377702176570892,\n",
559
+ " 0.5043097138404846,\n",
560
+ " -0.41853949427604675,\n",
561
+ " 0.03636287525296211,\n",
562
+ " -0.07350795716047287,\n",
563
+ " -0.06902104616165161,\n",
564
+ " 0.32698169350624084,\n",
565
+ " -0.24132660031318665,\n",
566
+ " 0.0912783071398735,\n",
567
+ " -1.047544002532959,\n",
568
+ " -0.8717364072799683,\n",
569
+ " -0.8879557847976685,\n",
570
+ " 0.301925927400589,\n",
571
+ " -1.2747677564620972,\n",
572
+ " 0.10643213242292404,\n",
573
+ " 0.050040390342473984,\n",
574
+ " -0.6990651488304138,\n",
575
+ " 0.4598444104194641,\n",
576
+ " -0.2630557417869568,\n",
577
+ " 0.3260715901851654,\n",
578
+ " 0.15428033471107483,\n",
579
+ " 0.10122397541999817,\n",
580
+ " 0.07699556648731232,\n",
581
+ " 0.06605273485183716,\n",
582
+ " -0.2160506695508957,\n",
583
+ " -0.1665394902229309,\n",
584
+ " -0.5145867466926575,\n",
585
+ " -0.8410879373550415,\n",
586
+ " -0.3635564148426056,\n",
587
+ " -0.14213085174560547,\n",
588
+ " -0.3718281686306,\n",
589
+ " -0.2025422751903534,\n",
590
+ " -0.45895904302597046,\n",
591
+ " 0.16690057516098022,\n",
592
+ " -0.29905644059181213,\n",
593
+ " 0.03865504637360573,\n",
594
+ " 0.23067855834960938,\n",
595
+ " 0.23403894901275635,\n",
596
+ " -0.3748420774936676,\n",
597
+ " -0.4377340078353882,\n",
598
+ " -0.6237973570823669,\n",
599
+ " -0.5650405287742615,\n",
600
+ " -0.12215842306613922,\n",
601
+ " -0.23550915718078613,\n",
602
+ " -0.030611969530582428,\n",
603
+ " 0.1457085907459259,\n",
604
+ " 0.39134201407432556,\n",
605
+ " 0.7538257241249084,\n",
606
+ " -0.5013869404792786,\n",
607
+ " -0.22639918327331543,\n",
608
+ " 0.324470579624176,\n",
609
+ " 0.2524488568305969,\n",
610
+ " -0.6817197799682617,\n",
611
+ " -0.1683609038591385,\n",
612
+ " 0.09771472215652466,\n",
613
+ " -0.324865460395813,\n",
614
+ " 0.38337022066116333,\n",
615
+ " -0.148436039686203,\n",
616
+ " 0.7256155610084534,\n",
617
+ " -0.9280087947845459,\n",
618
+ " -0.6846877336502075,\n",
619
+ " -0.37772396206855774,\n",
620
+ " 0.03854738548398018,\n",
621
+ " -0.5223367214202881,\n",
622
+ " 0.04659451171755791,\n",
623
+ " -1.2525877952575684,\n",
624
+ " 0.15308304131031036,\n",
625
+ " -0.2739616334438324,\n",
626
+ " 0.07301849126815796,\n",
627
+ " 0.7795864939689636,\n",
628
+ " -0.2228480577468872,\n",
629
+ " -0.35411256551742554,\n",
630
+ " -0.6261951923370361,\n",
631
+ " 0.20154286921024323,\n",
632
+ " -0.02966398000717163,\n",
633
+ " -0.7075097560882568,\n",
634
+ " -0.45100030303001404,\n",
635
+ " -0.5318045020103455,\n",
636
+ " 0.22182771563529968,\n",
637
+ " 0.08000355958938599,\n",
638
+ " 0.16378679871559143,\n",
639
+ " 0.33453676104545593,\n",
640
+ " -0.20498014986515045,\n",
641
+ " -0.5192173719406128,\n",
642
+ " 0.3957352936267853,\n",
643
+ " -0.21540209650993347,\n",
644
+ " -0.26865679025650024,\n",
645
+ " -0.9579092264175415,\n",
646
+ " 0.29295825958251953,\n",
647
+ " 0.07182762026786804,\n",
648
+ " 0.2812371850013733,\n",
649
+ " 0.5159787535667419,\n",
650
+ " -0.1598782241344452,\n",
651
+ " -0.02911016158759594,\n",
652
+ " 0.10978005081415176,\n",
653
+ " -1.152063012123108,\n",
654
+ " -1.075944423675537,\n",
655
+ " -0.19859834015369415,\n",
656
+ " 0.48424282670021057,\n",
657
+ " -0.3020830452442169,\n",
658
+ " 0.0681198462843895,\n",
659
+ " -0.03712642937898636,\n",
660
+ " -0.26295045018196106,\n",
661
+ " 0.23075002431869507,\n",
662
+ " 0.03392830863595009,\n",
663
+ " 0.5592344999313354,\n",
664
+ " 0.27158620953559875,\n",
665
+ " 0.08701741695404053,\n",
666
+ " -0.2469501793384552,\n",
667
+ " 0.7389507293701172,\n",
668
+ " 0.3184473216533661,\n",
669
+ " -0.5283591151237488,\n",
670
+ " -0.35726648569107056,\n",
671
+ " 0.2647046446800232,\n",
672
+ " 0.06684468686580658,\n",
673
+ " -0.4558630883693695,\n",
674
+ " -0.3814390301704407,\n",
675
+ " 0.6464404463768005,\n",
676
+ " -0.3603093922138214,\n",
677
+ " -0.7406730651855469,\n",
678
+ " -0.06739675253629684,\n",
679
+ " 0.3286390006542206,\n",
680
+ " 0.07030770927667618,\n",
681
+ " 0.20259763300418854,\n",
682
+ " -0.18537510931491852,\n",
683
+ " 0.39111021161079407,\n",
684
+ " -0.1252942532300949,\n",
685
+ " 0.1268956959247589,\n",
686
+ " -0.10496045649051666,\n",
687
+ " 1.1690759658813477,\n",
688
+ " 0.23655962944030762,\n",
689
+ " 0.2556387782096863,\n",
690
+ " -0.30134761333465576,\n",
691
+ " -0.3626421391963959,\n",
692
+ " -0.35505855083465576,\n",
693
+ " -0.22458982467651367,\n",
694
+ " -0.40729954838752747,\n",
695
+ " -0.40974897146224976,\n",
696
+ " 0.028972748667001724,\n",
697
+ " 0.6284871101379395,\n",
698
+ " 0.3097871243953705,\n",
699
+ " -0.1652112752199173,\n",
700
+ " 1.0627437829971313,\n",
701
+ " -0.6887637376785278,\n",
702
+ " -0.031500522047281265,\n",
703
+ " -0.0873744785785675,\n",
704
+ " -0.9616701006889343,\n",
705
+ " 0.3587159216403961,\n",
706
+ " 0.1391131579875946,\n",
707
+ " -0.19815994799137115,\n",
708
+ " 0.7807681560516357,\n",
709
+ " 0.2649019658565521,\n",
710
+ " -0.48934823274612427,\n",
711
+ " -0.7037213444709778,\n",
712
+ " -0.39783185720443726,\n",
713
+ " -0.36193808913230896,\n",
714
+ " -0.6811600923538208,\n",
715
+ " -0.18488575518131256,\n",
716
+ " 0.6047443151473999,\n",
717
+ " -0.17012985050678253,\n",
718
+ " -0.11221067607402802,\n",
719
+ " -0.11349140107631683,\n",
720
+ " -7.79653263092041,\n",
721
+ " -0.03174687176942825,\n",
722
+ " -0.5907049179077148,\n",
723
+ " -0.0845143049955368,\n",
724
+ " 0.6719594597816467,\n",
725
+ " -0.6047013998031616,\n",
726
+ " -0.4621417820453644,\n",
727
+ " 0.4189649224281311,\n",
728
+ " 0.2606521546840668,\n",
729
+ " -0.5251185894012451,\n",
730
+ " 0.656951904296875,\n",
731
+ " -0.14103704690933228,\n",
732
+ " -0.724404513835907,\n",
733
+ " 0.032266344875097275,\n",
734
+ " -0.38332653045654297,\n",
735
+ " 0.2214561551809311,\n",
736
+ " -0.11025898903608322,\n",
737
+ " 0.2219904512166977,\n",
738
+ " -0.16805943846702576,\n",
739
+ " -0.22911910712718964,\n",
740
+ " 0.40065279603004456,\n",
741
+ " 0.8264251947402954,\n",
742
+ " -0.25879043340682983,\n",
743
+ " -0.4252917170524597,\n",
744
+ " -0.1860014647245407,\n",
745
+ " 0.21712413430213928,\n",
746
+ " 0.852258026599884,\n",
747
+ " 1.1114447116851807,\n",
748
+ " 0.03458324819803238,\n",
749
+ " -0.42567503452301025,\n",
750
+ " -0.4035224914550781,\n",
751
+ " 0.5391470789909363,\n",
752
+ " 0.6653061509132385,\n",
753
+ " -0.15112830698490143,\n",
754
+ " 0.20673374831676483,\n",
755
+ " 0.5916152596473694,\n",
756
+ " 0.10783706605434418,\n",
757
+ " 0.06303859502077103,\n",
758
+ " -0.6804474592208862,\n",
759
+ " 0.46267828345298767,\n",
760
+ " -0.8944555521011353,\n",
761
+ " -0.20007365942001343,\n",
762
+ " -0.18524183332920074,\n",
763
+ " -0.25279444456100464,\n",
764
+ " 0.013942774385213852,\n",
765
+ " -0.227418452501297,\n",
766
+ " -0.5019238591194153,\n",
767
+ " -0.259070485830307,\n",
768
+ " -0.4195726811885834,\n",
769
+ " -0.2565968334674835,\n",
770
+ " 0.08592142164707184,\n",
771
+ " -0.4816386103630066,\n",
772
+ " -0.7389425039291382,\n",
773
+ " 0.384757936000824,\n",
774
+ " 1.148498773574829,\n",
775
+ " -0.08795226365327835,\n",
776
+ " -0.7781391143798828,\n",
777
+ " -0.18237966299057007,\n",
778
+ " 0.27100449800491333,\n",
779
+ " 0.7376315593719482,\n",
780
+ " -0.2066810131072998,\n",
781
+ " -0.042161568999290466,\n",
782
+ " 0.14717990159988403,\n",
783
+ " -0.25498059391975403,\n",
784
+ " 0.33164745569229126,\n",
785
+ " -0.3789907693862915,\n",
786
+ " -0.702992856502533,\n",
787
+ " -0.46402469277381897,\n",
788
+ " -0.47181829810142517,\n",
789
+ " -0.530529260635376,\n",
790
+ " 0.08136516064405441,\n",
791
+ " 0.3396340608596802,\n",
792
+ " -0.21239398419857025,\n",
793
+ " 0.38136026263237,\n",
794
+ " -0.9020550847053528,\n",
795
+ " -0.41401106119155884,\n",
796
+ " -0.47626185417175293,\n",
797
+ " -0.34683799743652344,\n",
798
+ " -0.3377147912979126,\n",
799
+ " -0.6628923416137695,\n",
800
+ " 0.2143520712852478,\n",
801
+ " 0.31117284297943115,\n",
802
+ " 0.43092554807662964,\n",
803
+ " 0.12191533297300339,\n",
804
+ " -0.017828848212957382,\n",
805
+ " -0.12583602964878082,\n",
806
+ " 0.33957740664482117,\n",
807
+ " -0.09169825166463852,\n",
808
+ " 0.24532632529735565,\n",
809
+ " 0.5283830165863037,\n",
810
+ " 0.7038718461990356,\n",
811
+ " 0.6268500089645386,\n",
812
+ " 0.00923143420368433,\n",
813
+ " 0.8284425139427185,\n",
814
+ " 0.6025779247283936,\n",
815
+ " 0.5495515465736389,\n",
816
+ " -0.34349843859672546,\n",
817
+ " 0.3288527727127075,\n",
818
+ " 0.1823807954788208,\n",
819
+ " 0.2601393759250641,\n",
820
+ " -0.01894410327076912,\n",
821
+ " 0.535849928855896,\n",
822
+ " -0.07729293406009674,\n",
823
+ " -0.05701117962598801,\n",
824
+ " -0.5398024320602417,\n",
825
+ " -0.2532539665699005,\n",
826
+ " -0.02206384763121605,\n",
827
+ " -0.5667169690132141,\n",
828
+ " -0.1217791885137558,\n",
829
+ " 0.37247171998023987,\n",
830
+ " -0.11095214635133743,\n",
831
+ " -0.615912914276123,\n",
832
+ " 0.32324957847595215,\n",
833
+ " 0.45441827178001404,\n",
834
+ " 0.23056231439113617,\n",
835
+ " -2.3405637741088867,\n",
836
+ " -0.3898467421531677,\n",
837
+ " -0.03767596557736397,\n",
838
+ " -0.17562665045261383,\n",
839
+ " 0.40651726722717285,\n",
840
+ " -0.45753777027130127,\n",
841
+ " 1.0350662469863892,\n",
842
+ " -0.45301544666290283,\n",
843
+ " 0.5571080446243286,\n",
844
+ " -0.7762919068336487,\n",
845
+ " -0.2582171857357025,\n",
846
+ " -0.8123776316642761,\n",
847
+ " 0.027839435264468193,\n",
848
+ " 0.021091900765895844,\n",
849
+ " -0.3034447133541107,\n",
850
+ " 0.34992972016334534,\n",
851
+ " -0.6623353958129883,\n",
852
+ " -0.2909213602542877,\n",
853
+ " -0.18953290581703186,\n",
854
+ " -0.5997650623321533,\n",
855
+ " 0.8640273213386536,\n",
856
+ " -0.24815954267978668,\n",
857
+ " -0.29709047079086304,\n",
858
+ " 0.8860780000686646,\n",
859
+ " 0.04529644176363945,\n",
860
+ " 1.1951236724853516,\n",
861
+ " -1.1161422729492188,\n",
862
+ " -0.04289549961686134,\n",
863
+ " -1.6880977153778076,\n",
864
+ " -0.16583313047885895,\n",
865
+ " -0.4640212059020996,\n",
866
+ " 0.03880169615149498,\n",
867
+ " -0.4149312973022461,\n",
868
+ " 0.5659136772155762,\n",
869
+ " -0.07184366881847382,\n",
870
+ " 0.6438769102096558,\n",
871
+ " -1.1572128534317017,\n",
872
+ " 0.32702523469924927,\n",
873
+ " 0.19401556253433228,\n",
874
+ " -0.36513882875442505,\n",
875
+ " -0.1496993601322174,\n",
876
+ " 0.5544662475585938,\n",
877
+ " -0.10601028800010681,\n",
878
+ " 0.2943094074726105,\n",
879
+ " -0.9837754368782043,\n",
880
+ " -0.14144904911518097,\n",
881
+ " 0.7259737253189087,\n",
882
+ " 0.05785682797431946,\n",
883
+ " 0.8584915995597839,\n",
884
+ " -0.27259302139282227,\n",
885
+ " -0.6073381900787354,\n",
886
+ " -0.22768571972846985,\n",
887
+ " 0.7255773544311523,\n",
888
+ " 0.1539279967546463,\n",
889
+ " -0.6805699467658997,\n",
890
+ " -1.0378549098968506,\n",
891
+ " -0.597703754901886,\n",
892
+ " -0.6462168097496033,\n",
893
+ " 1.1171226501464844,\n",
894
+ " -0.21000456809997559,\n",
895
+ " -0.7443035244941711,\n",
896
+ " -0.16614656150341034,\n",
897
+ " 0.03670107200741768,\n",
898
+ " 0.23261283338069916,\n",
899
+ " -0.5053027272224426,\n",
900
+ " -1.0062577724456787,\n",
901
+ " 0.028607431799173355,\n",
902
+ " 0.6196390986442566,\n",
903
+ " 0.11939772218465805,\n",
904
+ " 0.16041713953018188,\n",
905
+ " 0.012548833154141903,\n",
906
+ " -0.6940840482711792,\n",
907
+ " -1.0390965938568115,\n",
908
+ " 0.3209550082683563,\n",
909
+ " -0.5268062353134155,\n",
910
+ " 0.5799688696861267,\n",
911
+ " -0.3353428542613983,\n",
912
+ " -0.3517853319644928,\n",
913
+ " -0.38189470767974854,\n",
914
+ " 0.23297882080078125,\n",
915
+ " 0.045969072729349136,\n",
916
+ " 0.6408992409706116,\n",
917
+ " -0.23498287796974182,\n",
918
+ " -0.2744370400905609,\n",
919
+ " -0.3386567234992981,\n",
920
+ " 0.16898459196090698,\n",
921
+ " 0.4274075925350189,\n",
922
+ " -0.4734047055244446,\n",
923
+ " -0.02491043135523796,\n",
924
+ " -0.5023868680000305,\n",
925
+ " -0.1599859893321991,\n",
926
+ " -0.28793132305145264,\n",
927
+ " 0.45987895131111145,\n",
928
+ " 0.12111934274435043,\n",
929
+ " 0.695939838886261,\n",
930
+ " 0.18703705072402954,\n",
931
+ " 0.11010603606700897,\n",
932
+ " -0.0493675135076046,\n",
933
+ " 0.2681659758090973,\n",
934
+ " 0.6883248090744019,\n",
935
+ " 0.14249111711978912,\n",
936
+ " -0.3902900516986847,\n",
937
+ " 0.02434423565864563,\n",
938
+ " 0.8115938305854797,\n",
939
+ " 0.31366243958473206,\n",
940
+ " 0.1475793719291687,\n",
941
+ " 0.8607581853866577,\n",
942
+ " 1.106387972831726,\n",
943
+ " -0.12984894216060638,\n",
944
+ " 0.6475292444229126,\n",
945
+ " 0.4389672875404358,\n",
946
+ " -0.14565706253051758,\n",
947
+ " -0.29327720403671265,\n",
948
+ " 0.19903028011322021,\n",
949
+ " 0.44643306732177734,\n",
950
+ " -0.055179595947265625,\n",
951
+ " 8.315621376037598,\n",
952
+ " -0.08598960936069489,\n",
953
+ " 0.7728097438812256,\n",
954
+ " 0.1960563361644745,\n",
955
+ " 0.7582479119300842,\n",
956
+ " -0.6882674098014832,\n",
957
+ " -0.22637659311294556,\n",
958
+ " 0.5025527477264404,\n",
959
+ " -0.07177169620990753,\n",
960
+ " -0.03814778849482536,\n",
961
+ " 1.0206265449523926,\n",
962
+ " -0.4750046730041504,\n",
963
+ " 0.015179314650595188,\n",
964
+ " -0.6247814297676086,\n",
965
+ " 0.4034382998943329,\n",
966
+ " 1.700039029121399,\n",
967
+ " -0.30730658769607544,\n",
968
+ " 0.28762733936309814,\n",
969
+ " 0.63616544008255,\n",
970
+ " -0.23646242916584015,\n",
971
+ " 0.2806755304336548,\n",
972
+ " 0.4410918056964874,\n",
973
+ " 0.14614292979240417,\n",
974
+ " 0.4948270916938782,\n",
975
+ " 0.43732860684394836,\n",
976
+ " 1.0119167566299438,\n",
977
+ " 0.9210423827171326,\n",
978
+ " -0.35212814807891846,\n",
979
+ " 0.32403385639190674,\n",
980
+ " -0.44126105308532715,\n",
981
+ " -0.18103229999542236,\n",
982
+ " -0.31492364406585693,\n",
983
+ " -0.503863513469696,\n",
984
+ " -0.26293063163757324,\n",
985
+ " 0.21797089278697968,\n",
986
+ " -0.9694619178771973,\n",
987
+ " 0.021304313093423843,\n",
988
+ " 0.44222936034202576,\n",
989
+ " -0.36141523718833923,\n",
990
+ " -0.463960736989975,\n",
991
+ " -0.24528658390045166,\n",
992
+ " 0.11174631118774414,\n",
993
+ " 0.09441330283880234,\n",
994
+ " 0.18713852763175964,\n",
995
+ " 0.36507827043533325,\n",
996
+ " 0.7508949041366577,\n",
997
+ " -0.15697608888149261,\n",
998
+ " 0.4001035690307617,\n",
999
+ " 1.323508620262146,\n",
1000
+ " -0.20196901261806488,\n",
1001
+ " 0.292355477809906,\n",
1002
+ " 0.34666717052459717,\n",
1003
+ " -0.11999291181564331,\n",
1004
+ " -0.6510916352272034,\n",
1005
+ " 0.4462094306945801,\n",
1006
+ " -0.45647361874580383,\n",
1007
+ " -0.14198175072669983,\n",
1008
+ " -0.4045391082763672,\n",
1009
+ " 0.7035051584243774,\n",
1010
+ " 0.3213372826576233,\n",
1011
+ " 0.5096818804740906,\n",
1012
+ " 0.6800979971885681,\n",
1013
+ " -0.008764655329287052,\n",
1014
+ " -0.19463925063610077,\n",
1015
+ " -0.7179383635520935,\n",
1016
+ " 0.2567158043384552,\n",
1017
+ " 0.07364790141582489,\n",
1018
+ " -0.222466841340065,\n",
1019
+ " 0.022669780999422073,\n",
1020
+ " 0.8473037481307983,\n",
1021
+ " -0.034888043999671936,\n",
1022
+ " -0.07169658690690994,\n",
1023
+ " -0.05516548082232475,\n",
1024
+ " -0.06913617253303528,\n",
1025
+ " -0.530577540397644,\n",
1026
+ " -0.6640213131904602,\n",
1027
+ " -0.34023773670196533,\n",
1028
+ " -0.5658687949180603,\n",
1029
+ " -0.4476564824581146,\n",
1030
+ " -2.571279287338257,\n",
1031
+ " -0.12790530920028687,\n",
1032
+ " 0.9560791850090027,\n",
1033
+ " -0.6428014039993286,\n",
1034
+ " -0.4189566671848297,\n",
1035
+ " -0.20985344052314758,\n",
1036
+ " 0.47335946559906006,\n",
1037
+ " -0.11219882220029831,\n",
1038
+ " -0.10753587633371353,\n",
1039
+ " 0.14247222244739532,\n",
1040
+ " 1.059354305267334,\n",
1041
+ " 0.3302377462387085,\n",
1042
+ " -0.3935352563858032,\n",
1043
+ " -0.058758582919836044,\n",
1044
+ " 0.648691713809967,\n",
1045
+ " 0.30499130487442017,\n",
1046
+ " -0.27360308170318604,\n",
1047
+ " -0.25764214992523193,\n",
1048
+ " 0.015458552166819572,\n",
1049
+ " 0.6662879586219788,\n",
1050
+ " 0.3119010329246521,\n",
1051
+ " -0.15479373931884766,\n",
1052
+ " 0.028574924916028976,\n",
1053
+ " -0.1503346860408783,\n",
1054
+ " 0.06127818673849106,\n",
1055
+ " -0.0910576581954956,\n",
1056
+ " 0.0481022410094738,\n",
1057
+ " 0.9771047234535217,\n",
1058
+ " 0.7927762866020203,\n",
1059
+ " 0.023048892617225647,\n",
1060
+ " 0.30974704027175903,\n",
1061
+ " 0.33901262283325195,\n",
1062
+ " -0.07123278081417084,\n",
1063
+ " 0.34432730078697205,\n",
1064
+ " -0.12369780987501144,\n",
1065
+ " 0.2354590892791748,\n",
1066
+ " 0.38229313492774963,\n",
1067
+ " -0.8465576767921448,\n",
1068
+ " -0.2445705085992813,\n",
1069
+ " -0.16847288608551025,\n",
1070
+ " 0.5078030824661255,\n",
1071
+ " -0.4897501766681671,\n",
1072
+ " 0.07203903794288635,\n",
1073
+ " 0.6503809690475464,\n",
1074
+ " -0.08006825298070908]"
1075
+ ]
1076
+ },
1077
+ "execution_count": 11,
1078
+ "metadata": {},
1079
+ "output_type": "execute_result"
1080
+ }
1081
+ ],
1082
+ "source": [
1083
+ "# https://vipul-maheshwari.github.io/2024/03/03/multimodal-rag-application\n",
1084
+ "\n",
1085
+ "def embed_image(img):\n",
1086
+ " processed_image = preprocess(img)\n",
1087
+ " unsqueezed_image = processed_image.unsqueeze(0)\n",
1088
+ " embeddings = model.encode_image(unsqueezed_image)\n",
1089
+ " \n",
1090
+ " # Detach, move to CPU, convert to numpy array, and extract the first element as a list\n",
1091
+ " result = embeddings.detach().numpy()[0].tolist()\n",
1092
+ " return result\n",
1093
+ "\n",
1094
+ "len(embed_image(image))"
1095
+ ]
1096
+ },
1097
+ {
1098
+ "cell_type": "code",
1099
+ "execution_count": null,
1100
+ "metadata": {},
1101
+ "outputs": [],
1102
+ "source": [
1103
+ "def embed_txt(txt):\n",
1104
+ " tokenized_text = clip.tokenize([txt]).to(device)\n",
1105
+ " embeddings = model.encode_text(tokenized_text)\n",
1106
+ " \n",
1107
+ " # Detach, move to CPU, convert to numpy array, and extract the first element as a list\n",
1108
+ " result = embeddings.detach().cpu().numpy()[0].tolist()\n",
1109
+ " return result\n",
1110
+ "\n",
1111
+ "res = tbl.search(embed_txt(\"a road with a stop\")).limit(3).to_pandas()\n",
1112
+ "res"
1113
+ ]
1114
+ },
1115
+ {
1116
+ "cell_type": "code",
1117
+ "execution_count": null,
1118
+ "metadata": {},
1119
+ "outputs": [],
1120
+ "source": [
1121
+ "https://blog.lancedb.com/lancedb-polars-2d5eb32a8aa3/\n",
1122
+ "\n",
1123
+ "https://github.com/lancedb/lancedb"
1124
+ ]
1125
+ }
1126
+ ],
1127
+ "metadata": {
1128
+ "kernelspec": {
1129
+ "display_name": "Python 3",
1130
+ "language": "python",
1131
+ "name": "python3"
1132
+ },
1133
+ "language_info": {
1134
+ "codemirror_mode": {
1135
+ "name": "ipython",
1136
+ "version": 3
1137
+ },
1138
+ "file_extension": ".py",
1139
+ "mimetype": "text/x-python",
1140
+ "name": "python",
1141
+ "nbconvert_exporter": "python",
1142
+ "pygments_lexer": "ipython3",
1143
+ "version": "3.11.9"
1144
+ }
1145
+ },
1146
+ "nbformat": 4,
1147
+ "nbformat_minor": 2
1148
+ }
notebook_3.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebook_4.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "grandtheftauto-multimodal-rag-application"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["henryhyunwookim <44928790+henryhyunwookim@users.noreply.github.com>"]
6
+ readme = "README.md"
7
+
8
+ [tool.poetry.dependencies]
9
+ python = "^3.11"
10
+ pillow = "^10.3.0"
11
+ datasets = "^2.19.0"
12
+ ipykernel = "^6.29.4"
13
+ jupyter = "^1.0.0"
14
+ ipywidgets = "^8.1.2"
15
+ matplotlib = "^3.8.4"
16
+ sentence-transformers = "^2.7.0"
17
+ lancedb = "^0.6.11"
18
+ torch = "^2.3.0"
19
+ clip = {git = "https://github.com/openai/CLIP.git"}
20
+ chromadb = "^0.5.0"
21
+ gradio = "^4.32.0"
22
+
23
+
24
+ [tool.poetry.group.dev.dependencies]
25
+ ipykernel = "^6.29.4"
26
+
27
+ [build-system]
28
+ requires = ["poetry-core"]
29
+ build-backend = "poetry.core.masonry.api"
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pillow==10.3.0
2
+ datasets==2.19.0
3
+ ipykernel==6.29.4
4
+ jupyter==1.0.0
5
+ ipywidgets==8.1.2
6
+ matplotlib==3.8.4
7
+ sentence-transformers==2.7.0
8
+ lancedb==0.6.11
9
+ torch==2.3.0
10
+ clip @ git+https://github.com/openai/CLIP.git
11
+ chromadb==0.5.0
12
+ gradio==4.32.0
13
+
14
+ # Development dependencies
15
+ ipykernel==6.29.4
utils/__pycache__/utils.cpython-311.pyc ADDED
Binary file (8.03 kB). View file
 
utils/utils.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+ import pickle
6
+ from tqdm import tqdm
7
+ from datasets import load_dataset
8
+ import chromadb
9
+ import matplotlib.pyplot as plt
10
+
11
+
12
+ def set_directories():
13
+ curr_dir = Path(os.getcwd())
14
+
15
+ data_dir = curr_dir / 'data'
16
+ data_pickle_path = data_dir / 'data_set.pkl'
17
+
18
+ vectordb_dir = curr_dir / 'vectore_storage'
19
+ chroma_dir = vectordb_dir / 'chroma'
20
+
21
+ for dir in [data_dir, vectordb_dir, chroma_dir]:
22
+ if not os.path.exists(dir):
23
+ os.mkdir(dir)
24
+
25
+ return data_pickle_path, chroma_dir
26
+
27
+
28
+ def load_data(data_pickle_path, dataset="vipulmaheshwari/GTA-Image-Captioning-Dataset"):
29
+ if not os.path.exists(data_pickle_path):
30
+ print(f"Data set hasn't been loaded. Loading from the datasets library and save it as a pickle.")
31
+ data_set = load_dataset(dataset)
32
+ with open(data_pickle_path, 'wb') as outfile:
33
+ pickle.dump(data_set, outfile)
34
+ else:
35
+ print(f"Data set already exists in the local drive. Loading it.")
36
+ with open(data_pickle_path, 'rb') as infile:
37
+ data_set = pickle.load(infile)
38
+
39
+ return data_set
40
+
41
+
42
+ def get_embeddings(data, model):
43
+ # Get the id and embedding of each data/image
44
+ ids = []
45
+ embeddings = []
46
+ for id, image in tqdm(zip(list(range(len(data))), data)):
47
+ ids.append("image "+str(id))
48
+
49
+ embedding = model.encode(image)
50
+ embeddings.append(embedding.tolist())
51
+
52
+ return ids, embeddings
53
+
54
+
55
+ def get_collection(chroma_dir, model, collection_name, data):
56
+ client = chromadb.PersistentClient(path=chroma_dir.__str__())
57
+ collection = client.get_or_create_collection(name=collection_name)
58
+
59
+ if collection.count() != len(data):
60
+ print("Adding embeddings to the collection.")
61
+ ids, embeddings = get_embeddings(data, model)
62
+ collection.add(
63
+ ids=ids,
64
+ embeddings=embeddings
65
+ )
66
+ else:
67
+ print("Embeddings are already added to the collection.")
68
+
69
+ return collection
70
+
71
+
72
+ def get_result(collection, data_set, query, model, n_results=2):
73
+ # Query the vector store and get results
74
+ results = collection.query(
75
+ query_embeddings=model.encode([query]),
76
+ n_results=2
77
+ )
78
+
79
+ # Get the id of the most relevant image
80
+ img_id = int(results['ids'][0][0].split('image ')[-1])
81
+
82
+ # Get the image and its caption
83
+ image = data_set['train']['image'][img_id]
84
+ text = data_set['train']['text'][img_id]
85
+
86
+ return image, text
87
+
88
+
89
+ def show_image(image, text, query):
90
+ plt.ion()
91
+ plt.axis("off")
92
+ plt.imshow(image)
93
+ plt.show()
94
+ print(f"User query: {query}")
95
+ print(f"Original description: {text}\n")
96
+
97
+
98
+ def get_logger():
99
+ log_path = "./log/"
100
+ if not os.path.exists(log_path):
101
+ os.mkdir(log_path)
102
+
103
+ cur_date = datetime.utcnow().strftime("%Y%m%d")
104
+ log_filename = f"{log_path}{cur_date}.log"
105
+
106
+ logging.basicConfig(
107
+ filename=log_filename,
108
+ level=logging.INFO,
109
+ format="%(asctime)s %(levelname)-8s %(message)s",
110
+ datefmt="%Y-%m-%d %H:%M:%S")
111
+
112
+ logger = logging.getLogger(__name__)
113
+
114
+ return logger
115
+
116
+
117
+ def initialization(logger):
118
+ print("Initializing...")
119
+ logger.info("Initializing...")
120
+ print("-------------------------------------------------------")
121
+ logger.info("-------------------------------------------------------")
122
+
123
+ print("Importing functions...")
124
+ logger.info("Importing functions...")
125
+ # Import module, classes, and functions
126
+ from sentence_transformers import SentenceTransformer
127
+ from utils.utils import set_directories, load_data, get_collection, get_result, show_image
128
+
129
+ print("Set directories...")
130
+ logger.info("Set directories...")
131
+ # Set directories
132
+ data_pickle_path, chroma_dir = set_directories()
133
+
134
+ print("Loading data...")
135
+ logger.info("Loading data...")
136
+ # Load dataset
137
+ data_set = load_data(data_pickle_path)
138
+
139
+ print("Loading CLIP model...")
140
+ logger.info("Loading CLIP model...")
141
+ # Load CLIP model
142
+ model = SentenceTransformer("sentence-transformers/clip-ViT-L-14")
143
+
144
+ print("Getting vector embeddings...")
145
+ logger.info("Getting vector embeddings...")
146
+ # Get vector embeddings
147
+ collection = get_collection(chroma_dir, model, collection_name='image_vectors', data=data_set['train']['image'])
148
+
149
+ print("-------------------------------------------------------")
150
+ logger.info("-------------------------------------------------------")
151
+ print("Initialization completed! Ready for search.")
152
+ logger.info("Initialization completed! Ready for search.")
153
+
154
+ return collection, data_set, model, logger
vectore_storage/chroma/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34a5e8e1ac1cff55f102ec9eeb2fb556494f2d1d5c496e76641d5f4aab4feda5
3
+ size 3473408