GoodML commited on
Commit
8d144da
·
verified ·
1 Parent(s): e444da0

Upload 5 files

Browse files
Files changed (5) hide show
  1. .env +2 -0
  2. .gitattributes +35 -35
  3. .gitignore +5 -0
  4. requirements.txt +95 -0
  5. trials.ipynb +924 -0
.env ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ PINECONE_API_KEY = "1bae0d8e-019e-4e87-8080-ecf523e5f25f"
2
+ HUGGING_FACE_TOKEN = "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"
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ mchatbot/
2
+ model/llama-2-7b-chat.ggmlv3.q4_0.bin
3
+ .env
4
+
5
+ data/
requirements.txt ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.9.5
2
+ aiosignal==1.3.1
3
+ altair==5.3.0
4
+ annotated-types==0.7.0
5
+ attrs==23.2.0
6
+ blinker==1.8.2
7
+ cachetools==5.3.3
8
+ certifi==2024.6.2
9
+ charset-normalizer==3.3.2
10
+ click==8.1.7
11
+ colorama==0.4.6
12
+ ctransformers==0.2.27
13
+ dataclasses-json==0.6.7
14
+ filelock==3.15.4
15
+ Flask==3.0.3
16
+ frozenlist==1.4.1
17
+ fsspec==2024.6.1
18
+ gitdb==4.0.11
19
+ GitPython==3.1.43
20
+ greenlet==3.0.3
21
+ huggingface-hub==0.23.4
22
+ idna==3.7
23
+ intel-openmp==2021.4.0
24
+ itsdangerous==2.2.0
25
+ Jinja2==3.1.4
26
+ joblib==1.4.2
27
+ jsonpatch==1.33
28
+ jsonpointer==3.0.0
29
+ jsonschema==4.22.0
30
+ jsonschema-specifications==2023.12.1
31
+ langchain==0.2.6
32
+ langchain-community==0.2.6
33
+ langchain-core==0.2.10
34
+ langchain-pinecone==0.1.1
35
+ langchain-text-splitters==0.2.2
36
+ langsmith==0.1.82
37
+ markdown-it-py==3.0.0
38
+ MarkupSafe==2.1.5
39
+ marshmallow==3.21.3
40
+ mdurl==0.1.2
41
+ mkl==2021.4.0
42
+ mpmath==1.3.0
43
+ multidict==6.0.5
44
+ mypy-extensions==1.0.0
45
+ networkx==3.3
46
+ numpy==1.26.4
47
+ orjson==3.10.5
48
+ packaging==24.1
49
+ pandas==2.2.2
50
+ pillow==10.3.0
51
+ pinecone==4.0.0
52
+ pinecone-client==3.2.2
53
+ protobuf==5.27.2
54
+ py-cpuinfo==9.0.0
55
+ pyarrow==16.1.0
56
+ pydantic==2.7.4
57
+ pydantic_core==2.18.4
58
+ pydeck==0.9.1
59
+ Pygments==2.18.0
60
+ pypdf==4.2.0
61
+ python-dateutil==2.9.0.post0
62
+ python-dotenv==1.0.1
63
+ pytz==2024.1
64
+ PyYAML==6.0.1
65
+ referencing==0.35.1
66
+ regex==2024.5.15
67
+ requests==2.32.3
68
+ rich==13.7.1
69
+ rpds-py==0.18.1
70
+ safetensors==0.4.3
71
+ scikit-learn==1.5.0
72
+ scipy==1.14.0
73
+ sentence-transformers==3.0.1
74
+ six==1.16.0
75
+ smmap==5.0.1
76
+ SQLAlchemy==2.0.31
77
+ streamlit==1.36.0
78
+ sympy==1.12.1
79
+ tbb==2021.13.0
80
+ tenacity==8.4.2
81
+ threadpoolctl==3.5.0
82
+ tokenizers==0.19.1
83
+ toml==0.10.2
84
+ toolz==0.12.1
85
+ torch==2.3.1
86
+ tornado==6.4.1
87
+ tqdm==4.66.4
88
+ transformers==4.42.3
89
+ typing-inspect==0.9.0
90
+ typing_extensions==4.12.2
91
+ tzdata==2024.1
92
+ urllib3==2.2.2
93
+ watchdog==4.0.1
94
+ Werkzeug==3.0.3
95
+ yarl==1.9.4
trials.ipynb ADDED
@@ -0,0 +1,924 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 68,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from langchain import PromptTemplate\n",
10
+ "from langchain.chains import RetrievalQA\n",
11
+ "from langchain.embeddings import HuggingFaceEmbeddings\n",
12
+ "from langchain.vectorstores import Pinecone\n",
13
+ "\n",
14
+ "import pinecone\n",
15
+ "from langchain.document_loaders import PyPDFLoader, DirectoryLoader\n",
16
+ "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
17
+ "from langchain_core.prompts import PromptTemplate\n",
18
+ "from langchain.llms import CTransformers\n",
19
+ "from tqdm.autonotebook import tqdm"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 2,
25
+ "metadata": {},
26
+ "outputs": [],
27
+ "source": [
28
+ "PINECONE_API_KEY = \"1bae0d8e-019e-4e87-8080-ecf523e5f25f\""
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": 3,
34
+ "metadata": {},
35
+ "outputs": [],
36
+ "source": [
37
+ "def load_pdf(data):\n",
38
+ " loader = DirectoryLoader(data, glob=\"*.pdf\", loader_cls=PyPDFLoader)\n",
39
+ " documents = loader.load()\n",
40
+ " return documents"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": 4,
46
+ "metadata": {},
47
+ "outputs": [],
48
+ "source": [
49
+ "extracted_data = load_pdf(\"data/\")"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "execution_count": 5,
55
+ "metadata": {},
56
+ "outputs": [],
57
+ "source": [
58
+ "# Data is extracted from the PDFs\n",
59
+ "# Now form chunks out of it"
60
+ ]
61
+ },
62
+ {
63
+ "cell_type": "code",
64
+ "execution_count": 6,
65
+ "metadata": {},
66
+ "outputs": [],
67
+ "source": [
68
+ "def text_split(extracted_data):\n",
69
+ " splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)\n",
70
+ " chunks = splitter.split_documents(extracted_data)\n",
71
+ " return chunks\n"
72
+ ]
73
+ },
74
+ {
75
+ "cell_type": "code",
76
+ "execution_count": 7,
77
+ "metadata": {},
78
+ "outputs": [],
79
+ "source": [
80
+ "chunks = text_split(extracted_data)"
81
+ ]
82
+ },
83
+ {
84
+ "cell_type": "code",
85
+ "execution_count": 8,
86
+ "metadata": {},
87
+ "outputs": [
88
+ {
89
+ "data": {
90
+ "text/plain": [
91
+ "Document(page_content='TheGALE\\nENCYCLOPEDIA\\nofMEDICINE\\nSECOND EDITION', metadata={'source': 'data\\\\Medical_book.pdf', 'page': 1})"
92
+ ]
93
+ },
94
+ "execution_count": 8,
95
+ "metadata": {},
96
+ "output_type": "execute_result"
97
+ }
98
+ ],
99
+ "source": [
100
+ "# len(chunks)\n",
101
+ "chunks[0]\n",
102
+ "# Chunks formation is done\n",
103
+ "# Now, convert the chunks into embeddings\n",
104
+ "# Then Store this embeddings to VectorDB pinecone"
105
+ ]
106
+ },
107
+ {
108
+ "cell_type": "code",
109
+ "execution_count": 9,
110
+ "metadata": {},
111
+ "outputs": [],
112
+ "source": [
113
+ "def download_hugging_face_embeddings():\n",
114
+ " embeddings = HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-MiniLM-L6-v2\")\n",
115
+ " return embeddings"
116
+ ]
117
+ },
118
+ {
119
+ "cell_type": "code",
120
+ "execution_count": 10,
121
+ "metadata": {},
122
+ "outputs": [
123
+ {
124
+ "name": "stderr",
125
+ "output_type": "stream",
126
+ "text": [
127
+ "c:\\Users\\Aniket\\miniconda3\\Lib\\site-packages\\langchain_core\\_api\\deprecation.py:139: LangChainDeprecationWarning: The class `HuggingFaceEmbeddings` was deprecated in LangChain 0.2.2 and will be removed in 0.3.0. An updated version of the class exists in the langchain-huggingface package and should be used instead. To use it run `pip install -U langchain-huggingface` and import as `from langchain_huggingface import HuggingFaceEmbeddings`.\n",
128
+ " warn_deprecated(\n"
129
+ ]
130
+ }
131
+ ],
132
+ "source": [
133
+ "embeddings = download_hugging_face_embeddings()"
134
+ ]
135
+ },
136
+ {
137
+ "cell_type": "code",
138
+ "execution_count": 12,
139
+ "metadata": {},
140
+ "outputs": [],
141
+ "source": [
142
+ "\n",
143
+ "chunk_embeddings = []\n",
144
+ "for i in range(0, len(chunks)):\n",
145
+ " chunk_embeddings.append(embeddings.embed_query(chunks[i].page_content))\n",
146
+ "\n"
147
+ ]
148
+ },
149
+ {
150
+ "cell_type": "code",
151
+ "execution_count": 13,
152
+ "metadata": {},
153
+ "outputs": [
154
+ {
155
+ "data": {
156
+ "text/plain": [
157
+ "Document(page_content='TheGALE\\nENCYCLOPEDIA\\nofMEDICINE\\nSECOND EDITION', metadata={'source': 'data\\\\Medical_book.pdf', 'page': 1})"
158
+ ]
159
+ },
160
+ "execution_count": 13,
161
+ "metadata": {},
162
+ "output_type": "execute_result"
163
+ }
164
+ ],
165
+ "source": [
166
+ "len(chunk_embeddings)\n",
167
+ "chunks[0]"
168
+ ]
169
+ },
170
+ {
171
+ "cell_type": "code",
172
+ "execution_count": 14,
173
+ "metadata": {},
174
+ "outputs": [],
175
+ "source": [
176
+ "chunk_content = [chunks[i].page_content for i in range(0 , len(chunks))]"
177
+ ]
178
+ },
179
+ {
180
+ "cell_type": "code",
181
+ "execution_count": 15,
182
+ "metadata": {},
183
+ "outputs": [],
184
+ "source": [
185
+ "\n",
186
+ "chunk_ids = [str(i+1) for i in range(0, len(chunks))]\n"
187
+ ]
188
+ },
189
+ {
190
+ "cell_type": "code",
191
+ "execution_count": 16,
192
+ "metadata": {},
193
+ "outputs": [
194
+ {
195
+ "name": "stdout",
196
+ "output_type": "stream",
197
+ "text": [
198
+ "1\n",
199
+ "[0.0017460489179939032, -0.033502884209156036, -0.03290388733148575, 0.007168094161897898, -0.01460327859967947, 0.010261928662657738, -0.01151528861373663, 0.22930213809013367, -0.023232396692037582, 0.004120402969419956, -0.036560822278261185, 0.08592110127210617, 0.012972140684723854, 0.05221788212656975, -0.10232618451118469, -0.003139043692499399, -0.012686969712376595, 0.000471863109851256, -0.02848585695028305, -0.050259195268154144, 0.01155101228505373, 0.0778065174818039, 0.09282823652029037, -0.0137972766533494, -0.016935130581259727, -0.025955867022275925, -0.04956510663032532, -0.046131301671266556, 0.00729052210226655, -0.013553328812122345, 0.038439445197582245, 0.06280472129583359, 0.018353812396526337, 0.008242843672633171, 0.0017155527602881193, -0.039861857891082764, -0.011638614349067211, 0.016446180641651154, 0.025595590472221375, 0.09104609489440918, 0.029672738164663315, -0.05416030064225197, -0.04576560854911804, -0.013853926211595535, 0.02577359229326248, 0.010323088616132736, -0.05363088846206665, 0.021221553906798363, 0.01702778786420822, 0.11612221598625183, -0.06963177025318146, -0.0957275778055191, -0.03983991965651512, 0.052369263023138046, 0.02526022121310234, -0.031274523586034775, -0.07005152851343155, -0.05956605449318886, -0.09544864296913147, -0.05412125587463379, -0.0002130465436493978, 0.0002552172227296978, 0.012184805236756802, 0.036847226321697235, -0.09168802946805954, -0.016031526029109955, 0.05677250772714615, -0.061103083193302155, 0.05796805024147034, -0.03652450442314148, -0.021421410143375397, -0.047219835221767426, 0.03454083576798439, 0.12064339220523834, -0.013788564130663872, -0.06848538666963577, 0.012004958465695381, -0.059728946536779404, -0.05643806606531143, -0.101061150431633, 0.05889026075601578, -0.02077680453658104, 0.09746569395065308, 0.07813995331525803, -0.035233210772275925, -0.014866253361105919, 0.040357090532779694, 0.07460875064134598, -0.013026430271565914, -0.02844160981476307, 0.10370924323797226, 0.019506312906742096, 0.029694247990846634, 0.007636230438947678, 0.005692535545676947, -0.0007858198950998485, -0.0431533046066761, 0.007750411983579397, -0.01790708862245083, 0.06111792102456093, -0.02531685307621956, -0.10494077950716019, -0.053424857556819916, 0.00989371445029974, 0.01446546521037817, -0.06589170545339584, 0.009222910739481449, -0.13625681400299072, 0.021162521094083786, -0.01161885168403387, 0.034509241580963135, 0.06049511581659317, 0.01565300114452839, -0.012806619517505169, -0.007194896228611469, 0.055682480335235596, 0.07992612570524216, 0.05983540043234825, 0.09587424248456955, 0.018204279243946075, 0.023596445098519325, -0.08910958468914032, -0.007128870114684105, -0.09076816588640213, 0.047233086079359055, 0.004134080838412046, 0.00339791108854115, -2.0226589867935018e-33, 0.015145692974328995, -0.0040165213868021965, 0.046035610139369965, 0.06628156453371048, 0.08750341087579727, 0.03237520158290863, -0.013098624534904957, -0.06530202180147171, 0.0794229656457901, -0.1063862293958664, -0.07034210115671158, 0.03889153152704239, 0.014388104900717735, 0.05448484793305397, -0.1063254252076149, 0.0015779227251186967, -0.07627759128808975, 0.02941804938018322, -0.020254988223314285, -0.010341203771531582, 0.0077161630615592, 0.015365850180387497, -0.030866345390677452, 0.03806672990322113, -0.08467362076044083, 0.061196524649858475, -0.006315906532108784, 0.02108607068657875, 0.0948043167591095, -0.02897580713033676, -0.02469867840409279, -0.02624206617474556, 0.010229134000837803, -0.04816938936710358, -0.05085073038935661, 0.06431731581687927, -0.06250055879354477, -0.017944322898983955, -0.0032577342353761196, -0.002177971415221691, 0.0040510413236916065, 0.05613983795046806, 0.02165955677628517, -0.028459111228585243, 0.06688956916332245, -0.018423588946461678, -0.1386348307132721, -6.533067062264308e-05, 0.09188717603683472, -0.05782890319824219, 0.051801200956106186, -0.0333990603685379, 0.0635690987110138, -0.03353029489517212, 0.003560281591489911, 0.05478646978735924, -0.0592782236635685, 0.02796691469848156, 0.01981506496667862, 0.03064817003905773, 0.09453834593296051, 0.07339683920145035, 0.0220828615128994, -0.034395359456539154, -0.008398751728236675, 0.016077488660812378, -0.1019863560795784, -0.10767035186290741, -0.04845403507351875, -0.04698591306805611, -0.12305164337158203, 0.01794605329632759, 0.02716820128262043, 0.04150307923555374, -0.012137887999415398, 0.016134945675730705, 0.007807416841387749, -0.030696384608745575, -0.03695548698306084, -0.07249006628990173, -0.06594054400920868, -0.052304696291685104, 0.022644832730293274, 0.10688149929046631, -0.03437632694840431, -0.007189977914094925, 0.004040680360049009, -0.012765718623995781, -0.035733193159103394, -0.009521384723484516, -0.037608999758958817, 0.0257247406989336, -0.07330161333084106, 0.018399816006422043, -0.013152354396879673, -2.4391491249173157e-33, -0.036498475819826126, -0.03898495435714722, -0.02466081827878952, 0.05277742072939873, 0.057153843343257904, 0.08485236018896103, -0.047778353095054626, 0.04885183647274971, 0.0936603769659996, 0.024455295875668526, 0.10703857988119125, -0.044878069311380386, 0.013439536094665527, 0.010380103252828121, -0.03215843066573143, 0.03756462037563324, -0.024516936391592026, -0.015364289283752441, -0.08388064056634903, 0.07285160571336746, -0.08349563926458359, 0.05313438922166824, -0.0502142570912838, 0.021773921325802803, 0.09023481607437134, 0.014781351201236248, 0.039709243923425674, -0.01778602786362171, -0.01812116988003254, -0.05736514553427696, -0.014358995482325554, -0.019216574728488922, -0.07116208970546722, -0.03887920826673508, -0.06919412314891815, 0.015993399545550346, 0.03935178369283676, -0.06669557094573975, -0.02723979577422142, -0.041191909462213516, 0.08880122750997543, -0.03152484819293022, 0.020989220589399338, 0.007525313645601273, 0.006864176597446203, -0.00016016913286875933, -0.03395673632621765, -0.01977171003818512, -0.013901437632739544, -0.02347422204911709, 0.05024150386452675, 0.004815816413611174, 0.0030840442050248384, 0.013711294159293175, 0.04268831014633179, -0.010319440625607967, -0.01561632938683033, -0.13140302896499634, 0.03932563588023186, 0.0034621721133589745, 0.004625402390956879, -0.022172948345541954, -0.07998225092887878, 0.07068227231502533, 0.013251686468720436, 0.013216551393270493, -0.031048668548464775, 0.01629941165447235, 0.07485268265008926, -0.020337823778390884, -0.03757909685373306, 0.0027204568032175303, -0.07825025916099548, -0.07273514568805695, -0.0015075246337801218, -0.0015040460275486112, -0.04806789010763168, -0.02269686758518219, 0.00234704976901412, -0.10254422575235367, -0.01062675192952156, -0.05898876115679741, -0.028257478028535843, 0.018367942422628403, -0.02433406002819538, 0.006143547594547272, 0.058125562965869904, 0.006210033316165209, -0.004125489853322506, 0.004883211106061935, -0.051727477461099625, -0.002652254654094577, -0.08568170666694641, 0.04144113138318062, -0.007155539933592081, -2.368167706379154e-08, 0.05613090097904205, -0.014770290814340115, -0.0016940254718065262, -0.0011319591430947185, -0.004112910944968462, -0.07847736030817032, -0.03272825479507446, 0.08840586245059967, -0.009142806753516197, 0.08083547651767731, -0.06821239739656448, 0.08852605521678925, 0.018825415521860123, -0.004044109955430031, 0.01947229728102684, 0.05469341203570366, -0.004570759367197752, 0.050132788717746735, -0.020886147394776344, -0.0043810331262648106, 0.012420092709362507, -0.040249839425086975, -0.017629530280828476, -0.07264230400323868, 0.027827059850096703, 0.026226632297039032, 0.009220139123499393, 0.02573232352733612, -0.009361241944134235, -0.09865128993988037, -0.03130580857396126, 0.051363445818424225, 0.011322762817144394, -0.0496138371527195, -0.04716363921761513, -0.0054739126935601234, 0.06890673190355301, -0.040705155581235886, 0.05423299968242645, -0.019241807982325554, 0.01434497069567442, -0.04443489387631416, -0.02567482925951481, 0.031354621052742004, 0.06115218624472618, -0.026838727295398712, 0.04913513734936714, 0.05644834786653519, 0.027938084676861763, -0.08463852107524872, -0.005661568138748407, 0.007060948293656111, 0.10265117883682251, -0.019236600026488304, -0.08145822584629059, 0.07125057280063629, 0.01803840510547161, -0.017572075128555298, -0.00043016800191253424, -0.10405920445919037, 0.0028415117412805557, -0.00555642182007432, 0.10660912096500397, 0.050997212529182434]\n"
200
+ ]
201
+ },
202
+ {
203
+ "data": {
204
+ "text/plain": [
205
+ "'TheGALE\\nENCYCLOPEDIA\\nofMEDICINE\\nSECOND EDITION'"
206
+ ]
207
+ },
208
+ "execution_count": 16,
209
+ "metadata": {},
210
+ "output_type": "execute_result"
211
+ }
212
+ ],
213
+ "source": [
214
+ "print(chunk_ids[0])\n",
215
+ "print(chunk_embeddings[0])\n",
216
+ "chunk_content[0]"
217
+ ]
218
+ },
219
+ {
220
+ "cell_type": "code",
221
+ "execution_count": 41,
222
+ "metadata": {},
223
+ "outputs": [],
224
+ "source": [
225
+ "index_name = \"medical-chatbot\"\n",
226
+ "index=pinecone.Index(api_key=PINECONE_API_KEY, host=\"https://medical-chatbot-pv4ded8.svc.aped-4627-b74a.pinecone.io\")\n",
227
+ "# from http import client\n",
228
+ "pc = Pinecone(embedding=embeddings, text_key=chunk_content, index=index)\n",
229
+ "# Replace with your index name\n"
230
+ ]
231
+ },
232
+ {
233
+ "cell_type": "code",
234
+ "execution_count": 42,
235
+ "metadata": {},
236
+ "outputs": [],
237
+ "source": [
238
+ "upsert_vectors = [\n",
239
+ " {\n",
240
+ " \"id\": chunk_id,\n",
241
+ " \"values\": embedding,\n",
242
+ " \"metadata\": {\"text\": content} # Replace with actual metadata if available\n",
243
+ " }\n",
244
+ " for chunk_id, embedding, content in zip(chunk_ids, chunk_embeddings, chunk_content)\n",
245
+ "]\n"
246
+ ]
247
+ },
248
+ {
249
+ "cell_type": "code",
250
+ "execution_count": 43,
251
+ "metadata": {},
252
+ "outputs": [
253
+ {
254
+ "data": {
255
+ "text/plain": [
256
+ "{'id': '1',\n",
257
+ " 'values': [0.0017460489179939032,\n",
258
+ " -0.033502884209156036,\n",
259
+ " -0.03290388733148575,\n",
260
+ " 0.007168094161897898,\n",
261
+ " -0.01460327859967947,\n",
262
+ " 0.010261928662657738,\n",
263
+ " -0.01151528861373663,\n",
264
+ " 0.22930213809013367,\n",
265
+ " -0.023232396692037582,\n",
266
+ " 0.004120402969419956,\n",
267
+ " -0.036560822278261185,\n",
268
+ " 0.08592110127210617,\n",
269
+ " 0.012972140684723854,\n",
270
+ " 0.05221788212656975,\n",
271
+ " -0.10232618451118469,\n",
272
+ " -0.003139043692499399,\n",
273
+ " -0.012686969712376595,\n",
274
+ " 0.000471863109851256,\n",
275
+ " -0.02848585695028305,\n",
276
+ " -0.050259195268154144,\n",
277
+ " 0.01155101228505373,\n",
278
+ " 0.0778065174818039,\n",
279
+ " 0.09282823652029037,\n",
280
+ " -0.0137972766533494,\n",
281
+ " -0.016935130581259727,\n",
282
+ " -0.025955867022275925,\n",
283
+ " -0.04956510663032532,\n",
284
+ " -0.046131301671266556,\n",
285
+ " 0.00729052210226655,\n",
286
+ " -0.013553328812122345,\n",
287
+ " 0.038439445197582245,\n",
288
+ " 0.06280472129583359,\n",
289
+ " 0.018353812396526337,\n",
290
+ " 0.008242843672633171,\n",
291
+ " 0.0017155527602881193,\n",
292
+ " -0.039861857891082764,\n",
293
+ " -0.011638614349067211,\n",
294
+ " 0.016446180641651154,\n",
295
+ " 0.025595590472221375,\n",
296
+ " 0.09104609489440918,\n",
297
+ " 0.029672738164663315,\n",
298
+ " -0.05416030064225197,\n",
299
+ " -0.04576560854911804,\n",
300
+ " -0.013853926211595535,\n",
301
+ " 0.02577359229326248,\n",
302
+ " 0.010323088616132736,\n",
303
+ " -0.05363088846206665,\n",
304
+ " 0.021221553906798363,\n",
305
+ " 0.01702778786420822,\n",
306
+ " 0.11612221598625183,\n",
307
+ " -0.06963177025318146,\n",
308
+ " -0.0957275778055191,\n",
309
+ " -0.03983991965651512,\n",
310
+ " 0.052369263023138046,\n",
311
+ " 0.02526022121310234,\n",
312
+ " -0.031274523586034775,\n",
313
+ " -0.07005152851343155,\n",
314
+ " -0.05956605449318886,\n",
315
+ " -0.09544864296913147,\n",
316
+ " -0.05412125587463379,\n",
317
+ " -0.0002130465436493978,\n",
318
+ " 0.0002552172227296978,\n",
319
+ " 0.012184805236756802,\n",
320
+ " 0.036847226321697235,\n",
321
+ " -0.09168802946805954,\n",
322
+ " -0.016031526029109955,\n",
323
+ " 0.05677250772714615,\n",
324
+ " -0.061103083193302155,\n",
325
+ " 0.05796805024147034,\n",
326
+ " -0.03652450442314148,\n",
327
+ " -0.021421410143375397,\n",
328
+ " -0.047219835221767426,\n",
329
+ " 0.03454083576798439,\n",
330
+ " 0.12064339220523834,\n",
331
+ " -0.013788564130663872,\n",
332
+ " -0.06848538666963577,\n",
333
+ " 0.012004958465695381,\n",
334
+ " -0.059728946536779404,\n",
335
+ " -0.05643806606531143,\n",
336
+ " -0.101061150431633,\n",
337
+ " 0.05889026075601578,\n",
338
+ " -0.02077680453658104,\n",
339
+ " 0.09746569395065308,\n",
340
+ " 0.07813995331525803,\n",
341
+ " -0.035233210772275925,\n",
342
+ " -0.014866253361105919,\n",
343
+ " 0.040357090532779694,\n",
344
+ " 0.07460875064134598,\n",
345
+ " -0.013026430271565914,\n",
346
+ " -0.02844160981476307,\n",
347
+ " 0.10370924323797226,\n",
348
+ " 0.019506312906742096,\n",
349
+ " 0.029694247990846634,\n",
350
+ " 0.007636230438947678,\n",
351
+ " 0.005692535545676947,\n",
352
+ " -0.0007858198950998485,\n",
353
+ " -0.0431533046066761,\n",
354
+ " 0.007750411983579397,\n",
355
+ " -0.01790708862245083,\n",
356
+ " 0.06111792102456093,\n",
357
+ " -0.02531685307621956,\n",
358
+ " -0.10494077950716019,\n",
359
+ " -0.053424857556819916,\n",
360
+ " 0.00989371445029974,\n",
361
+ " 0.01446546521037817,\n",
362
+ " -0.06589170545339584,\n",
363
+ " 0.009222910739481449,\n",
364
+ " -0.13625681400299072,\n",
365
+ " 0.021162521094083786,\n",
366
+ " -0.01161885168403387,\n",
367
+ " 0.034509241580963135,\n",
368
+ " 0.06049511581659317,\n",
369
+ " 0.01565300114452839,\n",
370
+ " -0.012806619517505169,\n",
371
+ " -0.007194896228611469,\n",
372
+ " 0.055682480335235596,\n",
373
+ " 0.07992612570524216,\n",
374
+ " 0.05983540043234825,\n",
375
+ " 0.09587424248456955,\n",
376
+ " 0.018204279243946075,\n",
377
+ " 0.023596445098519325,\n",
378
+ " -0.08910958468914032,\n",
379
+ " -0.007128870114684105,\n",
380
+ " -0.09076816588640213,\n",
381
+ " 0.047233086079359055,\n",
382
+ " 0.004134080838412046,\n",
383
+ " 0.00339791108854115,\n",
384
+ " -2.0226589867935018e-33,\n",
385
+ " 0.015145692974328995,\n",
386
+ " -0.0040165213868021965,\n",
387
+ " 0.046035610139369965,\n",
388
+ " 0.06628156453371048,\n",
389
+ " 0.08750341087579727,\n",
390
+ " 0.03237520158290863,\n",
391
+ " -0.013098624534904957,\n",
392
+ " -0.06530202180147171,\n",
393
+ " 0.0794229656457901,\n",
394
+ " -0.1063862293958664,\n",
395
+ " -0.07034210115671158,\n",
396
+ " 0.03889153152704239,\n",
397
+ " 0.014388104900717735,\n",
398
+ " 0.05448484793305397,\n",
399
+ " -0.1063254252076149,\n",
400
+ " 0.0015779227251186967,\n",
401
+ " -0.07627759128808975,\n",
402
+ " 0.02941804938018322,\n",
403
+ " -0.020254988223314285,\n",
404
+ " -0.010341203771531582,\n",
405
+ " 0.0077161630615592,\n",
406
+ " 0.015365850180387497,\n",
407
+ " -0.030866345390677452,\n",
408
+ " 0.03806672990322113,\n",
409
+ " -0.08467362076044083,\n",
410
+ " 0.061196524649858475,\n",
411
+ " -0.006315906532108784,\n",
412
+ " 0.02108607068657875,\n",
413
+ " 0.0948043167591095,\n",
414
+ " -0.02897580713033676,\n",
415
+ " -0.02469867840409279,\n",
416
+ " -0.02624206617474556,\n",
417
+ " 0.010229134000837803,\n",
418
+ " -0.04816938936710358,\n",
419
+ " -0.05085073038935661,\n",
420
+ " 0.06431731581687927,\n",
421
+ " -0.06250055879354477,\n",
422
+ " -0.017944322898983955,\n",
423
+ " -0.0032577342353761196,\n",
424
+ " -0.002177971415221691,\n",
425
+ " 0.0040510413236916065,\n",
426
+ " 0.05613983795046806,\n",
427
+ " 0.02165955677628517,\n",
428
+ " -0.028459111228585243,\n",
429
+ " 0.06688956916332245,\n",
430
+ " -0.018423588946461678,\n",
431
+ " -0.1386348307132721,\n",
432
+ " -6.533067062264308e-05,\n",
433
+ " 0.09188717603683472,\n",
434
+ " -0.05782890319824219,\n",
435
+ " 0.051801200956106186,\n",
436
+ " -0.0333990603685379,\n",
437
+ " 0.0635690987110138,\n",
438
+ " -0.03353029489517212,\n",
439
+ " 0.003560281591489911,\n",
440
+ " 0.05478646978735924,\n",
441
+ " -0.0592782236635685,\n",
442
+ " 0.02796691469848156,\n",
443
+ " 0.01981506496667862,\n",
444
+ " 0.03064817003905773,\n",
445
+ " 0.09453834593296051,\n",
446
+ " 0.07339683920145035,\n",
447
+ " 0.0220828615128994,\n",
448
+ " -0.034395359456539154,\n",
449
+ " -0.008398751728236675,\n",
450
+ " 0.016077488660812378,\n",
451
+ " -0.1019863560795784,\n",
452
+ " -0.10767035186290741,\n",
453
+ " -0.04845403507351875,\n",
454
+ " -0.04698591306805611,\n",
455
+ " -0.12305164337158203,\n",
456
+ " 0.01794605329632759,\n",
457
+ " 0.02716820128262043,\n",
458
+ " 0.04150307923555374,\n",
459
+ " -0.012137887999415398,\n",
460
+ " 0.016134945675730705,\n",
461
+ " 0.007807416841387749,\n",
462
+ " -0.030696384608745575,\n",
463
+ " -0.03695548698306084,\n",
464
+ " -0.07249006628990173,\n",
465
+ " -0.06594054400920868,\n",
466
+ " -0.052304696291685104,\n",
467
+ " 0.022644832730293274,\n",
468
+ " 0.10688149929046631,\n",
469
+ " -0.03437632694840431,\n",
470
+ " -0.007189977914094925,\n",
471
+ " 0.004040680360049009,\n",
472
+ " -0.012765718623995781,\n",
473
+ " -0.035733193159103394,\n",
474
+ " -0.009521384723484516,\n",
475
+ " -0.037608999758958817,\n",
476
+ " 0.0257247406989336,\n",
477
+ " -0.07330161333084106,\n",
478
+ " 0.018399816006422043,\n",
479
+ " -0.013152354396879673,\n",
480
+ " -2.4391491249173157e-33,\n",
481
+ " -0.036498475819826126,\n",
482
+ " -0.03898495435714722,\n",
483
+ " -0.02466081827878952,\n",
484
+ " 0.05277742072939873,\n",
485
+ " 0.057153843343257904,\n",
486
+ " 0.08485236018896103,\n",
487
+ " -0.047778353095054626,\n",
488
+ " 0.04885183647274971,\n",
489
+ " 0.0936603769659996,\n",
490
+ " 0.024455295875668526,\n",
491
+ " 0.10703857988119125,\n",
492
+ " -0.044878069311380386,\n",
493
+ " 0.013439536094665527,\n",
494
+ " 0.010380103252828121,\n",
495
+ " -0.03215843066573143,\n",
496
+ " 0.03756462037563324,\n",
497
+ " -0.024516936391592026,\n",
498
+ " -0.015364289283752441,\n",
499
+ " -0.08388064056634903,\n",
500
+ " 0.07285160571336746,\n",
501
+ " -0.08349563926458359,\n",
502
+ " 0.05313438922166824,\n",
503
+ " -0.0502142570912838,\n",
504
+ " 0.021773921325802803,\n",
505
+ " 0.09023481607437134,\n",
506
+ " 0.014781351201236248,\n",
507
+ " 0.039709243923425674,\n",
508
+ " -0.01778602786362171,\n",
509
+ " -0.01812116988003254,\n",
510
+ " -0.05736514553427696,\n",
511
+ " -0.014358995482325554,\n",
512
+ " -0.019216574728488922,\n",
513
+ " -0.07116208970546722,\n",
514
+ " -0.03887920826673508,\n",
515
+ " -0.06919412314891815,\n",
516
+ " 0.015993399545550346,\n",
517
+ " 0.03935178369283676,\n",
518
+ " -0.06669557094573975,\n",
519
+ " -0.02723979577422142,\n",
520
+ " -0.041191909462213516,\n",
521
+ " 0.08880122750997543,\n",
522
+ " -0.03152484819293022,\n",
523
+ " 0.020989220589399338,\n",
524
+ " 0.007525313645601273,\n",
525
+ " 0.006864176597446203,\n",
526
+ " -0.00016016913286875933,\n",
527
+ " -0.03395673632621765,\n",
528
+ " -0.01977171003818512,\n",
529
+ " -0.013901437632739544,\n",
530
+ " -0.02347422204911709,\n",
531
+ " 0.05024150386452675,\n",
532
+ " 0.004815816413611174,\n",
533
+ " 0.0030840442050248384,\n",
534
+ " 0.013711294159293175,\n",
535
+ " 0.04268831014633179,\n",
536
+ " -0.010319440625607967,\n",
537
+ " -0.01561632938683033,\n",
538
+ " -0.13140302896499634,\n",
539
+ " 0.03932563588023186,\n",
540
+ " 0.0034621721133589745,\n",
541
+ " 0.004625402390956879,\n",
542
+ " -0.022172948345541954,\n",
543
+ " -0.07998225092887878,\n",
544
+ " 0.07068227231502533,\n",
545
+ " 0.013251686468720436,\n",
546
+ " 0.013216551393270493,\n",
547
+ " -0.031048668548464775,\n",
548
+ " 0.01629941165447235,\n",
549
+ " 0.07485268265008926,\n",
550
+ " -0.020337823778390884,\n",
551
+ " -0.03757909685373306,\n",
552
+ " 0.0027204568032175303,\n",
553
+ " -0.07825025916099548,\n",
554
+ " -0.07273514568805695,\n",
555
+ " -0.0015075246337801218,\n",
556
+ " -0.0015040460275486112,\n",
557
+ " -0.04806789010763168,\n",
558
+ " -0.02269686758518219,\n",
559
+ " 0.00234704976901412,\n",
560
+ " -0.10254422575235367,\n",
561
+ " -0.01062675192952156,\n",
562
+ " -0.05898876115679741,\n",
563
+ " -0.028257478028535843,\n",
564
+ " 0.018367942422628403,\n",
565
+ " -0.02433406002819538,\n",
566
+ " 0.006143547594547272,\n",
567
+ " 0.058125562965869904,\n",
568
+ " 0.006210033316165209,\n",
569
+ " -0.004125489853322506,\n",
570
+ " 0.004883211106061935,\n",
571
+ " -0.051727477461099625,\n",
572
+ " -0.002652254654094577,\n",
573
+ " -0.08568170666694641,\n",
574
+ " 0.04144113138318062,\n",
575
+ " -0.007155539933592081,\n",
576
+ " -2.368167706379154e-08,\n",
577
+ " 0.05613090097904205,\n",
578
+ " -0.014770290814340115,\n",
579
+ " -0.0016940254718065262,\n",
580
+ " -0.0011319591430947185,\n",
581
+ " -0.004112910944968462,\n",
582
+ " -0.07847736030817032,\n",
583
+ " -0.03272825479507446,\n",
584
+ " 0.08840586245059967,\n",
585
+ " -0.009142806753516197,\n",
586
+ " 0.08083547651767731,\n",
587
+ " -0.06821239739656448,\n",
588
+ " 0.08852605521678925,\n",
589
+ " 0.018825415521860123,\n",
590
+ " -0.004044109955430031,\n",
591
+ " 0.01947229728102684,\n",
592
+ " 0.05469341203570366,\n",
593
+ " -0.004570759367197752,\n",
594
+ " 0.050132788717746735,\n",
595
+ " -0.020886147394776344,\n",
596
+ " -0.0043810331262648106,\n",
597
+ " 0.012420092709362507,\n",
598
+ " -0.040249839425086975,\n",
599
+ " -0.017629530280828476,\n",
600
+ " -0.07264230400323868,\n",
601
+ " 0.027827059850096703,\n",
602
+ " 0.026226632297039032,\n",
603
+ " 0.009220139123499393,\n",
604
+ " 0.02573232352733612,\n",
605
+ " -0.009361241944134235,\n",
606
+ " -0.09865128993988037,\n",
607
+ " -0.03130580857396126,\n",
608
+ " 0.051363445818424225,\n",
609
+ " 0.011322762817144394,\n",
610
+ " -0.0496138371527195,\n",
611
+ " -0.04716363921761513,\n",
612
+ " -0.0054739126935601234,\n",
613
+ " 0.06890673190355301,\n",
614
+ " -0.040705155581235886,\n",
615
+ " 0.05423299968242645,\n",
616
+ " -0.019241807982325554,\n",
617
+ " 0.01434497069567442,\n",
618
+ " -0.04443489387631416,\n",
619
+ " -0.02567482925951481,\n",
620
+ " 0.031354621052742004,\n",
621
+ " 0.06115218624472618,\n",
622
+ " -0.026838727295398712,\n",
623
+ " 0.04913513734936714,\n",
624
+ " 0.05644834786653519,\n",
625
+ " 0.027938084676861763,\n",
626
+ " -0.08463852107524872,\n",
627
+ " -0.005661568138748407,\n",
628
+ " 0.007060948293656111,\n",
629
+ " 0.10265117883682251,\n",
630
+ " -0.019236600026488304,\n",
631
+ " -0.08145822584629059,\n",
632
+ " 0.07125057280063629,\n",
633
+ " 0.01803840510547161,\n",
634
+ " -0.017572075128555298,\n",
635
+ " -0.00043016800191253424,\n",
636
+ " -0.10405920445919037,\n",
637
+ " 0.0028415117412805557,\n",
638
+ " -0.00555642182007432,\n",
639
+ " 0.10660912096500397,\n",
640
+ " 0.050997212529182434],\n",
641
+ " 'metadata': {'text': 'TheGALE\\nENCYCLOPEDIA\\nofMEDICINE\\nSECOND EDITION'}}"
642
+ ]
643
+ },
644
+ "execution_count": 43,
645
+ "metadata": {},
646
+ "output_type": "execute_result"
647
+ }
648
+ ],
649
+ "source": [
650
+ "upsert_vectors[0]"
651
+ ]
652
+ },
653
+ {
654
+ "cell_type": "code",
655
+ "execution_count": 45,
656
+ "metadata": {},
657
+ "outputs": [],
658
+ "source": [
659
+ "# docsearch = Pinecone.from_texts([t.page_content for t in chunks], embeddings, index_name)\n",
660
+ "\n",
661
+ "# Upsert the chunks into Pinecone\n",
662
+ "# index.upsert(vectors=upsert_vectors)\n",
663
+ "batch_size = 500 # Adjust as necessary based on your data size and Pinecone limits\n",
664
+ "for i in range(0, len(upsert_vectors), batch_size):\n",
665
+ " batch_vectors = upsert_vectors[i:i + batch_size]\n",
666
+ " index.upsert(vectors=batch_vectors)\n",
667
+ "\n"
668
+ ]
669
+ },
670
+ {
671
+ "cell_type": "code",
672
+ "execution_count": 46,
673
+ "metadata": {},
674
+ "outputs": [
675
+ {
676
+ "name": "stdout",
677
+ "output_type": "stream",
678
+ "text": [
679
+ "384\n"
680
+ ]
681
+ }
682
+ ],
683
+ "source": [
684
+ "query_embedding = embeddings.embed_query(\"What are allergies\")\n",
685
+ "print(len(query_embedding))\n",
686
+ "# Perform query to retrieve similar vectors\n",
687
+ "results = index.query(vector=[query_embedding], top_k=3, include_values=False, include_metadata=True)\n"
688
+ ]
689
+ },
690
+ {
691
+ "cell_type": "code",
692
+ "execution_count": 47,
693
+ "metadata": {},
694
+ "outputs": [
695
+ {
696
+ "name": "stdout",
697
+ "output_type": "stream",
698
+ "text": [
699
+ "{'matches': [{'id': '1373',\n",
700
+ " 'metadata': {'text': 'GALE ENCYCLOPEDIA OF MEDICINE 2 '\n",
701
+ " '117Allergies\\n'\n",
702
+ " 'Allergic rhinitis is commonly triggered '\n",
703
+ " 'by\\n'\n",
704
+ " 'exposure to household dust, animal fur,or '\n",
705
+ " 'pollen. The foreign substance thattriggers '\n",
706
+ " 'an allergic reaction is calledan '\n",
707
+ " 'allergen.\\n'\n",
708
+ " 'The presence of an allergen causes the\\n'\n",
709
+ " \"body's lymphocytes to begin producingIgE \"\n",
710
+ " 'antibodies. The lymphocytes of an allergy '\n",
711
+ " 'sufferer produce an unusuallylarge amount '\n",
712
+ " 'of IgE.\\n'\n",
713
+ " 'IgE molecules attach to mast\\n'\n",
714
+ " 'cells, which contain '\n",
715
+ " 'histamine.HistaminePollen grains\\n'\n",
716
+ " 'Lymphocyte\\n'\n",
717
+ " 'FIRST EXPOSURE'},\n",
718
+ " 'score': 0.682266653,\n",
719
+ " 'values': []},\n",
720
+ " {'id': '1356',\n",
721
+ " 'metadata': {'text': 'allergens are the following:\\n'\n",
722
+ " '• plant pollens\\n'\n",
723
+ " '• animal fur and dander\\n'\n",
724
+ " '• body parts from house mites (microscopic '\n",
725
+ " 'creatures\\n'\n",
726
+ " 'found in all houses)\\n'\n",
727
+ " '• house dust• mold spores• cigarette '\n",
728
+ " 'smoke• solvents• cleaners\\n'\n",
729
+ " 'Common food allergens include the '\n",
730
+ " 'following:\\n'\n",
731
+ " '• nuts, especially peanuts, walnuts, and '\n",
732
+ " 'brazil nuts\\n'\n",
733
+ " '• fish, mollusks, and shellfish• eggs• '\n",
734
+ " 'wheat• milk• food additives and '\n",
735
+ " 'preservatives\\n'\n",
736
+ " 'The following types of drugs commonly '\n",
737
+ " 'cause aller-\\n'\n",
738
+ " 'gic reactions:\\n'\n",
739
+ " '• penicillin or other antibiotics'},\n",
740
+ " 'score': 0.678239942,\n",
741
+ " 'values': []},\n",
742
+ " {'id': '1306',\n",
743
+ " 'metadata': {'text': 'itchy, scratchy nose, eyes, and throat '\n",
744
+ " 'common in aller-gic rhinitis.\\n'\n",
745
+ " 'The number of possible airborne allergens '\n",
746
+ " 'is enor-'},\n",
747
+ " 'score': 0.676807582,\n",
748
+ " 'values': []}],\n",
749
+ " 'namespace': '',\n",
750
+ " 'usage': {'read_units': 6}}\n"
751
+ ]
752
+ }
753
+ ],
754
+ "source": [
755
+ "print(results)"
756
+ ]
757
+ },
758
+ {
759
+ "cell_type": "code",
760
+ "execution_count": 48,
761
+ "metadata": {},
762
+ "outputs": [
763
+ {
764
+ "name": "stdout",
765
+ "output_type": "stream",
766
+ "text": [
767
+ "['1373', '1356', '1306']\n"
768
+ ]
769
+ }
770
+ ],
771
+ "source": [
772
+ "matched_ids = [match['id'] for match in results['matches']]\n",
773
+ "print(matched_ids)"
774
+ ]
775
+ },
776
+ {
777
+ "cell_type": "code",
778
+ "execution_count": 49,
779
+ "metadata": {},
780
+ "outputs": [
781
+ {
782
+ "data": {
783
+ "text/plain": [
784
+ "'mous. Seasonal AR is most commonly caused by grassand tree pollens, since their pollen is produced in largeamounts and is dispersed by the wind. Showy flowers,like roses or lilacs, that attract insects produce a stickypollen which is less likely to become airborne. Differentplants release their pollen at different times of the year,so seasonal AR sufferers may be most affected in spring,summer, or fall, depending on which plants provoke aresponse. The amount of pollen in the air is reflected'"
785
+ ]
786
+ },
787
+ "execution_count": 49,
788
+ "metadata": {},
789
+ "output_type": "execute_result"
790
+ }
791
+ ],
792
+ "source": [
793
+ "chunks[1306].page_content\n",
794
+ "# Now, based on these top results, I will send it to the llm and it will return the appropriate answer"
795
+ ]
796
+ },
797
+ {
798
+ "cell_type": "code",
799
+ "execution_count": 50,
800
+ "metadata": {},
801
+ "outputs": [],
802
+ "source": [
803
+ "prompt_template = \"\"\"\n",
804
+ "Use the following pieces of information to answer the user's question.\n",
805
+ "If you don't know the answer, just say that you don't know, don't try to make up an answer.\n",
806
+ "\n",
807
+ "Context: {context}\n",
808
+ "Question: {question}\n",
809
+ "\n",
810
+ "Only return the helpful answer below nothing else.\n",
811
+ "Helpful Answer: \n",
812
+ "\"\"\""
813
+ ]
814
+ },
815
+ {
816
+ "cell_type": "code",
817
+ "execution_count": 51,
818
+ "metadata": {},
819
+ "outputs": [],
820
+ "source": [
821
+ "PROMPT = PromptTemplate(template = prompt_template, input_variables=[\"context\", \"question\"])\n",
822
+ "chain_type_kwargs = {\"prompt\":PROMPT}"
823
+ ]
824
+ },
825
+ {
826
+ "cell_type": "code",
827
+ "execution_count": 52,
828
+ "metadata": {},
829
+ "outputs": [],
830
+ "source": [
831
+ "llm = CTransformers(model=\"model/llama-2-7b-chat.ggmlv3.q4_0.bin\", model_type=\"llama\", config={'max_new_tokens': 512, 'temperature': 1})"
832
+ ]
833
+ },
834
+ {
835
+ "cell_type": "code",
836
+ "execution_count": 61,
837
+ "metadata": {},
838
+ "outputs": [],
839
+ "source": [
840
+ "# Create Pinecone retriever\n",
841
+ "vector_store = Pinecone(index, embeddings, text_key=\"text\")\n"
842
+ ]
843
+ },
844
+ {
845
+ "cell_type": "code",
846
+ "execution_count": 76,
847
+ "metadata": {},
848
+ "outputs": [
849
+ {
850
+ "name": "stdout",
851
+ "output_type": "stream",
852
+ "text": [
853
+ "[Document(page_content='tively or by altering the skin of the scalp. One exampleis thyroid disorders. Hyperthyroidism (too much thy-\\nroid hormone) causes hair to become thin and fine.\\nGALE ENCYCLOPEDIA OF MEDICINE 2 125Alopecia\\nTop of balding male’s head. (Photograph by Kelly A. Quin.\\nReproduced by permission.)GEM - 0001 to 0432 - A 10/22/03 1:42 PM Page 125'), Document(page_content='plugs of skin, each containing one to several hairs,from the back side of the scalp. The bald sections arethen implanted with the plugs. Research completed in2000 looked at the new technique of hair grafting, andfound that micrografts (one to two hairs transplantedper follicle) resulted in fewer complications and thebest results\\nAnother surgical procedure used to treat androgenic'), Document(page_content='multitude of hair replacement methods performed byboth physicians and non-physicians. They range fromsimply weaving someone else’s hair in with the remainsof your own to surgically transplanting thousands of hairfollicles one at a time.\\nHair transplantation is completed by taking tiny')]\n"
854
+ ]
855
+ }
856
+ ],
857
+ "source": [
858
+ "\n",
859
+ "# Example query\n",
860
+ "query = \"How to Strengthen Hairs?\"\n",
861
+ "answer = vector_store.similarity_search(query, k=3)\n",
862
+ "print(answer)"
863
+ ]
864
+ },
865
+ {
866
+ "cell_type": "code",
867
+ "execution_count": 63,
868
+ "metadata": {},
869
+ "outputs": [],
870
+ "source": [
871
+ "\n",
872
+ "qa = RetrievalQA.from_chain_type(llm, chain_type=\"stuff\",retriever = vector_store.as_retriever(search_kwargs={\"k\": 2}), chain_type_kwargs=chain_type_kwargs)"
873
+ ]
874
+ },
875
+ {
876
+ "cell_type": "code",
877
+ "execution_count": 77,
878
+ "metadata": {},
879
+ "outputs": [
880
+ {
881
+ "name": "stdout",
882
+ "output_type": "stream",
883
+ "text": [
884
+ "{'query': 'How to cure AIDS?', 'result': 'Unfortunately, there is no known cure for HIV or AIDS at this time. While advances have been made in treating the symptoms and slowing the progression of the disease, a cure has not yet been discovered. Research continues to be conducted on new treatments and potential cures, but as of now, there is no known way to completely eliminate the virus from the body or to restore the immune system to its full function.'}\n"
885
+ ]
886
+ }
887
+ ],
888
+ "source": [
889
+ "# Example query\n",
890
+ "query = \"How to cure AIDS?\"\n",
891
+ "answer = qa.invoke({\"query\":query})\n",
892
+ "print(answer)\n"
893
+ ]
894
+ },
895
+ {
896
+ "cell_type": "code",
897
+ "execution_count": null,
898
+ "metadata": {},
899
+ "outputs": [],
900
+ "source": []
901
+ }
902
+ ],
903
+ "metadata": {
904
+ "kernelspec": {
905
+ "display_name": "Python 3",
906
+ "language": "python",
907
+ "name": "python3"
908
+ },
909
+ "language_info": {
910
+ "codemirror_mode": {
911
+ "name": "ipython",
912
+ "version": 3
913
+ },
914
+ "file_extension": ".py",
915
+ "mimetype": "text/x-python",
916
+ "name": "python",
917
+ "nbconvert_exporter": "python",
918
+ "pygments_lexer": "ipython3",
919
+ "version": "3.11.5"
920
+ }
921
+ },
922
+ "nbformat": 4,
923
+ "nbformat_minor": 2
924
+ }