Spaces:
Running
Running
lingyit1108
commited on
Commit
•
70e2d85
1
Parent(s):
5ea4259
resolved pdf reader issue
Browse files- config/model_config_advanced.yml +1 -1
- database/mock_qna.sqlite +1 -1
- models/chroma_db_advanced_corrected/7ffa7a70-bc3b-4a97-858f-3c8223492b3e/data_level0.bin +3 -0
- models/chroma_db_advanced_corrected/7ffa7a70-bc3b-4a97-858f-3c8223492b3e/header.bin +3 -0
- models/chroma_db_advanced_corrected/7ffa7a70-bc3b-4a97-858f-3c8223492b3e/index_metadata.pickle +3 -0
- models/chroma_db_advanced_corrected/7ffa7a70-bc3b-4a97-858f-3c8223492b3e/length.bin +3 -0
- models/chroma_db_advanced_corrected/7ffa7a70-bc3b-4a97-858f-3c8223492b3e/link_lists.bin +3 -0
- models/chroma_db_advanced_corrected/chroma.sqlite3 +3 -0
- notebooks/002_persisted-embedding-model-advanced.ipynb +118 -148
- notebooks/007_test_hi_content_engine.ipynb +41 -45
- requirements.txt +1 -0
config/model_config_advanced.yml
CHANGED
@@ -11,7 +11,7 @@ embeddings:
|
|
11 |
fine_tuned_embedding_model: 'local:models/fine-tuned-embeddings-advanced'
|
12 |
|
13 |
vector_store:
|
14 |
-
persisted_path: './models/
|
15 |
|
16 |
questionaire_data:
|
17 |
db_path: './database/mock_qna.sqlite'
|
|
|
11 |
fine_tuned_embedding_model: 'local:models/fine-tuned-embeddings-advanced'
|
12 |
|
13 |
vector_store:
|
14 |
+
persisted_path: './models/chroma_db_advanced_corrected'
|
15 |
|
16 |
questionaire_data:
|
17 |
db_path: './database/mock_qna.sqlite'
|
database/mock_qna.sqlite
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 40960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7f3f8c146d46df19f3dd8a4846ccbf63f88e6dd914b67a0c5c689eba21a558d
|
3 |
size 40960
|
models/chroma_db_advanced_corrected/7ffa7a70-bc3b-4a97-858f-3c8223492b3e/data_level0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c7deba9397301d1ea3f7f5edcb06162bc4797984100c456b128303c58b95c79
|
3 |
+
size 31844000
|
models/chroma_db_advanced_corrected/7ffa7a70-bc3b-4a97-858f-3c8223492b3e/header.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1fdac383bac7a3236c814029cee7525e8018d396f9cd0d15b97a22a3af9090d8
|
3 |
+
size 100
|
models/chroma_db_advanced_corrected/7ffa7a70-bc3b-4a97-858f-3c8223492b3e/index_metadata.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:175eec7edc141af44f996bba0295ddb71d3fd54c39b1352539ede6753f00e834
|
3 |
+
size 1100226
|
models/chroma_db_advanced_corrected/7ffa7a70-bc3b-4a97-858f-3c8223492b3e/length.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68f24a06f63a85c1b082283b0be703475bf9023e9f4f7e8b3bd4bca276af3b8f
|
3 |
+
size 76000
|
models/chroma_db_advanced_corrected/7ffa7a70-bc3b-4a97-858f-3c8223492b3e/link_lists.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:224e61d8e99cd63b57c6e9204f3136aba4a43f9bb15d7dcd5eb181c9378829f8
|
3 |
+
size 167188
|
models/chroma_db_advanced_corrected/chroma.sqlite3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf347334a74a91a9d24d298ad1b12fa043579fe0b915cf7bc558f3d2acafca5c
|
3 |
+
size 299061248
|
notebooks/002_persisted-embedding-model-advanced.ipynb
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
},
|
11 |
{
|
12 |
"cell_type": "code",
|
13 |
-
"execution_count":
|
14 |
"id": "7de9c591-5a77-4bbe-80f1-4897e15f0b97",
|
15 |
"metadata": {},
|
16 |
"outputs": [],
|
@@ -33,24 +33,16 @@
|
|
33 |
"import nest_asyncio\n",
|
34 |
"nest_asyncio.apply()\n",
|
35 |
"\n",
|
36 |
-
"import time"
|
|
|
37 |
]
|
38 |
},
|
39 |
{
|
40 |
"cell_type": "code",
|
41 |
-
"execution_count":
|
42 |
"id": "978152ce-4d87-44b5-b521-dbaff60b32b0",
|
43 |
"metadata": {},
|
44 |
-
"outputs": [
|
45 |
-
{
|
46 |
-
"name": "stderr",
|
47 |
-
"output_type": "stream",
|
48 |
-
"text": [
|
49 |
-
"199it [00:00, 8821.71it/s]\n",
|
50 |
-
"200it [00:00, 12584.17it/s]\n"
|
51 |
-
]
|
52 |
-
}
|
53 |
-
],
|
54 |
"source": [
|
55 |
"split_content(filepath=\"../raw_documents/answers.txt\", \n",
|
56 |
" separator=\"\\n\\n\", \n",
|
@@ -63,7 +55,7 @@
|
|
63 |
},
|
64 |
{
|
65 |
"cell_type": "code",
|
66 |
-
"execution_count":
|
67 |
"id": "d925371b-8777-4f5b-a7f2-ec3f228ef266",
|
68 |
"metadata": {},
|
69 |
"outputs": [],
|
@@ -84,41 +76,64 @@
|
|
84 |
{
|
85 |
"cell_type": "code",
|
86 |
"execution_count": null,
|
87 |
-
"id": "
|
88 |
"metadata": {},
|
89 |
"outputs": [],
|
90 |
"source": []
|
91 |
},
|
92 |
{
|
93 |
"cell_type": "code",
|
94 |
-
"execution_count":
|
95 |
"id": "3e65dff6-77b6-4be8-8857-5cecf3a035bb",
|
96 |
"metadata": {},
|
97 |
"outputs": [],
|
98 |
"source": [
|
99 |
"# load some documents\n",
|
100 |
-
"
|
101 |
-
"
|
102 |
-
"
|
103 |
-
"
|
104 |
-
"
|
105 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
]
|
107 |
},
|
108 |
{
|
109 |
"cell_type": "code",
|
110 |
-
"execution_count":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
"id": "bd86b3f5-1dfc-4257-bd9c-86d34f02398d",
|
112 |
"metadata": {},
|
113 |
"outputs": [],
|
114 |
"source": [
|
115 |
"# initialize client, setting path to save data\n",
|
116 |
-
"db = chromadb.PersistentClient(path=\"../models/
|
117 |
]
|
118 |
},
|
119 |
{
|
120 |
"cell_type": "code",
|
121 |
-
"execution_count":
|
122 |
"id": "f568ce7b-bcbf-455c-acf1-6c2cae129fed",
|
123 |
"metadata": {},
|
124 |
"outputs": [],
|
@@ -129,7 +144,7 @@
|
|
129 |
},
|
130 |
{
|
131 |
"cell_type": "code",
|
132 |
-
"execution_count":
|
133 |
"id": "ed0b018e-1982-46b2-b1b4-04f5c0ce8672",
|
134 |
"metadata": {},
|
135 |
"outputs": [],
|
@@ -148,18 +163,10 @@
|
|
148 |
},
|
149 |
{
|
150 |
"cell_type": "code",
|
151 |
-
"execution_count":
|
152 |
"id": "0946b6ce-96ab-44de-ad75-e424a8429f67",
|
153 |
"metadata": {},
|
154 |
-
"outputs": [
|
155 |
-
{
|
156 |
-
"name": "stdout",
|
157 |
-
"output_type": "stream",
|
158 |
-
"text": [
|
159 |
-
"LLM is explicitly disabled. Using MockLLM.\n"
|
160 |
-
]
|
161 |
-
}
|
162 |
-
],
|
163 |
"source": [
|
164 |
"Settings.llm = None\n",
|
165 |
"Settings.chunk_size = 1024\n",
|
@@ -169,31 +176,20 @@
|
|
169 |
},
|
170 |
{
|
171 |
"cell_type": "code",
|
172 |
-
"execution_count":
|
173 |
"id": "b8c73a2c-1129-406a-8046-085afcaf9cbb",
|
174 |
"metadata": {},
|
175 |
"outputs": [],
|
176 |
"source": [
|
177 |
-
"nodes = Settings.node_parser.get_nodes_from_documents(
|
178 |
]
|
179 |
},
|
180 |
{
|
181 |
"cell_type": "code",
|
182 |
-
"execution_count":
|
183 |
"id": "75f1c76f-d3e5-4b69-818c-98865adb1457",
|
184 |
"metadata": {},
|
185 |
-
"outputs": [
|
186 |
-
{
|
187 |
-
"data": {
|
188 |
-
"text/plain": [
|
189 |
-
"6814"
|
190 |
-
]
|
191 |
-
},
|
192 |
-
"execution_count": 13,
|
193 |
-
"metadata": {},
|
194 |
-
"output_type": "execute_result"
|
195 |
-
}
|
196 |
-
],
|
197 |
"source": [
|
198 |
"len(nodes)"
|
199 |
]
|
@@ -208,7 +204,7 @@
|
|
208 |
},
|
209 |
{
|
210 |
"cell_type": "code",
|
211 |
-
"execution_count":
|
212 |
"id": "dab4c6f3-ef67-4d90-b3d5-e290c5d1b6f4",
|
213 |
"metadata": {},
|
214 |
"outputs": [],
|
@@ -218,7 +214,7 @@
|
|
218 |
},
|
219 |
{
|
220 |
"cell_type": "code",
|
221 |
-
"execution_count":
|
222 |
"id": "6a764113-ad7e-4674-aa57-ebbf405902a8",
|
223 |
"metadata": {},
|
224 |
"outputs": [],
|
@@ -236,7 +232,7 @@
|
|
236 |
},
|
237 |
{
|
238 |
"cell_type": "code",
|
239 |
-
"execution_count":
|
240 |
"id": "e492ed4a-23a3-47d6-8b50-51fb48b3aa05",
|
241 |
"metadata": {},
|
242 |
"outputs": [],
|
@@ -246,7 +242,7 @@
|
|
246 |
},
|
247 |
{
|
248 |
"cell_type": "code",
|
249 |
-
"execution_count":
|
250 |
"id": "cbd11b89-9b83-4f08-bb30-160f750f2ffb",
|
251 |
"metadata": {},
|
252 |
"outputs": [],
|
@@ -256,18 +252,10 @@
|
|
256 |
},
|
257 |
{
|
258 |
"cell_type": "code",
|
259 |
-
"execution_count":
|
260 |
"id": "d3bd848d-9985-4a3d-bdc4-ec340cc69ef3",
|
261 |
"metadata": {},
|
262 |
-
"outputs": [
|
263 |
-
{
|
264 |
-
"name": "stdout",
|
265 |
-
"output_type": "stream",
|
266 |
-
"text": [
|
267 |
-
"Indexing time: 2.3 mins\n"
|
268 |
-
]
|
269 |
-
}
|
270 |
-
],
|
271 |
"source": [
|
272 |
"indexing_cost = time.time() - start_time\n",
|
273 |
"indexing_cost = indexing_cost / 60\n",
|
@@ -276,7 +264,7 @@
|
|
276 |
},
|
277 |
{
|
278 |
"cell_type": "code",
|
279 |
-
"execution_count":
|
280 |
"id": "f16cca33-71fb-437d-a033-671b9fd44054",
|
281 |
"metadata": {},
|
282 |
"outputs": [],
|
@@ -286,28 +274,28 @@
|
|
286 |
},
|
287 |
{
|
288 |
"cell_type": "code",
|
289 |
-
"execution_count":
|
290 |
"id": "3290e870-41d7-49c4-9c4f-cb16bd1f469e",
|
291 |
"metadata": {
|
292 |
"scrolled": true
|
293 |
},
|
294 |
-
"outputs": [
|
295 |
-
{
|
296 |
-
"data": {
|
297 |
-
"text/plain": [
|
298 |
-
"Response(response='Context information is below.\\n---------------------\\nfile_path: ../raw_documents/answers_temp/answers_050.txt\\n\\nQuestion: The fundamental principle of Singapore healthcare financing is ____________.\\nAnswer: The answer is \"Individual Savings\".\\n\\nfile_path: ../raw_documents/qna_temp/qna_050.txt\\n\\nC1/5\\nQuestion: The fundamental principle of Singapore healthcare financing is ____________.\\nA. The 3’s M. That is Medisave, Medishield, Medifund.\\nB. Means Testing and Casemix.\\nC. Individual Savings.\\nD. Tax based subsidies and government subvention.\\nAnswer: C. The answer is \"Individual Savings\".\\n---------------------\\nGiven the context information and not prior knowledge, answer the query.\\nQuery: Healthcare System in Singapore consists of?\\nAnswer: ', source_nodes=[NodeWithScore(node=TextNode(id_='536fef67-6a3f-4054-a94a-cc9143599510', embedding=None, metadata={'file_path': '../raw_documents/answers_temp/answers_050.txt', 'file_name': 'answers_050.txt', 'file_type': 'text/plain', 'file_size': 130, 'creation_date': '2024-02-24', 'last_modified_date': '2024-02-24', 'last_accessed_date': '2024-02-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='2b0f7dad-c532-4abd-8c42-f53383a4fc76', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '../raw_documents/answers_temp/answers_050.txt', 'file_name': 'answers_050.txt', 'file_type': 'text/plain', 'file_size': 130, 'creation_date': '2024-02-24', 'last_modified_date': '2024-02-24', 'last_accessed_date': '2024-02-24'}, hash='5b1d1dc729a663e4ccfacc0f18adf0f6644a2a7d2991490fd962d1550c83f2ff'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='6d93c092-b4cc-4b5b-b379-080d777d3908', node_type=<ObjectType.TEXT: '1'>, metadata={'file_path': '../raw_documents/answers_temp/answers_044.txt', 'file_name': 'answers_044.txt', 'file_type': 'text/plain', 'file_size': 164, 'creation_date': '2024-02-24', 'last_modified_date': '2024-02-24', 'last_accessed_date': '2024-02-24'}, hash='caeb59043b8daa56ed472941882947570abff951f64aa0498672aba5921fac1d'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='859a9958-6f5d-4581-95d0-39edfc950ef5', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='8416454b2fbad3e6122c5151d2b3d1eadf0afde3514ba09374c71e96baf712bc')}, text='Question: The fundamental principle of Singapore healthcare financing is ____________.\\nAnswer: The answer is \"Individual Savings\".', start_char_idx=0, end_char_idx=130, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.4159636550867191), NodeWithScore(node=TextNode(id_='472000ae-a0aa-4464-a200-72fe67a3fbde', embedding=None, metadata={'file_path': '../raw_documents/qna_temp/qna_050.txt', 'file_name': 'qna_050.txt', 'file_type': 'text/plain', 'file_size': 297, 'creation_date': '2024-02-24', 'last_modified_date': '2024-02-24', 'last_accessed_date': '2024-02-24'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='506fb715-d3b0-4ca7-b7ca-011a1e1a1f0d', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '../raw_documents/qna_temp/qna_050.txt', 'file_name': 'qna_050.txt', 'file_type': 'text/plain', 'file_size': 297, 'creation_date': '2024-02-24', 'last_modified_date': '2024-02-24', 'last_accessed_date': '2024-02-24'}, hash='7461ffa12ff6729003131976b82995b7254ab10f8dc7d79c65988ec9e3b7b631'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='d8232b90-d641-4966-b98f-4ca0821db773', node_type=<ObjectType.TEXT: '1'>, metadata={'file_path': '../raw_documents/qna_temp/qna_044.txt', 'file_name': 'qna_044.txt', 'file_type': 'text/plain', 'file_size': 383, 'creation_date': '2024-02-24', 'last_modified_date': '2024-02-24', 'last_accessed_date': '2024-02-24'}, hash='cbeb00c29c6130548466697a862fee43ab2be92d84158cc0b69c2f5c7bbe68b1'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='e772e623-cf91-41cd-a516-50acb894eb54', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='a7583b0fd46f98d0118c712632277d81f417b779f8bcc100ab2558dae6317cde')}, text='C1/5\\nQuestion: The fundamental principle of Singapore healthcare financing is ____________.\\nA. The 3’s M. That is Medisave, Medishield, Medifund.\\nB. Means Testing and Casemix.\\nC. Individual Savings.\\nD. Tax based subsidies and government subvention.\\nAnswer: C. The answer is \"Individual Savings\".', start_char_idx=0, end_char_idx=295, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n'), score=0.4126648577998099)], metadata={'536fef67-6a3f-4054-a94a-cc9143599510': {'file_path': '../raw_documents/answers_temp/answers_050.txt', 'file_name': 'answers_050.txt', 'file_type': 'text/plain', 'file_size': 130, 'creation_date': '2024-02-24', 'last_modified_date': '2024-02-24', 'last_accessed_date': '2024-02-24'}, '472000ae-a0aa-4464-a200-72fe67a3fbde': {'file_path': '../raw_documents/qna_temp/qna_050.txt', 'file_name': 'qna_050.txt', 'file_type': 'text/plain', 'file_size': 297, 'creation_date': '2024-02-24', 'last_modified_date': '2024-02-24', 'last_accessed_date': '2024-02-24'}})"
|
299 |
-
]
|
300 |
-
},
|
301 |
-
"execution_count": 20,
|
302 |
-
"metadata": {},
|
303 |
-
"output_type": "execute_result"
|
304 |
-
}
|
305 |
-
],
|
306 |
"source": [
|
307 |
"response = vector_query_engine.query(\"Healthcare System in Singapore consists of?\")\n",
|
308 |
"response"
|
309 |
]
|
310 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
{
|
312 |
"cell_type": "code",
|
313 |
"execution_count": null,
|
@@ -318,7 +306,7 @@
|
|
318 |
},
|
319 |
{
|
320 |
"cell_type": "code",
|
321 |
-
"execution_count":
|
322 |
"id": "1bb75b04-6a62-43a4-8728-d2e52e49f1c0",
|
323 |
"metadata": {},
|
324 |
"outputs": [],
|
@@ -329,7 +317,7 @@
|
|
329 |
},
|
330 |
{
|
331 |
"cell_type": "code",
|
332 |
-
"execution_count":
|
333 |
"id": "0ed920fb-6456-49ac-8b63-08bd86b5b39c",
|
334 |
"metadata": {},
|
335 |
"outputs": [],
|
@@ -364,7 +352,7 @@
|
|
364 |
},
|
365 |
{
|
366 |
"cell_type": "code",
|
367 |
-
"execution_count":
|
368 |
"id": "c1a42c35-5f57-423c-8fb7-7d18b3b466b5",
|
369 |
"metadata": {},
|
370 |
"outputs": [],
|
@@ -381,7 +369,14 @@
|
|
381 |
"from llama_index.llms.openai import OpenAI\n",
|
382 |
"from llama_index.core.memory import ChatMemoryBuffer\n",
|
383 |
"\n",
|
384 |
-
"import time"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
385 |
]
|
386 |
},
|
387 |
{
|
@@ -394,7 +389,7 @@
|
|
394 |
},
|
395 |
{
|
396 |
"cell_type": "code",
|
397 |
-
"execution_count":
|
398 |
"id": "d38dc953-b923-4128-86a1-c8c6f69af0ed",
|
399 |
"metadata": {},
|
400 |
"outputs": [],
|
@@ -404,7 +399,7 @@
|
|
404 |
},
|
405 |
{
|
406 |
"cell_type": "code",
|
407 |
-
"execution_count":
|
408 |
"id": "4c83c613-2cfc-4871-9d07-c82f77a3bd5e",
|
409 |
"metadata": {},
|
410 |
"outputs": [],
|
@@ -414,7 +409,7 @@
|
|
414 |
},
|
415 |
{
|
416 |
"cell_type": "code",
|
417 |
-
"execution_count":
|
418 |
"id": "0583e9b0-d977-488c-8331-46dfa749924c",
|
419 |
"metadata": {},
|
420 |
"outputs": [],
|
@@ -433,17 +428,17 @@
|
|
433 |
},
|
434 |
{
|
435 |
"cell_type": "code",
|
436 |
-
"execution_count":
|
437 |
"id": "2159a2b6-494b-41b9-ac54-dd342bfb74ba",
|
438 |
"metadata": {},
|
439 |
"outputs": [],
|
440 |
"source": [
|
441 |
-
"db = chromadb.PersistentClient(path=\"../models/
|
442 |
]
|
443 |
},
|
444 |
{
|
445 |
"cell_type": "code",
|
446 |
-
"execution_count":
|
447 |
"id": "1b385644-b46e-4d13-88fa-9f4af39db405",
|
448 |
"metadata": {},
|
449 |
"outputs": [],
|
@@ -453,7 +448,7 @@
|
|
453 |
},
|
454 |
{
|
455 |
"cell_type": "code",
|
456 |
-
"execution_count":
|
457 |
"id": "93cb53d1-6b8c-4b2d-a839-53501c0d54b2",
|
458 |
"metadata": {},
|
459 |
"outputs": [],
|
@@ -465,7 +460,7 @@
|
|
465 |
},
|
466 |
{
|
467 |
"cell_type": "code",
|
468 |
-
"execution_count":
|
469 |
"id": "c40d59e1-6d42-41f0-8c9b-70aa026093ae",
|
470 |
"metadata": {},
|
471 |
"outputs": [],
|
@@ -487,20 +482,7 @@
|
|
487 |
},
|
488 |
{
|
489 |
"cell_type": "code",
|
490 |
-
"execution_count":
|
491 |
-
"id": "1a506940-c2b4-4d14-ad93-fd451331c582",
|
492 |
-
"metadata": {},
|
493 |
-
"outputs": [],
|
494 |
-
"source": [
|
495 |
-
"system_content = (\"You are a helpful study assistant. \"\n",
|
496 |
-
" \"You do not respond as 'User' or pretend to be 'User'. \"\n",
|
497 |
-
" \"You only respond once as 'Assistant'.\"\n",
|
498 |
-
")"
|
499 |
-
]
|
500 |
-
},
|
501 |
-
{
|
502 |
-
"cell_type": "code",
|
503 |
-
"execution_count": 10,
|
504 |
"id": "3f592848-8536-4b4d-b34a-adc32d043432",
|
505 |
"metadata": {},
|
506 |
"outputs": [],
|
@@ -510,7 +492,7 @@
|
|
510 |
},
|
511 |
{
|
512 |
"cell_type": "code",
|
513 |
-
"execution_count":
|
514 |
"id": "6c7df81a-fd2f-42bf-b09c-46d7750f7252",
|
515 |
"metadata": {},
|
516 |
"outputs": [],
|
@@ -524,7 +506,7 @@
|
|
524 |
},
|
525 |
{
|
526 |
"cell_type": "code",
|
527 |
-
"execution_count":
|
528 |
"id": "c3106dff-dd6f-47a9-9454-1e61775e7539",
|
529 |
"metadata": {},
|
530 |
"outputs": [],
|
@@ -532,7 +514,7 @@
|
|
532 |
"hi_engine = index.as_query_engine(\n",
|
533 |
" memory=memory,\n",
|
534 |
" system_prompt=system_content,\n",
|
535 |
-
" similarity_top_k=
|
536 |
" streaming=True\n",
|
537 |
")"
|
538 |
]
|
@@ -547,7 +529,7 @@
|
|
547 |
},
|
548 |
{
|
549 |
"cell_type": "code",
|
550 |
-
"execution_count":
|
551 |
"id": "434f0caf-8b1f-40c6-b9ec-b039cd1ca612",
|
552 |
"metadata": {},
|
553 |
"outputs": [],
|
@@ -563,21 +545,14 @@
|
|
563 |
},
|
564 |
{
|
565 |
"cell_type": "code",
|
566 |
-
"execution_count":
|
567 |
"id": "a1c83dff-50d1-47b1-b7e9-4fc5cd08e1e8",
|
568 |
"metadata": {},
|
569 |
-
"outputs": [
|
570 |
-
{
|
571 |
-
"name": "stdout",
|
572 |
-
"output_type": "stream",
|
573 |
-
"text": [
|
574 |
-
"D. To provide for the care of employees\n"
|
575 |
-
]
|
576 |
-
}
|
577 |
-
],
|
578 |
"source": [
|
579 |
-
"
|
580 |
-
"
|
|
|
581 |
]
|
582 |
},
|
583 |
{
|
@@ -591,39 +566,34 @@
|
|
591 |
{
|
592 |
"cell_type": "code",
|
593 |
"execution_count": null,
|
594 |
-
"id": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
595 |
"metadata": {},
|
596 |
"outputs": [],
|
597 |
"source": []
|
598 |
},
|
599 |
{
|
600 |
"cell_type": "code",
|
601 |
-
"execution_count":
|
602 |
-
"id": "
|
603 |
-
"metadata": {},
|
604 |
-
"outputs": [
|
605 |
-
|
606 |
-
"name": "stderr",
|
607 |
-
"output_type": "stream",
|
608 |
-
"text": [
|
609 |
-
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
610 |
-
"To disable this warning, you can either:\n",
|
611 |
-
"\t- Avoid using `tokenizers` before the fork if possible\n",
|
612 |
-
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
|
613 |
-
]
|
614 |
-
},
|
615 |
-
{
|
616 |
-
"name": "stdout",
|
617 |
-
"output_type": "stream",
|
618 |
-
"text": [
|
619 |
-
"The correct answer is \"Deductibles apply for all treatments\".\n"
|
620 |
-
]
|
621 |
-
}
|
622 |
-
],
|
623 |
-
"source": [
|
624 |
-
"res = chat_engine.chat(prompt)\n",
|
625 |
-
"print(res.response)"
|
626 |
-
]
|
627 |
},
|
628 |
{
|
629 |
"cell_type": "code",
|
|
|
10 |
},
|
11 |
{
|
12 |
"cell_type": "code",
|
13 |
+
"execution_count": null,
|
14 |
"id": "7de9c591-5a77-4bbe-80f1-4897e15f0b97",
|
15 |
"metadata": {},
|
16 |
"outputs": [],
|
|
|
33 |
"import nest_asyncio\n",
|
34 |
"nest_asyncio.apply()\n",
|
35 |
"\n",
|
36 |
+
"import time\n",
|
37 |
+
"import PyPDF2"
|
38 |
]
|
39 |
},
|
40 |
{
|
41 |
"cell_type": "code",
|
42 |
+
"execution_count": null,
|
43 |
"id": "978152ce-4d87-44b5-b521-dbaff60b32b0",
|
44 |
"metadata": {},
|
45 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
"source": [
|
47 |
"split_content(filepath=\"../raw_documents/answers.txt\", \n",
|
48 |
" separator=\"\\n\\n\", \n",
|
|
|
55 |
},
|
56 |
{
|
57 |
"cell_type": "code",
|
58 |
+
"execution_count": null,
|
59 |
"id": "d925371b-8777-4f5b-a7f2-ec3f228ef266",
|
60 |
"metadata": {},
|
61 |
"outputs": [],
|
|
|
76 |
{
|
77 |
"cell_type": "code",
|
78 |
"execution_count": null,
|
79 |
+
"id": "a83b4fd8-5075-4c52-820c-a3ac7ee7f0c8",
|
80 |
"metadata": {},
|
81 |
"outputs": [],
|
82 |
"source": []
|
83 |
},
|
84 |
{
|
85 |
"cell_type": "code",
|
86 |
+
"execution_count": null,
|
87 |
"id": "3e65dff6-77b6-4be8-8857-5cecf3a035bb",
|
88 |
"metadata": {},
|
89 |
"outputs": [],
|
90 |
"source": [
|
91 |
"# load some documents\n",
|
92 |
+
"if False:\n",
|
93 |
+
" documents = SimpleDirectoryReader(input_files=[\n",
|
94 |
+
" \"../raw_documents/HI Chapter Summary Version 1.3.pdf\",\n",
|
95 |
+
" \"../raw_documents/conversation_examples.txt\",\n",
|
96 |
+
" \"../raw_documents/HI_Knowledge_Base.pdf\",\n",
|
97 |
+
" ] + answers_temp_files + qna_temp_files ).load_data()\n",
|
98 |
+
"else:\n",
|
99 |
+
" reader_summary = PyPDF2.PdfReader(\"../raw_documents/HI Chapter Summary Version 1.3.pdf\")\n",
|
100 |
+
" documents_summary = [ p.extract_text() for p in reader_summary.pages ]\n",
|
101 |
+
"\n",
|
102 |
+
" reader_base = PyPDF2.PdfReader(\"../raw_documents/HI_Knowledge_Base.pdf\")\n",
|
103 |
+
" documents_base = [ p.extract_text() for p in reader_base.pages ]\n",
|
104 |
+
" \n",
|
105 |
+
" documents_txt = SimpleDirectoryReader(input_files=[\n",
|
106 |
+
" \"../raw_documents/conversation_examples.txt\",\n",
|
107 |
+
" \"../raw_documents/qna.txt\",\n",
|
108 |
+
" \"../raw_documents/answers.txt\"\n",
|
109 |
+
" ] ).load_data()\n",
|
110 |
+
" documents_txt = [doc.text for doc in documents_txt]\n",
|
111 |
+
"\n",
|
112 |
+
"document = Document(text=\"\\n\\n\".join(documents_summary + documents_base + documents_txt))"
|
113 |
]
|
114 |
},
|
115 |
{
|
116 |
"cell_type": "code",
|
117 |
+
"execution_count": null,
|
118 |
+
"id": "e485f801-1829-4b50-b6b2-52803203853b",
|
119 |
+
"metadata": {},
|
120 |
+
"outputs": [],
|
121 |
+
"source": []
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"cell_type": "code",
|
125 |
+
"execution_count": null,
|
126 |
"id": "bd86b3f5-1dfc-4257-bd9c-86d34f02398d",
|
127 |
"metadata": {},
|
128 |
"outputs": [],
|
129 |
"source": [
|
130 |
"# initialize client, setting path to save data\n",
|
131 |
+
"db = chromadb.PersistentClient(path=\"../models/chroma_db_advanced_corrected\")"
|
132 |
]
|
133 |
},
|
134 |
{
|
135 |
"cell_type": "code",
|
136 |
+
"execution_count": null,
|
137 |
"id": "f568ce7b-bcbf-455c-acf1-6c2cae129fed",
|
138 |
"metadata": {},
|
139 |
"outputs": [],
|
|
|
144 |
},
|
145 |
{
|
146 |
"cell_type": "code",
|
147 |
+
"execution_count": null,
|
148 |
"id": "ed0b018e-1982-46b2-b1b4-04f5c0ce8672",
|
149 |
"metadata": {},
|
150 |
"outputs": [],
|
|
|
163 |
},
|
164 |
{
|
165 |
"cell_type": "code",
|
166 |
+
"execution_count": null,
|
167 |
"id": "0946b6ce-96ab-44de-ad75-e424a8429f67",
|
168 |
"metadata": {},
|
169 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
"source": [
|
171 |
"Settings.llm = None\n",
|
172 |
"Settings.chunk_size = 1024\n",
|
|
|
176 |
},
|
177 |
{
|
178 |
"cell_type": "code",
|
179 |
+
"execution_count": null,
|
180 |
"id": "b8c73a2c-1129-406a-8046-085afcaf9cbb",
|
181 |
"metadata": {},
|
182 |
"outputs": [],
|
183 |
"source": [
|
184 |
+
"nodes = Settings.node_parser.get_nodes_from_documents([document])"
|
185 |
]
|
186 |
},
|
187 |
{
|
188 |
"cell_type": "code",
|
189 |
+
"execution_count": null,
|
190 |
"id": "75f1c76f-d3e5-4b69-818c-98865adb1457",
|
191 |
"metadata": {},
|
192 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
"source": [
|
194 |
"len(nodes)"
|
195 |
]
|
|
|
204 |
},
|
205 |
{
|
206 |
"cell_type": "code",
|
207 |
+
"execution_count": null,
|
208 |
"id": "dab4c6f3-ef67-4d90-b3d5-e290c5d1b6f4",
|
209 |
"metadata": {},
|
210 |
"outputs": [],
|
|
|
214 |
},
|
215 |
{
|
216 |
"cell_type": "code",
|
217 |
+
"execution_count": null,
|
218 |
"id": "6a764113-ad7e-4674-aa57-ebbf405902a8",
|
219 |
"metadata": {},
|
220 |
"outputs": [],
|
|
|
232 |
},
|
233 |
{
|
234 |
"cell_type": "code",
|
235 |
+
"execution_count": null,
|
236 |
"id": "e492ed4a-23a3-47d6-8b50-51fb48b3aa05",
|
237 |
"metadata": {},
|
238 |
"outputs": [],
|
|
|
242 |
},
|
243 |
{
|
244 |
"cell_type": "code",
|
245 |
+
"execution_count": null,
|
246 |
"id": "cbd11b89-9b83-4f08-bb30-160f750f2ffb",
|
247 |
"metadata": {},
|
248 |
"outputs": [],
|
|
|
252 |
},
|
253 |
{
|
254 |
"cell_type": "code",
|
255 |
+
"execution_count": null,
|
256 |
"id": "d3bd848d-9985-4a3d-bdc4-ec340cc69ef3",
|
257 |
"metadata": {},
|
258 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
"source": [
|
260 |
"indexing_cost = time.time() - start_time\n",
|
261 |
"indexing_cost = indexing_cost / 60\n",
|
|
|
264 |
},
|
265 |
{
|
266 |
"cell_type": "code",
|
267 |
+
"execution_count": null,
|
268 |
"id": "f16cca33-71fb-437d-a033-671b9fd44054",
|
269 |
"metadata": {},
|
270 |
"outputs": [],
|
|
|
274 |
},
|
275 |
{
|
276 |
"cell_type": "code",
|
277 |
+
"execution_count": null,
|
278 |
"id": "3290e870-41d7-49c4-9c4f-cb16bd1f469e",
|
279 |
"metadata": {
|
280 |
"scrolled": true
|
281 |
},
|
282 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
"source": [
|
284 |
"response = vector_query_engine.query(\"Healthcare System in Singapore consists of?\")\n",
|
285 |
"response"
|
286 |
]
|
287 |
},
|
288 |
+
{
|
289 |
+
"cell_type": "code",
|
290 |
+
"execution_count": null,
|
291 |
+
"id": "d83e2938-61fa-4d02-920d-0ae88a437abc",
|
292 |
+
"metadata": {},
|
293 |
+
"outputs": [],
|
294 |
+
"source": [
|
295 |
+
"response = vector_query_engine.query(\"what is integrated shield plan\")\n",
|
296 |
+
"response"
|
297 |
+
]
|
298 |
+
},
|
299 |
{
|
300 |
"cell_type": "code",
|
301 |
"execution_count": null,
|
|
|
306 |
},
|
307 |
{
|
308 |
"cell_type": "code",
|
309 |
+
"execution_count": null,
|
310 |
"id": "1bb75b04-6a62-43a4-8728-d2e52e49f1c0",
|
311 |
"metadata": {},
|
312 |
"outputs": [],
|
|
|
317 |
},
|
318 |
{
|
319 |
"cell_type": "code",
|
320 |
+
"execution_count": null,
|
321 |
"id": "0ed920fb-6456-49ac-8b63-08bd86b5b39c",
|
322 |
"metadata": {},
|
323 |
"outputs": [],
|
|
|
352 |
},
|
353 |
{
|
354 |
"cell_type": "code",
|
355 |
+
"execution_count": null,
|
356 |
"id": "c1a42c35-5f57-423c-8fb7-7d18b3b466b5",
|
357 |
"metadata": {},
|
358 |
"outputs": [],
|
|
|
369 |
"from llama_index.llms.openai import OpenAI\n",
|
370 |
"from llama_index.core.memory import ChatMemoryBuffer\n",
|
371 |
"\n",
|
372 |
+
"import time\n",
|
373 |
+
"\n",
|
374 |
+
"from prompt_engineering import (\n",
|
375 |
+
" system_content, \n",
|
376 |
+
" textbook_content, \n",
|
377 |
+
" winnie_the_pooh_prompt, \n",
|
378 |
+
" introduction_line\n",
|
379 |
+
")"
|
380 |
]
|
381 |
},
|
382 |
{
|
|
|
389 |
},
|
390 |
{
|
391 |
"cell_type": "code",
|
392 |
+
"execution_count": null,
|
393 |
"id": "d38dc953-b923-4128-86a1-c8c6f69af0ed",
|
394 |
"metadata": {},
|
395 |
"outputs": [],
|
|
|
399 |
},
|
400 |
{
|
401 |
"cell_type": "code",
|
402 |
+
"execution_count": null,
|
403 |
"id": "4c83c613-2cfc-4871-9d07-c82f77a3bd5e",
|
404 |
"metadata": {},
|
405 |
"outputs": [],
|
|
|
409 |
},
|
410 |
{
|
411 |
"cell_type": "code",
|
412 |
+
"execution_count": null,
|
413 |
"id": "0583e9b0-d977-488c-8331-46dfa749924c",
|
414 |
"metadata": {},
|
415 |
"outputs": [],
|
|
|
428 |
},
|
429 |
{
|
430 |
"cell_type": "code",
|
431 |
+
"execution_count": null,
|
432 |
"id": "2159a2b6-494b-41b9-ac54-dd342bfb74ba",
|
433 |
"metadata": {},
|
434 |
"outputs": [],
|
435 |
"source": [
|
436 |
+
"db = chromadb.PersistentClient(path=\"../models/chroma_db_advanced_corrected\")"
|
437 |
]
|
438 |
},
|
439 |
{
|
440 |
"cell_type": "code",
|
441 |
+
"execution_count": null,
|
442 |
"id": "1b385644-b46e-4d13-88fa-9f4af39db405",
|
443 |
"metadata": {},
|
444 |
"outputs": [],
|
|
|
448 |
},
|
449 |
{
|
450 |
"cell_type": "code",
|
451 |
+
"execution_count": null,
|
452 |
"id": "93cb53d1-6b8c-4b2d-a839-53501c0d54b2",
|
453 |
"metadata": {},
|
454 |
"outputs": [],
|
|
|
460 |
},
|
461 |
{
|
462 |
"cell_type": "code",
|
463 |
+
"execution_count": null,
|
464 |
"id": "c40d59e1-6d42-41f0-8c9b-70aa026093ae",
|
465 |
"metadata": {},
|
466 |
"outputs": [],
|
|
|
482 |
},
|
483 |
{
|
484 |
"cell_type": "code",
|
485 |
+
"execution_count": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
486 |
"id": "3f592848-8536-4b4d-b34a-adc32d043432",
|
487 |
"metadata": {},
|
488 |
"outputs": [],
|
|
|
492 |
},
|
493 |
{
|
494 |
"cell_type": "code",
|
495 |
+
"execution_count": null,
|
496 |
"id": "6c7df81a-fd2f-42bf-b09c-46d7750f7252",
|
497 |
"metadata": {},
|
498 |
"outputs": [],
|
|
|
506 |
},
|
507 |
{
|
508 |
"cell_type": "code",
|
509 |
+
"execution_count": null,
|
510 |
"id": "c3106dff-dd6f-47a9-9454-1e61775e7539",
|
511 |
"metadata": {},
|
512 |
"outputs": [],
|
|
|
514 |
"hi_engine = index.as_query_engine(\n",
|
515 |
" memory=memory,\n",
|
516 |
" system_prompt=system_content,\n",
|
517 |
+
" similarity_top_k=20,\n",
|
518 |
" streaming=True\n",
|
519 |
")"
|
520 |
]
|
|
|
529 |
},
|
530 |
{
|
531 |
"cell_type": "code",
|
532 |
+
"execution_count": null,
|
533 |
"id": "434f0caf-8b1f-40c6-b9ec-b039cd1ca612",
|
534 |
"metadata": {},
|
535 |
"outputs": [],
|
|
|
545 |
},
|
546 |
{
|
547 |
"cell_type": "code",
|
548 |
+
"execution_count": null,
|
549 |
"id": "a1c83dff-50d1-47b1-b7e9-4fc5cd08e1e8",
|
550 |
"metadata": {},
|
551 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
552 |
"source": [
|
553 |
+
"response = hi_engine.query(prompt)\n",
|
554 |
+
"for res in response.response_gen:\n",
|
555 |
+
" print(res, end=\"\")"
|
556 |
]
|
557 |
},
|
558 |
{
|
|
|
566 |
{
|
567 |
"cell_type": "code",
|
568 |
"execution_count": null,
|
569 |
+
"id": "91821a22-c1c4-46a6-90f0-c00651afb0f6",
|
570 |
+
"metadata": {},
|
571 |
+
"outputs": [],
|
572 |
+
"source": [
|
573 |
+
"# query_string = \"tell me more about integrated shield plans\"\n",
|
574 |
+
"# query_string = \"how to use CPF\"\n",
|
575 |
+
"query_string = \"what is MediSave\"\n",
|
576 |
+
"\n",
|
577 |
+
"response = hi_engine.query(query_string)\n",
|
578 |
+
"for res in response.response_gen:\n",
|
579 |
+
" print(res, end=\"\")"
|
580 |
+
]
|
581 |
+
},
|
582 |
+
{
|
583 |
+
"cell_type": "code",
|
584 |
+
"execution_count": null,
|
585 |
+
"id": "07969feb-2667-4d7d-a769-953082138988",
|
586 |
"metadata": {},
|
587 |
"outputs": [],
|
588 |
"source": []
|
589 |
},
|
590 |
{
|
591 |
"cell_type": "code",
|
592 |
+
"execution_count": null,
|
593 |
+
"id": "ec53dfcf-d4c0-4d10-a24e-be2004a83656",
|
594 |
+
"metadata": {},
|
595 |
+
"outputs": [],
|
596 |
+
"source": []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
597 |
},
|
598 |
{
|
599 |
"cell_type": "code",
|
notebooks/007_test_hi_content_engine.ipynb
CHANGED
@@ -34,6 +34,12 @@
|
|
34 |
"\n",
|
35 |
"from vision_api import get_transcribed_text\n",
|
36 |
"from qna_prompting import get_qna_question_tool, evaluate_qna_answer_tool\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
"\n",
|
38 |
"import nest_asyncio\n",
|
39 |
"nest_asyncio.apply()"
|
@@ -106,18 +112,20 @@
|
|
106 |
"\n",
|
107 |
" index = VectorStoreIndex(nodes, storage_context=storage_context)\n",
|
108 |
" \n",
|
109 |
-
" memory = ChatMemoryBuffer.from_defaults(token_limit=
|
110 |
" hi_content_engine = index.as_query_engine(\n",
|
111 |
" memory=memory,\n",
|
112 |
" system_prompt=system_content,\n",
|
113 |
-
" similarity_top_k=
|
|
|
114 |
" streaming=True\n",
|
115 |
" )\n",
|
116 |
" hi_textbook_query_description = \"\"\"\n",
|
117 |
-
" Use this tool to extract content from
|
|
|
118 |
" that has 15 chapters in total. When user wants to learn more about a \n",
|
119 |
" particular chapter, this tool will help to assist user to get better\n",
|
120 |
-
" understanding of the content of the textbook
|
121 |
" \"\"\"\n",
|
122 |
" \n",
|
123 |
" hi_query_tool = QueryEngineTool.from_defaults(\n",
|
@@ -195,32 +203,10 @@
|
|
195 |
"input_files = [\"./raw_documents/HI Chapter Summary Version 1.3.pdf\",\n",
|
196 |
" \"./raw_documents/qna.txt\"]\n",
|
197 |
"embedding_model = \"BAAI/bge-small-en-v1.5\"\n",
|
198 |
-
"persisted_vector_db = \"../models/
|
199 |
-
"
|
200 |
-
"
|
201 |
-
"
|
202 |
-
" \"You do not respond as 'User' or pretend to be 'User'. \"\n",
|
203 |
-
" \"You only respond once as 'Assistant'.\"\n",
|
204 |
-
")\n",
|
205 |
-
"textbook_content = (\n",
|
206 |
-
" \"The content of the textbook `Health Insurance 7th Edition` are as follows,\"\n",
|
207 |
-
" \"- Chapter 1: Overview Of Healthcare Environment In Singapore\"\n",
|
208 |
-
" \"- Chapter 2: Medical Expense Insurance\"\n",
|
209 |
-
" \"- Chapter 3: Group Medical Expense Insurance\"\n",
|
210 |
-
" \"- Chapter 4: Disability Income Insurance\"\n",
|
211 |
-
" \"- Chapter 5: Long-Term Care Insurance \"\n",
|
212 |
-
" \"- Chapter 6: Critical Illness Insurance\"\n",
|
213 |
-
" \"- Chapter 7: Other Types Of Health Insurance\"\n",
|
214 |
-
" \"- Chapter 8: Managed Healthcare\"\n",
|
215 |
-
" \"- Chapter 9: Part I Healthcare Financing\"\n",
|
216 |
-
" \"- Chapter 9: Part II Healthcare Financing\"\n",
|
217 |
-
" \"- Chapter 10: Common Policy Provisions\"\n",
|
218 |
-
" \"- Chapter 11: Health Insurance Pricing\"\n",
|
219 |
-
" \"- Chapter 12: Health Insurance Underwriting\"\n",
|
220 |
-
" \"- Chapter 13: Notice No: MAS 120 Disclosure And Advisory Process - Requirements For Accident And Health Insurance Products\"\n",
|
221 |
-
" \"- Chapter 14: Financial Needs Analysis\"\n",
|
222 |
-
" \"- Chapter 15: Case Studies\"\n",
|
223 |
-
")"
|
224 |
]
|
225 |
},
|
226 |
{
|
@@ -292,6 +278,14 @@
|
|
292 |
")"
|
293 |
]
|
294 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
295 |
{
|
296 |
"cell_type": "code",
|
297 |
"execution_count": null,
|
@@ -338,36 +332,39 @@
|
|
338 |
{
|
339 |
"cell_type": "code",
|
340 |
"execution_count": null,
|
341 |
-
"id": "
|
342 |
"metadata": {},
|
343 |
"outputs": [],
|
344 |
"source": [
|
345 |
-
"
|
|
|
|
|
346 |
]
|
347 |
},
|
348 |
{
|
349 |
"cell_type": "code",
|
350 |
"execution_count": null,
|
351 |
-
"id": "
|
352 |
"metadata": {},
|
353 |
"outputs": [],
|
354 |
-
"source": [
|
355 |
-
"for res in response.response_gen:\n",
|
356 |
-
" print(res, end=\"\")"
|
357 |
-
]
|
358 |
},
|
359 |
{
|
360 |
"cell_type": "code",
|
361 |
"execution_count": null,
|
362 |
-
"id": "
|
363 |
"metadata": {},
|
364 |
"outputs": [],
|
365 |
-
"source": [
|
|
|
|
|
|
|
|
|
366 |
},
|
367 |
{
|
368 |
"cell_type": "code",
|
369 |
"execution_count": null,
|
370 |
-
"id": "
|
371 |
"metadata": {},
|
372 |
"outputs": [],
|
373 |
"source": []
|
@@ -379,7 +376,9 @@
|
|
379 |
"metadata": {},
|
380 |
"outputs": [],
|
381 |
"source": [
|
382 |
-
"response = agent.stream_chat(
|
|
|
|
|
383 |
]
|
384 |
},
|
385 |
{
|
@@ -388,10 +387,7 @@
|
|
388 |
"id": "eff8bb8d-a2d1-428a-9c3d-193389378288",
|
389 |
"metadata": {},
|
390 |
"outputs": [],
|
391 |
-
"source": [
|
392 |
-
"for res in response.response_gen:\n",
|
393 |
-
" print(res, end=\"\")"
|
394 |
-
]
|
395 |
},
|
396 |
{
|
397 |
"cell_type": "code",
|
|
|
34 |
"\n",
|
35 |
"from vision_api import get_transcribed_text\n",
|
36 |
"from qna_prompting import get_qna_question_tool, evaluate_qna_answer_tool\n",
|
37 |
+
"from prompt_engineering import (\n",
|
38 |
+
" system_content, \n",
|
39 |
+
" textbook_content, \n",
|
40 |
+
" winnie_the_pooh_prompt, \n",
|
41 |
+
" introduction_line\n",
|
42 |
+
")\n",
|
43 |
"\n",
|
44 |
"import nest_asyncio\n",
|
45 |
"nest_asyncio.apply()"
|
|
|
112 |
"\n",
|
113 |
" index = VectorStoreIndex(nodes, storage_context=storage_context)\n",
|
114 |
" \n",
|
115 |
+
" memory = ChatMemoryBuffer.from_defaults(token_limit=100_000)\n",
|
116 |
" hi_content_engine = index.as_query_engine(\n",
|
117 |
" memory=memory,\n",
|
118 |
" system_prompt=system_content,\n",
|
119 |
+
" similarity_top_k=10,\n",
|
120 |
+
" verbose=True,\n",
|
121 |
" streaming=True\n",
|
122 |
" )\n",
|
123 |
" hi_textbook_query_description = \"\"\"\n",
|
124 |
+
" Use this tool to extract content from the query engine,\n",
|
125 |
+
" which is built by ingesting textbook content from `Health Insurance 7th Edition`,\n",
|
126 |
" that has 15 chapters in total. When user wants to learn more about a \n",
|
127 |
" particular chapter, this tool will help to assist user to get better\n",
|
128 |
+
" understanding of the content of the textbook. \n",
|
129 |
" \"\"\"\n",
|
130 |
" \n",
|
131 |
" hi_query_tool = QueryEngineTool.from_defaults(\n",
|
|
|
203 |
"input_files = [\"./raw_documents/HI Chapter Summary Version 1.3.pdf\",\n",
|
204 |
" \"./raw_documents/qna.txt\"]\n",
|
205 |
"embedding_model = \"BAAI/bge-small-en-v1.5\"\n",
|
206 |
+
"persisted_vector_db = \"../models/chroma_db_advanced_corrected\"\n",
|
207 |
+
"\n",
|
208 |
+
"# fine_tuned_path = \"local:../models/fine-tuned-embeddings\"\n",
|
209 |
+
"fine_tuned_path = \"local:../models/fine-tuned-embeddings-advanced\""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
]
|
211 |
},
|
212 |
{
|
|
|
278 |
")"
|
279 |
]
|
280 |
},
|
281 |
+
{
|
282 |
+
"cell_type": "code",
|
283 |
+
"execution_count": null,
|
284 |
+
"id": "a49ed30e-a631-4618-a79e-adab02114d8d",
|
285 |
+
"metadata": {},
|
286 |
+
"outputs": [],
|
287 |
+
"source": []
|
288 |
+
},
|
289 |
{
|
290 |
"cell_type": "code",
|
291 |
"execution_count": null,
|
|
|
332 |
{
|
333 |
"cell_type": "code",
|
334 |
"execution_count": null,
|
335 |
+
"id": "66c8881d-fc57-4e95-ad86-110c4818e2fe",
|
336 |
"metadata": {},
|
337 |
"outputs": [],
|
338 |
"source": [
|
339 |
+
"# query_string = \"tell me more about integrated shield plans\"\n",
|
340 |
+
"query_string = \"how to use CPF\"\n",
|
341 |
+
"# query_string = \"what is MediSave\""
|
342 |
]
|
343 |
},
|
344 |
{
|
345 |
"cell_type": "code",
|
346 |
"execution_count": null,
|
347 |
+
"id": "9cbd338b-bee5-4c06-9934-a0e27fd518d3",
|
348 |
"metadata": {},
|
349 |
"outputs": [],
|
350 |
+
"source": []
|
|
|
|
|
|
|
351 |
},
|
352 |
{
|
353 |
"cell_type": "code",
|
354 |
"execution_count": null,
|
355 |
+
"id": "5902ffd2-2f66-4b89-bf7f-a05e3fdeccaa",
|
356 |
"metadata": {},
|
357 |
"outputs": [],
|
358 |
+
"source": [
|
359 |
+
"response = hi_content_engine.query(query_string)\n",
|
360 |
+
"for res in response.response_gen:\n",
|
361 |
+
" print(res, end=\"\")"
|
362 |
+
]
|
363 |
},
|
364 |
{
|
365 |
"cell_type": "code",
|
366 |
"execution_count": null,
|
367 |
+
"id": "0e75453b-85c7-4e1c-8683-6df45a13cacb",
|
368 |
"metadata": {},
|
369 |
"outputs": [],
|
370 |
"source": []
|
|
|
376 |
"metadata": {},
|
377 |
"outputs": [],
|
378 |
"source": [
|
379 |
+
"response = agent.stream_chat(query_string, tool_choice=\"auto\")\n",
|
380 |
+
"for res in response.response_gen:\n",
|
381 |
+
" print(res, end=\"\")"
|
382 |
]
|
383 |
},
|
384 |
{
|
|
|
387 |
"id": "eff8bb8d-a2d1-428a-9c3d-193389378288",
|
388 |
"metadata": {},
|
389 |
"outputs": [],
|
390 |
+
"source": []
|
|
|
|
|
|
|
391 |
},
|
392 |
{
|
393 |
"cell_type": "code",
|
requirements.txt
CHANGED
@@ -185,6 +185,7 @@ PyMuPDF==1.23.22
|
|
185 |
PyMuPDFb==1.23.22
|
186 |
pyparsing==3.1.1
|
187 |
pypdf==4.0.1
|
|
|
188 |
PyPika==0.48.9
|
189 |
pyproject_hooks==1.0.0
|
190 |
python-dateutil==2.8.2
|
|
|
185 |
PyMuPDFb==1.23.22
|
186 |
pyparsing==3.1.1
|
187 |
pypdf==4.0.1
|
188 |
+
PyPDF2==3.0.1
|
189 |
PyPika==0.48.9
|
190 |
pyproject_hooks==1.0.0
|
191 |
python-dateutil==2.8.2
|