ionosphere commited on
Commit
68ed2d8
1 Parent(s): 56f059f
Files changed (6) hide show
  1. .gitignore +11 -0
  2. RAG_basic.ipynb +561 -0
  3. RAG_basic_no_langchain.ipynb +468 -0
  4. README.md +28 -1
  5. app.py +100 -0
  6. requirements.txt +5 -0
.gitignore ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # See https://help.github.com/articles/ignoring-files for more about ignoring
2
+ # files.
3
+ #
4
+ # If you find yourself ignoring temporary files generated by your text editor
5
+ # or operating system, you probably want to add a global ignore instead:
6
+ # git config --global core.excludesfile '~/.gitignore_global'
7
+
8
+ .env
9
+ /gradio_cached_examples
10
+
11
+
RAG_basic.ipynb ADDED
@@ -0,0 +1,561 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "b880d1ed-3db0-45a1-807e-1b47e9ce1320",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "! pip install faiss-cpu, mistralai"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 40,
16
+ "id": "851612c3-ee93-42e3-a1fb-481f89c9410f",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "from mistralai.client import MistralClient, ChatMessage\n",
21
+ "import requests\n",
22
+ "import numpy as np\n",
23
+ "import faiss\n",
24
+ "import os\n",
25
+ "\n",
26
+ "api_key=os.environ[\"MISTRAL_API_KEY\"]\n",
27
+ "client = MistralClient(api_key=api_key)"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "markdown",
32
+ "id": "01b27964-b40f-41d5-ba20-cec93ca25dc5",
33
+ "metadata": {},
34
+ "source": [
35
+ "# 1. RAG from scratch"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "markdown",
40
+ "id": "fe8609d5-9f27-4202-b0be-36db34412998",
41
+ "metadata": {},
42
+ "source": [
43
+ "## Get data"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 3,
49
+ "id": "c4c01740-72b4-482c-b61e-e272a734f01f",
50
+ "metadata": {},
51
+ "outputs": [],
52
+ "source": [
53
+ "response = requests.get('https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt')\n",
54
+ "text = response.text"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "execution_count": 130,
60
+ "id": "f03f47af-a20b-4122-a114-74b9748ff543",
61
+ "metadata": {},
62
+ "outputs": [
63
+ {
64
+ "data": {
65
+ "text/plain": [
66
+ "75014"
67
+ ]
68
+ },
69
+ "execution_count": 130,
70
+ "metadata": {},
71
+ "output_type": "execute_result"
72
+ }
73
+ ],
74
+ "source": [
75
+ "len(text)"
76
+ ]
77
+ },
78
+ {
79
+ "cell_type": "markdown",
80
+ "id": "aad1aa61-9e1c-46c8-ae5e-61855df440f9",
81
+ "metadata": {},
82
+ "source": [
83
+ "## Split document into chunks"
84
+ ]
85
+ },
86
+ {
87
+ "cell_type": "code",
88
+ "execution_count": 4,
89
+ "id": "8494655e-bd87-49de-8f1d-69ffbc1c256e",
90
+ "metadata": {},
91
+ "outputs": [],
92
+ "source": [
93
+ "chunk_size = 512\n",
94
+ "chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "markdown",
99
+ "id": "4176cbe3-9b15-4d17-afb1-665011d09bb7",
100
+ "metadata": {},
101
+ "source": [
102
+ "## Create embeddings for each text chunk"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "execution_count": 5,
108
+ "id": "e77d9805-7a53-4210-9f80-f4de52285588",
109
+ "metadata": {},
110
+ "outputs": [],
111
+ "source": [
112
+ "def get_text_embedding(input):\n",
113
+ " embeddings_batch_response = client.embeddings(\n",
114
+ " model=\"mistral-embed\",\n",
115
+ " input=input\n",
116
+ " )\n",
117
+ " return embeddings_batch_response.data[0].embedding"
118
+ ]
119
+ },
120
+ {
121
+ "cell_type": "code",
122
+ "execution_count": 6,
123
+ "id": "46503830-6ad5-493e-a629-152721e2d88e",
124
+ "metadata": {},
125
+ "outputs": [],
126
+ "source": [
127
+ "text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])"
128
+ ]
129
+ },
130
+ {
131
+ "cell_type": "code",
132
+ "execution_count": 9,
133
+ "id": "ca875993-fe6d-42df-811e-a43891cd0350",
134
+ "metadata": {},
135
+ "outputs": [
136
+ {
137
+ "data": {
138
+ "text/plain": [
139
+ "(147, 1024)"
140
+ ]
141
+ },
142
+ "execution_count": 9,
143
+ "metadata": {},
144
+ "output_type": "execute_result"
145
+ }
146
+ ],
147
+ "source": [
148
+ "text_embeddings.shape"
149
+ ]
150
+ },
151
+ {
152
+ "cell_type": "code",
153
+ "execution_count": 10,
154
+ "id": "55396758-c3f3-45b3-b6e7-d4912c0899f2",
155
+ "metadata": {},
156
+ "outputs": [
157
+ {
158
+ "data": {
159
+ "text/plain": [
160
+ "array([[-0.04849243, 0.07305908, 0.01568604, ..., -0.0234375 ,\n",
161
+ " -0.02072144, -0.01068115],\n",
162
+ " [-0.04660034, 0.04846191, -0.00045729, ..., -0.00754929,\n",
163
+ " -0.00577545, 0.01355743],\n",
164
+ " [-0.02139282, 0.0625 , 0.00907898, ..., -0.02233887,\n",
165
+ " -0.00765228, -0.00793457],\n",
166
+ " ...,\n",
167
+ " [-0.02787781, 0.04260254, 0.00785828, ..., -0.00067568,\n",
168
+ " -0.01176453, -0.02828979],\n",
169
+ " [-0.02966309, 0.06292725, 0.03979492, ..., -0.01296997,\n",
170
+ " -0.00264549, -0.03845215],\n",
171
+ " [-0.06185913, 0.05847168, 0.03988647, ..., -0.04724121,\n",
172
+ " -0.01289368, -0.02728271]])"
173
+ ]
174
+ },
175
+ "execution_count": 10,
176
+ "metadata": {},
177
+ "output_type": "execute_result"
178
+ }
179
+ ],
180
+ "source": [
181
+ "text_embeddings"
182
+ ]
183
+ },
184
+ {
185
+ "cell_type": "markdown",
186
+ "id": "1cba33c7-9d1d-44d8-a01e-e30f16be1aac",
187
+ "metadata": {},
188
+ "source": [
189
+ "## Load into a vector database"
190
+ ]
191
+ },
192
+ {
193
+ "cell_type": "code",
194
+ "execution_count": 11,
195
+ "id": "6a5b1877-b113-4527-9055-cae9049fef08",
196
+ "metadata": {},
197
+ "outputs": [],
198
+ "source": [
199
+ "d = text_embeddings.shape[1]\n",
200
+ "index = faiss.IndexFlatL2(d)\n",
201
+ "index.add(text_embeddings)"
202
+ ]
203
+ },
204
+ {
205
+ "cell_type": "markdown",
206
+ "id": "5ee023ab-b26c-4df5-8a7b-7dd660bfad86",
207
+ "metadata": {},
208
+ "source": [
209
+ "## Create embeddings for a question"
210
+ ]
211
+ },
212
+ {
213
+ "cell_type": "code",
214
+ "execution_count": 12,
215
+ "id": "894d9764-9da9-4629-8f2a-c9dcaf6ceb8d",
216
+ "metadata": {},
217
+ "outputs": [
218
+ {
219
+ "data": {
220
+ "text/plain": [
221
+ "(1, 1024)"
222
+ ]
223
+ },
224
+ "execution_count": 12,
225
+ "metadata": {},
226
+ "output_type": "execute_result"
227
+ }
228
+ ],
229
+ "source": [
230
+ "question = \"What were the two main things the author worked on before college?\"\n",
231
+ "question_embeddings = np.array([get_text_embedding(question)])\n",
232
+ "question_embeddings.shape"
233
+ ]
234
+ },
235
+ {
236
+ "cell_type": "code",
237
+ "execution_count": 13,
238
+ "id": "9c4948cc-6d8b-449f-bc00-abb3591c7222",
239
+ "metadata": {},
240
+ "outputs": [
241
+ {
242
+ "data": {
243
+ "text/plain": [
244
+ "array([[-0.05456543, 0.03518677, 0.03723145, ..., -0.02763367,\n",
245
+ " -0.00327873, 0.00323677]])"
246
+ ]
247
+ },
248
+ "execution_count": 13,
249
+ "metadata": {},
250
+ "output_type": "execute_result"
251
+ }
252
+ ],
253
+ "source": [
254
+ "question_embeddings"
255
+ ]
256
+ },
257
+ {
258
+ "cell_type": "markdown",
259
+ "id": "15989e10-d0ec-41be-b6be-fa317565a926",
260
+ "metadata": {},
261
+ "source": [
262
+ "## Retrieve similar chunks from the vector database"
263
+ ]
264
+ },
265
+ {
266
+ "cell_type": "code",
267
+ "execution_count": 35,
268
+ "id": "c930b378-7aac-434c-881b-ab69d3edb93d",
269
+ "metadata": {},
270
+ "outputs": [
271
+ {
272
+ "name": "stdout",
273
+ "output_type": "stream",
274
+ "text": [
275
+ "[[ 0 90]]\n"
276
+ ]
277
+ }
278
+ ],
279
+ "source": [
280
+ "D, I = index.search(question_embeddings, k=2) \n",
281
+ "print(I)"
282
+ ]
283
+ },
284
+ {
285
+ "cell_type": "code",
286
+ "execution_count": 43,
287
+ "id": "73aab584-1dbf-4532-b41e-0403eeeeb567",
288
+ "metadata": {},
289
+ "outputs": [
290
+ {
291
+ "name": "stdout",
292
+ "output_type": "stream",
293
+ "text": [
294
+ "['\\n\\nWhat I Worked On\\n\\nFebruary 2021\\n\\nBefore college the two main things I worked on, outside of school, were writing and programming. I didn\\'t write essays. I wrote what beginning writers were supposed to write then, and probably still are: short stories. My stories were awful. They had hardly any plot, just characters with strong feelings, which I imagined made them deep.\\n\\nThe first programs I tried writing were on the IBM 1401 that our school district used for what was then called \"data processing.\" This wa', \"king on things that aren't prestigious doesn't guarantee you're on the right track, it at least guarantees you're not on the most common type of wrong one.\\n\\nOver the next several years I wrote lots of essays about all kinds of different topics. O'Reilly reprinted a collection of them as a book, called Hackers & Painters after one of the essays in it. I also worked on spam filters, and did some more painting. I used to have dinners for a group of friends every thursday night, which taught me how to cook for \"]\n"
295
+ ]
296
+ }
297
+ ],
298
+ "source": [
299
+ "retrieved_chunk = [chunks[i] for i in I.tolist()[0]]\n",
300
+ "print(retrieved_chunk)"
301
+ ]
302
+ },
303
+ {
304
+ "cell_type": "markdown",
305
+ "id": "4b417a59-021a-411d-a491-cb31815192cd",
306
+ "metadata": {},
307
+ "source": [
308
+ "## Combine context and question in a prompt and generate response"
309
+ ]
310
+ },
311
+ {
312
+ "cell_type": "code",
313
+ "execution_count": 37,
314
+ "id": "da042a53-4564-4057-9a60-9b57dffff6a1",
315
+ "metadata": {},
316
+ "outputs": [],
317
+ "source": [
318
+ "prompt = f\"\"\"\n",
319
+ "Context information is below.\n",
320
+ "---------------------\n",
321
+ "{retrieved_chunk}\n",
322
+ "---------------------\n",
323
+ "Given the context information and not prior knowledge, answer the query.\n",
324
+ "Query: {question}\n",
325
+ "Answer:\n",
326
+ "\"\"\""
327
+ ]
328
+ },
329
+ {
330
+ "cell_type": "code",
331
+ "execution_count": 41,
332
+ "id": "e77d975b-5f69-4e9c-8b94-97214517eac7",
333
+ "metadata": {},
334
+ "outputs": [],
335
+ "source": [
336
+ "def run_mistral(user_message, model=\"mistral-medium\"):\n",
337
+ " messages = [\n",
338
+ " ChatMessage(role=\"user\", content=user_message)\n",
339
+ " ]\n",
340
+ " chat_response = client.chat(\n",
341
+ " model=model,\n",
342
+ " messages=messages,\n",
343
+ " temperature=1, \n",
344
+ " # max_tokens=100\n",
345
+ " )\n",
346
+ " return (chat_response.choices[0].message.content)"
347
+ ]
348
+ },
349
+ {
350
+ "cell_type": "code",
351
+ "execution_count": 42,
352
+ "id": "1c5c20aa-6673-4105-9c10-886a1e18da8a",
353
+ "metadata": {},
354
+ "outputs": [
355
+ {
356
+ "data": {
357
+ "text/plain": [
358
+ "'The two main things the author worked on before college were writing and programming. Specifically, the author wrote short stories and tried writing programs on an IBM 1401.'"
359
+ ]
360
+ },
361
+ "execution_count": 42,
362
+ "metadata": {},
363
+ "output_type": "execute_result"
364
+ }
365
+ ],
366
+ "source": [
367
+ "run_mistral(prompt)"
368
+ ]
369
+ },
370
+ {
371
+ "cell_type": "markdown",
372
+ "id": "4e3b531c-4730-4108-ae8a-8de6563e085b",
373
+ "metadata": {},
374
+ "source": [
375
+ "# 2. LlamaIndex"
376
+ ]
377
+ },
378
+ {
379
+ "cell_type": "code",
380
+ "execution_count": null,
381
+ "id": "152c2a1e-9564-459c-9ea9-5208da519a90",
382
+ "metadata": {},
383
+ "outputs": [],
384
+ "source": [
385
+ "!pip install llama-index"
386
+ ]
387
+ },
388
+ {
389
+ "cell_type": "code",
390
+ "execution_count": 73,
391
+ "id": "96003762-acac-4886-964b-2d6a67f6f724",
392
+ "metadata": {},
393
+ "outputs": [
394
+ {
395
+ "name": "stdout",
396
+ "output_type": "stream",
397
+ "text": [
398
+ "--2024-01-16 18:40:06-- https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt\n",
399
+ "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.108.133, ...\n",
400
+ "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
401
+ "HTTP request sent, awaiting response... 200 OK\n",
402
+ "Length: 75042 (73K) [text/plain]\n",
403
+ "Saving to: ‘pg_essay.txt’\n",
404
+ "\n",
405
+ "pg_essay.txt 100%[===================>] 73,28K --.-KB/s in 0,01s \n",
406
+ "\n",
407
+ "2024-01-16 18:40:07 (5,45 MB/s) - ‘pg_essay.txt’ saved [75042/75042]\n",
408
+ "\n",
409
+ "The two main things the author worked on before college, outside of school, were writing and programming. In terms of writing, they wrote short stories, which they described as having hardly any plot and mainly featuring characters with strong feelings. As for programming, they tried writing programs on an IBM 1401 in 9th grade using an early version of Fortran. They typed programs on punch cards and stacked them in the card reader to load the program into memory and run it. However, they couldn't figure out what to do with the 1401 and didn't have any data stored on punched cards, so they didn't do much with it.\n"
410
+ ]
411
+ }
412
+ ],
413
+ "source": [
414
+ "from llama_index import VectorStoreIndex, SimpleDirectoryReader\n",
415
+ "from llama_index.llms import MistralAI\n",
416
+ "from llama_index.embeddings import MistralAIEmbedding\n",
417
+ "from llama_index import ServiceContext\n",
418
+ "from llama_index.query_engine import RetrieverQueryEngine\n",
419
+ "\n",
420
+ "# Load data\n",
421
+ "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt' -O pg_essay.txt\n",
422
+ "reader = SimpleDirectoryReader(input_files=[\"pg_essay.txt\"])\n",
423
+ "documents = reader.load_data()\n",
424
+ "# Define LLM and embedding model \n",
425
+ "llm = MistralAI(api_key=api_key,model=\"mistral-medium\")\n",
426
+ "embed_model = MistralAIEmbedding(model_name='mistral-embed', api_key=api_key)\n",
427
+ "service_context = ServiceContext.from_defaults(llm=llm,embed_model=embed_model)\n",
428
+ "# Create vector store index \n",
429
+ "index = VectorStoreIndex.from_documents(documents, service_context=service_context)\n",
430
+ "# Create query engine\n",
431
+ "query_engine = index.as_query_engine(similarity_top_k=2)\n",
432
+ "response = query_engine.query(\n",
433
+ " \"What were the two main things the author worked on before college?\"\n",
434
+ ")\n",
435
+ "print(str(response))"
436
+ ]
437
+ },
438
+ {
439
+ "cell_type": "markdown",
440
+ "id": "3c8f1701-897f-43ff-8101-6ec503995e23",
441
+ "metadata": {},
442
+ "source": [
443
+ "# 3. LangChain"
444
+ ]
445
+ },
446
+ {
447
+ "cell_type": "code",
448
+ "execution_count": null,
449
+ "id": "edcb2ef1-f7aa-4b49-96a6-850cda03bf6d",
450
+ "metadata": {},
451
+ "outputs": [],
452
+ "source": [
453
+ "! pip install langchain, langchain_mistralai"
454
+ ]
455
+ },
456
+ {
457
+ "cell_type": "code",
458
+ "execution_count": 134,
459
+ "id": "2e9a4f96-7bcf-452d-85b8-fe89990a5dbc",
460
+ "metadata": {},
461
+ "outputs": [
462
+ {
463
+ "name": "stdout",
464
+ "output_type": "stream",
465
+ "text": [
466
+ "--2024-01-19 17:46:24-- https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt\n",
467
+ "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.111.133, 185.199.110.133, ...\n",
468
+ "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n",
469
+ "HTTP request sent, awaiting response... 200 OK\n",
470
+ "Length: 75042 (73K) [text/plain]\n",
471
+ "Saving to: ‘pg_essay.txt’\n",
472
+ "\n",
473
+ "pg_essay.txt 100%[===================>] 73,28K --.-KB/s in 0,009s \n",
474
+ "\n",
475
+ "2024-01-19 17:46:25 (7,57 MB/s) - ‘pg_essay.txt’ saved [75042/75042]\n",
476
+ "\n",
477
+ "The two main things the author worked on before college were writing and programming. Specifically, they wrote short stories and created spam filters, and they also did some painting.\n"
478
+ ]
479
+ }
480
+ ],
481
+ "source": [
482
+ "from langchain_community.document_loaders import TextLoader\n",
483
+ "from langchain_mistralai.chat_models import ChatMistralAI\n",
484
+ "from langchain_mistralai.embeddings import MistralAIEmbeddings\n",
485
+ "from langchain_community.vectorstores import FAISS\n",
486
+ "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
487
+ "from langchain.chains.combine_documents import create_stuff_documents_chain\n",
488
+ "from langchain_core.prompts import ChatPromptTemplate\n",
489
+ "from langchain.chains import create_retrieval_chain\n",
490
+ "from langchain.text_splitter import CharacterTextSplitter\n",
491
+ "\n",
492
+ "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt' -O pg_essay.txt\n",
493
+ "loader = TextLoader(\"pg_essay.txt\")\n",
494
+ "docs = loader.load()\n",
495
+ "\n",
496
+ "# text_splitter = RecursiveCharacterTextSplitter()\n",
497
+ "text_splitter = CharacterTextSplitter(\n",
498
+ " separator=\"\\n\\n\",\n",
499
+ " chunk_size=500,\n",
500
+ " chunk_overlap=200,\n",
501
+ " length_function=len,\n",
502
+ " is_separator_regex=False,\n",
503
+ ")\n",
504
+ "documents = text_splitter.split_documents(docs)\n",
505
+ "\n",
506
+ "# Our embedding model doesnt work :( \n",
507
+ "# embeddings = MistralAIEmbeddings(model=\"mistral-embed\", mistral_api_key=api_key)\n",
508
+ "import os\n",
509
+ "embeddings = OpenAIEmbeddings()\n",
510
+ "\n",
511
+ "\n",
512
+ "vector = FAISS.from_documents(documents, embeddings)\n",
513
+ "retriever = vector.as_retriever()\n",
514
+ "model = ChatMistralAI(mistral_api_key=api_key)\n",
515
+ "\n",
516
+ "prompt = ChatPromptTemplate.from_template(\"\"\"Answer the following question based only on the provided context:\n",
517
+ "\n",
518
+ "<context>\n",
519
+ "{context}\n",
520
+ "</context>\n",
521
+ "\n",
522
+ "Question: {input}\"\"\")\n",
523
+ "\n",
524
+ "document_chain = create_stuff_documents_chain(model, prompt)\n",
525
+ "retriever = vector.as_retriever()\n",
526
+ "retrieval_chain = create_retrieval_chain(retriever, document_chain)\n",
527
+ "response = retrieval_chain.invoke({\"input\": \"What were the two main things the author worked on before college?\"})\n",
528
+ "print(response[\"answer\"])\n"
529
+ ]
530
+ },
531
+ {
532
+ "cell_type": "code",
533
+ "execution_count": null,
534
+ "id": "ec63c4a9-9c04-4707-a92f-e339b14054fd",
535
+ "metadata": {},
536
+ "outputs": [],
537
+ "source": []
538
+ }
539
+ ],
540
+ "metadata": {
541
+ "kernelspec": {
542
+ "display_name": "Python 3 (ipykernel)",
543
+ "language": "python",
544
+ "name": "python3"
545
+ },
546
+ "language_info": {
547
+ "codemirror_mode": {
548
+ "name": "ipython",
549
+ "version": 3
550
+ },
551
+ "file_extension": ".py",
552
+ "mimetype": "text/x-python",
553
+ "name": "python",
554
+ "nbconvert_exporter": "python",
555
+ "pygments_lexer": "ipython3",
556
+ "version": "3.10.13"
557
+ }
558
+ },
559
+ "nbformat": 4,
560
+ "nbformat_minor": 5
561
+ }
RAG_basic_no_langchain.ipynb ADDED
@@ -0,0 +1,468 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "b880d1ed-3db0-45a1-807e-1b47e9ce1320",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "! pip install faiss-cpu, mistralai"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 40,
16
+ "id": "851612c3-ee93-42e3-a1fb-481f89c9410f",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "from mistralai.client import MistralClient, ChatMessage\n",
21
+ "import requests\n",
22
+ "import numpy as np\n",
23
+ "import faiss\n",
24
+ "import os\n",
25
+ "\n",
26
+ "api_key=os.environ[\"MISTRAL_API_KEY\"]\n",
27
+ "client = MistralClient(api_key=api_key)"
28
+ ]
29
+ },
30
+ {
31
+ "cell_type": "markdown",
32
+ "id": "01b27964-b40f-41d5-ba20-cec93ca25dc5",
33
+ "metadata": {},
34
+ "source": [
35
+ "# 1. RAG from scratch"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "markdown",
40
+ "id": "fe8609d5-9f27-4202-b0be-36db34412998",
41
+ "metadata": {},
42
+ "source": [
43
+ "## Get data"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 3,
49
+ "id": "c4c01740-72b4-482c-b61e-e272a734f01f",
50
+ "metadata": {},
51
+ "outputs": [],
52
+ "source": [
53
+ "response = requests.get('https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt')\n",
54
+ "text = response.text"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "execution_count": 130,
60
+ "id": "f03f47af-a20b-4122-a114-74b9748ff543",
61
+ "metadata": {},
62
+ "outputs": [
63
+ {
64
+ "data": {
65
+ "text/plain": [
66
+ "75014"
67
+ ]
68
+ },
69
+ "execution_count": 130,
70
+ "metadata": {},
71
+ "output_type": "execute_result"
72
+ }
73
+ ],
74
+ "source": [
75
+ "len(text)"
76
+ ]
77
+ },
78
+ {
79
+ "cell_type": "markdown",
80
+ "id": "aad1aa61-9e1c-46c8-ae5e-61855df440f9",
81
+ "metadata": {},
82
+ "source": [
83
+ "## Split document into chunks"
84
+ ]
85
+ },
86
+ {
87
+ "cell_type": "code",
88
+ "execution_count": 4,
89
+ "id": "8494655e-bd87-49de-8f1d-69ffbc1c256e",
90
+ "metadata": {},
91
+ "outputs": [],
92
+ "source": [
93
+ "chunk_size = 512\n",
94
+ "chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "markdown",
99
+ "id": "4176cbe3-9b15-4d17-afb1-665011d09bb7",
100
+ "metadata": {},
101
+ "source": [
102
+ "## Create embeddings for each text chunk"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "execution_count": 5,
108
+ "id": "e77d9805-7a53-4210-9f80-f4de52285588",
109
+ "metadata": {},
110
+ "outputs": [],
111
+ "source": [
112
+ "def get_text_embedding(input):\n",
113
+ " embeddings_batch_response = client.embeddings(\n",
114
+ " model=\"mistral-embed\",\n",
115
+ " input=input\n",
116
+ " )\n",
117
+ " return embeddings_batch_response.data[0].embedding"
118
+ ]
119
+ },
120
+ {
121
+ "cell_type": "code",
122
+ "execution_count": 6,
123
+ "id": "46503830-6ad5-493e-a629-152721e2d88e",
124
+ "metadata": {},
125
+ "outputs": [],
126
+ "source": [
127
+ "text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])"
128
+ ]
129
+ },
130
+ {
131
+ "cell_type": "code",
132
+ "execution_count": 9,
133
+ "id": "ca875993-fe6d-42df-811e-a43891cd0350",
134
+ "metadata": {},
135
+ "outputs": [
136
+ {
137
+ "data": {
138
+ "text/plain": [
139
+ "(147, 1024)"
140
+ ]
141
+ },
142
+ "execution_count": 9,
143
+ "metadata": {},
144
+ "output_type": "execute_result"
145
+ }
146
+ ],
147
+ "source": [
148
+ "text_embeddings.shape"
149
+ ]
150
+ },
151
+ {
152
+ "cell_type": "code",
153
+ "execution_count": 10,
154
+ "id": "55396758-c3f3-45b3-b6e7-d4912c0899f2",
155
+ "metadata": {},
156
+ "outputs": [
157
+ {
158
+ "data": {
159
+ "text/plain": [
160
+ "array([[-0.04849243, 0.07305908, 0.01568604, ..., -0.0234375 ,\n",
161
+ " -0.02072144, -0.01068115],\n",
162
+ " [-0.04660034, 0.04846191, -0.00045729, ..., -0.00754929,\n",
163
+ " -0.00577545, 0.01355743],\n",
164
+ " [-0.02139282, 0.0625 , 0.00907898, ..., -0.02233887,\n",
165
+ " -0.00765228, -0.00793457],\n",
166
+ " ...,\n",
167
+ " [-0.02787781, 0.04260254, 0.00785828, ..., -0.00067568,\n",
168
+ " -0.01176453, -0.02828979],\n",
169
+ " [-0.02966309, 0.06292725, 0.03979492, ..., -0.01296997,\n",
170
+ " -0.00264549, -0.03845215],\n",
171
+ " [-0.06185913, 0.05847168, 0.03988647, ..., -0.04724121,\n",
172
+ " -0.01289368, -0.02728271]])"
173
+ ]
174
+ },
175
+ "execution_count": 10,
176
+ "metadata": {},
177
+ "output_type": "execute_result"
178
+ }
179
+ ],
180
+ "source": [
181
+ "text_embeddings"
182
+ ]
183
+ },
184
+ {
185
+ "cell_type": "markdown",
186
+ "id": "1cba33c7-9d1d-44d8-a01e-e30f16be1aac",
187
+ "metadata": {},
188
+ "source": [
189
+ "## Load into a vector database"
190
+ ]
191
+ },
192
+ {
193
+ "cell_type": "code",
194
+ "execution_count": 11,
195
+ "id": "6a5b1877-b113-4527-9055-cae9049fef08",
196
+ "metadata": {},
197
+ "outputs": [],
198
+ "source": [
199
+ "d = text_embeddings.shape[1]\n",
200
+ "index = faiss.IndexFlatL2(d)\n",
201
+ "index.add(text_embeddings)"
202
+ ]
203
+ },
204
+ {
205
+ "cell_type": "markdown",
206
+ "id": "5ee023ab-b26c-4df5-8a7b-7dd660bfad86",
207
+ "metadata": {},
208
+ "source": [
209
+ "## Create embeddings for a question"
210
+ ]
211
+ },
212
+ {
213
+ "cell_type": "code",
214
+ "execution_count": 12,
215
+ "id": "894d9764-9da9-4629-8f2a-c9dcaf6ceb8d",
216
+ "metadata": {},
217
+ "outputs": [
218
+ {
219
+ "data": {
220
+ "text/plain": [
221
+ "(1, 1024)"
222
+ ]
223
+ },
224
+ "execution_count": 12,
225
+ "metadata": {},
226
+ "output_type": "execute_result"
227
+ }
228
+ ],
229
+ "source": [
230
+ "question = \"What were the two main things the author worked on before college?\"\n",
231
+ "question_embeddings = np.array([get_text_embedding(question)])\n",
232
+ "question_embeddings.shape"
233
+ ]
234
+ },
235
+ {
236
+ "cell_type": "code",
237
+ "execution_count": 13,
238
+ "id": "9c4948cc-6d8b-449f-bc00-abb3591c7222",
239
+ "metadata": {},
240
+ "outputs": [
241
+ {
242
+ "data": {
243
+ "text/plain": [
244
+ "array([[-0.05456543, 0.03518677, 0.03723145, ..., -0.02763367,\n",
245
+ " -0.00327873, 0.00323677]])"
246
+ ]
247
+ },
248
+ "execution_count": 13,
249
+ "metadata": {},
250
+ "output_type": "execute_result"
251
+ }
252
+ ],
253
+ "source": [
254
+ "question_embeddings"
255
+ ]
256
+ },
257
+ {
258
+ "cell_type": "markdown",
259
+ "id": "15989e10-d0ec-41be-b6be-fa317565a926",
260
+ "metadata": {},
261
+ "source": [
262
+ "## Retrieve similar chunks from the vector database"
263
+ ]
264
+ },
265
+ {
266
+ "cell_type": "code",
267
+ "execution_count": 35,
268
+ "id": "c930b378-7aac-434c-881b-ab69d3edb93d",
269
+ "metadata": {},
270
+ "outputs": [
271
+ {
272
+ "name": "stdout",
273
+ "output_type": "stream",
274
+ "text": [
275
+ "[[ 0 90]]\n"
276
+ ]
277
+ }
278
+ ],
279
+ "source": [
280
+ "D, I = index.search(question_embeddings, k=2) \n",
281
+ "print(I)"
282
+ ]
283
+ },
284
+ {
285
+ "cell_type": "code",
286
+ "execution_count": 43,
287
+ "id": "73aab584-1dbf-4532-b41e-0403eeeeb567",
288
+ "metadata": {},
289
+ "outputs": [
290
+ {
291
+ "name": "stdout",
292
+ "output_type": "stream",
293
+ "text": [
294
+ "['\\n\\nWhat I Worked On\\n\\nFebruary 2021\\n\\nBefore college the two main things I worked on, outside of school, were writing and programming. I didn\\'t write essays. I wrote what beginning writers were supposed to write then, and probably still are: short stories. My stories were awful. They had hardly any plot, just characters with strong feelings, which I imagined made them deep.\\n\\nThe first programs I tried writing were on the IBM 1401 that our school district used for what was then called \"data processing.\" This wa', \"king on things that aren't prestigious doesn't guarantee you're on the right track, it at least guarantees you're not on the most common type of wrong one.\\n\\nOver the next several years I wrote lots of essays about all kinds of different topics. O'Reilly reprinted a collection of them as a book, called Hackers & Painters after one of the essays in it. I also worked on spam filters, and did some more painting. I used to have dinners for a group of friends every thursday night, which taught me how to cook for \"]\n"
295
+ ]
296
+ }
297
+ ],
298
+ "source": [
299
+ "retrieved_chunk = [chunks[i] for i in I.tolist()[0]]\n",
300
+ "print(retrieved_chunk)"
301
+ ]
302
+ },
303
+ {
304
+ "cell_type": "markdown",
305
+ "id": "4b417a59-021a-411d-a491-cb31815192cd",
306
+ "metadata": {},
307
+ "source": [
308
+ "## Combine context and question in a prompt and generate response"
309
+ ]
310
+ },
311
+ {
312
+ "cell_type": "code",
313
+ "execution_count": 37,
314
+ "id": "da042a53-4564-4057-9a60-9b57dffff6a1",
315
+ "metadata": {},
316
+ "outputs": [],
317
+ "source": [
318
+ "prompt = f\"\"\"\n",
319
+ "Context information is below.\n",
320
+ "---------------------\n",
321
+ "{retrieved_chunk}\n",
322
+ "---------------------\n",
323
+ "Given the context information and not prior knowledge, answer the query.\n",
324
+ "Query: {question}\n",
325
+ "Answer:\n",
326
+ "\"\"\""
327
+ ]
328
+ },
329
+ {
330
+ "cell_type": "code",
331
+ "execution_count": 41,
332
+ "id": "e77d975b-5f69-4e9c-8b94-97214517eac7",
333
+ "metadata": {},
334
+ "outputs": [],
335
+ "source": [
336
+ "def run_mistral(user_message, model=\"mistral-medium\"):\n",
337
+ " messages = [\n",
338
+ " ChatMessage(role=\"user\", content=user_message)\n",
339
+ " ]\n",
340
+ " chat_response = client.chat(\n",
341
+ " model=model,\n",
342
+ " messages=messages,\n",
343
+ " temperature=1, \n",
344
+ " # max_tokens=100\n",
345
+ " )\n",
346
+ " return (chat_response.choices[0].message.content)"
347
+ ]
348
+ },
349
+ {
350
+ "cell_type": "code",
351
+ "execution_count": 42,
352
+ "id": "1c5c20aa-6673-4105-9c10-886a1e18da8a",
353
+ "metadata": {},
354
+ "outputs": [
355
+ {
356
+ "data": {
357
+ "text/plain": [
358
+ "'The two main things the author worked on before college were writing and programming. Specifically, the author wrote short stories and tried writing programs on an IBM 1401.'"
359
+ ]
360
+ },
361
+ "execution_count": 42,
362
+ "metadata": {},
363
+ "output_type": "execute_result"
364
+ }
365
+ ],
366
+ "source": [
367
+ "run_mistral(prompt)"
368
+ ]
369
+ },
370
+ {
371
+ "cell_type": "markdown",
372
+ "id": "4e3b531c-4730-4108-ae8a-8de6563e085b",
373
+ "metadata": {},
374
+ "source": [
375
+ "# 2. LlamaIndex"
376
+ ]
377
+ },
378
+ {
379
+ "cell_type": "code",
380
+ "execution_count": null,
381
+ "id": "152c2a1e-9564-459c-9ea9-5208da519a90",
382
+ "metadata": {},
383
+ "outputs": [],
384
+ "source": [
385
+ "!pip install llama-index"
386
+ ]
387
+ },
388
+ {
389
+ "cell_type": "code",
390
+ "execution_count": 73,
391
+ "id": "96003762-acac-4886-964b-2d6a67f6f724",
392
+ "metadata": {},
393
+ "outputs": [
394
+ {
395
+ "name": "stdout",
396
+ "output_type": "stream",
397
+ "text": [
398
+ "--2024-01-16 18:40:06-- https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt\n",
399
+ "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.108.133, ...\n",
400
+ "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n",
401
+ "HTTP request sent, awaiting response... 200 OK\n",
402
+ "Length: 75042 (73K) [text/plain]\n",
403
+ "Saving to: ‘pg_essay.txt’\n",
404
+ "\n",
405
+ "pg_essay.txt 100%[===================>] 73,28K --.-KB/s in 0,01s \n",
406
+ "\n",
407
+ "2024-01-16 18:40:07 (5,45 MB/s) - ‘pg_essay.txt’ saved [75042/75042]\n",
408
+ "\n",
409
+ "The two main things the author worked on before college, outside of school, were writing and programming. In terms of writing, they wrote short stories, which they described as having hardly any plot and mainly featuring characters with strong feelings. As for programming, they tried writing programs on an IBM 1401 in 9th grade using an early version of Fortran. They typed programs on punch cards and stacked them in the card reader to load the program into memory and run it. However, they couldn't figure out what to do with the 1401 and didn't have any data stored on punched cards, so they didn't do much with it.\n"
410
+ ]
411
+ }
412
+ ],
413
+ "source": [
414
+ "from llama_index import VectorStoreIndex, SimpleDirectoryReader\n",
415
+ "from llama_index.llms import MistralAI\n",
416
+ "from llama_index.embeddings import MistralAIEmbedding\n",
417
+ "from llama_index import ServiceContext\n",
418
+ "from llama_index.query_engine import RetrieverQueryEngine\n",
419
+ "\n",
420
+ "# Load data\n",
421
+ "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt' -O pg_essay.txt\n",
422
+ "reader = SimpleDirectoryReader(input_files=[\"pg_essay.txt\"])\n",
423
+ "documents = reader.load_data()\n",
424
+ "# Define LLM and embedding model \n",
425
+ "llm = MistralAI(api_key=api_key,model=\"mistral-medium\")\n",
426
+ "embed_model = MistralAIEmbedding(model_name='mistral-embed', api_key=api_key)\n",
427
+ "service_context = ServiceContext.from_defaults(llm=llm,embed_model=embed_model)\n",
428
+ "# Create vector store index \n",
429
+ "index = VectorStoreIndex.from_documents(documents, service_context=service_context)\n",
430
+ "# Create query engine\n",
431
+ "query_engine = index.as_query_engine(similarity_top_k=2)\n",
432
+ "response = query_engine.query(\n",
433
+ " \"What were the two main things the author worked on before college?\"\n",
434
+ ")\n",
435
+ "print(str(response))"
436
+ ]
437
+ },
438
+ {
439
+ "cell_type": "code",
440
+ "execution_count": null,
441
+ "id": "ec63c4a9-9c04-4707-a92f-e339b14054fd",
442
+ "metadata": {},
443
+ "outputs": [],
444
+ "source": []
445
+ }
446
+ ],
447
+ "metadata": {
448
+ "kernelspec": {
449
+ "display_name": "Python 3 (ipykernel)",
450
+ "language": "python",
451
+ "name": "python3"
452
+ },
453
+ "language_info": {
454
+ "codemirror_mode": {
455
+ "name": "ipython",
456
+ "version": 3
457
+ },
458
+ "file_extension": ".py",
459
+ "mimetype": "text/x-python",
460
+ "name": "python",
461
+ "nbconvert_exporter": "python",
462
+ "pygments_lexer": "ipython3",
463
+ "version": "3.11.5"
464
+ }
465
+ },
466
+ "nbformat": 4,
467
+ "nbformat_minor": 5
468
+ }
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Chatbot G Rag
3
  emoji: 🌍
4
  colorFrom: red
5
  colorTo: pink
@@ -9,5 +9,32 @@ app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
 
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: GAIA Chatbot - level 2
3
  emoji: 🌍
4
  colorFrom: red
5
  colorTo: pink
 
9
  pinned: false
10
  license: mit
11
  ---
12
+ # Run on a space
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
15
+
16
+ Simply push your code on a huggingface space.
17
+
18
+ # Run locally
19
+
20
+ You must have python (3.8)[https://www.python.org/downloads/].
21
+
22
+ Check https://www.gradio.app/guides/quickstart for more details about Gradio.
23
+
24
+ ## Install dependencies
25
+
26
+ `pip install gradio`
27
+
28
+ `pip install -r requirements.txt`
29
+
30
+ ## Add Mistral API Key to your environement variables
31
+
32
+ in `~/.profile` or `~/.bashrc`
33
+
34
+ `export MISTRAL_API_KEY=YOUR_API_KEY`
35
+
36
+ ## Run your code
37
+
38
+ `python3 app.py`
39
+
40
+ ## Open your browser to `http://127.0.0.1:7860`
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from mistralai.client import MistralClient
3
+ from mistralai.models.chat_completion import ChatMessage
4
+ import requests
5
+ import numpy as np
6
+ import faiss
7
+ import os
8
+ from llama_index import VectorStoreIndex, SimpleDirectoryReader, SummaryIndex
9
+ from llama_index.readers import SimpleWebPageReader
10
+ from llama_index.llms import MistralAI
11
+ from llama_index.embeddings import MistralAIEmbedding
12
+ from llama_index import ServiceContext
13
+ from llama_index.query_engine import RetrieverQueryEngine
14
+
15
+ title = "Gaia Mistral Chat Demo with RAG"
16
+ description = "Exemple d'assistant avec Gradio et Mistral AI via son API"
17
+ placeholder = "Posez moi une question sur l'agriculture"
18
+ placeholder_url = "Donner moi une url qui va servir de contexte agricole complémentaire"
19
+ examples = ["Comment fait on pour produire du maïs ?", "Rédige moi une lettre pour faire un stage dans une exploitation agricole", "Comment reprendre une exploitation agricole ?"]
20
+
21
+ api_key = os.environ.get("MISTRAL_API_KEY")
22
+ client = MistralClient(api_key=api_key)
23
+ chat_model = 'mistral-small'
24
+
25
+ llm = MistralAI(api_key=api_key,model="mistral-medium")
26
+ embed_model = MistralAIEmbedding(model_name='mistral-embed', api_key=api_key)
27
+ service_context = ServiceContext.from_defaults(chunk_size=512, llm=llm, embed_model=embed_model)
28
+
29
+ # build a vector database with documents
30
+ def setup_db_with_url(url):
31
+ global query_engine
32
+ documents = SimpleWebPageReader(html_to_text=True).load_data([url])
33
+ # insert in DB
34
+ index = VectorStoreIndex.from_documents(documents, service_context=service_context)
35
+ query_engine = index.as_query_engine(similarity_top_k=2)
36
+ # get document source
37
+ # response = requests.get(url)
38
+ # text = response.text
39
+ # print(text)
40
+ # chunk_size = 512
41
+ # split in chunks
42
+ # chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
43
+ # embbed in Mistral to have vectors
44
+ # text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])
45
+ # print(text_embeddings)
46
+ # d = text_embeddings.shape[1]
47
+ documents
48
+
49
+
50
+ # vector data with Mistral
51
+ def get_text_embedding(input):
52
+ embeddings_batch_response = client.embeddings(
53
+ model="mistral-embed",
54
+ input=input
55
+ )
56
+ return embeddings_batch_response.data[0].embedding
57
+
58
+ # build a prompt
59
+ def build_prompt(user_input):
60
+ retrieved_chunk = query_engine.query(user_input)
61
+ prompt = f"""
62
+ Context information is below.
63
+ ---------------------
64
+ {retrieved_chunk}
65
+ ---------------------
66
+ Given the context information and not prior knowledge, answer the query.
67
+ Query: {user_input}
68
+ Answer:
69
+ """
70
+
71
+ def chat_with_mistral(user_input, history):
72
+ prompt = build_prompt(user_input)
73
+ messages = [ChatMessage(role="user", content=prompt)]
74
+
75
+ chat_response = client.chat(model=chat_model, messages=messages)
76
+ mistral_content = chat_response.choices[0].message.content
77
+ histories = history + [(mistral_content, None)]
78
+ return [mistral_content, histories]
79
+
80
+ with gr.Blocks() as iface:
81
+
82
+ with gr.Row():
83
+ gr.Markdown("#Mixtral-8x7B Playground Space!")
84
+ with gr.Row():
85
+ url_msg = gr.Textbox(placeholder=placeholder_url, container=False, scale=7)
86
+ url_btn = gr.Button(value="🔄", interactive=True)
87
+ with gr.Row():
88
+ url_return = gr.Textbox(value='', container=False, scale=7)
89
+ url_btn.click(setup_db_with_url, url_msg, url_return)
90
+ with gr.Row():
91
+ chatbot=gr.Chatbot(height=300)
92
+ with gr.Row():
93
+ msg = gr.Textbox(placeholder=placeholder, container=False, scale=7)
94
+ msg_btn = gr.Button("Envoyer")
95
+
96
+ msg_btn.click(chat_with_mistral, [msg, chatbot], [msg, chatbot] )
97
+
98
+ iface.title = title
99
+
100
+ iface.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ mistralai
2
+ faiss-cpu
3
+ numpy
4
+ html2text
5
+ llama-index